xref: /linux/kernel/trace/trace_kprobe.c (revision b7ad6108484221f431372b94763b74e550d16c93)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Kprobes-based tracing events
4  *
5  * Created by Masami Hiramatsu <mhiramat@redhat.com>
6  *
7  */
8 #define pr_fmt(fmt)	"trace_kprobe: " fmt
9 
10 #include <linux/module.h>
11 #include <linux/uaccess.h>
12 #include <linux/rculist.h>
13 #include <linux/error-injection.h>
14 
15 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
16 
17 #include "trace_dynevent.h"
18 #include "trace_kprobe_selftest.h"
19 #include "trace_probe.h"
20 #include "trace_probe_tmpl.h"
21 
22 #define KPROBE_EVENT_SYSTEM "kprobes"
23 #define KRETPROBE_MAXACTIVE_MAX 4096
24 #define MAX_KPROBE_CMDLINE_SIZE 1024
25 
26 /* Kprobe early definition from command line */
27 static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata;
28 static bool kprobe_boot_events_enabled __initdata;
29 
30 static int __init set_kprobe_boot_events(char *str)
31 {
32 	strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
33 	return 0;
34 }
35 __setup("kprobe_event=", set_kprobe_boot_events);
36 
37 static int trace_kprobe_create(int argc, const char **argv);
38 static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
39 static int trace_kprobe_release(struct dyn_event *ev);
40 static bool trace_kprobe_is_busy(struct dyn_event *ev);
41 static bool trace_kprobe_match(const char *system, const char *event,
42 			int argc, const char **argv, struct dyn_event *ev);
43 
44 static struct dyn_event_operations trace_kprobe_ops = {
45 	.create = trace_kprobe_create,
46 	.show = trace_kprobe_show,
47 	.is_busy = trace_kprobe_is_busy,
48 	.free = trace_kprobe_release,
49 	.match = trace_kprobe_match,
50 };
51 
52 /*
53  * Kprobe event core functions
54  */
55 struct trace_kprobe {
56 	struct dyn_event	devent;
57 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
58 	unsigned long __percpu *nhit;
59 	const char		*symbol;	/* symbol name */
60 	struct trace_probe	tp;
61 };
62 
63 static bool is_trace_kprobe(struct dyn_event *ev)
64 {
65 	return ev->ops == &trace_kprobe_ops;
66 }
67 
68 static struct trace_kprobe *to_trace_kprobe(struct dyn_event *ev)
69 {
70 	return container_of(ev, struct trace_kprobe, devent);
71 }
72 
73 /**
74  * for_each_trace_kprobe - iterate over the trace_kprobe list
75  * @pos:	the struct trace_kprobe * for each entry
76  * @dpos:	the struct dyn_event * to use as a loop cursor
77  */
78 #define for_each_trace_kprobe(pos, dpos)	\
79 	for_each_dyn_event(dpos)		\
80 		if (is_trace_kprobe(dpos) && (pos = to_trace_kprobe(dpos)))
81 
82 #define SIZEOF_TRACE_KPROBE(n)				\
83 	(offsetof(struct trace_kprobe, tp.args) +	\
84 	(sizeof(struct probe_arg) * (n)))
85 
86 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
87 {
88 	return tk->rp.handler != NULL;
89 }
90 
91 static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
92 {
93 	return tk->symbol ? tk->symbol : "unknown";
94 }
95 
96 static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
97 {
98 	return tk->rp.kp.offset;
99 }
100 
101 static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
102 {
103 	return !!(kprobe_gone(&tk->rp.kp));
104 }
105 
106 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
107 						 struct module *mod)
108 {
109 	int len = strlen(mod->name);
110 	const char *name = trace_kprobe_symbol(tk);
111 	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
112 }
113 
114 static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
115 {
116 	char *p;
117 	bool ret;
118 
119 	if (!tk->symbol)
120 		return false;
121 	p = strchr(tk->symbol, ':');
122 	if (!p)
123 		return true;
124 	*p = '\0';
125 	mutex_lock(&module_mutex);
126 	ret = !!find_module(tk->symbol);
127 	mutex_unlock(&module_mutex);
128 	*p = ':';
129 
130 	return ret;
131 }
132 
133 static bool trace_kprobe_is_busy(struct dyn_event *ev)
134 {
135 	struct trace_kprobe *tk = to_trace_kprobe(ev);
136 
137 	return trace_probe_is_enabled(&tk->tp);
138 }
139 
140 static bool trace_kprobe_match_command_head(struct trace_kprobe *tk,
141 					    int argc, const char **argv)
142 {
143 	char buf[MAX_ARGSTR_LEN + 1];
144 
145 	if (!argc)
146 		return true;
147 
148 	if (!tk->symbol)
149 		snprintf(buf, sizeof(buf), "0x%p", tk->rp.kp.addr);
150 	else if (tk->rp.kp.offset)
151 		snprintf(buf, sizeof(buf), "%s+%u",
152 			 trace_kprobe_symbol(tk), tk->rp.kp.offset);
153 	else
154 		snprintf(buf, sizeof(buf), "%s", trace_kprobe_symbol(tk));
155 	if (strcmp(buf, argv[0]))
156 		return false;
157 	argc--; argv++;
158 
159 	return trace_probe_match_command_args(&tk->tp, argc, argv);
160 }
161 
162 static bool trace_kprobe_match(const char *system, const char *event,
163 			int argc, const char **argv, struct dyn_event *ev)
164 {
165 	struct trace_kprobe *tk = to_trace_kprobe(ev);
166 
167 	return strcmp(trace_probe_name(&tk->tp), event) == 0 &&
168 	    (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) &&
169 	    trace_kprobe_match_command_head(tk, argc, argv);
170 }
171 
172 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
173 {
174 	unsigned long nhit = 0;
175 	int cpu;
176 
177 	for_each_possible_cpu(cpu)
178 		nhit += *per_cpu_ptr(tk->nhit, cpu);
179 
180 	return nhit;
181 }
182 
183 static nokprobe_inline bool trace_kprobe_is_registered(struct trace_kprobe *tk)
184 {
185 	return !(list_empty(&tk->rp.kp.list) &&
186 		 hlist_unhashed(&tk->rp.kp.hlist));
187 }
188 
189 /* Return 0 if it fails to find the symbol address */
190 static nokprobe_inline
191 unsigned long trace_kprobe_address(struct trace_kprobe *tk)
192 {
193 	unsigned long addr;
194 
195 	if (tk->symbol) {
196 		addr = (unsigned long)
197 			kallsyms_lookup_name(trace_kprobe_symbol(tk));
198 		if (addr)
199 			addr += tk->rp.kp.offset;
200 	} else {
201 		addr = (unsigned long)tk->rp.kp.addr;
202 	}
203 	return addr;
204 }
205 
206 static nokprobe_inline struct trace_kprobe *
207 trace_kprobe_primary_from_call(struct trace_event_call *call)
208 {
209 	struct trace_probe *tp;
210 
211 	tp = trace_probe_primary_from_call(call);
212 	if (WARN_ON_ONCE(!tp))
213 		return NULL;
214 
215 	return container_of(tp, struct trace_kprobe, tp);
216 }
217 
218 bool trace_kprobe_on_func_entry(struct trace_event_call *call)
219 {
220 	struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
221 
222 	return tk ? kprobe_on_func_entry(tk->rp.kp.addr,
223 			tk->rp.kp.addr ? NULL : tk->rp.kp.symbol_name,
224 			tk->rp.kp.addr ? 0 : tk->rp.kp.offset) : false;
225 }
226 
227 bool trace_kprobe_error_injectable(struct trace_event_call *call)
228 {
229 	struct trace_kprobe *tk = trace_kprobe_primary_from_call(call);
230 
231 	return tk ? within_error_injection_list(trace_kprobe_address(tk)) :
232 	       false;
233 }
234 
235 static int register_kprobe_event(struct trace_kprobe *tk);
236 static int unregister_kprobe_event(struct trace_kprobe *tk);
237 
238 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
239 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
240 				struct pt_regs *regs);
241 
242 static void free_trace_kprobe(struct trace_kprobe *tk)
243 {
244 	if (tk) {
245 		trace_probe_cleanup(&tk->tp);
246 		kfree(tk->symbol);
247 		free_percpu(tk->nhit);
248 		kfree(tk);
249 	}
250 }
251 
252 /*
253  * Allocate new trace_probe and initialize it (including kprobes).
254  */
255 static struct trace_kprobe *alloc_trace_kprobe(const char *group,
256 					     const char *event,
257 					     void *addr,
258 					     const char *symbol,
259 					     unsigned long offs,
260 					     int maxactive,
261 					     int nargs, bool is_return)
262 {
263 	struct trace_kprobe *tk;
264 	int ret = -ENOMEM;
265 
266 	tk = kzalloc(SIZEOF_TRACE_KPROBE(nargs), GFP_KERNEL);
267 	if (!tk)
268 		return ERR_PTR(ret);
269 
270 	tk->nhit = alloc_percpu(unsigned long);
271 	if (!tk->nhit)
272 		goto error;
273 
274 	if (symbol) {
275 		tk->symbol = kstrdup(symbol, GFP_KERNEL);
276 		if (!tk->symbol)
277 			goto error;
278 		tk->rp.kp.symbol_name = tk->symbol;
279 		tk->rp.kp.offset = offs;
280 	} else
281 		tk->rp.kp.addr = addr;
282 
283 	if (is_return)
284 		tk->rp.handler = kretprobe_dispatcher;
285 	else
286 		tk->rp.kp.pre_handler = kprobe_dispatcher;
287 
288 	tk->rp.maxactive = maxactive;
289 	INIT_HLIST_NODE(&tk->rp.kp.hlist);
290 	INIT_LIST_HEAD(&tk->rp.kp.list);
291 
292 	ret = trace_probe_init(&tk->tp, event, group);
293 	if (ret < 0)
294 		goto error;
295 
296 	dyn_event_init(&tk->devent, &trace_kprobe_ops);
297 	return tk;
298 error:
299 	free_trace_kprobe(tk);
300 	return ERR_PTR(ret);
301 }
302 
303 static struct trace_kprobe *find_trace_kprobe(const char *event,
304 					      const char *group)
305 {
306 	struct dyn_event *pos;
307 	struct trace_kprobe *tk;
308 
309 	for_each_trace_kprobe(tk, pos)
310 		if (strcmp(trace_probe_name(&tk->tp), event) == 0 &&
311 		    strcmp(trace_probe_group_name(&tk->tp), group) == 0)
312 			return tk;
313 	return NULL;
314 }
315 
316 static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
317 {
318 	int ret = 0;
319 
320 	if (trace_kprobe_is_registered(tk) && !trace_kprobe_has_gone(tk)) {
321 		if (trace_kprobe_is_return(tk))
322 			ret = enable_kretprobe(&tk->rp);
323 		else
324 			ret = enable_kprobe(&tk->rp.kp);
325 	}
326 
327 	return ret;
328 }
329 
330 static void __disable_trace_kprobe(struct trace_probe *tp)
331 {
332 	struct trace_probe *pos;
333 	struct trace_kprobe *tk;
334 
335 	list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
336 		tk = container_of(pos, struct trace_kprobe, tp);
337 		if (!trace_kprobe_is_registered(tk))
338 			continue;
339 		if (trace_kprobe_is_return(tk))
340 			disable_kretprobe(&tk->rp);
341 		else
342 			disable_kprobe(&tk->rp.kp);
343 	}
344 }
345 
346 /*
347  * Enable trace_probe
348  * if the file is NULL, enable "perf" handler, or enable "trace" handler.
349  */
350 static int enable_trace_kprobe(struct trace_event_call *call,
351 				struct trace_event_file *file)
352 {
353 	struct trace_probe *pos, *tp;
354 	struct trace_kprobe *tk;
355 	bool enabled;
356 	int ret = 0;
357 
358 	tp = trace_probe_primary_from_call(call);
359 	if (WARN_ON_ONCE(!tp))
360 		return -ENODEV;
361 	enabled = trace_probe_is_enabled(tp);
362 
363 	/* This also changes "enabled" state */
364 	if (file) {
365 		ret = trace_probe_add_file(tp, file);
366 		if (ret)
367 			return ret;
368 	} else
369 		trace_probe_set_flag(tp, TP_FLAG_PROFILE);
370 
371 	if (enabled)
372 		return 0;
373 
374 	list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
375 		tk = container_of(pos, struct trace_kprobe, tp);
376 		if (trace_kprobe_has_gone(tk))
377 			continue;
378 		ret = __enable_trace_kprobe(tk);
379 		if (ret)
380 			break;
381 		enabled = true;
382 	}
383 
384 	if (ret) {
385 		/* Failed to enable one of them. Roll back all */
386 		if (enabled)
387 			__disable_trace_kprobe(tp);
388 		if (file)
389 			trace_probe_remove_file(tp, file);
390 		else
391 			trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
392 	}
393 
394 	return ret;
395 }
396 
397 /*
398  * Disable trace_probe
399  * if the file is NULL, disable "perf" handler, or disable "trace" handler.
400  */
401 static int disable_trace_kprobe(struct trace_event_call *call,
402 				struct trace_event_file *file)
403 {
404 	struct trace_probe *tp;
405 
406 	tp = trace_probe_primary_from_call(call);
407 	if (WARN_ON_ONCE(!tp))
408 		return -ENODEV;
409 
410 	if (file) {
411 		if (!trace_probe_get_file_link(tp, file))
412 			return -ENOENT;
413 		if (!trace_probe_has_single_file(tp))
414 			goto out;
415 		trace_probe_clear_flag(tp, TP_FLAG_TRACE);
416 	} else
417 		trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
418 
419 	if (!trace_probe_is_enabled(tp))
420 		__disable_trace_kprobe(tp);
421 
422  out:
423 	if (file)
424 		/*
425 		 * Synchronization is done in below function. For perf event,
426 		 * file == NULL and perf_trace_event_unreg() calls
427 		 * tracepoint_synchronize_unregister() to ensure synchronize
428 		 * event. We don't need to care about it.
429 		 */
430 		trace_probe_remove_file(tp, file);
431 
432 	return 0;
433 }
434 
435 #if defined(CONFIG_KPROBES_ON_FTRACE) && \
436 	!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
437 static bool within_notrace_func(struct trace_kprobe *tk)
438 {
439 	unsigned long offset, size, addr;
440 
441 	addr = trace_kprobe_address(tk);
442 	if (!addr || !kallsyms_lookup_size_offset(addr, &size, &offset))
443 		return false;
444 
445 	/* Get the entry address of the target function */
446 	addr -= offset;
447 
448 	/*
449 	 * Since ftrace_location_range() does inclusive range check, we need
450 	 * to subtract 1 byte from the end address.
451 	 */
452 	return !ftrace_location_range(addr, addr + size - 1);
453 }
454 #else
455 #define within_notrace_func(tk)	(false)
456 #endif
457 
458 /* Internal register function - just handle k*probes and flags */
459 static int __register_trace_kprobe(struct trace_kprobe *tk)
460 {
461 	int i, ret;
462 
463 	if (trace_kprobe_is_registered(tk))
464 		return -EINVAL;
465 
466 	if (within_notrace_func(tk)) {
467 		pr_warn("Could not probe notrace function %s\n",
468 			trace_kprobe_symbol(tk));
469 		return -EINVAL;
470 	}
471 
472 	for (i = 0; i < tk->tp.nr_args; i++) {
473 		ret = traceprobe_update_arg(&tk->tp.args[i]);
474 		if (ret)
475 			return ret;
476 	}
477 
478 	/* Set/clear disabled flag according to tp->flag */
479 	if (trace_probe_is_enabled(&tk->tp))
480 		tk->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
481 	else
482 		tk->rp.kp.flags |= KPROBE_FLAG_DISABLED;
483 
484 	if (trace_kprobe_is_return(tk))
485 		ret = register_kretprobe(&tk->rp);
486 	else
487 		ret = register_kprobe(&tk->rp.kp);
488 
489 	return ret;
490 }
491 
492 /* Internal unregister function - just handle k*probes and flags */
493 static void __unregister_trace_kprobe(struct trace_kprobe *tk)
494 {
495 	if (trace_kprobe_is_registered(tk)) {
496 		if (trace_kprobe_is_return(tk))
497 			unregister_kretprobe(&tk->rp);
498 		else
499 			unregister_kprobe(&tk->rp.kp);
500 		/* Cleanup kprobe for reuse and mark it unregistered */
501 		INIT_HLIST_NODE(&tk->rp.kp.hlist);
502 		INIT_LIST_HEAD(&tk->rp.kp.list);
503 		if (tk->rp.kp.symbol_name)
504 			tk->rp.kp.addr = NULL;
505 	}
506 }
507 
508 /* Unregister a trace_probe and probe_event */
509 static int unregister_trace_kprobe(struct trace_kprobe *tk)
510 {
511 	/* If other probes are on the event, just unregister kprobe */
512 	if (trace_probe_has_sibling(&tk->tp))
513 		goto unreg;
514 
515 	/* Enabled event can not be unregistered */
516 	if (trace_probe_is_enabled(&tk->tp))
517 		return -EBUSY;
518 
519 	/* Will fail if probe is being used by ftrace or perf */
520 	if (unregister_kprobe_event(tk))
521 		return -EBUSY;
522 
523 unreg:
524 	__unregister_trace_kprobe(tk);
525 	dyn_event_remove(&tk->devent);
526 	trace_probe_unlink(&tk->tp);
527 
528 	return 0;
529 }
530 
531 static bool trace_kprobe_has_same_kprobe(struct trace_kprobe *orig,
532 					 struct trace_kprobe *comp)
533 {
534 	struct trace_probe_event *tpe = orig->tp.event;
535 	struct trace_probe *pos;
536 	int i;
537 
538 	list_for_each_entry(pos, &tpe->probes, list) {
539 		orig = container_of(pos, struct trace_kprobe, tp);
540 		if (strcmp(trace_kprobe_symbol(orig),
541 			   trace_kprobe_symbol(comp)) ||
542 		    trace_kprobe_offset(orig) != trace_kprobe_offset(comp))
543 			continue;
544 
545 		/*
546 		 * trace_probe_compare_arg_type() ensured that nr_args and
547 		 * each argument name and type are same. Let's compare comm.
548 		 */
549 		for (i = 0; i < orig->tp.nr_args; i++) {
550 			if (strcmp(orig->tp.args[i].comm,
551 				   comp->tp.args[i].comm))
552 				break;
553 		}
554 
555 		if (i == orig->tp.nr_args)
556 			return true;
557 	}
558 
559 	return false;
560 }
561 
562 static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to)
563 {
564 	int ret;
565 
566 	ret = trace_probe_compare_arg_type(&tk->tp, &to->tp);
567 	if (ret) {
568 		/* Note that argument starts index = 2 */
569 		trace_probe_log_set_index(ret + 1);
570 		trace_probe_log_err(0, DIFF_ARG_TYPE);
571 		return -EEXIST;
572 	}
573 	if (trace_kprobe_has_same_kprobe(to, tk)) {
574 		trace_probe_log_set_index(0);
575 		trace_probe_log_err(0, SAME_PROBE);
576 		return -EEXIST;
577 	}
578 
579 	/* Append to existing event */
580 	ret = trace_probe_append(&tk->tp, &to->tp);
581 	if (ret)
582 		return ret;
583 
584 	/* Register k*probe */
585 	ret = __register_trace_kprobe(tk);
586 	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
587 		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
588 		ret = 0;
589 	}
590 
591 	if (ret)
592 		trace_probe_unlink(&tk->tp);
593 	else
594 		dyn_event_add(&tk->devent);
595 
596 	return ret;
597 }
598 
599 /* Register a trace_probe and probe_event */
600 static int register_trace_kprobe(struct trace_kprobe *tk)
601 {
602 	struct trace_kprobe *old_tk;
603 	int ret;
604 
605 	mutex_lock(&event_mutex);
606 
607 	old_tk = find_trace_kprobe(trace_probe_name(&tk->tp),
608 				   trace_probe_group_name(&tk->tp));
609 	if (old_tk) {
610 		if (trace_kprobe_is_return(tk) != trace_kprobe_is_return(old_tk)) {
611 			trace_probe_log_set_index(0);
612 			trace_probe_log_err(0, DIFF_PROBE_TYPE);
613 			ret = -EEXIST;
614 		} else {
615 			ret = append_trace_kprobe(tk, old_tk);
616 		}
617 		goto end;
618 	}
619 
620 	/* Register new event */
621 	ret = register_kprobe_event(tk);
622 	if (ret) {
623 		pr_warn("Failed to register probe event(%d)\n", ret);
624 		goto end;
625 	}
626 
627 	/* Register k*probe */
628 	ret = __register_trace_kprobe(tk);
629 	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
630 		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
631 		ret = 0;
632 	}
633 
634 	if (ret < 0)
635 		unregister_kprobe_event(tk);
636 	else
637 		dyn_event_add(&tk->devent);
638 
639 end:
640 	mutex_unlock(&event_mutex);
641 	return ret;
642 }
643 
644 /* Module notifier call back, checking event on the module */
645 static int trace_kprobe_module_callback(struct notifier_block *nb,
646 				       unsigned long val, void *data)
647 {
648 	struct module *mod = data;
649 	struct dyn_event *pos;
650 	struct trace_kprobe *tk;
651 	int ret;
652 
653 	if (val != MODULE_STATE_COMING)
654 		return NOTIFY_DONE;
655 
656 	/* Update probes on coming module */
657 	mutex_lock(&event_mutex);
658 	for_each_trace_kprobe(tk, pos) {
659 		if (trace_kprobe_within_module(tk, mod)) {
660 			/* Don't need to check busy - this should have gone. */
661 			__unregister_trace_kprobe(tk);
662 			ret = __register_trace_kprobe(tk);
663 			if (ret)
664 				pr_warn("Failed to re-register probe %s on %s: %d\n",
665 					trace_probe_name(&tk->tp),
666 					mod->name, ret);
667 		}
668 	}
669 	mutex_unlock(&event_mutex);
670 
671 	return NOTIFY_DONE;
672 }
673 
674 static struct notifier_block trace_kprobe_module_nb = {
675 	.notifier_call = trace_kprobe_module_callback,
676 	.priority = 1	/* Invoked after kprobe module callback */
677 };
678 
679 /* Convert certain expected symbols into '_' when generating event names */
680 static inline void sanitize_event_name(char *name)
681 {
682 	while (*name++ != '\0')
683 		if (*name == ':' || *name == '.')
684 			*name = '_';
685 }
686 
687 static int trace_kprobe_create(int argc, const char *argv[])
688 {
689 	/*
690 	 * Argument syntax:
691 	 *  - Add kprobe:
692 	 *      p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
693 	 *  - Add kretprobe:
694 	 *      r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
695 	 * Fetch args:
696 	 *  $retval	: fetch return value
697 	 *  $stack	: fetch stack address
698 	 *  $stackN	: fetch Nth of stack (N:0-)
699 	 *  $comm       : fetch current task comm
700 	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
701 	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
702 	 *  %REG	: fetch register REG
703 	 * Dereferencing memory fetch:
704 	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
705 	 * Alias name of args:
706 	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
707 	 * Type of args:
708 	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
709 	 */
710 	struct trace_kprobe *tk = NULL;
711 	int i, len, ret = 0;
712 	bool is_return = false;
713 	char *symbol = NULL, *tmp = NULL;
714 	const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
715 	int maxactive = 0;
716 	long offset = 0;
717 	void *addr = NULL;
718 	char buf[MAX_EVENT_NAME_LEN];
719 	unsigned int flags = TPARG_FL_KERNEL;
720 
721 	switch (argv[0][0]) {
722 	case 'r':
723 		is_return = true;
724 		flags |= TPARG_FL_RETURN;
725 		break;
726 	case 'p':
727 		break;
728 	default:
729 		return -ECANCELED;
730 	}
731 	if (argc < 2)
732 		return -ECANCELED;
733 
734 	trace_probe_log_init("trace_kprobe", argc, argv);
735 
736 	event = strchr(&argv[0][1], ':');
737 	if (event)
738 		event++;
739 
740 	if (isdigit(argv[0][1])) {
741 		if (!is_return) {
742 			trace_probe_log_err(1, MAXACT_NO_KPROBE);
743 			goto parse_error;
744 		}
745 		if (event)
746 			len = event - &argv[0][1] - 1;
747 		else
748 			len = strlen(&argv[0][1]);
749 		if (len > MAX_EVENT_NAME_LEN - 1) {
750 			trace_probe_log_err(1, BAD_MAXACT);
751 			goto parse_error;
752 		}
753 		memcpy(buf, &argv[0][1], len);
754 		buf[len] = '\0';
755 		ret = kstrtouint(buf, 0, &maxactive);
756 		if (ret || !maxactive) {
757 			trace_probe_log_err(1, BAD_MAXACT);
758 			goto parse_error;
759 		}
760 		/* kretprobes instances are iterated over via a list. The
761 		 * maximum should stay reasonable.
762 		 */
763 		if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
764 			trace_probe_log_err(1, MAXACT_TOO_BIG);
765 			goto parse_error;
766 		}
767 	}
768 
769 	/* try to parse an address. if that fails, try to read the
770 	 * input as a symbol. */
771 	if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
772 		trace_probe_log_set_index(1);
773 		/* Check whether uprobe event specified */
774 		if (strchr(argv[1], '/') && strchr(argv[1], ':')) {
775 			ret = -ECANCELED;
776 			goto error;
777 		}
778 		/* a symbol specified */
779 		symbol = kstrdup(argv[1], GFP_KERNEL);
780 		if (!symbol)
781 			return -ENOMEM;
782 		/* TODO: support .init module functions */
783 		ret = traceprobe_split_symbol_offset(symbol, &offset);
784 		if (ret || offset < 0 || offset > UINT_MAX) {
785 			trace_probe_log_err(0, BAD_PROBE_ADDR);
786 			goto parse_error;
787 		}
788 		if (kprobe_on_func_entry(NULL, symbol, offset))
789 			flags |= TPARG_FL_FENTRY;
790 		if (offset && is_return && !(flags & TPARG_FL_FENTRY)) {
791 			trace_probe_log_err(0, BAD_RETPROBE);
792 			goto parse_error;
793 		}
794 	}
795 
796 	trace_probe_log_set_index(0);
797 	if (event) {
798 		ret = traceprobe_parse_event_name(&event, &group, buf,
799 						  event - argv[0]);
800 		if (ret)
801 			goto parse_error;
802 	} else {
803 		/* Make a new event name */
804 		if (symbol)
805 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
806 				 is_return ? 'r' : 'p', symbol, offset);
807 		else
808 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
809 				 is_return ? 'r' : 'p', addr);
810 		sanitize_event_name(buf);
811 		event = buf;
812 	}
813 
814 	/* setup a probe */
815 	tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
816 			       argc - 2, is_return);
817 	if (IS_ERR(tk)) {
818 		ret = PTR_ERR(tk);
819 		/* This must return -ENOMEM, else there is a bug */
820 		WARN_ON_ONCE(ret != -ENOMEM);
821 		goto out;	/* We know tk is not allocated */
822 	}
823 	argc -= 2; argv += 2;
824 
825 	/* parse arguments */
826 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
827 		tmp = kstrdup(argv[i], GFP_KERNEL);
828 		if (!tmp) {
829 			ret = -ENOMEM;
830 			goto error;
831 		}
832 
833 		trace_probe_log_set_index(i + 2);
834 		ret = traceprobe_parse_probe_arg(&tk->tp, i, tmp, flags);
835 		kfree(tmp);
836 		if (ret)
837 			goto error;	/* This can be -ENOMEM */
838 	}
839 
840 	ret = traceprobe_set_print_fmt(&tk->tp, is_return);
841 	if (ret < 0)
842 		goto error;
843 
844 	ret = register_trace_kprobe(tk);
845 	if (ret) {
846 		trace_probe_log_set_index(1);
847 		if (ret == -EILSEQ)
848 			trace_probe_log_err(0, BAD_INSN_BNDRY);
849 		else if (ret == -ENOENT)
850 			trace_probe_log_err(0, BAD_PROBE_ADDR);
851 		else if (ret != -ENOMEM && ret != -EEXIST)
852 			trace_probe_log_err(0, FAIL_REG_PROBE);
853 		goto error;
854 	}
855 
856 out:
857 	trace_probe_log_clear();
858 	kfree(symbol);
859 	return ret;
860 
861 parse_error:
862 	ret = -EINVAL;
863 error:
864 	free_trace_kprobe(tk);
865 	goto out;
866 }
867 
868 static int create_or_delete_trace_kprobe(int argc, char **argv)
869 {
870 	int ret;
871 
872 	if (argv[0][0] == '-')
873 		return dyn_event_release(argc, argv, &trace_kprobe_ops);
874 
875 	ret = trace_kprobe_create(argc, (const char **)argv);
876 	return ret == -ECANCELED ? -EINVAL : ret;
877 }
878 
879 static int trace_kprobe_release(struct dyn_event *ev)
880 {
881 	struct trace_kprobe *tk = to_trace_kprobe(ev);
882 	int ret = unregister_trace_kprobe(tk);
883 
884 	if (!ret)
885 		free_trace_kprobe(tk);
886 	return ret;
887 }
888 
889 static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
890 {
891 	struct trace_kprobe *tk = to_trace_kprobe(ev);
892 	int i;
893 
894 	seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
895 	seq_printf(m, ":%s/%s", trace_probe_group_name(&tk->tp),
896 				trace_probe_name(&tk->tp));
897 
898 	if (!tk->symbol)
899 		seq_printf(m, " 0x%p", tk->rp.kp.addr);
900 	else if (tk->rp.kp.offset)
901 		seq_printf(m, " %s+%u", trace_kprobe_symbol(tk),
902 			   tk->rp.kp.offset);
903 	else
904 		seq_printf(m, " %s", trace_kprobe_symbol(tk));
905 
906 	for (i = 0; i < tk->tp.nr_args; i++)
907 		seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm);
908 	seq_putc(m, '\n');
909 
910 	return 0;
911 }
912 
913 static int probes_seq_show(struct seq_file *m, void *v)
914 {
915 	struct dyn_event *ev = v;
916 
917 	if (!is_trace_kprobe(ev))
918 		return 0;
919 
920 	return trace_kprobe_show(m, ev);
921 }
922 
923 static const struct seq_operations probes_seq_op = {
924 	.start  = dyn_event_seq_start,
925 	.next   = dyn_event_seq_next,
926 	.stop   = dyn_event_seq_stop,
927 	.show   = probes_seq_show
928 };
929 
930 static int probes_open(struct inode *inode, struct file *file)
931 {
932 	int ret;
933 
934 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
935 		ret = dyn_events_release_all(&trace_kprobe_ops);
936 		if (ret < 0)
937 			return ret;
938 	}
939 
940 	return seq_open(file, &probes_seq_op);
941 }
942 
943 static ssize_t probes_write(struct file *file, const char __user *buffer,
944 			    size_t count, loff_t *ppos)
945 {
946 	return trace_parse_run_command(file, buffer, count, ppos,
947 				       create_or_delete_trace_kprobe);
948 }
949 
950 static const struct file_operations kprobe_events_ops = {
951 	.owner          = THIS_MODULE,
952 	.open           = probes_open,
953 	.read           = seq_read,
954 	.llseek         = seq_lseek,
955 	.release        = seq_release,
956 	.write		= probes_write,
957 };
958 
959 /* Probes profiling interfaces */
960 static int probes_profile_seq_show(struct seq_file *m, void *v)
961 {
962 	struct dyn_event *ev = v;
963 	struct trace_kprobe *tk;
964 
965 	if (!is_trace_kprobe(ev))
966 		return 0;
967 
968 	tk = to_trace_kprobe(ev);
969 	seq_printf(m, "  %-44s %15lu %15lu\n",
970 		   trace_probe_name(&tk->tp),
971 		   trace_kprobe_nhit(tk),
972 		   tk->rp.kp.nmissed);
973 
974 	return 0;
975 }
976 
977 static const struct seq_operations profile_seq_op = {
978 	.start  = dyn_event_seq_start,
979 	.next   = dyn_event_seq_next,
980 	.stop   = dyn_event_seq_stop,
981 	.show   = probes_profile_seq_show
982 };
983 
984 static int profile_open(struct inode *inode, struct file *file)
985 {
986 	return seq_open(file, &profile_seq_op);
987 }
988 
989 static const struct file_operations kprobe_profile_ops = {
990 	.owner          = THIS_MODULE,
991 	.open           = profile_open,
992 	.read           = seq_read,
993 	.llseek         = seq_lseek,
994 	.release        = seq_release,
995 };
996 
997 /* Kprobe specific fetch functions */
998 
999 /* Return the length of string -- including null terminal byte */
1000 static nokprobe_inline int
1001 fetch_store_strlen(unsigned long addr)
1002 {
1003 	int ret, len = 0;
1004 	u8 c;
1005 
1006 	do {
1007 		ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
1008 		len++;
1009 	} while (c && ret == 0 && len < MAX_STRING_SIZE);
1010 
1011 	return (ret < 0) ? ret : len;
1012 }
1013 
1014 /* Return the length of string -- including null terminal byte */
1015 static nokprobe_inline int
1016 fetch_store_strlen_user(unsigned long addr)
1017 {
1018 	const void __user *uaddr =  (__force const void __user *)addr;
1019 
1020 	return strnlen_unsafe_user(uaddr, MAX_STRING_SIZE);
1021 }
1022 
1023 /*
1024  * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
1025  * length and relative data location.
1026  */
1027 static nokprobe_inline int
1028 fetch_store_string(unsigned long addr, void *dest, void *base)
1029 {
1030 	int maxlen = get_loc_len(*(u32 *)dest);
1031 	void *__dest;
1032 	long ret;
1033 
1034 	if (unlikely(!maxlen))
1035 		return -ENOMEM;
1036 
1037 	__dest = get_loc_data(dest, base);
1038 
1039 	/*
1040 	 * Try to get string again, since the string can be changed while
1041 	 * probing.
1042 	 */
1043 	ret = strncpy_from_unsafe(__dest, (void *)addr, maxlen);
1044 	if (ret >= 0)
1045 		*(u32 *)dest = make_data_loc(ret, __dest - base);
1046 
1047 	return ret;
1048 }
1049 
1050 /*
1051  * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
1052  * with max length and relative data location.
1053  */
1054 static nokprobe_inline int
1055 fetch_store_string_user(unsigned long addr, void *dest, void *base)
1056 {
1057 	const void __user *uaddr =  (__force const void __user *)addr;
1058 	int maxlen = get_loc_len(*(u32 *)dest);
1059 	void *__dest;
1060 	long ret;
1061 
1062 	if (unlikely(!maxlen))
1063 		return -ENOMEM;
1064 
1065 	__dest = get_loc_data(dest, base);
1066 
1067 	ret = strncpy_from_unsafe_user(__dest, uaddr, maxlen);
1068 	if (ret >= 0)
1069 		*(u32 *)dest = make_data_loc(ret, __dest - base);
1070 
1071 	return ret;
1072 }
1073 
1074 static nokprobe_inline int
1075 probe_mem_read(void *dest, void *src, size_t size)
1076 {
1077 	return probe_kernel_read(dest, src, size);
1078 }
1079 
1080 static nokprobe_inline int
1081 probe_mem_read_user(void *dest, void *src, size_t size)
1082 {
1083 	const void __user *uaddr =  (__force const void __user *)src;
1084 
1085 	return probe_user_read(dest, uaddr, size);
1086 }
1087 
1088 /* Note that we don't verify it, since the code does not come from user space */
1089 static int
1090 process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
1091 		   void *base)
1092 {
1093 	unsigned long val;
1094 
1095 retry:
1096 	/* 1st stage: get value from context */
1097 	switch (code->op) {
1098 	case FETCH_OP_REG:
1099 		val = regs_get_register(regs, code->param);
1100 		break;
1101 	case FETCH_OP_STACK:
1102 		val = regs_get_kernel_stack_nth(regs, code->param);
1103 		break;
1104 	case FETCH_OP_STACKP:
1105 		val = kernel_stack_pointer(regs);
1106 		break;
1107 	case FETCH_OP_RETVAL:
1108 		val = regs_return_value(regs);
1109 		break;
1110 	case FETCH_OP_IMM:
1111 		val = code->immediate;
1112 		break;
1113 	case FETCH_OP_COMM:
1114 		val = (unsigned long)current->comm;
1115 		break;
1116 	case FETCH_OP_DATA:
1117 		val = (unsigned long)code->data;
1118 		break;
1119 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
1120 	case FETCH_OP_ARG:
1121 		val = regs_get_kernel_argument(regs, code->param);
1122 		break;
1123 #endif
1124 	case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
1125 		code++;
1126 		goto retry;
1127 	default:
1128 		return -EILSEQ;
1129 	}
1130 	code++;
1131 
1132 	return process_fetch_insn_bottom(code, val, dest, base);
1133 }
1134 NOKPROBE_SYMBOL(process_fetch_insn)
1135 
1136 /* Kprobe handler */
1137 static nokprobe_inline void
1138 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
1139 		    struct trace_event_file *trace_file)
1140 {
1141 	struct kprobe_trace_entry_head *entry;
1142 	struct ring_buffer_event *event;
1143 	struct ring_buffer *buffer;
1144 	int size, dsize, pc;
1145 	unsigned long irq_flags;
1146 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1147 
1148 	WARN_ON(call != trace_file->event_call);
1149 
1150 	if (trace_trigger_soft_disabled(trace_file))
1151 		return;
1152 
1153 	local_save_flags(irq_flags);
1154 	pc = preempt_count();
1155 
1156 	dsize = __get_data_size(&tk->tp, regs);
1157 	size = sizeof(*entry) + tk->tp.size + dsize;
1158 
1159 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1160 						call->event.type,
1161 						size, irq_flags, pc);
1162 	if (!event)
1163 		return;
1164 
1165 	entry = ring_buffer_event_data(event);
1166 	entry->ip = (unsigned long)tk->rp.kp.addr;
1167 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1168 
1169 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1170 					 entry, irq_flags, pc, regs);
1171 }
1172 
1173 static void
1174 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
1175 {
1176 	struct event_file_link *link;
1177 
1178 	trace_probe_for_each_link_rcu(link, &tk->tp)
1179 		__kprobe_trace_func(tk, regs, link->file);
1180 }
1181 NOKPROBE_SYMBOL(kprobe_trace_func);
1182 
1183 /* Kretprobe handler */
1184 static nokprobe_inline void
1185 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1186 		       struct pt_regs *regs,
1187 		       struct trace_event_file *trace_file)
1188 {
1189 	struct kretprobe_trace_entry_head *entry;
1190 	struct ring_buffer_event *event;
1191 	struct ring_buffer *buffer;
1192 	int size, pc, dsize;
1193 	unsigned long irq_flags;
1194 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1195 
1196 	WARN_ON(call != trace_file->event_call);
1197 
1198 	if (trace_trigger_soft_disabled(trace_file))
1199 		return;
1200 
1201 	local_save_flags(irq_flags);
1202 	pc = preempt_count();
1203 
1204 	dsize = __get_data_size(&tk->tp, regs);
1205 	size = sizeof(*entry) + tk->tp.size + dsize;
1206 
1207 	event = trace_event_buffer_lock_reserve(&buffer, trace_file,
1208 						call->event.type,
1209 						size, irq_flags, pc);
1210 	if (!event)
1211 		return;
1212 
1213 	entry = ring_buffer_event_data(event);
1214 	entry->func = (unsigned long)tk->rp.kp.addr;
1215 	entry->ret_ip = (unsigned long)ri->ret_addr;
1216 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1217 
1218 	event_trigger_unlock_commit_regs(trace_file, buffer, event,
1219 					 entry, irq_flags, pc, regs);
1220 }
1221 
1222 static void
1223 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1224 		     struct pt_regs *regs)
1225 {
1226 	struct event_file_link *link;
1227 
1228 	trace_probe_for_each_link_rcu(link, &tk->tp)
1229 		__kretprobe_trace_func(tk, ri, regs, link->file);
1230 }
1231 NOKPROBE_SYMBOL(kretprobe_trace_func);
1232 
1233 /* Event entry printers */
1234 static enum print_line_t
1235 print_kprobe_event(struct trace_iterator *iter, int flags,
1236 		   struct trace_event *event)
1237 {
1238 	struct kprobe_trace_entry_head *field;
1239 	struct trace_seq *s = &iter->seq;
1240 	struct trace_probe *tp;
1241 
1242 	field = (struct kprobe_trace_entry_head *)iter->ent;
1243 	tp = trace_probe_primary_from_call(
1244 		container_of(event, struct trace_event_call, event));
1245 	if (WARN_ON_ONCE(!tp))
1246 		goto out;
1247 
1248 	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
1249 
1250 	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1251 		goto out;
1252 
1253 	trace_seq_putc(s, ')');
1254 
1255 	if (print_probe_args(s, tp->args, tp->nr_args,
1256 			     (u8 *)&field[1], field) < 0)
1257 		goto out;
1258 
1259 	trace_seq_putc(s, '\n');
1260  out:
1261 	return trace_handle_return(s);
1262 }
1263 
1264 static enum print_line_t
1265 print_kretprobe_event(struct trace_iterator *iter, int flags,
1266 		      struct trace_event *event)
1267 {
1268 	struct kretprobe_trace_entry_head *field;
1269 	struct trace_seq *s = &iter->seq;
1270 	struct trace_probe *tp;
1271 
1272 	field = (struct kretprobe_trace_entry_head *)iter->ent;
1273 	tp = trace_probe_primary_from_call(
1274 		container_of(event, struct trace_event_call, event));
1275 	if (WARN_ON_ONCE(!tp))
1276 		goto out;
1277 
1278 	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
1279 
1280 	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1281 		goto out;
1282 
1283 	trace_seq_puts(s, " <- ");
1284 
1285 	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1286 		goto out;
1287 
1288 	trace_seq_putc(s, ')');
1289 
1290 	if (print_probe_args(s, tp->args, tp->nr_args,
1291 			     (u8 *)&field[1], field) < 0)
1292 		goto out;
1293 
1294 	trace_seq_putc(s, '\n');
1295 
1296  out:
1297 	return trace_handle_return(s);
1298 }
1299 
1300 
1301 static int kprobe_event_define_fields(struct trace_event_call *event_call)
1302 {
1303 	int ret;
1304 	struct kprobe_trace_entry_head field;
1305 	struct trace_probe *tp;
1306 
1307 	tp = trace_probe_primary_from_call(event_call);
1308 	if (WARN_ON_ONCE(!tp))
1309 		return -ENOENT;
1310 
1311 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1312 
1313 	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
1314 }
1315 
1316 static int kretprobe_event_define_fields(struct trace_event_call *event_call)
1317 {
1318 	int ret;
1319 	struct kretprobe_trace_entry_head field;
1320 	struct trace_probe *tp;
1321 
1322 	tp = trace_probe_primary_from_call(event_call);
1323 	if (WARN_ON_ONCE(!tp))
1324 		return -ENOENT;
1325 
1326 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1327 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1328 
1329 	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
1330 }
1331 
1332 #ifdef CONFIG_PERF_EVENTS
1333 
1334 /* Kprobe profile handler */
1335 static int
1336 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1337 {
1338 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1339 	struct kprobe_trace_entry_head *entry;
1340 	struct hlist_head *head;
1341 	int size, __size, dsize;
1342 	int rctx;
1343 
1344 	if (bpf_prog_array_valid(call)) {
1345 		unsigned long orig_ip = instruction_pointer(regs);
1346 		int ret;
1347 
1348 		ret = trace_call_bpf(call, regs);
1349 
1350 		/*
1351 		 * We need to check and see if we modified the pc of the
1352 		 * pt_regs, and if so return 1 so that we don't do the
1353 		 * single stepping.
1354 		 */
1355 		if (orig_ip != instruction_pointer(regs))
1356 			return 1;
1357 		if (!ret)
1358 			return 0;
1359 	}
1360 
1361 	head = this_cpu_ptr(call->perf_events);
1362 	if (hlist_empty(head))
1363 		return 0;
1364 
1365 	dsize = __get_data_size(&tk->tp, regs);
1366 	__size = sizeof(*entry) + tk->tp.size + dsize;
1367 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1368 	size -= sizeof(u32);
1369 
1370 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1371 	if (!entry)
1372 		return 0;
1373 
1374 	entry->ip = (unsigned long)tk->rp.kp.addr;
1375 	memset(&entry[1], 0, dsize);
1376 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1377 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1378 			      head, NULL);
1379 	return 0;
1380 }
1381 NOKPROBE_SYMBOL(kprobe_perf_func);
1382 
1383 /* Kretprobe profile handler */
1384 static void
1385 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1386 		    struct pt_regs *regs)
1387 {
1388 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1389 	struct kretprobe_trace_entry_head *entry;
1390 	struct hlist_head *head;
1391 	int size, __size, dsize;
1392 	int rctx;
1393 
1394 	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1395 		return;
1396 
1397 	head = this_cpu_ptr(call->perf_events);
1398 	if (hlist_empty(head))
1399 		return;
1400 
1401 	dsize = __get_data_size(&tk->tp, regs);
1402 	__size = sizeof(*entry) + tk->tp.size + dsize;
1403 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1404 	size -= sizeof(u32);
1405 
1406 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
1407 	if (!entry)
1408 		return;
1409 
1410 	entry->func = (unsigned long)tk->rp.kp.addr;
1411 	entry->ret_ip = (unsigned long)ri->ret_addr;
1412 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
1413 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
1414 			      head, NULL);
1415 }
1416 NOKPROBE_SYMBOL(kretprobe_perf_func);
1417 
1418 int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
1419 			const char **symbol, u64 *probe_offset,
1420 			u64 *probe_addr, bool perf_type_tracepoint)
1421 {
1422 	const char *pevent = trace_event_name(event->tp_event);
1423 	const char *group = event->tp_event->class->system;
1424 	struct trace_kprobe *tk;
1425 
1426 	if (perf_type_tracepoint)
1427 		tk = find_trace_kprobe(pevent, group);
1428 	else
1429 		tk = event->tp_event->data;
1430 	if (!tk)
1431 		return -EINVAL;
1432 
1433 	*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
1434 					      : BPF_FD_TYPE_KPROBE;
1435 	if (tk->symbol) {
1436 		*symbol = tk->symbol;
1437 		*probe_offset = tk->rp.kp.offset;
1438 		*probe_addr = 0;
1439 	} else {
1440 		*symbol = NULL;
1441 		*probe_offset = 0;
1442 		*probe_addr = (unsigned long)tk->rp.kp.addr;
1443 	}
1444 	return 0;
1445 }
1446 #endif	/* CONFIG_PERF_EVENTS */
1447 
1448 /*
1449  * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
1450  *
1451  * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
1452  * lockless, but we can't race with this __init function.
1453  */
1454 static int kprobe_register(struct trace_event_call *event,
1455 			   enum trace_reg type, void *data)
1456 {
1457 	struct trace_event_file *file = data;
1458 
1459 	switch (type) {
1460 	case TRACE_REG_REGISTER:
1461 		return enable_trace_kprobe(event, file);
1462 	case TRACE_REG_UNREGISTER:
1463 		return disable_trace_kprobe(event, file);
1464 
1465 #ifdef CONFIG_PERF_EVENTS
1466 	case TRACE_REG_PERF_REGISTER:
1467 		return enable_trace_kprobe(event, NULL);
1468 	case TRACE_REG_PERF_UNREGISTER:
1469 		return disable_trace_kprobe(event, NULL);
1470 	case TRACE_REG_PERF_OPEN:
1471 	case TRACE_REG_PERF_CLOSE:
1472 	case TRACE_REG_PERF_ADD:
1473 	case TRACE_REG_PERF_DEL:
1474 		return 0;
1475 #endif
1476 	}
1477 	return 0;
1478 }
1479 
1480 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1481 {
1482 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1483 	int ret = 0;
1484 
1485 	raw_cpu_inc(*tk->nhit);
1486 
1487 	if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
1488 		kprobe_trace_func(tk, regs);
1489 #ifdef CONFIG_PERF_EVENTS
1490 	if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
1491 		ret = kprobe_perf_func(tk, regs);
1492 #endif
1493 	return ret;
1494 }
1495 NOKPROBE_SYMBOL(kprobe_dispatcher);
1496 
1497 static int
1498 kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1499 {
1500 	struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
1501 
1502 	raw_cpu_inc(*tk->nhit);
1503 
1504 	if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
1505 		kretprobe_trace_func(tk, ri, regs);
1506 #ifdef CONFIG_PERF_EVENTS
1507 	if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
1508 		kretprobe_perf_func(tk, ri, regs);
1509 #endif
1510 	return 0;	/* We don't tweek kernel, so just return 0 */
1511 }
1512 NOKPROBE_SYMBOL(kretprobe_dispatcher);
1513 
1514 static struct trace_event_functions kretprobe_funcs = {
1515 	.trace		= print_kretprobe_event
1516 };
1517 
1518 static struct trace_event_functions kprobe_funcs = {
1519 	.trace		= print_kprobe_event
1520 };
1521 
1522 static inline void init_trace_event_call(struct trace_kprobe *tk)
1523 {
1524 	struct trace_event_call *call = trace_probe_event_call(&tk->tp);
1525 
1526 	if (trace_kprobe_is_return(tk)) {
1527 		call->event.funcs = &kretprobe_funcs;
1528 		call->class->define_fields = kretprobe_event_define_fields;
1529 	} else {
1530 		call->event.funcs = &kprobe_funcs;
1531 		call->class->define_fields = kprobe_event_define_fields;
1532 	}
1533 
1534 	call->flags = TRACE_EVENT_FL_KPROBE;
1535 	call->class->reg = kprobe_register;
1536 }
1537 
1538 static int register_kprobe_event(struct trace_kprobe *tk)
1539 {
1540 	init_trace_event_call(tk);
1541 
1542 	return trace_probe_register_event_call(&tk->tp);
1543 }
1544 
1545 static int unregister_kprobe_event(struct trace_kprobe *tk)
1546 {
1547 	return trace_probe_unregister_event_call(&tk->tp);
1548 }
1549 
1550 #ifdef CONFIG_PERF_EVENTS
1551 /* create a trace_kprobe, but don't add it to global lists */
1552 struct trace_event_call *
1553 create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
1554 			  bool is_return)
1555 {
1556 	struct trace_kprobe *tk;
1557 	int ret;
1558 	char *event;
1559 
1560 	/*
1561 	 * local trace_kprobes are not added to dyn_event, so they are never
1562 	 * searched in find_trace_kprobe(). Therefore, there is no concern of
1563 	 * duplicated name here.
1564 	 */
1565 	event = func ? func : "DUMMY_EVENT";
1566 
1567 	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
1568 				offs, 0 /* maxactive */, 0 /* nargs */,
1569 				is_return);
1570 
1571 	if (IS_ERR(tk)) {
1572 		pr_info("Failed to allocate trace_probe.(%d)\n",
1573 			(int)PTR_ERR(tk));
1574 		return ERR_CAST(tk);
1575 	}
1576 
1577 	init_trace_event_call(tk);
1578 
1579 	if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
1580 		ret = -ENOMEM;
1581 		goto error;
1582 	}
1583 
1584 	ret = __register_trace_kprobe(tk);
1585 	if (ret < 0)
1586 		goto error;
1587 
1588 	return trace_probe_event_call(&tk->tp);
1589 error:
1590 	free_trace_kprobe(tk);
1591 	return ERR_PTR(ret);
1592 }
1593 
1594 void destroy_local_trace_kprobe(struct trace_event_call *event_call)
1595 {
1596 	struct trace_kprobe *tk;
1597 
1598 	tk = trace_kprobe_primary_from_call(event_call);
1599 	if (unlikely(!tk))
1600 		return;
1601 
1602 	if (trace_probe_is_enabled(&tk->tp)) {
1603 		WARN_ON(1);
1604 		return;
1605 	}
1606 
1607 	__unregister_trace_kprobe(tk);
1608 
1609 	free_trace_kprobe(tk);
1610 }
1611 #endif /* CONFIG_PERF_EVENTS */
1612 
1613 static __init void enable_boot_kprobe_events(void)
1614 {
1615 	struct trace_array *tr = top_trace_array();
1616 	struct trace_event_file *file;
1617 	struct trace_kprobe *tk;
1618 	struct dyn_event *pos;
1619 
1620 	mutex_lock(&event_mutex);
1621 	for_each_trace_kprobe(tk, pos) {
1622 		list_for_each_entry(file, &tr->events, list)
1623 			if (file->event_call == trace_probe_event_call(&tk->tp))
1624 				trace_event_enable_disable(file, 1, 0);
1625 	}
1626 	mutex_unlock(&event_mutex);
1627 }
1628 
1629 static __init void setup_boot_kprobe_events(void)
1630 {
1631 	char *p, *cmd = kprobe_boot_events_buf;
1632 	int ret;
1633 
1634 	strreplace(kprobe_boot_events_buf, ',', ' ');
1635 
1636 	while (cmd && *cmd != '\0') {
1637 		p = strchr(cmd, ';');
1638 		if (p)
1639 			*p++ = '\0';
1640 
1641 		ret = trace_run_command(cmd, create_or_delete_trace_kprobe);
1642 		if (ret)
1643 			pr_warn("Failed to add event(%d): %s\n", ret, cmd);
1644 		else
1645 			kprobe_boot_events_enabled = true;
1646 
1647 		cmd = p;
1648 	}
1649 
1650 	enable_boot_kprobe_events();
1651 }
1652 
1653 /* Make a tracefs interface for controlling probe points */
1654 static __init int init_kprobe_trace(void)
1655 {
1656 	struct dentry *d_tracer;
1657 	struct dentry *entry;
1658 	int ret;
1659 
1660 	ret = dyn_event_register(&trace_kprobe_ops);
1661 	if (ret)
1662 		return ret;
1663 
1664 	if (register_module_notifier(&trace_kprobe_module_nb))
1665 		return -EINVAL;
1666 
1667 	d_tracer = tracing_init_dentry();
1668 	if (IS_ERR(d_tracer))
1669 		return 0;
1670 
1671 	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
1672 				    NULL, &kprobe_events_ops);
1673 
1674 	/* Event list interface */
1675 	if (!entry)
1676 		pr_warn("Could not create tracefs 'kprobe_events' entry\n");
1677 
1678 	/* Profile interface */
1679 	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
1680 				    NULL, &kprobe_profile_ops);
1681 
1682 	if (!entry)
1683 		pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
1684 
1685 	setup_boot_kprobe_events();
1686 
1687 	return 0;
1688 }
1689 fs_initcall(init_kprobe_trace);
1690 
1691 
1692 #ifdef CONFIG_FTRACE_STARTUP_TEST
1693 static __init struct trace_event_file *
1694 find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
1695 {
1696 	struct trace_event_file *file;
1697 
1698 	list_for_each_entry(file, &tr->events, list)
1699 		if (file->event_call == trace_probe_event_call(&tk->tp))
1700 			return file;
1701 
1702 	return NULL;
1703 }
1704 
1705 /*
1706  * Nobody but us can call enable_trace_kprobe/disable_trace_kprobe at this
1707  * stage, we can do this lockless.
1708  */
1709 static __init int kprobe_trace_self_tests_init(void)
1710 {
1711 	int ret, warn = 0;
1712 	int (*target)(int, int, int, int, int, int);
1713 	struct trace_kprobe *tk;
1714 	struct trace_event_file *file;
1715 
1716 	if (tracing_is_disabled())
1717 		return -ENODEV;
1718 
1719 	if (kprobe_boot_events_enabled) {
1720 		pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n");
1721 		return 0;
1722 	}
1723 
1724 	target = kprobe_trace_selftest_target;
1725 
1726 	pr_info("Testing kprobe tracing: ");
1727 
1728 	ret = trace_run_command("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)",
1729 				create_or_delete_trace_kprobe);
1730 	if (WARN_ON_ONCE(ret)) {
1731 		pr_warn("error on probing function entry.\n");
1732 		warn++;
1733 	} else {
1734 		/* Enable trace point */
1735 		tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1736 		if (WARN_ON_ONCE(tk == NULL)) {
1737 			pr_warn("error on getting new probe.\n");
1738 			warn++;
1739 		} else {
1740 			file = find_trace_probe_file(tk, top_trace_array());
1741 			if (WARN_ON_ONCE(file == NULL)) {
1742 				pr_warn("error on getting probe file.\n");
1743 				warn++;
1744 			} else
1745 				enable_trace_kprobe(
1746 					trace_probe_event_call(&tk->tp), file);
1747 		}
1748 	}
1749 
1750 	ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target $retval",
1751 				create_or_delete_trace_kprobe);
1752 	if (WARN_ON_ONCE(ret)) {
1753 		pr_warn("error on probing function return.\n");
1754 		warn++;
1755 	} else {
1756 		/* Enable trace point */
1757 		tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1758 		if (WARN_ON_ONCE(tk == NULL)) {
1759 			pr_warn("error on getting 2nd new probe.\n");
1760 			warn++;
1761 		} else {
1762 			file = find_trace_probe_file(tk, top_trace_array());
1763 			if (WARN_ON_ONCE(file == NULL)) {
1764 				pr_warn("error on getting probe file.\n");
1765 				warn++;
1766 			} else
1767 				enable_trace_kprobe(
1768 					trace_probe_event_call(&tk->tp), file);
1769 		}
1770 	}
1771 
1772 	if (warn)
1773 		goto end;
1774 
1775 	ret = target(1, 2, 3, 4, 5, 6);
1776 
1777 	/*
1778 	 * Not expecting an error here, the check is only to prevent the
1779 	 * optimizer from removing the call to target() as otherwise there
1780 	 * are no side-effects and the call is never performed.
1781 	 */
1782 	if (ret != 21)
1783 		warn++;
1784 
1785 	/* Disable trace points before removing it */
1786 	tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
1787 	if (WARN_ON_ONCE(tk == NULL)) {
1788 		pr_warn("error on getting test probe.\n");
1789 		warn++;
1790 	} else {
1791 		if (trace_kprobe_nhit(tk) != 1) {
1792 			pr_warn("incorrect number of testprobe hits\n");
1793 			warn++;
1794 		}
1795 
1796 		file = find_trace_probe_file(tk, top_trace_array());
1797 		if (WARN_ON_ONCE(file == NULL)) {
1798 			pr_warn("error on getting probe file.\n");
1799 			warn++;
1800 		} else
1801 			disable_trace_kprobe(
1802 				trace_probe_event_call(&tk->tp), file);
1803 	}
1804 
1805 	tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
1806 	if (WARN_ON_ONCE(tk == NULL)) {
1807 		pr_warn("error on getting 2nd test probe.\n");
1808 		warn++;
1809 	} else {
1810 		if (trace_kprobe_nhit(tk) != 1) {
1811 			pr_warn("incorrect number of testprobe2 hits\n");
1812 			warn++;
1813 		}
1814 
1815 		file = find_trace_probe_file(tk, top_trace_array());
1816 		if (WARN_ON_ONCE(file == NULL)) {
1817 			pr_warn("error on getting probe file.\n");
1818 			warn++;
1819 		} else
1820 			disable_trace_kprobe(
1821 				trace_probe_event_call(&tk->tp), file);
1822 	}
1823 
1824 	ret = trace_run_command("-:testprobe", create_or_delete_trace_kprobe);
1825 	if (WARN_ON_ONCE(ret)) {
1826 		pr_warn("error on deleting a probe.\n");
1827 		warn++;
1828 	}
1829 
1830 	ret = trace_run_command("-:testprobe2", create_or_delete_trace_kprobe);
1831 	if (WARN_ON_ONCE(ret)) {
1832 		pr_warn("error on deleting a probe.\n");
1833 		warn++;
1834 	}
1835 
1836 end:
1837 	ret = dyn_events_release_all(&trace_kprobe_ops);
1838 	if (WARN_ON_ONCE(ret)) {
1839 		pr_warn("error on cleaning up probes.\n");
1840 		warn++;
1841 	}
1842 	/*
1843 	 * Wait for the optimizer work to finish. Otherwise it might fiddle
1844 	 * with probes in already freed __init text.
1845 	 */
1846 	wait_for_kprobe_optimizer();
1847 	if (warn)
1848 		pr_cont("NG: Some tests are failed. Please check them.\n");
1849 	else
1850 		pr_cont("OK\n");
1851 	return 0;
1852 }
1853 
1854 late_initcall(kprobe_trace_self_tests_init);
1855 
1856 #endif
1857