xref: /linux/kernel/trace/trace_kprobe.c (revision d39d0ed196aa1685bb24771e92f78633c66ac9cb)
1 /*
2  * Kprobes-based tracing events
3  *
4  * Created by Masami Hiramatsu <mhiramat@redhat.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  */
19 
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
22 #include <linux/kprobes.h>
23 #include <linux/seq_file.h>
24 #include <linux/slab.h>
25 #include <linux/smp.h>
26 #include <linux/debugfs.h>
27 #include <linux/types.h>
28 #include <linux/string.h>
29 #include <linux/ctype.h>
30 #include <linux/ptrace.h>
31 #include <linux/perf_event.h>
32 #include <linux/stringify.h>
33 #include <linux/limits.h>
34 #include <linux/uaccess.h>
35 #include <asm/bitsperlong.h>
36 
37 #include "trace.h"
38 #include "trace_output.h"
39 
40 #define MAX_TRACE_ARGS 128
41 #define MAX_ARGSTR_LEN 63
42 #define MAX_EVENT_NAME_LEN 64
43 #define MAX_STRING_SIZE PATH_MAX
44 #define KPROBE_EVENT_SYSTEM "kprobes"
45 
46 /* Reserved field names */
47 #define FIELD_STRING_IP "__probe_ip"
48 #define FIELD_STRING_RETIP "__probe_ret_ip"
49 #define FIELD_STRING_FUNC "__probe_func"
50 
51 const char *reserved_field_names[] = {
52 	"common_type",
53 	"common_flags",
54 	"common_preempt_count",
55 	"common_pid",
56 	"common_tgid",
57 	"common_lock_depth",
58 	FIELD_STRING_IP,
59 	FIELD_STRING_RETIP,
60 	FIELD_STRING_FUNC,
61 };
62 
63 /* Printing function type */
64 typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *,
65 				 void *);
66 #define PRINT_TYPE_FUNC_NAME(type)	print_type_##type
67 #define PRINT_TYPE_FMT_NAME(type)	print_type_format_##type
68 
69 /* Printing  in basic type function template */
70 #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast)			\
71 static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,	\
72 						const char *name,	\
73 						void *data, void *ent)\
74 {									\
75 	return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
76 }									\
77 static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
78 
79 DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
80 DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
81 DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
82 DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
83 DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
84 DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
85 DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
86 DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
87 
88 /* data_rloc: data relative location, compatible with u32 */
89 #define make_data_rloc(len, roffs)	\
90 	(((u32)(len) << 16) | ((u32)(roffs) & 0xffff))
91 #define get_rloc_len(dl)	((u32)(dl) >> 16)
92 #define get_rloc_offs(dl)	((u32)(dl) & 0xffff)
93 
94 static inline void *get_rloc_data(u32 *dl)
95 {
96 	return (u8 *)dl + get_rloc_offs(*dl);
97 }
98 
99 /* For data_loc conversion */
100 static inline void *get_loc_data(u32 *dl, void *ent)
101 {
102 	return (u8 *)ent + get_rloc_offs(*dl);
103 }
104 
105 /*
106  * Convert data_rloc to data_loc:
107  *  data_rloc stores the offset from data_rloc itself, but data_loc
108  *  stores the offset from event entry.
109  */
110 #define convert_rloc_to_loc(dl, offs)	((u32)(dl) + (offs))
111 
112 /* For defining macros, define string/string_size types */
113 typedef u32 string;
114 typedef u32 string_size;
115 
116 /* Print type function for string type */
117 static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
118 						  const char *name,
119 						  void *data, void *ent)
120 {
121 	int len = *(u32 *)data >> 16;
122 
123 	if (!len)
124 		return trace_seq_printf(s, " %s=(fault)", name);
125 	else
126 		return trace_seq_printf(s, " %s=\"%s\"", name,
127 					(const char *)get_loc_data(data, ent));
128 }
129 static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
130 
131 /* Data fetch function type */
132 typedef	void (*fetch_func_t)(struct pt_regs *, void *, void *);
133 
134 struct fetch_param {
135 	fetch_func_t	fn;
136 	void *data;
137 };
138 
139 static __kprobes void call_fetch(struct fetch_param *fprm,
140 				 struct pt_regs *regs, void *dest)
141 {
142 	return fprm->fn(regs, fprm->data, dest);
143 }
144 
145 #define FETCH_FUNC_NAME(method, type)	fetch_##method##_##type
146 /*
147  * Define macro for basic types - we don't need to define s* types, because
148  * we have to care only about bitwidth at recording time.
149  */
150 #define DEFINE_BASIC_FETCH_FUNCS(method) \
151 DEFINE_FETCH_##method(u8)		\
152 DEFINE_FETCH_##method(u16)		\
153 DEFINE_FETCH_##method(u32)		\
154 DEFINE_FETCH_##method(u64)
155 
156 #define CHECK_FETCH_FUNCS(method, fn)			\
157 	(((FETCH_FUNC_NAME(method, u8) == fn) ||	\
158 	  (FETCH_FUNC_NAME(method, u16) == fn) ||	\
159 	  (FETCH_FUNC_NAME(method, u32) == fn) ||	\
160 	  (FETCH_FUNC_NAME(method, u64) == fn) ||	\
161 	  (FETCH_FUNC_NAME(method, string) == fn) ||	\
162 	  (FETCH_FUNC_NAME(method, string_size) == fn)) \
163 	 && (fn != NULL))
164 
165 /* Data fetch function templates */
166 #define DEFINE_FETCH_reg(type)						\
167 static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,	\
168 					void *offset, void *dest)	\
169 {									\
170 	*(type *)dest = (type)regs_get_register(regs,			\
171 				(unsigned int)((unsigned long)offset));	\
172 }
173 DEFINE_BASIC_FETCH_FUNCS(reg)
174 /* No string on the register */
175 #define fetch_reg_string NULL
176 #define fetch_reg_string_size NULL
177 
178 #define DEFINE_FETCH_stack(type)					\
179 static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
180 					  void *offset, void *dest)	\
181 {									\
182 	*(type *)dest = (type)regs_get_kernel_stack_nth(regs,		\
183 				(unsigned int)((unsigned long)offset));	\
184 }
185 DEFINE_BASIC_FETCH_FUNCS(stack)
186 /* No string on the stack entry */
187 #define fetch_stack_string NULL
188 #define fetch_stack_string_size NULL
189 
190 #define DEFINE_FETCH_retval(type)					\
191 static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
192 					  void *dummy, void *dest)	\
193 {									\
194 	*(type *)dest = (type)regs_return_value(regs);			\
195 }
196 DEFINE_BASIC_FETCH_FUNCS(retval)
197 /* No string on the retval */
198 #define fetch_retval_string NULL
199 #define fetch_retval_string_size NULL
200 
201 #define DEFINE_FETCH_memory(type)					\
202 static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
203 					  void *addr, void *dest)	\
204 {									\
205 	type retval;							\
206 	if (probe_kernel_address(addr, retval))				\
207 		*(type *)dest = 0;					\
208 	else								\
209 		*(type *)dest = retval;					\
210 }
211 DEFINE_BASIC_FETCH_FUNCS(memory)
212 /*
213  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
214  * length and relative data location.
215  */
216 static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
217 						      void *addr, void *dest)
218 {
219 	long ret;
220 	int maxlen = get_rloc_len(*(u32 *)dest);
221 	u8 *dst = get_rloc_data(dest);
222 	u8 *src = addr;
223 	mm_segment_t old_fs = get_fs();
224 	if (!maxlen)
225 		return;
226 	/*
227 	 * Try to get string again, since the string can be changed while
228 	 * probing.
229 	 */
230 	set_fs(KERNEL_DS);
231 	pagefault_disable();
232 	do
233 		ret = __copy_from_user_inatomic(dst++, src++, 1);
234 	while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen);
235 	dst[-1] = '\0';
236 	pagefault_enable();
237 	set_fs(old_fs);
238 
239 	if (ret < 0) {	/* Failed to fetch string */
240 		((u8 *)get_rloc_data(dest))[0] = '\0';
241 		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest));
242 	} else
243 		*(u32 *)dest = make_data_rloc(src - (u8 *)addr,
244 					      get_rloc_offs(*(u32 *)dest));
245 }
246 /* Return the length of string -- including null terminal byte */
247 static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
248 							void *addr, void *dest)
249 {
250 	int ret, len = 0;
251 	u8 c;
252 	mm_segment_t old_fs = get_fs();
253 
254 	set_fs(KERNEL_DS);
255 	pagefault_disable();
256 	do {
257 		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1);
258 		len++;
259 	} while (c && ret == 0 && len < MAX_STRING_SIZE);
260 	pagefault_enable();
261 	set_fs(old_fs);
262 
263 	if (ret < 0)	/* Failed to check the length */
264 		*(u32 *)dest = 0;
265 	else
266 		*(u32 *)dest = len;
267 }
268 
269 /* Memory fetching by symbol */
270 struct symbol_cache {
271 	char *symbol;
272 	long offset;
273 	unsigned long addr;
274 };
275 
276 static unsigned long update_symbol_cache(struct symbol_cache *sc)
277 {
278 	sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol);
279 	if (sc->addr)
280 		sc->addr += sc->offset;
281 	return sc->addr;
282 }
283 
284 static void free_symbol_cache(struct symbol_cache *sc)
285 {
286 	kfree(sc->symbol);
287 	kfree(sc);
288 }
289 
290 static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
291 {
292 	struct symbol_cache *sc;
293 
294 	if (!sym || strlen(sym) == 0)
295 		return NULL;
296 	sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL);
297 	if (!sc)
298 		return NULL;
299 
300 	sc->symbol = kstrdup(sym, GFP_KERNEL);
301 	if (!sc->symbol) {
302 		kfree(sc);
303 		return NULL;
304 	}
305 	sc->offset = offset;
306 
307 	update_symbol_cache(sc);
308 	return sc;
309 }
310 
311 #define DEFINE_FETCH_symbol(type)					\
312 static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
313 					  void *data, void *dest)	\
314 {									\
315 	struct symbol_cache *sc = data;					\
316 	if (sc->addr)							\
317 		fetch_memory_##type(regs, (void *)sc->addr, dest);	\
318 	else								\
319 		*(type *)dest = 0;					\
320 }
321 DEFINE_BASIC_FETCH_FUNCS(symbol)
322 DEFINE_FETCH_symbol(string)
323 DEFINE_FETCH_symbol(string_size)
324 
325 /* Dereference memory access function */
326 struct deref_fetch_param {
327 	struct fetch_param orig;
328 	long offset;
329 };
330 
331 #define DEFINE_FETCH_deref(type)					\
332 static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
333 					    void *data, void *dest)	\
334 {									\
335 	struct deref_fetch_param *dprm = data;				\
336 	unsigned long addr;						\
337 	call_fetch(&dprm->orig, regs, &addr);				\
338 	if (addr) {							\
339 		addr += dprm->offset;					\
340 		fetch_memory_##type(regs, (void *)addr, dest);		\
341 	} else								\
342 		*(type *)dest = 0;					\
343 }
344 DEFINE_BASIC_FETCH_FUNCS(deref)
345 DEFINE_FETCH_deref(string)
346 DEFINE_FETCH_deref(string_size)
347 
348 static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
349 {
350 	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
351 		free_deref_fetch_param(data->orig.data);
352 	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
353 		free_symbol_cache(data->orig.data);
354 	kfree(data);
355 }
356 
357 /* Default (unsigned long) fetch type */
358 #define __DEFAULT_FETCH_TYPE(t) u##t
359 #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
360 #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
361 #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
362 
363 /* Fetch types */
364 enum {
365 	FETCH_MTD_reg = 0,
366 	FETCH_MTD_stack,
367 	FETCH_MTD_retval,
368 	FETCH_MTD_memory,
369 	FETCH_MTD_symbol,
370 	FETCH_MTD_deref,
371 	FETCH_MTD_END,
372 };
373 
374 #define ASSIGN_FETCH_FUNC(method, type)	\
375 	[FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type)
376 
377 #define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype)	\
378 	{.name = _name,				\
379 	 .size = _size,					\
380 	 .is_signed = sign,				\
381 	 .print = PRINT_TYPE_FUNC_NAME(ptype),		\
382 	 .fmt = PRINT_TYPE_FMT_NAME(ptype),		\
383 	 .fmttype = _fmttype,				\
384 	 .fetch = {					\
385 ASSIGN_FETCH_FUNC(reg, ftype),				\
386 ASSIGN_FETCH_FUNC(stack, ftype),			\
387 ASSIGN_FETCH_FUNC(retval, ftype),			\
388 ASSIGN_FETCH_FUNC(memory, ftype),			\
389 ASSIGN_FETCH_FUNC(symbol, ftype),			\
390 ASSIGN_FETCH_FUNC(deref, ftype),			\
391 	  }						\
392 	}
393 
394 #define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\
395 	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
396 
397 #define FETCH_TYPE_STRING 0
398 #define FETCH_TYPE_STRSIZE 1
399 
400 /* Fetch type information table */
401 static const struct fetch_type {
402 	const char	*name;		/* Name of type */
403 	size_t		size;		/* Byte size of type */
404 	int		is_signed;	/* Signed flag */
405 	print_type_func_t	print;	/* Print functions */
406 	const char	*fmt;		/* Fromat string */
407 	const char	*fmttype;	/* Name in format file */
408 	/* Fetch functions */
409 	fetch_func_t	fetch[FETCH_MTD_END];
410 } fetch_type_table[] = {
411 	/* Special types */
412 	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
413 					sizeof(u32), 1, "__data_loc char[]"),
414 	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32,
415 					string_size, sizeof(u32), 0, "u32"),
416 	/* Basic types */
417 	ASSIGN_FETCH_TYPE(u8,  u8,  0),
418 	ASSIGN_FETCH_TYPE(u16, u16, 0),
419 	ASSIGN_FETCH_TYPE(u32, u32, 0),
420 	ASSIGN_FETCH_TYPE(u64, u64, 0),
421 	ASSIGN_FETCH_TYPE(s8,  u8,  1),
422 	ASSIGN_FETCH_TYPE(s16, u16, 1),
423 	ASSIGN_FETCH_TYPE(s32, u32, 1),
424 	ASSIGN_FETCH_TYPE(s64, u64, 1),
425 };
426 
427 static const struct fetch_type *find_fetch_type(const char *type)
428 {
429 	int i;
430 
431 	if (!type)
432 		type = DEFAULT_FETCH_TYPE_STR;
433 
434 	for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
435 		if (strcmp(type, fetch_type_table[i].name) == 0)
436 			return &fetch_type_table[i];
437 	return NULL;
438 }
439 
440 /* Special function : only accept unsigned long */
441 static __kprobes void fetch_stack_address(struct pt_regs *regs,
442 					  void *dummy, void *dest)
443 {
444 	*(unsigned long *)dest = kernel_stack_pointer(regs);
445 }
446 
447 static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
448 					    fetch_func_t orig_fn)
449 {
450 	int i;
451 
452 	if (type != &fetch_type_table[FETCH_TYPE_STRING])
453 		return NULL;	/* Only string type needs size function */
454 	for (i = 0; i < FETCH_MTD_END; i++)
455 		if (type->fetch[i] == orig_fn)
456 			return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i];
457 
458 	WARN_ON(1);	/* This should not happen */
459 	return NULL;
460 }
461 
462 /**
463  * Kprobe event core functions
464  */
465 
466 struct probe_arg {
467 	struct fetch_param	fetch;
468 	struct fetch_param	fetch_size;
469 	unsigned int		offset;	/* Offset from argument entry */
470 	const char		*name;	/* Name of this argument */
471 	const char		*comm;	/* Command of this argument */
472 	const struct fetch_type	*type;	/* Type of this argument */
473 };
474 
475 /* Flags for trace_probe */
476 #define TP_FLAG_TRACE	1
477 #define TP_FLAG_PROFILE	2
478 
479 struct trace_probe {
480 	struct list_head	list;
481 	struct kretprobe	rp;	/* Use rp.kp for kprobe use */
482 	unsigned long 		nhit;
483 	unsigned int		flags;	/* For TP_FLAG_* */
484 	const char		*symbol;	/* symbol name */
485 	struct ftrace_event_class	class;
486 	struct ftrace_event_call	call;
487 	ssize_t			size;		/* trace entry size */
488 	unsigned int		nr_args;
489 	struct probe_arg	args[];
490 };
491 
492 #define SIZEOF_TRACE_PROBE(n)			\
493 	(offsetof(struct trace_probe, args) +	\
494 	(sizeof(struct probe_arg) * (n)))
495 
496 
497 static __kprobes int probe_is_return(struct trace_probe *tp)
498 {
499 	return tp->rp.handler != NULL;
500 }
501 
502 static __kprobes const char *probe_symbol(struct trace_probe *tp)
503 {
504 	return tp->symbol ? tp->symbol : "unknown";
505 }
506 
507 static int register_probe_event(struct trace_probe *tp);
508 static void unregister_probe_event(struct trace_probe *tp);
509 
510 static DEFINE_MUTEX(probe_lock);
511 static LIST_HEAD(probe_list);
512 
513 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
514 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
515 				struct pt_regs *regs);
516 
517 /* Check the name is good for event/group */
518 static int check_event_name(const char *name)
519 {
520 	if (!isalpha(*name) && *name != '_')
521 		return 0;
522 	while (*++name != '\0') {
523 		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
524 			return 0;
525 	}
526 	return 1;
527 }
528 
529 /*
530  * Allocate new trace_probe and initialize it (including kprobes).
531  */
532 static struct trace_probe *alloc_trace_probe(const char *group,
533 					     const char *event,
534 					     void *addr,
535 					     const char *symbol,
536 					     unsigned long offs,
537 					     int nargs, int is_return)
538 {
539 	struct trace_probe *tp;
540 	int ret = -ENOMEM;
541 
542 	tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
543 	if (!tp)
544 		return ERR_PTR(ret);
545 
546 	if (symbol) {
547 		tp->symbol = kstrdup(symbol, GFP_KERNEL);
548 		if (!tp->symbol)
549 			goto error;
550 		tp->rp.kp.symbol_name = tp->symbol;
551 		tp->rp.kp.offset = offs;
552 	} else
553 		tp->rp.kp.addr = addr;
554 
555 	if (is_return)
556 		tp->rp.handler = kretprobe_dispatcher;
557 	else
558 		tp->rp.kp.pre_handler = kprobe_dispatcher;
559 
560 	if (!event || !check_event_name(event)) {
561 		ret = -EINVAL;
562 		goto error;
563 	}
564 
565 	tp->call.class = &tp->class;
566 	tp->call.name = kstrdup(event, GFP_KERNEL);
567 	if (!tp->call.name)
568 		goto error;
569 
570 	if (!group || !check_event_name(group)) {
571 		ret = -EINVAL;
572 		goto error;
573 	}
574 
575 	tp->class.system = kstrdup(group, GFP_KERNEL);
576 	if (!tp->class.system)
577 		goto error;
578 
579 	INIT_LIST_HEAD(&tp->list);
580 	return tp;
581 error:
582 	kfree(tp->call.name);
583 	kfree(tp->symbol);
584 	kfree(tp);
585 	return ERR_PTR(ret);
586 }
587 
588 static void free_probe_arg(struct probe_arg *arg)
589 {
590 	if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
591 		free_deref_fetch_param(arg->fetch.data);
592 	else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
593 		free_symbol_cache(arg->fetch.data);
594 	kfree(arg->name);
595 	kfree(arg->comm);
596 }
597 
598 static void free_trace_probe(struct trace_probe *tp)
599 {
600 	int i;
601 
602 	for (i = 0; i < tp->nr_args; i++)
603 		free_probe_arg(&tp->args[i]);
604 
605 	kfree(tp->call.class->system);
606 	kfree(tp->call.name);
607 	kfree(tp->symbol);
608 	kfree(tp);
609 }
610 
611 static struct trace_probe *find_probe_event(const char *event,
612 					    const char *group)
613 {
614 	struct trace_probe *tp;
615 
616 	list_for_each_entry(tp, &probe_list, list)
617 		if (strcmp(tp->call.name, event) == 0 &&
618 		    strcmp(tp->call.class->system, group) == 0)
619 			return tp;
620 	return NULL;
621 }
622 
623 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
624 static void unregister_trace_probe(struct trace_probe *tp)
625 {
626 	if (probe_is_return(tp))
627 		unregister_kretprobe(&tp->rp);
628 	else
629 		unregister_kprobe(&tp->rp.kp);
630 	list_del(&tp->list);
631 	unregister_probe_event(tp);
632 }
633 
634 /* Register a trace_probe and probe_event */
635 static int register_trace_probe(struct trace_probe *tp)
636 {
637 	struct trace_probe *old_tp;
638 	int ret;
639 
640 	mutex_lock(&probe_lock);
641 
642 	/* register as an event */
643 	old_tp = find_probe_event(tp->call.name, tp->call.class->system);
644 	if (old_tp) {
645 		/* delete old event */
646 		unregister_trace_probe(old_tp);
647 		free_trace_probe(old_tp);
648 	}
649 	ret = register_probe_event(tp);
650 	if (ret) {
651 		pr_warning("Faild to register probe event(%d)\n", ret);
652 		goto end;
653 	}
654 
655 	tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
656 	if (probe_is_return(tp))
657 		ret = register_kretprobe(&tp->rp);
658 	else
659 		ret = register_kprobe(&tp->rp.kp);
660 
661 	if (ret) {
662 		pr_warning("Could not insert probe(%d)\n", ret);
663 		if (ret == -EILSEQ) {
664 			pr_warning("Probing address(0x%p) is not an "
665 				   "instruction boundary.\n",
666 				   tp->rp.kp.addr);
667 			ret = -EINVAL;
668 		}
669 		unregister_probe_event(tp);
670 	} else
671 		list_add_tail(&tp->list, &probe_list);
672 end:
673 	mutex_unlock(&probe_lock);
674 	return ret;
675 }
676 
677 /* Split symbol and offset. */
678 static int split_symbol_offset(char *symbol, unsigned long *offset)
679 {
680 	char *tmp;
681 	int ret;
682 
683 	if (!offset)
684 		return -EINVAL;
685 
686 	tmp = strchr(symbol, '+');
687 	if (tmp) {
688 		/* skip sign because strict_strtol doesn't accept '+' */
689 		ret = strict_strtoul(tmp + 1, 0, offset);
690 		if (ret)
691 			return ret;
692 		*tmp = '\0';
693 	} else
694 		*offset = 0;
695 	return 0;
696 }
697 
698 #define PARAM_MAX_ARGS 16
699 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
700 
701 static int parse_probe_vars(char *arg, const struct fetch_type *t,
702 			    struct fetch_param *f, int is_return)
703 {
704 	int ret = 0;
705 	unsigned long param;
706 
707 	if (strcmp(arg, "retval") == 0) {
708 		if (is_return)
709 			f->fn = t->fetch[FETCH_MTD_retval];
710 		else
711 			ret = -EINVAL;
712 	} else if (strncmp(arg, "stack", 5) == 0) {
713 		if (arg[5] == '\0') {
714 			if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
715 				f->fn = fetch_stack_address;
716 			else
717 				ret = -EINVAL;
718 		} else if (isdigit(arg[5])) {
719 			ret = strict_strtoul(arg + 5, 10, &param);
720 			if (ret || param > PARAM_MAX_STACK)
721 				ret = -EINVAL;
722 			else {
723 				f->fn = t->fetch[FETCH_MTD_stack];
724 				f->data = (void *)param;
725 			}
726 		} else
727 			ret = -EINVAL;
728 	} else
729 		ret = -EINVAL;
730 	return ret;
731 }
732 
733 /* Recursive argument parser */
734 static int __parse_probe_arg(char *arg, const struct fetch_type *t,
735 			     struct fetch_param *f, int is_return)
736 {
737 	int ret = 0;
738 	unsigned long param;
739 	long offset;
740 	char *tmp;
741 
742 	switch (arg[0]) {
743 	case '$':
744 		ret = parse_probe_vars(arg + 1, t, f, is_return);
745 		break;
746 	case '%':	/* named register */
747 		ret = regs_query_register_offset(arg + 1);
748 		if (ret >= 0) {
749 			f->fn = t->fetch[FETCH_MTD_reg];
750 			f->data = (void *)(unsigned long)ret;
751 			ret = 0;
752 		}
753 		break;
754 	case '@':	/* memory or symbol */
755 		if (isdigit(arg[1])) {
756 			ret = strict_strtoul(arg + 1, 0, &param);
757 			if (ret)
758 				break;
759 			f->fn = t->fetch[FETCH_MTD_memory];
760 			f->data = (void *)param;
761 		} else {
762 			ret = split_symbol_offset(arg + 1, &offset);
763 			if (ret)
764 				break;
765 			f->data = alloc_symbol_cache(arg + 1, offset);
766 			if (f->data)
767 				f->fn = t->fetch[FETCH_MTD_symbol];
768 		}
769 		break;
770 	case '+':	/* deref memory */
771 	case '-':
772 		tmp = strchr(arg, '(');
773 		if (!tmp)
774 			break;
775 		*tmp = '\0';
776 		ret = strict_strtol(arg + 1, 0, &offset);
777 		if (ret)
778 			break;
779 		if (arg[0] == '-')
780 			offset = -offset;
781 		arg = tmp + 1;
782 		tmp = strrchr(arg, ')');
783 		if (tmp) {
784 			struct deref_fetch_param *dprm;
785 			const struct fetch_type *t2 = find_fetch_type(NULL);
786 			*tmp = '\0';
787 			dprm = kzalloc(sizeof(struct deref_fetch_param),
788 				       GFP_KERNEL);
789 			if (!dprm)
790 				return -ENOMEM;
791 			dprm->offset = offset;
792 			ret = __parse_probe_arg(arg, t2, &dprm->orig,
793 						is_return);
794 			if (ret)
795 				kfree(dprm);
796 			else {
797 				f->fn = t->fetch[FETCH_MTD_deref];
798 				f->data = (void *)dprm;
799 			}
800 		}
801 		break;
802 	}
803 	if (!ret && !f->fn) {	/* Parsed, but do not find fetch method */
804 		pr_info("%s type has no corresponding fetch method.\n",
805 			t->name);
806 		ret = -EINVAL;
807 	}
808 	return ret;
809 }
810 
811 /* String length checking wrapper */
812 static int parse_probe_arg(char *arg, struct trace_probe *tp,
813 			   struct probe_arg *parg, int is_return)
814 {
815 	const char *t;
816 	int ret;
817 
818 	if (strlen(arg) > MAX_ARGSTR_LEN) {
819 		pr_info("Argument is too long.: %s\n",  arg);
820 		return -ENOSPC;
821 	}
822 	parg->comm = kstrdup(arg, GFP_KERNEL);
823 	if (!parg->comm) {
824 		pr_info("Failed to allocate memory for command '%s'.\n", arg);
825 		return -ENOMEM;
826 	}
827 	t = strchr(parg->comm, ':');
828 	if (t) {
829 		arg[t - parg->comm] = '\0';
830 		t++;
831 	}
832 	parg->type = find_fetch_type(t);
833 	if (!parg->type) {
834 		pr_info("Unsupported type: %s\n", t);
835 		return -EINVAL;
836 	}
837 	parg->offset = tp->size;
838 	tp->size += parg->type->size;
839 	ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
840 	if (ret >= 0) {
841 		parg->fetch_size.fn = get_fetch_size_function(parg->type,
842 							      parg->fetch.fn);
843 		parg->fetch_size.data = parg->fetch.data;
844 	}
845 	return ret;
846 }
847 
848 /* Return 1 if name is reserved or already used by another argument */
849 static int conflict_field_name(const char *name,
850 			       struct probe_arg *args, int narg)
851 {
852 	int i;
853 	for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++)
854 		if (strcmp(reserved_field_names[i], name) == 0)
855 			return 1;
856 	for (i = 0; i < narg; i++)
857 		if (strcmp(args[i].name, name) == 0)
858 			return 1;
859 	return 0;
860 }
861 
862 static int create_trace_probe(int argc, char **argv)
863 {
864 	/*
865 	 * Argument syntax:
866 	 *  - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
867 	 *  - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
868 	 * Fetch args:
869 	 *  $retval	: fetch return value
870 	 *  $stack	: fetch stack address
871 	 *  $stackN	: fetch Nth of stack (N:0-)
872 	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
873 	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
874 	 *  %REG	: fetch register REG
875 	 * Dereferencing memory fetch:
876 	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
877 	 * Alias name of args:
878 	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
879 	 * Type of args:
880 	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
881 	 */
882 	struct trace_probe *tp;
883 	int i, ret = 0;
884 	int is_return = 0, is_delete = 0;
885 	char *symbol = NULL, *event = NULL, *group = NULL;
886 	char *arg, *tmp;
887 	unsigned long offset = 0;
888 	void *addr = NULL;
889 	char buf[MAX_EVENT_NAME_LEN];
890 
891 	/* argc must be >= 1 */
892 	if (argv[0][0] == 'p')
893 		is_return = 0;
894 	else if (argv[0][0] == 'r')
895 		is_return = 1;
896 	else if (argv[0][0] == '-')
897 		is_delete = 1;
898 	else {
899 		pr_info("Probe definition must be started with 'p', 'r' or"
900 			" '-'.\n");
901 		return -EINVAL;
902 	}
903 
904 	if (argv[0][1] == ':') {
905 		event = &argv[0][2];
906 		if (strchr(event, '/')) {
907 			group = event;
908 			event = strchr(group, '/') + 1;
909 			event[-1] = '\0';
910 			if (strlen(group) == 0) {
911 				pr_info("Group name is not specified\n");
912 				return -EINVAL;
913 			}
914 		}
915 		if (strlen(event) == 0) {
916 			pr_info("Event name is not specified\n");
917 			return -EINVAL;
918 		}
919 	}
920 	if (!group)
921 		group = KPROBE_EVENT_SYSTEM;
922 
923 	if (is_delete) {
924 		if (!event) {
925 			pr_info("Delete command needs an event name.\n");
926 			return -EINVAL;
927 		}
928 		mutex_lock(&probe_lock);
929 		tp = find_probe_event(event, group);
930 		if (!tp) {
931 			mutex_unlock(&probe_lock);
932 			pr_info("Event %s/%s doesn't exist.\n", group, event);
933 			return -ENOENT;
934 		}
935 		/* delete an event */
936 		unregister_trace_probe(tp);
937 		free_trace_probe(tp);
938 		mutex_unlock(&probe_lock);
939 		return 0;
940 	}
941 
942 	if (argc < 2) {
943 		pr_info("Probe point is not specified.\n");
944 		return -EINVAL;
945 	}
946 	if (isdigit(argv[1][0])) {
947 		if (is_return) {
948 			pr_info("Return probe point must be a symbol.\n");
949 			return -EINVAL;
950 		}
951 		/* an address specified */
952 		ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
953 		if (ret) {
954 			pr_info("Failed to parse address.\n");
955 			return ret;
956 		}
957 	} else {
958 		/* a symbol specified */
959 		symbol = argv[1];
960 		/* TODO: support .init module functions */
961 		ret = split_symbol_offset(symbol, &offset);
962 		if (ret) {
963 			pr_info("Failed to parse symbol.\n");
964 			return ret;
965 		}
966 		if (offset && is_return) {
967 			pr_info("Return probe must be used without offset.\n");
968 			return -EINVAL;
969 		}
970 	}
971 	argc -= 2; argv += 2;
972 
973 	/* setup a probe */
974 	if (!event) {
975 		/* Make a new event name */
976 		if (symbol)
977 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
978 				 is_return ? 'r' : 'p', symbol, offset);
979 		else
980 			snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
981 				 is_return ? 'r' : 'p', addr);
982 		event = buf;
983 	}
984 	tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
985 			       is_return);
986 	if (IS_ERR(tp)) {
987 		pr_info("Failed to allocate trace_probe.(%d)\n",
988 			(int)PTR_ERR(tp));
989 		return PTR_ERR(tp);
990 	}
991 
992 	/* parse arguments */
993 	ret = 0;
994 	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
995 		/* Parse argument name */
996 		arg = strchr(argv[i], '=');
997 		if (arg)
998 			*arg++ = '\0';
999 		else
1000 			arg = argv[i];
1001 
1002 		tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
1003 		if (!tp->args[i].name) {
1004 			pr_info("Failed to allocate argument%d name '%s'.\n",
1005 				i, argv[i]);
1006 			ret = -ENOMEM;
1007 			goto error;
1008 		}
1009 		tmp = strchr(tp->args[i].name, ':');
1010 		if (tmp)
1011 			*tmp = '_';	/* convert : to _ */
1012 
1013 		if (conflict_field_name(tp->args[i].name, tp->args, i)) {
1014 			pr_info("Argument%d name '%s' conflicts with "
1015 				"another field.\n", i, argv[i]);
1016 			ret = -EINVAL;
1017 			goto error;
1018 		}
1019 
1020 		/* Parse fetch argument */
1021 		ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
1022 		if (ret) {
1023 			pr_info("Parse error at argument%d. (%d)\n", i, ret);
1024 			kfree(tp->args[i].name);
1025 			goto error;
1026 		}
1027 
1028 		tp->nr_args++;
1029 	}
1030 
1031 	ret = register_trace_probe(tp);
1032 	if (ret)
1033 		goto error;
1034 	return 0;
1035 
1036 error:
1037 	free_trace_probe(tp);
1038 	return ret;
1039 }
1040 
1041 static void cleanup_all_probes(void)
1042 {
1043 	struct trace_probe *tp;
1044 
1045 	mutex_lock(&probe_lock);
1046 	/* TODO: Use batch unregistration */
1047 	while (!list_empty(&probe_list)) {
1048 		tp = list_entry(probe_list.next, struct trace_probe, list);
1049 		unregister_trace_probe(tp);
1050 		free_trace_probe(tp);
1051 	}
1052 	mutex_unlock(&probe_lock);
1053 }
1054 
1055 
1056 /* Probes listing interfaces */
1057 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
1058 {
1059 	mutex_lock(&probe_lock);
1060 	return seq_list_start(&probe_list, *pos);
1061 }
1062 
1063 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
1064 {
1065 	return seq_list_next(v, &probe_list, pos);
1066 }
1067 
1068 static void probes_seq_stop(struct seq_file *m, void *v)
1069 {
1070 	mutex_unlock(&probe_lock);
1071 }
1072 
1073 static int probes_seq_show(struct seq_file *m, void *v)
1074 {
1075 	struct trace_probe *tp = v;
1076 	int i;
1077 
1078 	seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
1079 	seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
1080 
1081 	if (!tp->symbol)
1082 		seq_printf(m, " 0x%p", tp->rp.kp.addr);
1083 	else if (tp->rp.kp.offset)
1084 		seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
1085 	else
1086 		seq_printf(m, " %s", probe_symbol(tp));
1087 
1088 	for (i = 0; i < tp->nr_args; i++)
1089 		seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
1090 	seq_printf(m, "\n");
1091 
1092 	return 0;
1093 }
1094 
1095 static const struct seq_operations probes_seq_op = {
1096 	.start  = probes_seq_start,
1097 	.next   = probes_seq_next,
1098 	.stop   = probes_seq_stop,
1099 	.show   = probes_seq_show
1100 };
1101 
1102 static int probes_open(struct inode *inode, struct file *file)
1103 {
1104 	if ((file->f_mode & FMODE_WRITE) &&
1105 	    (file->f_flags & O_TRUNC))
1106 		cleanup_all_probes();
1107 
1108 	return seq_open(file, &probes_seq_op);
1109 }
1110 
1111 static int command_trace_probe(const char *buf)
1112 {
1113 	char **argv;
1114 	int argc = 0, ret = 0;
1115 
1116 	argv = argv_split(GFP_KERNEL, buf, &argc);
1117 	if (!argv)
1118 		return -ENOMEM;
1119 
1120 	if (argc)
1121 		ret = create_trace_probe(argc, argv);
1122 
1123 	argv_free(argv);
1124 	return ret;
1125 }
1126 
1127 #define WRITE_BUFSIZE 128
1128 
1129 static ssize_t probes_write(struct file *file, const char __user *buffer,
1130 			    size_t count, loff_t *ppos)
1131 {
1132 	char *kbuf, *tmp;
1133 	int ret;
1134 	size_t done;
1135 	size_t size;
1136 
1137 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
1138 	if (!kbuf)
1139 		return -ENOMEM;
1140 
1141 	ret = done = 0;
1142 	while (done < count) {
1143 		size = count - done;
1144 		if (size >= WRITE_BUFSIZE)
1145 			size = WRITE_BUFSIZE - 1;
1146 		if (copy_from_user(kbuf, buffer + done, size)) {
1147 			ret = -EFAULT;
1148 			goto out;
1149 		}
1150 		kbuf[size] = '\0';
1151 		tmp = strchr(kbuf, '\n');
1152 		if (tmp) {
1153 			*tmp = '\0';
1154 			size = tmp - kbuf + 1;
1155 		} else if (done + size < count) {
1156 			pr_warning("Line length is too long: "
1157 				   "Should be less than %d.", WRITE_BUFSIZE);
1158 			ret = -EINVAL;
1159 			goto out;
1160 		}
1161 		done += size;
1162 		/* Remove comments */
1163 		tmp = strchr(kbuf, '#');
1164 		if (tmp)
1165 			*tmp = '\0';
1166 
1167 		ret = command_trace_probe(kbuf);
1168 		if (ret)
1169 			goto out;
1170 	}
1171 	ret = done;
1172 out:
1173 	kfree(kbuf);
1174 	return ret;
1175 }
1176 
1177 static const struct file_operations kprobe_events_ops = {
1178 	.owner          = THIS_MODULE,
1179 	.open           = probes_open,
1180 	.read           = seq_read,
1181 	.llseek         = seq_lseek,
1182 	.release        = seq_release,
1183 	.write		= probes_write,
1184 };
1185 
1186 /* Probes profiling interfaces */
1187 static int probes_profile_seq_show(struct seq_file *m, void *v)
1188 {
1189 	struct trace_probe *tp = v;
1190 
1191 	seq_printf(m, "  %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
1192 		   tp->rp.kp.nmissed);
1193 
1194 	return 0;
1195 }
1196 
1197 static const struct seq_operations profile_seq_op = {
1198 	.start  = probes_seq_start,
1199 	.next   = probes_seq_next,
1200 	.stop   = probes_seq_stop,
1201 	.show   = probes_profile_seq_show
1202 };
1203 
1204 static int profile_open(struct inode *inode, struct file *file)
1205 {
1206 	return seq_open(file, &profile_seq_op);
1207 }
1208 
1209 static const struct file_operations kprobe_profile_ops = {
1210 	.owner          = THIS_MODULE,
1211 	.open           = profile_open,
1212 	.read           = seq_read,
1213 	.llseek         = seq_lseek,
1214 	.release        = seq_release,
1215 };
1216 
1217 /* Sum up total data length for dynamic arraies (strings) */
1218 static __kprobes int __get_data_size(struct trace_probe *tp,
1219 				     struct pt_regs *regs)
1220 {
1221 	int i, ret = 0;
1222 	u32 len;
1223 
1224 	for (i = 0; i < tp->nr_args; i++)
1225 		if (unlikely(tp->args[i].fetch_size.fn)) {
1226 			call_fetch(&tp->args[i].fetch_size, regs, &len);
1227 			ret += len;
1228 		}
1229 
1230 	return ret;
1231 }
1232 
1233 /* Store the value of each argument */
1234 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
1235 				       struct pt_regs *regs,
1236 				       u8 *data, int maxlen)
1237 {
1238 	int i;
1239 	u32 end = tp->size;
1240 	u32 *dl;	/* Data (relative) location */
1241 
1242 	for (i = 0; i < tp->nr_args; i++) {
1243 		if (unlikely(tp->args[i].fetch_size.fn)) {
1244 			/*
1245 			 * First, we set the relative location and
1246 			 * maximum data length to *dl
1247 			 */
1248 			dl = (u32 *)(data + tp->args[i].offset);
1249 			*dl = make_data_rloc(maxlen, end - tp->args[i].offset);
1250 			/* Then try to fetch string or dynamic array data */
1251 			call_fetch(&tp->args[i].fetch, regs, dl);
1252 			/* Reduce maximum length */
1253 			end += get_rloc_len(*dl);
1254 			maxlen -= get_rloc_len(*dl);
1255 			/* Trick here, convert data_rloc to data_loc */
1256 			*dl = convert_rloc_to_loc(*dl,
1257 				 ent_size + tp->args[i].offset);
1258 		} else
1259 			/* Just fetching data normally */
1260 			call_fetch(&tp->args[i].fetch, regs,
1261 				   data + tp->args[i].offset);
1262 	}
1263 }
1264 
1265 /* Kprobe handler */
1266 static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1267 {
1268 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1269 	struct kprobe_trace_entry_head *entry;
1270 	struct ring_buffer_event *event;
1271 	struct ring_buffer *buffer;
1272 	int size, dsize, pc;
1273 	unsigned long irq_flags;
1274 	struct ftrace_event_call *call = &tp->call;
1275 
1276 	tp->nhit++;
1277 
1278 	local_save_flags(irq_flags);
1279 	pc = preempt_count();
1280 
1281 	dsize = __get_data_size(tp, regs);
1282 	size = sizeof(*entry) + tp->size + dsize;
1283 
1284 	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1285 						  size, irq_flags, pc);
1286 	if (!event)
1287 		return;
1288 
1289 	entry = ring_buffer_event_data(event);
1290 	entry->ip = (unsigned long)kp->addr;
1291 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1292 
1293 	if (!filter_current_check_discard(buffer, call, entry, event))
1294 		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1295 }
1296 
1297 /* Kretprobe handler */
1298 static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1299 					  struct pt_regs *regs)
1300 {
1301 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1302 	struct kretprobe_trace_entry_head *entry;
1303 	struct ring_buffer_event *event;
1304 	struct ring_buffer *buffer;
1305 	int size, pc, dsize;
1306 	unsigned long irq_flags;
1307 	struct ftrace_event_call *call = &tp->call;
1308 
1309 	local_save_flags(irq_flags);
1310 	pc = preempt_count();
1311 
1312 	dsize = __get_data_size(tp, regs);
1313 	size = sizeof(*entry) + tp->size + dsize;
1314 
1315 	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
1316 						  size, irq_flags, pc);
1317 	if (!event)
1318 		return;
1319 
1320 	entry = ring_buffer_event_data(event);
1321 	entry->func = (unsigned long)tp->rp.kp.addr;
1322 	entry->ret_ip = (unsigned long)ri->ret_addr;
1323 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1324 
1325 	if (!filter_current_check_discard(buffer, call, entry, event))
1326 		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
1327 }
1328 
1329 /* Event entry printers */
1330 enum print_line_t
1331 print_kprobe_event(struct trace_iterator *iter, int flags,
1332 		   struct trace_event *event)
1333 {
1334 	struct kprobe_trace_entry_head *field;
1335 	struct trace_seq *s = &iter->seq;
1336 	struct trace_probe *tp;
1337 	u8 *data;
1338 	int i;
1339 
1340 	field = (struct kprobe_trace_entry_head *)iter->ent;
1341 	tp = container_of(event, struct trace_probe, call.event);
1342 
1343 	if (!trace_seq_printf(s, "%s: (", tp->call.name))
1344 		goto partial;
1345 
1346 	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
1347 		goto partial;
1348 
1349 	if (!trace_seq_puts(s, ")"))
1350 		goto partial;
1351 
1352 	data = (u8 *)&field[1];
1353 	for (i = 0; i < tp->nr_args; i++)
1354 		if (!tp->args[i].type->print(s, tp->args[i].name,
1355 					     data + tp->args[i].offset, field))
1356 			goto partial;
1357 
1358 	if (!trace_seq_puts(s, "\n"))
1359 		goto partial;
1360 
1361 	return TRACE_TYPE_HANDLED;
1362 partial:
1363 	return TRACE_TYPE_PARTIAL_LINE;
1364 }
1365 
1366 enum print_line_t
1367 print_kretprobe_event(struct trace_iterator *iter, int flags,
1368 		      struct trace_event *event)
1369 {
1370 	struct kretprobe_trace_entry_head *field;
1371 	struct trace_seq *s = &iter->seq;
1372 	struct trace_probe *tp;
1373 	u8 *data;
1374 	int i;
1375 
1376 	field = (struct kretprobe_trace_entry_head *)iter->ent;
1377 	tp = container_of(event, struct trace_probe, call.event);
1378 
1379 	if (!trace_seq_printf(s, "%s: (", tp->call.name))
1380 		goto partial;
1381 
1382 	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
1383 		goto partial;
1384 
1385 	if (!trace_seq_puts(s, " <- "))
1386 		goto partial;
1387 
1388 	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
1389 		goto partial;
1390 
1391 	if (!trace_seq_puts(s, ")"))
1392 		goto partial;
1393 
1394 	data = (u8 *)&field[1];
1395 	for (i = 0; i < tp->nr_args; i++)
1396 		if (!tp->args[i].type->print(s, tp->args[i].name,
1397 					     data + tp->args[i].offset, field))
1398 			goto partial;
1399 
1400 	if (!trace_seq_puts(s, "\n"))
1401 		goto partial;
1402 
1403 	return TRACE_TYPE_HANDLED;
1404 partial:
1405 	return TRACE_TYPE_PARTIAL_LINE;
1406 }
1407 
1408 static int probe_event_enable(struct ftrace_event_call *call)
1409 {
1410 	struct trace_probe *tp = (struct trace_probe *)call->data;
1411 
1412 	tp->flags |= TP_FLAG_TRACE;
1413 	if (probe_is_return(tp))
1414 		return enable_kretprobe(&tp->rp);
1415 	else
1416 		return enable_kprobe(&tp->rp.kp);
1417 }
1418 
1419 static void probe_event_disable(struct ftrace_event_call *call)
1420 {
1421 	struct trace_probe *tp = (struct trace_probe *)call->data;
1422 
1423 	tp->flags &= ~TP_FLAG_TRACE;
1424 	if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1425 		if (probe_is_return(tp))
1426 			disable_kretprobe(&tp->rp);
1427 		else
1428 			disable_kprobe(&tp->rp.kp);
1429 	}
1430 }
1431 
1432 #undef DEFINE_FIELD
1433 #define DEFINE_FIELD(type, item, name, is_signed)			\
1434 	do {								\
1435 		ret = trace_define_field(event_call, #type, name,	\
1436 					 offsetof(typeof(field), item),	\
1437 					 sizeof(field.item), is_signed, \
1438 					 FILTER_OTHER);			\
1439 		if (ret)						\
1440 			return ret;					\
1441 	} while (0)
1442 
1443 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1444 {
1445 	int ret, i;
1446 	struct kprobe_trace_entry_head field;
1447 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
1448 
1449 	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1450 	/* Set argument names as fields */
1451 	for (i = 0; i < tp->nr_args; i++) {
1452 		ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1453 					 tp->args[i].name,
1454 					 sizeof(field) + tp->args[i].offset,
1455 					 tp->args[i].type->size,
1456 					 tp->args[i].type->is_signed,
1457 					 FILTER_OTHER);
1458 		if (ret)
1459 			return ret;
1460 	}
1461 	return 0;
1462 }
1463 
1464 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1465 {
1466 	int ret, i;
1467 	struct kretprobe_trace_entry_head field;
1468 	struct trace_probe *tp = (struct trace_probe *)event_call->data;
1469 
1470 	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1471 	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1472 	/* Set argument names as fields */
1473 	for (i = 0; i < tp->nr_args; i++) {
1474 		ret = trace_define_field(event_call, tp->args[i].type->fmttype,
1475 					 tp->args[i].name,
1476 					 sizeof(field) + tp->args[i].offset,
1477 					 tp->args[i].type->size,
1478 					 tp->args[i].type->is_signed,
1479 					 FILTER_OTHER);
1480 		if (ret)
1481 			return ret;
1482 	}
1483 	return 0;
1484 }
1485 
1486 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1487 {
1488 	int i;
1489 	int pos = 0;
1490 
1491 	const char *fmt, *arg;
1492 
1493 	if (!probe_is_return(tp)) {
1494 		fmt = "(%lx)";
1495 		arg = "REC->" FIELD_STRING_IP;
1496 	} else {
1497 		fmt = "(%lx <- %lx)";
1498 		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
1499 	}
1500 
1501 	/* When len=0, we just calculate the needed length */
1502 #define LEN_OR_ZERO (len ? len - pos : 0)
1503 
1504 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1505 
1506 	for (i = 0; i < tp->nr_args; i++) {
1507 		pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1508 				tp->args[i].name, tp->args[i].type->fmt);
1509 	}
1510 
1511 	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
1512 
1513 	for (i = 0; i < tp->nr_args; i++) {
1514 		if (strcmp(tp->args[i].type->name, "string") == 0)
1515 			pos += snprintf(buf + pos, LEN_OR_ZERO,
1516 					", __get_str(%s)",
1517 					tp->args[i].name);
1518 		else
1519 			pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
1520 					tp->args[i].name);
1521 	}
1522 
1523 #undef LEN_OR_ZERO
1524 
1525 	/* return the length of print_fmt */
1526 	return pos;
1527 }
1528 
1529 static int set_print_fmt(struct trace_probe *tp)
1530 {
1531 	int len;
1532 	char *print_fmt;
1533 
1534 	/* First: called with 0 length to calculate the needed length */
1535 	len = __set_print_fmt(tp, NULL, 0);
1536 	print_fmt = kmalloc(len + 1, GFP_KERNEL);
1537 	if (!print_fmt)
1538 		return -ENOMEM;
1539 
1540 	/* Second: actually write the @print_fmt */
1541 	__set_print_fmt(tp, print_fmt, len + 1);
1542 	tp->call.print_fmt = print_fmt;
1543 
1544 	return 0;
1545 }
1546 
1547 #ifdef CONFIG_PERF_EVENTS
1548 
1549 /* Kprobe profile handler */
1550 static __kprobes void kprobe_perf_func(struct kprobe *kp,
1551 					 struct pt_regs *regs)
1552 {
1553 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1554 	struct ftrace_event_call *call = &tp->call;
1555 	struct kprobe_trace_entry_head *entry;
1556 	struct hlist_head *head;
1557 	int size, __size, dsize;
1558 	int rctx;
1559 
1560 	dsize = __get_data_size(tp, regs);
1561 	__size = sizeof(*entry) + tp->size + dsize;
1562 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1563 	size -= sizeof(u32);
1564 	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1565 		     "profile buffer not large enough"))
1566 		return;
1567 
1568 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1569 	if (!entry)
1570 		return;
1571 
1572 	entry->ip = (unsigned long)kp->addr;
1573 	memset(&entry[1], 0, dsize);
1574 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1575 
1576 	head = this_cpu_ptr(call->perf_events);
1577 	perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1578 }
1579 
1580 /* Kretprobe profile handler */
1581 static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1582 					    struct pt_regs *regs)
1583 {
1584 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1585 	struct ftrace_event_call *call = &tp->call;
1586 	struct kretprobe_trace_entry_head *entry;
1587 	struct hlist_head *head;
1588 	int size, __size, dsize;
1589 	int rctx;
1590 
1591 	dsize = __get_data_size(tp, regs);
1592 	__size = sizeof(*entry) + tp->size + dsize;
1593 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
1594 	size -= sizeof(u32);
1595 	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1596 		     "profile buffer not large enough"))
1597 		return;
1598 
1599 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1600 	if (!entry)
1601 		return;
1602 
1603 	entry->func = (unsigned long)tp->rp.kp.addr;
1604 	entry->ret_ip = (unsigned long)ri->ret_addr;
1605 	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1606 
1607 	head = this_cpu_ptr(call->perf_events);
1608 	perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1609 }
1610 
1611 static int probe_perf_enable(struct ftrace_event_call *call)
1612 {
1613 	struct trace_probe *tp = (struct trace_probe *)call->data;
1614 
1615 	tp->flags |= TP_FLAG_PROFILE;
1616 
1617 	if (probe_is_return(tp))
1618 		return enable_kretprobe(&tp->rp);
1619 	else
1620 		return enable_kprobe(&tp->rp.kp);
1621 }
1622 
1623 static void probe_perf_disable(struct ftrace_event_call *call)
1624 {
1625 	struct trace_probe *tp = (struct trace_probe *)call->data;
1626 
1627 	tp->flags &= ~TP_FLAG_PROFILE;
1628 
1629 	if (!(tp->flags & TP_FLAG_TRACE)) {
1630 		if (probe_is_return(tp))
1631 			disable_kretprobe(&tp->rp);
1632 		else
1633 			disable_kprobe(&tp->rp.kp);
1634 	}
1635 }
1636 #endif	/* CONFIG_PERF_EVENTS */
1637 
1638 static __kprobes
1639 int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1640 {
1641 	switch (type) {
1642 	case TRACE_REG_REGISTER:
1643 		return probe_event_enable(event);
1644 	case TRACE_REG_UNREGISTER:
1645 		probe_event_disable(event);
1646 		return 0;
1647 
1648 #ifdef CONFIG_PERF_EVENTS
1649 	case TRACE_REG_PERF_REGISTER:
1650 		return probe_perf_enable(event);
1651 	case TRACE_REG_PERF_UNREGISTER:
1652 		probe_perf_disable(event);
1653 		return 0;
1654 #endif
1655 	}
1656 	return 0;
1657 }
1658 
1659 static __kprobes
1660 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1661 {
1662 	struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1663 
1664 	if (tp->flags & TP_FLAG_TRACE)
1665 		kprobe_trace_func(kp, regs);
1666 #ifdef CONFIG_PERF_EVENTS
1667 	if (tp->flags & TP_FLAG_PROFILE)
1668 		kprobe_perf_func(kp, regs);
1669 #endif
1670 	return 0;	/* We don't tweek kernel, so just return 0 */
1671 }
1672 
1673 static __kprobes
1674 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1675 {
1676 	struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1677 
1678 	if (tp->flags & TP_FLAG_TRACE)
1679 		kretprobe_trace_func(ri, regs);
1680 #ifdef CONFIG_PERF_EVENTS
1681 	if (tp->flags & TP_FLAG_PROFILE)
1682 		kretprobe_perf_func(ri, regs);
1683 #endif
1684 	return 0;	/* We don't tweek kernel, so just return 0 */
1685 }
1686 
1687 static struct trace_event_functions kretprobe_funcs = {
1688 	.trace		= print_kretprobe_event
1689 };
1690 
1691 static struct trace_event_functions kprobe_funcs = {
1692 	.trace		= print_kprobe_event
1693 };
1694 
1695 static int register_probe_event(struct trace_probe *tp)
1696 {
1697 	struct ftrace_event_call *call = &tp->call;
1698 	int ret;
1699 
1700 	/* Initialize ftrace_event_call */
1701 	INIT_LIST_HEAD(&call->class->fields);
1702 	if (probe_is_return(tp)) {
1703 		call->event.funcs = &kretprobe_funcs;
1704 		call->class->define_fields = kretprobe_event_define_fields;
1705 	} else {
1706 		call->event.funcs = &kprobe_funcs;
1707 		call->class->define_fields = kprobe_event_define_fields;
1708 	}
1709 	if (set_print_fmt(tp) < 0)
1710 		return -ENOMEM;
1711 	ret = register_ftrace_event(&call->event);
1712 	if (!ret) {
1713 		kfree(call->print_fmt);
1714 		return -ENODEV;
1715 	}
1716 	call->flags = 0;
1717 	call->class->reg = kprobe_register;
1718 	call->data = tp;
1719 	ret = trace_add_event_call(call);
1720 	if (ret) {
1721 		pr_info("Failed to register kprobe event: %s\n", call->name);
1722 		kfree(call->print_fmt);
1723 		unregister_ftrace_event(&call->event);
1724 	}
1725 	return ret;
1726 }
1727 
1728 static void unregister_probe_event(struct trace_probe *tp)
1729 {
1730 	/* tp->event is unregistered in trace_remove_event_call() */
1731 	trace_remove_event_call(&tp->call);
1732 	kfree(tp->call.print_fmt);
1733 }
1734 
1735 /* Make a debugfs interface for controling probe points */
1736 static __init int init_kprobe_trace(void)
1737 {
1738 	struct dentry *d_tracer;
1739 	struct dentry *entry;
1740 
1741 	d_tracer = tracing_init_dentry();
1742 	if (!d_tracer)
1743 		return 0;
1744 
1745 	entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1746 				    NULL, &kprobe_events_ops);
1747 
1748 	/* Event list interface */
1749 	if (!entry)
1750 		pr_warning("Could not create debugfs "
1751 			   "'kprobe_events' entry\n");
1752 
1753 	/* Profile interface */
1754 	entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1755 				    NULL, &kprobe_profile_ops);
1756 
1757 	if (!entry)
1758 		pr_warning("Could not create debugfs "
1759 			   "'kprobe_profile' entry\n");
1760 	return 0;
1761 }
1762 fs_initcall(init_kprobe_trace);
1763 
1764 
1765 #ifdef CONFIG_FTRACE_STARTUP_TEST
1766 
1767 static int kprobe_trace_selftest_target(int a1, int a2, int a3,
1768 					int a4, int a5, int a6)
1769 {
1770 	return a1 + a2 + a3 + a4 + a5 + a6;
1771 }
1772 
1773 static __init int kprobe_trace_self_tests_init(void)
1774 {
1775 	int ret, warn = 0;
1776 	int (*target)(int, int, int, int, int, int);
1777 	struct trace_probe *tp;
1778 
1779 	target = kprobe_trace_selftest_target;
1780 
1781 	pr_info("Testing kprobe tracing: ");
1782 
1783 	ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
1784 				  "$stack $stack0 +0($stack)");
1785 	if (WARN_ON_ONCE(ret)) {
1786 		pr_warning("error on probing function entry.\n");
1787 		warn++;
1788 	} else {
1789 		/* Enable trace point */
1790 		tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
1791 		if (WARN_ON_ONCE(tp == NULL)) {
1792 			pr_warning("error on getting new probe.\n");
1793 			warn++;
1794 		} else
1795 			probe_event_enable(&tp->call);
1796 	}
1797 
1798 	ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
1799 				  "$retval");
1800 	if (WARN_ON_ONCE(ret)) {
1801 		pr_warning("error on probing function return.\n");
1802 		warn++;
1803 	} else {
1804 		/* Enable trace point */
1805 		tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
1806 		if (WARN_ON_ONCE(tp == NULL)) {
1807 			pr_warning("error on getting new probe.\n");
1808 			warn++;
1809 		} else
1810 			probe_event_enable(&tp->call);
1811 	}
1812 
1813 	if (warn)
1814 		goto end;
1815 
1816 	ret = target(1, 2, 3, 4, 5, 6);
1817 
1818 	ret = command_trace_probe("-:testprobe");
1819 	if (WARN_ON_ONCE(ret)) {
1820 		pr_warning("error on deleting a probe.\n");
1821 		warn++;
1822 	}
1823 
1824 	ret = command_trace_probe("-:testprobe2");
1825 	if (WARN_ON_ONCE(ret)) {
1826 		pr_warning("error on deleting a probe.\n");
1827 		warn++;
1828 	}
1829 
1830 end:
1831 	cleanup_all_probes();
1832 	if (warn)
1833 		pr_cont("NG: Some tests are failed. Please check them.\n");
1834 	else
1835 		pr_cont("OK\n");
1836 	return 0;
1837 }
1838 
1839 late_initcall(kprobe_trace_self_tests_init);
1840 
1841 #endif
1842