xref: /linux/kernel/trace/trace_events.c (revision 273b281fa22c293963ee3e6eec418f5dda2dbc83)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/delay.h>
19 
20 #include <asm/setup.h>
21 
22 #include "trace_output.h"
23 
24 #undef TRACE_SYSTEM
25 #define TRACE_SYSTEM "TRACE_SYSTEM"
26 
27 DEFINE_MUTEX(event_mutex);
28 
29 LIST_HEAD(ftrace_events);
30 
31 int trace_define_field(struct ftrace_event_call *call, const char *type,
32 		       const char *name, int offset, int size, int is_signed,
33 		       int filter_type)
34 {
35 	struct ftrace_event_field *field;
36 
37 	field = kzalloc(sizeof(*field), GFP_KERNEL);
38 	if (!field)
39 		goto err;
40 
41 	field->name = kstrdup(name, GFP_KERNEL);
42 	if (!field->name)
43 		goto err;
44 
45 	field->type = kstrdup(type, GFP_KERNEL);
46 	if (!field->type)
47 		goto err;
48 
49 	if (filter_type == FILTER_OTHER)
50 		field->filter_type = filter_assign_type(type);
51 	else
52 		field->filter_type = filter_type;
53 
54 	field->offset = offset;
55 	field->size = size;
56 	field->is_signed = is_signed;
57 
58 	list_add(&field->link, &call->fields);
59 
60 	return 0;
61 
62 err:
63 	if (field) {
64 		kfree(field->name);
65 		kfree(field->type);
66 	}
67 	kfree(field);
68 
69 	return -ENOMEM;
70 }
71 EXPORT_SYMBOL_GPL(trace_define_field);
72 
73 #define __common_field(type, item)					\
74 	ret = trace_define_field(call, #type, "common_" #item,		\
75 				 offsetof(typeof(ent), item),		\
76 				 sizeof(ent.item),			\
77 				 is_signed_type(type), FILTER_OTHER);	\
78 	if (ret)							\
79 		return ret;
80 
81 int trace_define_common_fields(struct ftrace_event_call *call)
82 {
83 	int ret;
84 	struct trace_entry ent;
85 
86 	__common_field(unsigned short, type);
87 	__common_field(unsigned char, flags);
88 	__common_field(unsigned char, preempt_count);
89 	__common_field(int, pid);
90 	__common_field(int, lock_depth);
91 
92 	return ret;
93 }
94 EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 
96 void trace_destroy_fields(struct ftrace_event_call *call)
97 {
98 	struct ftrace_event_field *field, *next;
99 
100 	list_for_each_entry_safe(field, next, &call->fields, link) {
101 		list_del(&field->link);
102 		kfree(field->type);
103 		kfree(field->name);
104 		kfree(field);
105 	}
106 }
107 
108 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
109 					int enable)
110 {
111 	switch (enable) {
112 	case 0:
113 		if (call->enabled) {
114 			call->enabled = 0;
115 			tracing_stop_cmdline_record();
116 			call->unregfunc(call);
117 		}
118 		break;
119 	case 1:
120 		if (!call->enabled) {
121 			call->enabled = 1;
122 			tracing_start_cmdline_record();
123 			call->regfunc(call);
124 		}
125 		break;
126 	}
127 }
128 
129 static void ftrace_clear_events(void)
130 {
131 	struct ftrace_event_call *call;
132 
133 	mutex_lock(&event_mutex);
134 	list_for_each_entry(call, &ftrace_events, list) {
135 		ftrace_event_enable_disable(call, 0);
136 	}
137 	mutex_unlock(&event_mutex);
138 }
139 
140 /*
141  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
142  */
143 static int __ftrace_set_clr_event(const char *match, const char *sub,
144 				  const char *event, int set)
145 {
146 	struct ftrace_event_call *call;
147 	int ret = -EINVAL;
148 
149 	mutex_lock(&event_mutex);
150 	list_for_each_entry(call, &ftrace_events, list) {
151 
152 		if (!call->name || !call->regfunc)
153 			continue;
154 
155 		if (match &&
156 		    strcmp(match, call->name) != 0 &&
157 		    strcmp(match, call->system) != 0)
158 			continue;
159 
160 		if (sub && strcmp(sub, call->system) != 0)
161 			continue;
162 
163 		if (event && strcmp(event, call->name) != 0)
164 			continue;
165 
166 		ftrace_event_enable_disable(call, set);
167 
168 		ret = 0;
169 	}
170 	mutex_unlock(&event_mutex);
171 
172 	return ret;
173 }
174 
175 static int ftrace_set_clr_event(char *buf, int set)
176 {
177 	char *event = NULL, *sub = NULL, *match;
178 
179 	/*
180 	 * The buf format can be <subsystem>:<event-name>
181 	 *  *:<event-name> means any event by that name.
182 	 *  :<event-name> is the same.
183 	 *
184 	 *  <subsystem>:* means all events in that subsystem
185 	 *  <subsystem>: means the same.
186 	 *
187 	 *  <name> (no ':') means all events in a subsystem with
188 	 *  the name <name> or any event that matches <name>
189 	 */
190 
191 	match = strsep(&buf, ":");
192 	if (buf) {
193 		sub = match;
194 		event = buf;
195 		match = NULL;
196 
197 		if (!strlen(sub) || strcmp(sub, "*") == 0)
198 			sub = NULL;
199 		if (!strlen(event) || strcmp(event, "*") == 0)
200 			event = NULL;
201 	}
202 
203 	return __ftrace_set_clr_event(match, sub, event, set);
204 }
205 
206 /**
207  * trace_set_clr_event - enable or disable an event
208  * @system: system name to match (NULL for any system)
209  * @event: event name to match (NULL for all events, within system)
210  * @set: 1 to enable, 0 to disable
211  *
212  * This is a way for other parts of the kernel to enable or disable
213  * event recording.
214  *
215  * Returns 0 on success, -EINVAL if the parameters do not match any
216  * registered events.
217  */
218 int trace_set_clr_event(const char *system, const char *event, int set)
219 {
220 	return __ftrace_set_clr_event(NULL, system, event, set);
221 }
222 
223 /* 128 should be much more than enough */
224 #define EVENT_BUF_SIZE		127
225 
226 static ssize_t
227 ftrace_event_write(struct file *file, const char __user *ubuf,
228 		   size_t cnt, loff_t *ppos)
229 {
230 	struct trace_parser parser;
231 	ssize_t read, ret;
232 
233 	if (!cnt)
234 		return 0;
235 
236 	ret = tracing_update_buffers();
237 	if (ret < 0)
238 		return ret;
239 
240 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
241 		return -ENOMEM;
242 
243 	read = trace_get_user(&parser, ubuf, cnt, ppos);
244 
245 	if (read >= 0 && trace_parser_loaded((&parser))) {
246 		int set = 1;
247 
248 		if (*parser.buffer == '!')
249 			set = 0;
250 
251 		parser.buffer[parser.idx] = 0;
252 
253 		ret = ftrace_set_clr_event(parser.buffer + !set, set);
254 		if (ret)
255 			goto out_put;
256 	}
257 
258 	ret = read;
259 
260  out_put:
261 	trace_parser_put(&parser);
262 
263 	return ret;
264 }
265 
266 static void *
267 t_next(struct seq_file *m, void *v, loff_t *pos)
268 {
269 	struct ftrace_event_call *call = v;
270 
271 	(*pos)++;
272 
273 	list_for_each_entry_continue(call, &ftrace_events, list) {
274 		/*
275 		 * The ftrace subsystem is for showing formats only.
276 		 * They can not be enabled or disabled via the event files.
277 		 */
278 		if (call->regfunc)
279 			return call;
280 	}
281 
282 	return NULL;
283 }
284 
285 static void *t_start(struct seq_file *m, loff_t *pos)
286 {
287 	struct ftrace_event_call *call;
288 	loff_t l;
289 
290 	mutex_lock(&event_mutex);
291 
292 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
293 	for (l = 0; l <= *pos; ) {
294 		call = t_next(m, call, &l);
295 		if (!call)
296 			break;
297 	}
298 	return call;
299 }
300 
301 static void *
302 s_next(struct seq_file *m, void *v, loff_t *pos)
303 {
304 	struct ftrace_event_call *call = v;
305 
306 	(*pos)++;
307 
308 	list_for_each_entry_continue(call, &ftrace_events, list) {
309 		if (call->enabled)
310 			return call;
311 	}
312 
313 	return NULL;
314 }
315 
316 static void *s_start(struct seq_file *m, loff_t *pos)
317 {
318 	struct ftrace_event_call *call;
319 	loff_t l;
320 
321 	mutex_lock(&event_mutex);
322 
323 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
324 	for (l = 0; l <= *pos; ) {
325 		call = s_next(m, call, &l);
326 		if (!call)
327 			break;
328 	}
329 	return call;
330 }
331 
332 static int t_show(struct seq_file *m, void *v)
333 {
334 	struct ftrace_event_call *call = v;
335 
336 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
337 		seq_printf(m, "%s:", call->system);
338 	seq_printf(m, "%s\n", call->name);
339 
340 	return 0;
341 }
342 
343 static void t_stop(struct seq_file *m, void *p)
344 {
345 	mutex_unlock(&event_mutex);
346 }
347 
348 static int
349 ftrace_event_seq_open(struct inode *inode, struct file *file)
350 {
351 	const struct seq_operations *seq_ops;
352 
353 	if ((file->f_mode & FMODE_WRITE) &&
354 	    (file->f_flags & O_TRUNC))
355 		ftrace_clear_events();
356 
357 	seq_ops = inode->i_private;
358 	return seq_open(file, seq_ops);
359 }
360 
361 static ssize_t
362 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
363 		  loff_t *ppos)
364 {
365 	struct ftrace_event_call *call = filp->private_data;
366 	char *buf;
367 
368 	if (call->enabled)
369 		buf = "1\n";
370 	else
371 		buf = "0\n";
372 
373 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
374 }
375 
376 static ssize_t
377 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
378 		   loff_t *ppos)
379 {
380 	struct ftrace_event_call *call = filp->private_data;
381 	char buf[64];
382 	unsigned long val;
383 	int ret;
384 
385 	if (cnt >= sizeof(buf))
386 		return -EINVAL;
387 
388 	if (copy_from_user(&buf, ubuf, cnt))
389 		return -EFAULT;
390 
391 	buf[cnt] = 0;
392 
393 	ret = strict_strtoul(buf, 10, &val);
394 	if (ret < 0)
395 		return ret;
396 
397 	ret = tracing_update_buffers();
398 	if (ret < 0)
399 		return ret;
400 
401 	switch (val) {
402 	case 0:
403 	case 1:
404 		mutex_lock(&event_mutex);
405 		ftrace_event_enable_disable(call, val);
406 		mutex_unlock(&event_mutex);
407 		break;
408 
409 	default:
410 		return -EINVAL;
411 	}
412 
413 	*ppos += cnt;
414 
415 	return cnt;
416 }
417 
418 static ssize_t
419 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
420 		   loff_t *ppos)
421 {
422 	const char set_to_char[4] = { '?', '0', '1', 'X' };
423 	const char *system = filp->private_data;
424 	struct ftrace_event_call *call;
425 	char buf[2];
426 	int set = 0;
427 	int ret;
428 
429 	mutex_lock(&event_mutex);
430 	list_for_each_entry(call, &ftrace_events, list) {
431 		if (!call->name || !call->regfunc)
432 			continue;
433 
434 		if (system && strcmp(call->system, system) != 0)
435 			continue;
436 
437 		/*
438 		 * We need to find out if all the events are set
439 		 * or if all events or cleared, or if we have
440 		 * a mixture.
441 		 */
442 		set |= (1 << !!call->enabled);
443 
444 		/*
445 		 * If we have a mixture, no need to look further.
446 		 */
447 		if (set == 3)
448 			break;
449 	}
450 	mutex_unlock(&event_mutex);
451 
452 	buf[0] = set_to_char[set];
453 	buf[1] = '\n';
454 
455 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
456 
457 	return ret;
458 }
459 
460 static ssize_t
461 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
462 		    loff_t *ppos)
463 {
464 	const char *system = filp->private_data;
465 	unsigned long val;
466 	char buf[64];
467 	ssize_t ret;
468 
469 	if (cnt >= sizeof(buf))
470 		return -EINVAL;
471 
472 	if (copy_from_user(&buf, ubuf, cnt))
473 		return -EFAULT;
474 
475 	buf[cnt] = 0;
476 
477 	ret = strict_strtoul(buf, 10, &val);
478 	if (ret < 0)
479 		return ret;
480 
481 	ret = tracing_update_buffers();
482 	if (ret < 0)
483 		return ret;
484 
485 	if (val != 0 && val != 1)
486 		return -EINVAL;
487 
488 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
489 	if (ret)
490 		goto out;
491 
492 	ret = cnt;
493 
494 out:
495 	*ppos += cnt;
496 
497 	return ret;
498 }
499 
500 extern char *__bad_type_size(void);
501 
502 #undef FIELD
503 #define FIELD(type, name)						\
504 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
505 	#type, "common_" #name, offsetof(typeof(field), name),		\
506 		sizeof(field.name), is_signed_type(type)
507 
508 static int trace_write_header(struct trace_seq *s)
509 {
510 	struct trace_entry field;
511 
512 	/* struct trace_entry */
513 	return trace_seq_printf(s,
514 			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
515 			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
516 			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
517 			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
518 			"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
519 			"\n",
520 			FIELD(unsigned short, type),
521 			FIELD(unsigned char, flags),
522 			FIELD(unsigned char, preempt_count),
523 			FIELD(int, pid),
524 			FIELD(int, lock_depth));
525 }
526 
527 static ssize_t
528 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
529 		  loff_t *ppos)
530 {
531 	struct ftrace_event_call *call = filp->private_data;
532 	struct trace_seq *s;
533 	char *buf;
534 	int r;
535 
536 	if (*ppos)
537 		return 0;
538 
539 	s = kmalloc(sizeof(*s), GFP_KERNEL);
540 	if (!s)
541 		return -ENOMEM;
542 
543 	trace_seq_init(s);
544 
545 	/* If any of the first writes fail, so will the show_format. */
546 
547 	trace_seq_printf(s, "name: %s\n", call->name);
548 	trace_seq_printf(s, "ID: %d\n", call->id);
549 	trace_seq_printf(s, "format:\n");
550 	trace_write_header(s);
551 
552 	r = call->show_format(call, s);
553 	if (!r) {
554 		/*
555 		 * ug!  The format output is bigger than a PAGE!!
556 		 */
557 		buf = "FORMAT TOO BIG\n";
558 		r = simple_read_from_buffer(ubuf, cnt, ppos,
559 					      buf, strlen(buf));
560 		goto out;
561 	}
562 
563 	r = simple_read_from_buffer(ubuf, cnt, ppos,
564 				    s->buffer, s->len);
565  out:
566 	kfree(s);
567 	return r;
568 }
569 
570 static ssize_t
571 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
572 {
573 	struct ftrace_event_call *call = filp->private_data;
574 	struct trace_seq *s;
575 	int r;
576 
577 	if (*ppos)
578 		return 0;
579 
580 	s = kmalloc(sizeof(*s), GFP_KERNEL);
581 	if (!s)
582 		return -ENOMEM;
583 
584 	trace_seq_init(s);
585 	trace_seq_printf(s, "%d\n", call->id);
586 
587 	r = simple_read_from_buffer(ubuf, cnt, ppos,
588 				    s->buffer, s->len);
589 	kfree(s);
590 	return r;
591 }
592 
593 static ssize_t
594 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
595 		  loff_t *ppos)
596 {
597 	struct ftrace_event_call *call = filp->private_data;
598 	struct trace_seq *s;
599 	int r;
600 
601 	if (*ppos)
602 		return 0;
603 
604 	s = kmalloc(sizeof(*s), GFP_KERNEL);
605 	if (!s)
606 		return -ENOMEM;
607 
608 	trace_seq_init(s);
609 
610 	print_event_filter(call, s);
611 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
612 
613 	kfree(s);
614 
615 	return r;
616 }
617 
618 static ssize_t
619 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
620 		   loff_t *ppos)
621 {
622 	struct ftrace_event_call *call = filp->private_data;
623 	char *buf;
624 	int err;
625 
626 	if (cnt >= PAGE_SIZE)
627 		return -EINVAL;
628 
629 	buf = (char *)__get_free_page(GFP_TEMPORARY);
630 	if (!buf)
631 		return -ENOMEM;
632 
633 	if (copy_from_user(buf, ubuf, cnt)) {
634 		free_page((unsigned long) buf);
635 		return -EFAULT;
636 	}
637 	buf[cnt] = '\0';
638 
639 	err = apply_event_filter(call, buf);
640 	free_page((unsigned long) buf);
641 	if (err < 0)
642 		return err;
643 
644 	*ppos += cnt;
645 
646 	return cnt;
647 }
648 
649 static ssize_t
650 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
651 		      loff_t *ppos)
652 {
653 	struct event_subsystem *system = filp->private_data;
654 	struct trace_seq *s;
655 	int r;
656 
657 	if (*ppos)
658 		return 0;
659 
660 	s = kmalloc(sizeof(*s), GFP_KERNEL);
661 	if (!s)
662 		return -ENOMEM;
663 
664 	trace_seq_init(s);
665 
666 	print_subsystem_event_filter(system, s);
667 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
668 
669 	kfree(s);
670 
671 	return r;
672 }
673 
674 static ssize_t
675 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
676 		       loff_t *ppos)
677 {
678 	struct event_subsystem *system = filp->private_data;
679 	char *buf;
680 	int err;
681 
682 	if (cnt >= PAGE_SIZE)
683 		return -EINVAL;
684 
685 	buf = (char *)__get_free_page(GFP_TEMPORARY);
686 	if (!buf)
687 		return -ENOMEM;
688 
689 	if (copy_from_user(buf, ubuf, cnt)) {
690 		free_page((unsigned long) buf);
691 		return -EFAULT;
692 	}
693 	buf[cnt] = '\0';
694 
695 	err = apply_subsystem_event_filter(system, buf);
696 	free_page((unsigned long) buf);
697 	if (err < 0)
698 		return err;
699 
700 	*ppos += cnt;
701 
702 	return cnt;
703 }
704 
705 static ssize_t
706 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
707 {
708 	int (*func)(struct trace_seq *s) = filp->private_data;
709 	struct trace_seq *s;
710 	int r;
711 
712 	if (*ppos)
713 		return 0;
714 
715 	s = kmalloc(sizeof(*s), GFP_KERNEL);
716 	if (!s)
717 		return -ENOMEM;
718 
719 	trace_seq_init(s);
720 
721 	func(s);
722 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
723 
724 	kfree(s);
725 
726 	return r;
727 }
728 
729 static const struct seq_operations show_event_seq_ops = {
730 	.start = t_start,
731 	.next = t_next,
732 	.show = t_show,
733 	.stop = t_stop,
734 };
735 
736 static const struct seq_operations show_set_event_seq_ops = {
737 	.start = s_start,
738 	.next = s_next,
739 	.show = t_show,
740 	.stop = t_stop,
741 };
742 
743 static const struct file_operations ftrace_avail_fops = {
744 	.open = ftrace_event_seq_open,
745 	.read = seq_read,
746 	.llseek = seq_lseek,
747 	.release = seq_release,
748 };
749 
750 static const struct file_operations ftrace_set_event_fops = {
751 	.open = ftrace_event_seq_open,
752 	.read = seq_read,
753 	.write = ftrace_event_write,
754 	.llseek = seq_lseek,
755 	.release = seq_release,
756 };
757 
758 static const struct file_operations ftrace_enable_fops = {
759 	.open = tracing_open_generic,
760 	.read = event_enable_read,
761 	.write = event_enable_write,
762 };
763 
764 static const struct file_operations ftrace_event_format_fops = {
765 	.open = tracing_open_generic,
766 	.read = event_format_read,
767 };
768 
769 static const struct file_operations ftrace_event_id_fops = {
770 	.open = tracing_open_generic,
771 	.read = event_id_read,
772 };
773 
774 static const struct file_operations ftrace_event_filter_fops = {
775 	.open = tracing_open_generic,
776 	.read = event_filter_read,
777 	.write = event_filter_write,
778 };
779 
780 static const struct file_operations ftrace_subsystem_filter_fops = {
781 	.open = tracing_open_generic,
782 	.read = subsystem_filter_read,
783 	.write = subsystem_filter_write,
784 };
785 
786 static const struct file_operations ftrace_system_enable_fops = {
787 	.open = tracing_open_generic,
788 	.read = system_enable_read,
789 	.write = system_enable_write,
790 };
791 
792 static const struct file_operations ftrace_show_header_fops = {
793 	.open = tracing_open_generic,
794 	.read = show_header,
795 };
796 
797 static struct dentry *event_trace_events_dir(void)
798 {
799 	static struct dentry *d_tracer;
800 	static struct dentry *d_events;
801 
802 	if (d_events)
803 		return d_events;
804 
805 	d_tracer = tracing_init_dentry();
806 	if (!d_tracer)
807 		return NULL;
808 
809 	d_events = debugfs_create_dir("events", d_tracer);
810 	if (!d_events)
811 		pr_warning("Could not create debugfs "
812 			   "'events' directory\n");
813 
814 	return d_events;
815 }
816 
817 static LIST_HEAD(event_subsystems);
818 
819 static struct dentry *
820 event_subsystem_dir(const char *name, struct dentry *d_events)
821 {
822 	struct event_subsystem *system;
823 	struct dentry *entry;
824 
825 	/* First see if we did not already create this dir */
826 	list_for_each_entry(system, &event_subsystems, list) {
827 		if (strcmp(system->name, name) == 0) {
828 			system->nr_events++;
829 			return system->entry;
830 		}
831 	}
832 
833 	/* need to create new entry */
834 	system = kmalloc(sizeof(*system), GFP_KERNEL);
835 	if (!system) {
836 		pr_warning("No memory to create event subsystem %s\n",
837 			   name);
838 		return d_events;
839 	}
840 
841 	system->entry = debugfs_create_dir(name, d_events);
842 	if (!system->entry) {
843 		pr_warning("Could not create event subsystem %s\n",
844 			   name);
845 		kfree(system);
846 		return d_events;
847 	}
848 
849 	system->nr_events = 1;
850 	system->name = kstrdup(name, GFP_KERNEL);
851 	if (!system->name) {
852 		debugfs_remove(system->entry);
853 		kfree(system);
854 		return d_events;
855 	}
856 
857 	list_add(&system->list, &event_subsystems);
858 
859 	system->filter = NULL;
860 
861 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
862 	if (!system->filter) {
863 		pr_warning("Could not allocate filter for subsystem "
864 			   "'%s'\n", name);
865 		return system->entry;
866 	}
867 
868 	entry = debugfs_create_file("filter", 0644, system->entry, system,
869 				    &ftrace_subsystem_filter_fops);
870 	if (!entry) {
871 		kfree(system->filter);
872 		system->filter = NULL;
873 		pr_warning("Could not create debugfs "
874 			   "'%s/filter' entry\n", name);
875 	}
876 
877 	trace_create_file("enable", 0644, system->entry,
878 			  (void *)system->name,
879 			  &ftrace_system_enable_fops);
880 
881 	return system->entry;
882 }
883 
884 static int
885 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
886 		 const struct file_operations *id,
887 		 const struct file_operations *enable,
888 		 const struct file_operations *filter,
889 		 const struct file_operations *format)
890 {
891 	int ret;
892 
893 	/*
894 	 * If the trace point header did not define TRACE_SYSTEM
895 	 * then the system would be called "TRACE_SYSTEM".
896 	 */
897 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
898 		d_events = event_subsystem_dir(call->system, d_events);
899 
900 	call->dir = debugfs_create_dir(call->name, d_events);
901 	if (!call->dir) {
902 		pr_warning("Could not create debugfs "
903 			   "'%s' directory\n", call->name);
904 		return -1;
905 	}
906 
907 	if (call->regfunc)
908 		trace_create_file("enable", 0644, call->dir, call,
909 				  enable);
910 
911 	if (call->id && call->profile_enable)
912 		trace_create_file("id", 0444, call->dir, call,
913 		 		  id);
914 
915 	if (call->define_fields) {
916 		ret = call->define_fields(call);
917 		if (ret < 0) {
918 			pr_warning("Could not initialize trace point"
919 				   " events/%s\n", call->name);
920 			return ret;
921 		}
922 		trace_create_file("filter", 0644, call->dir, call,
923 				  filter);
924 	}
925 
926 	/* A trace may not want to export its format */
927 	if (!call->show_format)
928 		return 0;
929 
930 	trace_create_file("format", 0444, call->dir, call,
931 			  format);
932 
933 	return 0;
934 }
935 
936 static int __trace_add_event_call(struct ftrace_event_call *call)
937 {
938 	struct dentry *d_events;
939 	int ret;
940 
941 	if (!call->name)
942 		return -EINVAL;
943 
944 	if (call->raw_init) {
945 		ret = call->raw_init(call);
946 		if (ret < 0) {
947 			if (ret != -ENOSYS)
948 				pr_warning("Could not initialize trace "
949 				"events/%s\n", call->name);
950 			return ret;
951 		}
952 	}
953 
954 	d_events = event_trace_events_dir();
955 	if (!d_events)
956 		return -ENOENT;
957 
958 	ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
959 				&ftrace_enable_fops, &ftrace_event_filter_fops,
960 				&ftrace_event_format_fops);
961 	if (!ret)
962 		list_add(&call->list, &ftrace_events);
963 
964 	return ret;
965 }
966 
967 /* Add an additional event_call dynamically */
968 int trace_add_event_call(struct ftrace_event_call *call)
969 {
970 	int ret;
971 	mutex_lock(&event_mutex);
972 	ret = __trace_add_event_call(call);
973 	mutex_unlock(&event_mutex);
974 	return ret;
975 }
976 
977 static void remove_subsystem_dir(const char *name)
978 {
979 	struct event_subsystem *system;
980 
981 	if (strcmp(name, TRACE_SYSTEM) == 0)
982 		return;
983 
984 	list_for_each_entry(system, &event_subsystems, list) {
985 		if (strcmp(system->name, name) == 0) {
986 			if (!--system->nr_events) {
987 				struct event_filter *filter = system->filter;
988 
989 				debugfs_remove_recursive(system->entry);
990 				list_del(&system->list);
991 				if (filter) {
992 					kfree(filter->filter_string);
993 					kfree(filter);
994 				}
995 				kfree(system->name);
996 				kfree(system);
997 			}
998 			break;
999 		}
1000 	}
1001 }
1002 
1003 /*
1004  * Must be called under locking both of event_mutex and trace_event_mutex.
1005  */
1006 static void __trace_remove_event_call(struct ftrace_event_call *call)
1007 {
1008 	ftrace_event_enable_disable(call, 0);
1009 	if (call->event)
1010 		__unregister_ftrace_event(call->event);
1011 	debugfs_remove_recursive(call->dir);
1012 	list_del(&call->list);
1013 	trace_destroy_fields(call);
1014 	destroy_preds(call);
1015 	remove_subsystem_dir(call->system);
1016 }
1017 
1018 /* Remove an event_call */
1019 void trace_remove_event_call(struct ftrace_event_call *call)
1020 {
1021 	mutex_lock(&event_mutex);
1022 	down_write(&trace_event_mutex);
1023 	__trace_remove_event_call(call);
1024 	up_write(&trace_event_mutex);
1025 	mutex_unlock(&event_mutex);
1026 }
1027 
1028 #define for_each_event(event, start, end)			\
1029 	for (event = start;					\
1030 	     (unsigned long)event < (unsigned long)end;		\
1031 	     event++)
1032 
1033 #ifdef CONFIG_MODULES
1034 
1035 static LIST_HEAD(ftrace_module_file_list);
1036 
1037 /*
1038  * Modules must own their file_operations to keep up with
1039  * reference counting.
1040  */
1041 struct ftrace_module_file_ops {
1042 	struct list_head		list;
1043 	struct module			*mod;
1044 	struct file_operations		id;
1045 	struct file_operations		enable;
1046 	struct file_operations		format;
1047 	struct file_operations		filter;
1048 };
1049 
1050 static struct ftrace_module_file_ops *
1051 trace_create_file_ops(struct module *mod)
1052 {
1053 	struct ftrace_module_file_ops *file_ops;
1054 
1055 	/*
1056 	 * This is a bit of a PITA. To allow for correct reference
1057 	 * counting, modules must "own" their file_operations.
1058 	 * To do this, we allocate the file operations that will be
1059 	 * used in the event directory.
1060 	 */
1061 
1062 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1063 	if (!file_ops)
1064 		return NULL;
1065 
1066 	file_ops->mod = mod;
1067 
1068 	file_ops->id = ftrace_event_id_fops;
1069 	file_ops->id.owner = mod;
1070 
1071 	file_ops->enable = ftrace_enable_fops;
1072 	file_ops->enable.owner = mod;
1073 
1074 	file_ops->filter = ftrace_event_filter_fops;
1075 	file_ops->filter.owner = mod;
1076 
1077 	file_ops->format = ftrace_event_format_fops;
1078 	file_ops->format.owner = mod;
1079 
1080 	list_add(&file_ops->list, &ftrace_module_file_list);
1081 
1082 	return file_ops;
1083 }
1084 
1085 static void trace_module_add_events(struct module *mod)
1086 {
1087 	struct ftrace_module_file_ops *file_ops = NULL;
1088 	struct ftrace_event_call *call, *start, *end;
1089 	struct dentry *d_events;
1090 	int ret;
1091 
1092 	start = mod->trace_events;
1093 	end = mod->trace_events + mod->num_trace_events;
1094 
1095 	if (start == end)
1096 		return;
1097 
1098 	d_events = event_trace_events_dir();
1099 	if (!d_events)
1100 		return;
1101 
1102 	for_each_event(call, start, end) {
1103 		/* The linker may leave blanks */
1104 		if (!call->name)
1105 			continue;
1106 		if (call->raw_init) {
1107 			ret = call->raw_init(call);
1108 			if (ret < 0) {
1109 				if (ret != -ENOSYS)
1110 					pr_warning("Could not initialize trace "
1111 					"point events/%s\n", call->name);
1112 				continue;
1113 			}
1114 		}
1115 		/*
1116 		 * This module has events, create file ops for this module
1117 		 * if not already done.
1118 		 */
1119 		if (!file_ops) {
1120 			file_ops = trace_create_file_ops(mod);
1121 			if (!file_ops)
1122 				return;
1123 		}
1124 		call->mod = mod;
1125 		ret = event_create_dir(call, d_events,
1126 				       &file_ops->id, &file_ops->enable,
1127 				       &file_ops->filter, &file_ops->format);
1128 		if (!ret)
1129 			list_add(&call->list, &ftrace_events);
1130 	}
1131 }
1132 
1133 static void trace_module_remove_events(struct module *mod)
1134 {
1135 	struct ftrace_module_file_ops *file_ops;
1136 	struct ftrace_event_call *call, *p;
1137 	bool found = false;
1138 
1139 	down_write(&trace_event_mutex);
1140 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1141 		if (call->mod == mod) {
1142 			found = true;
1143 			__trace_remove_event_call(call);
1144 		}
1145 	}
1146 
1147 	/* Now free the file_operations */
1148 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1149 		if (file_ops->mod == mod)
1150 			break;
1151 	}
1152 	if (&file_ops->list != &ftrace_module_file_list) {
1153 		list_del(&file_ops->list);
1154 		kfree(file_ops);
1155 	}
1156 
1157 	/*
1158 	 * It is safest to reset the ring buffer if the module being unloaded
1159 	 * registered any events.
1160 	 */
1161 	if (found)
1162 		tracing_reset_current_online_cpus();
1163 	up_write(&trace_event_mutex);
1164 }
1165 
1166 static int trace_module_notify(struct notifier_block *self,
1167 			       unsigned long val, void *data)
1168 {
1169 	struct module *mod = data;
1170 
1171 	mutex_lock(&event_mutex);
1172 	switch (val) {
1173 	case MODULE_STATE_COMING:
1174 		trace_module_add_events(mod);
1175 		break;
1176 	case MODULE_STATE_GOING:
1177 		trace_module_remove_events(mod);
1178 		break;
1179 	}
1180 	mutex_unlock(&event_mutex);
1181 
1182 	return 0;
1183 }
1184 #else
1185 static int trace_module_notify(struct notifier_block *self,
1186 			       unsigned long val, void *data)
1187 {
1188 	return 0;
1189 }
1190 #endif /* CONFIG_MODULES */
1191 
1192 static struct notifier_block trace_module_nb = {
1193 	.notifier_call = trace_module_notify,
1194 	.priority = 0,
1195 };
1196 
1197 extern struct ftrace_event_call __start_ftrace_events[];
1198 extern struct ftrace_event_call __stop_ftrace_events[];
1199 
1200 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1201 
1202 static __init int setup_trace_event(char *str)
1203 {
1204 	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1205 	ring_buffer_expanded = 1;
1206 	tracing_selftest_disabled = 1;
1207 
1208 	return 1;
1209 }
1210 __setup("trace_event=", setup_trace_event);
1211 
1212 static __init int event_trace_init(void)
1213 {
1214 	struct ftrace_event_call *call;
1215 	struct dentry *d_tracer;
1216 	struct dentry *entry;
1217 	struct dentry *d_events;
1218 	int ret;
1219 	char *buf = bootup_event_buf;
1220 	char *token;
1221 
1222 	d_tracer = tracing_init_dentry();
1223 	if (!d_tracer)
1224 		return 0;
1225 
1226 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1227 				    (void *)&show_event_seq_ops,
1228 				    &ftrace_avail_fops);
1229 	if (!entry)
1230 		pr_warning("Could not create debugfs "
1231 			   "'available_events' entry\n");
1232 
1233 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1234 				    (void *)&show_set_event_seq_ops,
1235 				    &ftrace_set_event_fops);
1236 	if (!entry)
1237 		pr_warning("Could not create debugfs "
1238 			   "'set_event' entry\n");
1239 
1240 	d_events = event_trace_events_dir();
1241 	if (!d_events)
1242 		return 0;
1243 
1244 	/* ring buffer internal formats */
1245 	trace_create_file("header_page", 0444, d_events,
1246 			  ring_buffer_print_page_header,
1247 			  &ftrace_show_header_fops);
1248 
1249 	trace_create_file("header_event", 0444, d_events,
1250 			  ring_buffer_print_entry_header,
1251 			  &ftrace_show_header_fops);
1252 
1253 	trace_create_file("enable", 0644, d_events,
1254 			  NULL, &ftrace_system_enable_fops);
1255 
1256 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1257 		/* The linker may leave blanks */
1258 		if (!call->name)
1259 			continue;
1260 		if (call->raw_init) {
1261 			ret = call->raw_init(call);
1262 			if (ret < 0) {
1263 				if (ret != -ENOSYS)
1264 					pr_warning("Could not initialize trace "
1265 					"point events/%s\n", call->name);
1266 				continue;
1267 			}
1268 		}
1269 		ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1270 				       &ftrace_enable_fops,
1271 				       &ftrace_event_filter_fops,
1272 				       &ftrace_event_format_fops);
1273 		if (!ret)
1274 			list_add(&call->list, &ftrace_events);
1275 	}
1276 
1277 	while (true) {
1278 		token = strsep(&buf, ",");
1279 
1280 		if (!token)
1281 			break;
1282 		if (!*token)
1283 			continue;
1284 
1285 		ret = ftrace_set_clr_event(token, 1);
1286 		if (ret)
1287 			pr_warning("Failed to enable trace event: %s\n", token);
1288 	}
1289 
1290 	ret = register_module_notifier(&trace_module_nb);
1291 	if (ret)
1292 		pr_warning("Failed to register trace events module notifier\n");
1293 
1294 	return 0;
1295 }
1296 fs_initcall(event_trace_init);
1297 
1298 #ifdef CONFIG_FTRACE_STARTUP_TEST
1299 
1300 static DEFINE_SPINLOCK(test_spinlock);
1301 static DEFINE_SPINLOCK(test_spinlock_irq);
1302 static DEFINE_MUTEX(test_mutex);
1303 
1304 static __init void test_work(struct work_struct *dummy)
1305 {
1306 	spin_lock(&test_spinlock);
1307 	spin_lock_irq(&test_spinlock_irq);
1308 	udelay(1);
1309 	spin_unlock_irq(&test_spinlock_irq);
1310 	spin_unlock(&test_spinlock);
1311 
1312 	mutex_lock(&test_mutex);
1313 	msleep(1);
1314 	mutex_unlock(&test_mutex);
1315 }
1316 
1317 static __init int event_test_thread(void *unused)
1318 {
1319 	void *test_malloc;
1320 
1321 	test_malloc = kmalloc(1234, GFP_KERNEL);
1322 	if (!test_malloc)
1323 		pr_info("failed to kmalloc\n");
1324 
1325 	schedule_on_each_cpu(test_work);
1326 
1327 	kfree(test_malloc);
1328 
1329 	set_current_state(TASK_INTERRUPTIBLE);
1330 	while (!kthread_should_stop())
1331 		schedule();
1332 
1333 	return 0;
1334 }
1335 
1336 /*
1337  * Do various things that may trigger events.
1338  */
1339 static __init void event_test_stuff(void)
1340 {
1341 	struct task_struct *test_thread;
1342 
1343 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1344 	msleep(1);
1345 	kthread_stop(test_thread);
1346 }
1347 
1348 /*
1349  * For every trace event defined, we will test each trace point separately,
1350  * and then by groups, and finally all trace points.
1351  */
1352 static __init void event_trace_self_tests(void)
1353 {
1354 	struct ftrace_event_call *call;
1355 	struct event_subsystem *system;
1356 	int ret;
1357 
1358 	pr_info("Running tests on trace events:\n");
1359 
1360 	list_for_each_entry(call, &ftrace_events, list) {
1361 
1362 		/* Only test those that have a regfunc */
1363 		if (!call->regfunc)
1364 			continue;
1365 
1366 /*
1367  * Testing syscall events here is pretty useless, but
1368  * we still do it if configured. But this is time consuming.
1369  * What we really need is a user thread to perform the
1370  * syscalls as we test.
1371  */
1372 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1373 		if (call->system &&
1374 		    strcmp(call->system, "syscalls") == 0)
1375 			continue;
1376 #endif
1377 
1378 		pr_info("Testing event %s: ", call->name);
1379 
1380 		/*
1381 		 * If an event is already enabled, someone is using
1382 		 * it and the self test should not be on.
1383 		 */
1384 		if (call->enabled) {
1385 			pr_warning("Enabled event during self test!\n");
1386 			WARN_ON_ONCE(1);
1387 			continue;
1388 		}
1389 
1390 		ftrace_event_enable_disable(call, 1);
1391 		event_test_stuff();
1392 		ftrace_event_enable_disable(call, 0);
1393 
1394 		pr_cont("OK\n");
1395 	}
1396 
1397 	/* Now test at the sub system level */
1398 
1399 	pr_info("Running tests on trace event systems:\n");
1400 
1401 	list_for_each_entry(system, &event_subsystems, list) {
1402 
1403 		/* the ftrace system is special, skip it */
1404 		if (strcmp(system->name, "ftrace") == 0)
1405 			continue;
1406 
1407 		pr_info("Testing event system %s: ", system->name);
1408 
1409 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1410 		if (WARN_ON_ONCE(ret)) {
1411 			pr_warning("error enabling system %s\n",
1412 				   system->name);
1413 			continue;
1414 		}
1415 
1416 		event_test_stuff();
1417 
1418 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1419 		if (WARN_ON_ONCE(ret))
1420 			pr_warning("error disabling system %s\n",
1421 				   system->name);
1422 
1423 		pr_cont("OK\n");
1424 	}
1425 
1426 	/* Test with all events enabled */
1427 
1428 	pr_info("Running tests on all trace events:\n");
1429 	pr_info("Testing all events: ");
1430 
1431 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1432 	if (WARN_ON_ONCE(ret)) {
1433 		pr_warning("error enabling all events\n");
1434 		return;
1435 	}
1436 
1437 	event_test_stuff();
1438 
1439 	/* reset sysname */
1440 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1441 	if (WARN_ON_ONCE(ret)) {
1442 		pr_warning("error disabling all events\n");
1443 		return;
1444 	}
1445 
1446 	pr_cont("OK\n");
1447 }
1448 
1449 #ifdef CONFIG_FUNCTION_TRACER
1450 
1451 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1452 
1453 static void
1454 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1455 {
1456 	struct ring_buffer_event *event;
1457 	struct ring_buffer *buffer;
1458 	struct ftrace_entry *entry;
1459 	unsigned long flags;
1460 	long disabled;
1461 	int resched;
1462 	int cpu;
1463 	int pc;
1464 
1465 	pc = preempt_count();
1466 	resched = ftrace_preempt_disable();
1467 	cpu = raw_smp_processor_id();
1468 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1469 
1470 	if (disabled != 1)
1471 		goto out;
1472 
1473 	local_save_flags(flags);
1474 
1475 	event = trace_current_buffer_lock_reserve(&buffer,
1476 						  TRACE_FN, sizeof(*entry),
1477 						  flags, pc);
1478 	if (!event)
1479 		goto out;
1480 	entry	= ring_buffer_event_data(event);
1481 	entry->ip			= ip;
1482 	entry->parent_ip		= parent_ip;
1483 
1484 	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1485 
1486  out:
1487 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1488 	ftrace_preempt_enable(resched);
1489 }
1490 
1491 static struct ftrace_ops trace_ops __initdata  =
1492 {
1493 	.func = function_test_events_call,
1494 };
1495 
1496 static __init void event_trace_self_test_with_function(void)
1497 {
1498 	register_ftrace_function(&trace_ops);
1499 	pr_info("Running tests again, along with the function tracer\n");
1500 	event_trace_self_tests();
1501 	unregister_ftrace_function(&trace_ops);
1502 }
1503 #else
1504 static __init void event_trace_self_test_with_function(void)
1505 {
1506 }
1507 #endif
1508 
1509 static __init int event_trace_self_tests_init(void)
1510 {
1511 	if (!tracing_selftest_disabled) {
1512 		event_trace_self_tests();
1513 		event_trace_self_test_with_function();
1514 	}
1515 
1516 	return 0;
1517 }
1518 
1519 late_initcall(event_trace_self_tests_init);
1520 
1521 #endif
1522