xref: /linux/kernel/trace/trace_events.c (revision fd589a8f0a13f53a2dd580b1fe170633cf6b095f)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/delay.h>
19 
20 #include <asm/setup.h>
21 
22 #include "trace_output.h"
23 
24 #undef TRACE_SYSTEM
25 #define TRACE_SYSTEM "TRACE_SYSTEM"
26 
27 DEFINE_MUTEX(event_mutex);
28 
29 LIST_HEAD(ftrace_events);
30 
31 int trace_define_field(struct ftrace_event_call *call, const char *type,
32 		       const char *name, int offset, int size, int is_signed,
33 		       int filter_type)
34 {
35 	struct ftrace_event_field *field;
36 
37 	field = kzalloc(sizeof(*field), GFP_KERNEL);
38 	if (!field)
39 		goto err;
40 
41 	field->name = kstrdup(name, GFP_KERNEL);
42 	if (!field->name)
43 		goto err;
44 
45 	field->type = kstrdup(type, GFP_KERNEL);
46 	if (!field->type)
47 		goto err;
48 
49 	if (filter_type == FILTER_OTHER)
50 		field->filter_type = filter_assign_type(type);
51 	else
52 		field->filter_type = filter_type;
53 
54 	field->offset = offset;
55 	field->size = size;
56 	field->is_signed = is_signed;
57 
58 	list_add(&field->link, &call->fields);
59 
60 	return 0;
61 
62 err:
63 	if (field) {
64 		kfree(field->name);
65 		kfree(field->type);
66 	}
67 	kfree(field);
68 
69 	return -ENOMEM;
70 }
71 EXPORT_SYMBOL_GPL(trace_define_field);
72 
73 #define __common_field(type, item)					\
74 	ret = trace_define_field(call, #type, "common_" #item,		\
75 				 offsetof(typeof(ent), item),		\
76 				 sizeof(ent.item),			\
77 				 is_signed_type(type), FILTER_OTHER);	\
78 	if (ret)							\
79 		return ret;
80 
81 int trace_define_common_fields(struct ftrace_event_call *call)
82 {
83 	int ret;
84 	struct trace_entry ent;
85 
86 	__common_field(unsigned short, type);
87 	__common_field(unsigned char, flags);
88 	__common_field(unsigned char, preempt_count);
89 	__common_field(int, pid);
90 	__common_field(int, lock_depth);
91 
92 	return ret;
93 }
94 EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 
96 #ifdef CONFIG_MODULES
97 
98 static void trace_destroy_fields(struct ftrace_event_call *call)
99 {
100 	struct ftrace_event_field *field, *next;
101 
102 	list_for_each_entry_safe(field, next, &call->fields, link) {
103 		list_del(&field->link);
104 		kfree(field->type);
105 		kfree(field->name);
106 		kfree(field);
107 	}
108 }
109 
110 #endif /* CONFIG_MODULES */
111 
112 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
113 					int enable)
114 {
115 	switch (enable) {
116 	case 0:
117 		if (call->enabled) {
118 			call->enabled = 0;
119 			tracing_stop_cmdline_record();
120 			call->unregfunc(call->data);
121 		}
122 		break;
123 	case 1:
124 		if (!call->enabled) {
125 			call->enabled = 1;
126 			tracing_start_cmdline_record();
127 			call->regfunc(call->data);
128 		}
129 		break;
130 	}
131 }
132 
133 static void ftrace_clear_events(void)
134 {
135 	struct ftrace_event_call *call;
136 
137 	mutex_lock(&event_mutex);
138 	list_for_each_entry(call, &ftrace_events, list) {
139 		ftrace_event_enable_disable(call, 0);
140 	}
141 	mutex_unlock(&event_mutex);
142 }
143 
144 /*
145  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
146  */
147 static int __ftrace_set_clr_event(const char *match, const char *sub,
148 				  const char *event, int set)
149 {
150 	struct ftrace_event_call *call;
151 	int ret = -EINVAL;
152 
153 	mutex_lock(&event_mutex);
154 	list_for_each_entry(call, &ftrace_events, list) {
155 
156 		if (!call->name || !call->regfunc)
157 			continue;
158 
159 		if (match &&
160 		    strcmp(match, call->name) != 0 &&
161 		    strcmp(match, call->system) != 0)
162 			continue;
163 
164 		if (sub && strcmp(sub, call->system) != 0)
165 			continue;
166 
167 		if (event && strcmp(event, call->name) != 0)
168 			continue;
169 
170 		ftrace_event_enable_disable(call, set);
171 
172 		ret = 0;
173 	}
174 	mutex_unlock(&event_mutex);
175 
176 	return ret;
177 }
178 
179 static int ftrace_set_clr_event(char *buf, int set)
180 {
181 	char *event = NULL, *sub = NULL, *match;
182 
183 	/*
184 	 * The buf format can be <subsystem>:<event-name>
185 	 *  *:<event-name> means any event by that name.
186 	 *  :<event-name> is the same.
187 	 *
188 	 *  <subsystem>:* means all events in that subsystem
189 	 *  <subsystem>: means the same.
190 	 *
191 	 *  <name> (no ':') means all events in a subsystem with
192 	 *  the name <name> or any event that matches <name>
193 	 */
194 
195 	match = strsep(&buf, ":");
196 	if (buf) {
197 		sub = match;
198 		event = buf;
199 		match = NULL;
200 
201 		if (!strlen(sub) || strcmp(sub, "*") == 0)
202 			sub = NULL;
203 		if (!strlen(event) || strcmp(event, "*") == 0)
204 			event = NULL;
205 	}
206 
207 	return __ftrace_set_clr_event(match, sub, event, set);
208 }
209 
210 /**
211  * trace_set_clr_event - enable or disable an event
212  * @system: system name to match (NULL for any system)
213  * @event: event name to match (NULL for all events, within system)
214  * @set: 1 to enable, 0 to disable
215  *
216  * This is a way for other parts of the kernel to enable or disable
217  * event recording.
218  *
219  * Returns 0 on success, -EINVAL if the parameters do not match any
220  * registered events.
221  */
222 int trace_set_clr_event(const char *system, const char *event, int set)
223 {
224 	return __ftrace_set_clr_event(NULL, system, event, set);
225 }
226 
227 /* 128 should be much more than enough */
228 #define EVENT_BUF_SIZE		127
229 
230 static ssize_t
231 ftrace_event_write(struct file *file, const char __user *ubuf,
232 		   size_t cnt, loff_t *ppos)
233 {
234 	struct trace_parser parser;
235 	size_t read = 0;
236 	ssize_t ret;
237 
238 	if (!cnt || cnt < 0)
239 		return 0;
240 
241 	ret = tracing_update_buffers();
242 	if (ret < 0)
243 		return ret;
244 
245 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
246 		return -ENOMEM;
247 
248 	read = trace_get_user(&parser, ubuf, cnt, ppos);
249 
250 	if (trace_parser_loaded((&parser))) {
251 		int set = 1;
252 
253 		if (*parser.buffer == '!')
254 			set = 0;
255 
256 		parser.buffer[parser.idx] = 0;
257 
258 		ret = ftrace_set_clr_event(parser.buffer + !set, set);
259 		if (ret)
260 			goto out_put;
261 	}
262 
263 	ret = read;
264 
265  out_put:
266 	trace_parser_put(&parser);
267 
268 	return ret;
269 }
270 
271 static void *
272 t_next(struct seq_file *m, void *v, loff_t *pos)
273 {
274 	struct list_head *list = m->private;
275 	struct ftrace_event_call *call;
276 
277 	(*pos)++;
278 
279 	for (;;) {
280 		if (list == &ftrace_events)
281 			return NULL;
282 
283 		call = list_entry(list, struct ftrace_event_call, list);
284 
285 		/*
286 		 * The ftrace subsystem is for showing formats only.
287 		 * They can not be enabled or disabled via the event files.
288 		 */
289 		if (call->regfunc)
290 			break;
291 
292 		list = list->next;
293 	}
294 
295 	m->private = list->next;
296 
297 	return call;
298 }
299 
300 static void *t_start(struct seq_file *m, loff_t *pos)
301 {
302 	struct ftrace_event_call *call = NULL;
303 	loff_t l;
304 
305 	mutex_lock(&event_mutex);
306 
307 	m->private = ftrace_events.next;
308 	for (l = 0; l <= *pos; ) {
309 		call = t_next(m, NULL, &l);
310 		if (!call)
311 			break;
312 	}
313 	return call;
314 }
315 
316 static void *
317 s_next(struct seq_file *m, void *v, loff_t *pos)
318 {
319 	struct list_head *list = m->private;
320 	struct ftrace_event_call *call;
321 
322 	(*pos)++;
323 
324  retry:
325 	if (list == &ftrace_events)
326 		return NULL;
327 
328 	call = list_entry(list, struct ftrace_event_call, list);
329 
330 	if (!call->enabled) {
331 		list = list->next;
332 		goto retry;
333 	}
334 
335 	m->private = list->next;
336 
337 	return call;
338 }
339 
340 static void *s_start(struct seq_file *m, loff_t *pos)
341 {
342 	struct ftrace_event_call *call = NULL;
343 	loff_t l;
344 
345 	mutex_lock(&event_mutex);
346 
347 	m->private = ftrace_events.next;
348 	for (l = 0; l <= *pos; ) {
349 		call = s_next(m, NULL, &l);
350 		if (!call)
351 			break;
352 	}
353 	return call;
354 }
355 
356 static int t_show(struct seq_file *m, void *v)
357 {
358 	struct ftrace_event_call *call = v;
359 
360 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
361 		seq_printf(m, "%s:", call->system);
362 	seq_printf(m, "%s\n", call->name);
363 
364 	return 0;
365 }
366 
367 static void t_stop(struct seq_file *m, void *p)
368 {
369 	mutex_unlock(&event_mutex);
370 }
371 
372 static int
373 ftrace_event_seq_open(struct inode *inode, struct file *file)
374 {
375 	const struct seq_operations *seq_ops;
376 
377 	if ((file->f_mode & FMODE_WRITE) &&
378 	    (file->f_flags & O_TRUNC))
379 		ftrace_clear_events();
380 
381 	seq_ops = inode->i_private;
382 	return seq_open(file, seq_ops);
383 }
384 
385 static ssize_t
386 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
387 		  loff_t *ppos)
388 {
389 	struct ftrace_event_call *call = filp->private_data;
390 	char *buf;
391 
392 	if (call->enabled)
393 		buf = "1\n";
394 	else
395 		buf = "0\n";
396 
397 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
398 }
399 
400 static ssize_t
401 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
402 		   loff_t *ppos)
403 {
404 	struct ftrace_event_call *call = filp->private_data;
405 	char buf[64];
406 	unsigned long val;
407 	int ret;
408 
409 	if (cnt >= sizeof(buf))
410 		return -EINVAL;
411 
412 	if (copy_from_user(&buf, ubuf, cnt))
413 		return -EFAULT;
414 
415 	buf[cnt] = 0;
416 
417 	ret = strict_strtoul(buf, 10, &val);
418 	if (ret < 0)
419 		return ret;
420 
421 	ret = tracing_update_buffers();
422 	if (ret < 0)
423 		return ret;
424 
425 	switch (val) {
426 	case 0:
427 	case 1:
428 		mutex_lock(&event_mutex);
429 		ftrace_event_enable_disable(call, val);
430 		mutex_unlock(&event_mutex);
431 		break;
432 
433 	default:
434 		return -EINVAL;
435 	}
436 
437 	*ppos += cnt;
438 
439 	return cnt;
440 }
441 
442 static ssize_t
443 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
444 		   loff_t *ppos)
445 {
446 	const char set_to_char[4] = { '?', '0', '1', 'X' };
447 	const char *system = filp->private_data;
448 	struct ftrace_event_call *call;
449 	char buf[2];
450 	int set = 0;
451 	int ret;
452 
453 	mutex_lock(&event_mutex);
454 	list_for_each_entry(call, &ftrace_events, list) {
455 		if (!call->name || !call->regfunc)
456 			continue;
457 
458 		if (system && strcmp(call->system, system) != 0)
459 			continue;
460 
461 		/*
462 		 * We need to find out if all the events are set
463 		 * or if all events or cleared, or if we have
464 		 * a mixture.
465 		 */
466 		set |= (1 << !!call->enabled);
467 
468 		/*
469 		 * If we have a mixture, no need to look further.
470 		 */
471 		if (set == 3)
472 			break;
473 	}
474 	mutex_unlock(&event_mutex);
475 
476 	buf[0] = set_to_char[set];
477 	buf[1] = '\n';
478 
479 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
480 
481 	return ret;
482 }
483 
484 static ssize_t
485 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
486 		    loff_t *ppos)
487 {
488 	const char *system = filp->private_data;
489 	unsigned long val;
490 	char buf[64];
491 	ssize_t ret;
492 
493 	if (cnt >= sizeof(buf))
494 		return -EINVAL;
495 
496 	if (copy_from_user(&buf, ubuf, cnt))
497 		return -EFAULT;
498 
499 	buf[cnt] = 0;
500 
501 	ret = strict_strtoul(buf, 10, &val);
502 	if (ret < 0)
503 		return ret;
504 
505 	ret = tracing_update_buffers();
506 	if (ret < 0)
507 		return ret;
508 
509 	if (val != 0 && val != 1)
510 		return -EINVAL;
511 
512 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
513 	if (ret)
514 		goto out;
515 
516 	ret = cnt;
517 
518 out:
519 	*ppos += cnt;
520 
521 	return ret;
522 }
523 
524 extern char *__bad_type_size(void);
525 
526 #undef FIELD
527 #define FIELD(type, name)						\
528 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
529 	#type, "common_" #name, offsetof(typeof(field), name),		\
530 		sizeof(field.name)
531 
532 static int trace_write_header(struct trace_seq *s)
533 {
534 	struct trace_entry field;
535 
536 	/* struct trace_entry */
537 	return trace_seq_printf(s,
538 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
539 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
540 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
541 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
542 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
543 				"\n",
544 				FIELD(unsigned short, type),
545 				FIELD(unsigned char, flags),
546 				FIELD(unsigned char, preempt_count),
547 				FIELD(int, pid),
548 				FIELD(int, lock_depth));
549 }
550 
551 static ssize_t
552 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
553 		  loff_t *ppos)
554 {
555 	struct ftrace_event_call *call = filp->private_data;
556 	struct trace_seq *s;
557 	char *buf;
558 	int r;
559 
560 	if (*ppos)
561 		return 0;
562 
563 	s = kmalloc(sizeof(*s), GFP_KERNEL);
564 	if (!s)
565 		return -ENOMEM;
566 
567 	trace_seq_init(s);
568 
569 	/* If any of the first writes fail, so will the show_format. */
570 
571 	trace_seq_printf(s, "name: %s\n", call->name);
572 	trace_seq_printf(s, "ID: %d\n", call->id);
573 	trace_seq_printf(s, "format:\n");
574 	trace_write_header(s);
575 
576 	r = call->show_format(call, s);
577 	if (!r) {
578 		/*
579 		 * ug!  The format output is bigger than a PAGE!!
580 		 */
581 		buf = "FORMAT TOO BIG\n";
582 		r = simple_read_from_buffer(ubuf, cnt, ppos,
583 					      buf, strlen(buf));
584 		goto out;
585 	}
586 
587 	r = simple_read_from_buffer(ubuf, cnt, ppos,
588 				    s->buffer, s->len);
589  out:
590 	kfree(s);
591 	return r;
592 }
593 
594 static ssize_t
595 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
596 {
597 	struct ftrace_event_call *call = filp->private_data;
598 	struct trace_seq *s;
599 	int r;
600 
601 	if (*ppos)
602 		return 0;
603 
604 	s = kmalloc(sizeof(*s), GFP_KERNEL);
605 	if (!s)
606 		return -ENOMEM;
607 
608 	trace_seq_init(s);
609 	trace_seq_printf(s, "%d\n", call->id);
610 
611 	r = simple_read_from_buffer(ubuf, cnt, ppos,
612 				    s->buffer, s->len);
613 	kfree(s);
614 	return r;
615 }
616 
617 static ssize_t
618 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
619 		  loff_t *ppos)
620 {
621 	struct ftrace_event_call *call = filp->private_data;
622 	struct trace_seq *s;
623 	int r;
624 
625 	if (*ppos)
626 		return 0;
627 
628 	s = kmalloc(sizeof(*s), GFP_KERNEL);
629 	if (!s)
630 		return -ENOMEM;
631 
632 	trace_seq_init(s);
633 
634 	print_event_filter(call, s);
635 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
636 
637 	kfree(s);
638 
639 	return r;
640 }
641 
642 static ssize_t
643 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
644 		   loff_t *ppos)
645 {
646 	struct ftrace_event_call *call = filp->private_data;
647 	char *buf;
648 	int err;
649 
650 	if (cnt >= PAGE_SIZE)
651 		return -EINVAL;
652 
653 	buf = (char *)__get_free_page(GFP_TEMPORARY);
654 	if (!buf)
655 		return -ENOMEM;
656 
657 	if (copy_from_user(buf, ubuf, cnt)) {
658 		free_page((unsigned long) buf);
659 		return -EFAULT;
660 	}
661 	buf[cnt] = '\0';
662 
663 	err = apply_event_filter(call, buf);
664 	free_page((unsigned long) buf);
665 	if (err < 0)
666 		return err;
667 
668 	*ppos += cnt;
669 
670 	return cnt;
671 }
672 
673 static ssize_t
674 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
675 		      loff_t *ppos)
676 {
677 	struct event_subsystem *system = filp->private_data;
678 	struct trace_seq *s;
679 	int r;
680 
681 	if (*ppos)
682 		return 0;
683 
684 	s = kmalloc(sizeof(*s), GFP_KERNEL);
685 	if (!s)
686 		return -ENOMEM;
687 
688 	trace_seq_init(s);
689 
690 	print_subsystem_event_filter(system, s);
691 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
692 
693 	kfree(s);
694 
695 	return r;
696 }
697 
698 static ssize_t
699 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
700 		       loff_t *ppos)
701 {
702 	struct event_subsystem *system = filp->private_data;
703 	char *buf;
704 	int err;
705 
706 	if (cnt >= PAGE_SIZE)
707 		return -EINVAL;
708 
709 	buf = (char *)__get_free_page(GFP_TEMPORARY);
710 	if (!buf)
711 		return -ENOMEM;
712 
713 	if (copy_from_user(buf, ubuf, cnt)) {
714 		free_page((unsigned long) buf);
715 		return -EFAULT;
716 	}
717 	buf[cnt] = '\0';
718 
719 	err = apply_subsystem_event_filter(system, buf);
720 	free_page((unsigned long) buf);
721 	if (err < 0)
722 		return err;
723 
724 	*ppos += cnt;
725 
726 	return cnt;
727 }
728 
729 static ssize_t
730 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
731 {
732 	int (*func)(struct trace_seq *s) = filp->private_data;
733 	struct trace_seq *s;
734 	int r;
735 
736 	if (*ppos)
737 		return 0;
738 
739 	s = kmalloc(sizeof(*s), GFP_KERNEL);
740 	if (!s)
741 		return -ENOMEM;
742 
743 	trace_seq_init(s);
744 
745 	func(s);
746 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
747 
748 	kfree(s);
749 
750 	return r;
751 }
752 
753 static const struct seq_operations show_event_seq_ops = {
754 	.start = t_start,
755 	.next = t_next,
756 	.show = t_show,
757 	.stop = t_stop,
758 };
759 
760 static const struct seq_operations show_set_event_seq_ops = {
761 	.start = s_start,
762 	.next = s_next,
763 	.show = t_show,
764 	.stop = t_stop,
765 };
766 
767 static const struct file_operations ftrace_avail_fops = {
768 	.open = ftrace_event_seq_open,
769 	.read = seq_read,
770 	.llseek = seq_lseek,
771 	.release = seq_release,
772 };
773 
774 static const struct file_operations ftrace_set_event_fops = {
775 	.open = ftrace_event_seq_open,
776 	.read = seq_read,
777 	.write = ftrace_event_write,
778 	.llseek = seq_lseek,
779 	.release = seq_release,
780 };
781 
782 static const struct file_operations ftrace_enable_fops = {
783 	.open = tracing_open_generic,
784 	.read = event_enable_read,
785 	.write = event_enable_write,
786 };
787 
788 static const struct file_operations ftrace_event_format_fops = {
789 	.open = tracing_open_generic,
790 	.read = event_format_read,
791 };
792 
793 static const struct file_operations ftrace_event_id_fops = {
794 	.open = tracing_open_generic,
795 	.read = event_id_read,
796 };
797 
798 static const struct file_operations ftrace_event_filter_fops = {
799 	.open = tracing_open_generic,
800 	.read = event_filter_read,
801 	.write = event_filter_write,
802 };
803 
804 static const struct file_operations ftrace_subsystem_filter_fops = {
805 	.open = tracing_open_generic,
806 	.read = subsystem_filter_read,
807 	.write = subsystem_filter_write,
808 };
809 
810 static const struct file_operations ftrace_system_enable_fops = {
811 	.open = tracing_open_generic,
812 	.read = system_enable_read,
813 	.write = system_enable_write,
814 };
815 
816 static const struct file_operations ftrace_show_header_fops = {
817 	.open = tracing_open_generic,
818 	.read = show_header,
819 };
820 
821 static struct dentry *event_trace_events_dir(void)
822 {
823 	static struct dentry *d_tracer;
824 	static struct dentry *d_events;
825 
826 	if (d_events)
827 		return d_events;
828 
829 	d_tracer = tracing_init_dentry();
830 	if (!d_tracer)
831 		return NULL;
832 
833 	d_events = debugfs_create_dir("events", d_tracer);
834 	if (!d_events)
835 		pr_warning("Could not create debugfs "
836 			   "'events' directory\n");
837 
838 	return d_events;
839 }
840 
841 static LIST_HEAD(event_subsystems);
842 
843 static struct dentry *
844 event_subsystem_dir(const char *name, struct dentry *d_events)
845 {
846 	struct event_subsystem *system;
847 	struct dentry *entry;
848 
849 	/* First see if we did not already create this dir */
850 	list_for_each_entry(system, &event_subsystems, list) {
851 		if (strcmp(system->name, name) == 0) {
852 			system->nr_events++;
853 			return system->entry;
854 		}
855 	}
856 
857 	/* need to create new entry */
858 	system = kmalloc(sizeof(*system), GFP_KERNEL);
859 	if (!system) {
860 		pr_warning("No memory to create event subsystem %s\n",
861 			   name);
862 		return d_events;
863 	}
864 
865 	system->entry = debugfs_create_dir(name, d_events);
866 	if (!system->entry) {
867 		pr_warning("Could not create event subsystem %s\n",
868 			   name);
869 		kfree(system);
870 		return d_events;
871 	}
872 
873 	system->nr_events = 1;
874 	system->name = kstrdup(name, GFP_KERNEL);
875 	if (!system->name) {
876 		debugfs_remove(system->entry);
877 		kfree(system);
878 		return d_events;
879 	}
880 
881 	list_add(&system->list, &event_subsystems);
882 
883 	system->filter = NULL;
884 
885 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
886 	if (!system->filter) {
887 		pr_warning("Could not allocate filter for subsystem "
888 			   "'%s'\n", name);
889 		return system->entry;
890 	}
891 
892 	entry = debugfs_create_file("filter", 0644, system->entry, system,
893 				    &ftrace_subsystem_filter_fops);
894 	if (!entry) {
895 		kfree(system->filter);
896 		system->filter = NULL;
897 		pr_warning("Could not create debugfs "
898 			   "'%s/filter' entry\n", name);
899 	}
900 
901 	entry = trace_create_file("enable", 0644, system->entry,
902 				  (void *)system->name,
903 				  &ftrace_system_enable_fops);
904 
905 	return system->entry;
906 }
907 
908 static int
909 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
910 		 const struct file_operations *id,
911 		 const struct file_operations *enable,
912 		 const struct file_operations *filter,
913 		 const struct file_operations *format)
914 {
915 	struct dentry *entry;
916 	int ret;
917 
918 	/*
919 	 * If the trace point header did not define TRACE_SYSTEM
920 	 * then the system would be called "TRACE_SYSTEM".
921 	 */
922 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
923 		d_events = event_subsystem_dir(call->system, d_events);
924 
925 	call->dir = debugfs_create_dir(call->name, d_events);
926 	if (!call->dir) {
927 		pr_warning("Could not create debugfs "
928 			   "'%s' directory\n", call->name);
929 		return -1;
930 	}
931 
932 	if (call->regfunc)
933 		entry = trace_create_file("enable", 0644, call->dir, call,
934 					  enable);
935 
936 	if (call->id && call->profile_enable)
937 		entry = trace_create_file("id", 0444, call->dir, call,
938 					  id);
939 
940 	if (call->define_fields) {
941 		ret = call->define_fields(call);
942 		if (ret < 0) {
943 			pr_warning("Could not initialize trace point"
944 				   " events/%s\n", call->name);
945 			return ret;
946 		}
947 		entry = trace_create_file("filter", 0644, call->dir, call,
948 					  filter);
949 	}
950 
951 	/* A trace may not want to export its format */
952 	if (!call->show_format)
953 		return 0;
954 
955 	entry = trace_create_file("format", 0444, call->dir, call,
956 				  format);
957 
958 	return 0;
959 }
960 
961 #define for_each_event(event, start, end)			\
962 	for (event = start;					\
963 	     (unsigned long)event < (unsigned long)end;		\
964 	     event++)
965 
966 #ifdef CONFIG_MODULES
967 
968 static LIST_HEAD(ftrace_module_file_list);
969 
970 /*
971  * Modules must own their file_operations to keep up with
972  * reference counting.
973  */
974 struct ftrace_module_file_ops {
975 	struct list_head		list;
976 	struct module			*mod;
977 	struct file_operations		id;
978 	struct file_operations		enable;
979 	struct file_operations		format;
980 	struct file_operations		filter;
981 };
982 
983 static void remove_subsystem_dir(const char *name)
984 {
985 	struct event_subsystem *system;
986 
987 	if (strcmp(name, TRACE_SYSTEM) == 0)
988 		return;
989 
990 	list_for_each_entry(system, &event_subsystems, list) {
991 		if (strcmp(system->name, name) == 0) {
992 			if (!--system->nr_events) {
993 				struct event_filter *filter = system->filter;
994 
995 				debugfs_remove_recursive(system->entry);
996 				list_del(&system->list);
997 				if (filter) {
998 					kfree(filter->filter_string);
999 					kfree(filter);
1000 				}
1001 				kfree(system->name);
1002 				kfree(system);
1003 			}
1004 			break;
1005 		}
1006 	}
1007 }
1008 
1009 static struct ftrace_module_file_ops *
1010 trace_create_file_ops(struct module *mod)
1011 {
1012 	struct ftrace_module_file_ops *file_ops;
1013 
1014 	/*
1015 	 * This is a bit of a PITA. To allow for correct reference
1016 	 * counting, modules must "own" their file_operations.
1017 	 * To do this, we allocate the file operations that will be
1018 	 * used in the event directory.
1019 	 */
1020 
1021 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1022 	if (!file_ops)
1023 		return NULL;
1024 
1025 	file_ops->mod = mod;
1026 
1027 	file_ops->id = ftrace_event_id_fops;
1028 	file_ops->id.owner = mod;
1029 
1030 	file_ops->enable = ftrace_enable_fops;
1031 	file_ops->enable.owner = mod;
1032 
1033 	file_ops->filter = ftrace_event_filter_fops;
1034 	file_ops->filter.owner = mod;
1035 
1036 	file_ops->format = ftrace_event_format_fops;
1037 	file_ops->format.owner = mod;
1038 
1039 	list_add(&file_ops->list, &ftrace_module_file_list);
1040 
1041 	return file_ops;
1042 }
1043 
1044 static void trace_module_add_events(struct module *mod)
1045 {
1046 	struct ftrace_module_file_ops *file_ops = NULL;
1047 	struct ftrace_event_call *call, *start, *end;
1048 	struct dentry *d_events;
1049 	int ret;
1050 
1051 	start = mod->trace_events;
1052 	end = mod->trace_events + mod->num_trace_events;
1053 
1054 	if (start == end)
1055 		return;
1056 
1057 	d_events = event_trace_events_dir();
1058 	if (!d_events)
1059 		return;
1060 
1061 	for_each_event(call, start, end) {
1062 		/* The linker may leave blanks */
1063 		if (!call->name)
1064 			continue;
1065 		if (call->raw_init) {
1066 			ret = call->raw_init();
1067 			if (ret < 0) {
1068 				if (ret != -ENOSYS)
1069 					pr_warning("Could not initialize trace "
1070 					"point events/%s\n", call->name);
1071 				continue;
1072 			}
1073 		}
1074 		/*
1075 		 * This module has events, create file ops for this module
1076 		 * if not already done.
1077 		 */
1078 		if (!file_ops) {
1079 			file_ops = trace_create_file_ops(mod);
1080 			if (!file_ops)
1081 				return;
1082 		}
1083 		call->mod = mod;
1084 		list_add(&call->list, &ftrace_events);
1085 		event_create_dir(call, d_events,
1086 				 &file_ops->id, &file_ops->enable,
1087 				 &file_ops->filter, &file_ops->format);
1088 	}
1089 }
1090 
1091 static void trace_module_remove_events(struct module *mod)
1092 {
1093 	struct ftrace_module_file_ops *file_ops;
1094 	struct ftrace_event_call *call, *p;
1095 	bool found = false;
1096 
1097 	down_write(&trace_event_mutex);
1098 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1099 		if (call->mod == mod) {
1100 			found = true;
1101 			ftrace_event_enable_disable(call, 0);
1102 			if (call->event)
1103 				__unregister_ftrace_event(call->event);
1104 			debugfs_remove_recursive(call->dir);
1105 			list_del(&call->list);
1106 			trace_destroy_fields(call);
1107 			destroy_preds(call);
1108 			remove_subsystem_dir(call->system);
1109 		}
1110 	}
1111 
1112 	/* Now free the file_operations */
1113 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1114 		if (file_ops->mod == mod)
1115 			break;
1116 	}
1117 	if (&file_ops->list != &ftrace_module_file_list) {
1118 		list_del(&file_ops->list);
1119 		kfree(file_ops);
1120 	}
1121 
1122 	/*
1123 	 * It is safest to reset the ring buffer if the module being unloaded
1124 	 * registered any events.
1125 	 */
1126 	if (found)
1127 		tracing_reset_current_online_cpus();
1128 	up_write(&trace_event_mutex);
1129 }
1130 
1131 static int trace_module_notify(struct notifier_block *self,
1132 			       unsigned long val, void *data)
1133 {
1134 	struct module *mod = data;
1135 
1136 	mutex_lock(&event_mutex);
1137 	switch (val) {
1138 	case MODULE_STATE_COMING:
1139 		trace_module_add_events(mod);
1140 		break;
1141 	case MODULE_STATE_GOING:
1142 		trace_module_remove_events(mod);
1143 		break;
1144 	}
1145 	mutex_unlock(&event_mutex);
1146 
1147 	return 0;
1148 }
1149 #else
1150 static int trace_module_notify(struct notifier_block *self,
1151 			       unsigned long val, void *data)
1152 {
1153 	return 0;
1154 }
1155 #endif /* CONFIG_MODULES */
1156 
1157 static struct notifier_block trace_module_nb = {
1158 	.notifier_call = trace_module_notify,
1159 	.priority = 0,
1160 };
1161 
1162 extern struct ftrace_event_call __start_ftrace_events[];
1163 extern struct ftrace_event_call __stop_ftrace_events[];
1164 
1165 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1166 
1167 static __init int setup_trace_event(char *str)
1168 {
1169 	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1170 	ring_buffer_expanded = 1;
1171 	tracing_selftest_disabled = 1;
1172 
1173 	return 1;
1174 }
1175 __setup("trace_event=", setup_trace_event);
1176 
1177 static __init int event_trace_init(void)
1178 {
1179 	struct ftrace_event_call *call;
1180 	struct dentry *d_tracer;
1181 	struct dentry *entry;
1182 	struct dentry *d_events;
1183 	int ret;
1184 	char *buf = bootup_event_buf;
1185 	char *token;
1186 
1187 	d_tracer = tracing_init_dentry();
1188 	if (!d_tracer)
1189 		return 0;
1190 
1191 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1192 				    (void *)&show_event_seq_ops,
1193 				    &ftrace_avail_fops);
1194 	if (!entry)
1195 		pr_warning("Could not create debugfs "
1196 			   "'available_events' entry\n");
1197 
1198 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1199 				    (void *)&show_set_event_seq_ops,
1200 				    &ftrace_set_event_fops);
1201 	if (!entry)
1202 		pr_warning("Could not create debugfs "
1203 			   "'set_event' entry\n");
1204 
1205 	d_events = event_trace_events_dir();
1206 	if (!d_events)
1207 		return 0;
1208 
1209 	/* ring buffer internal formats */
1210 	trace_create_file("header_page", 0444, d_events,
1211 			  ring_buffer_print_page_header,
1212 			  &ftrace_show_header_fops);
1213 
1214 	trace_create_file("header_event", 0444, d_events,
1215 			  ring_buffer_print_entry_header,
1216 			  &ftrace_show_header_fops);
1217 
1218 	trace_create_file("enable", 0644, d_events,
1219 			  NULL, &ftrace_system_enable_fops);
1220 
1221 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1222 		/* The linker may leave blanks */
1223 		if (!call->name)
1224 			continue;
1225 		if (call->raw_init) {
1226 			ret = call->raw_init();
1227 			if (ret < 0) {
1228 				if (ret != -ENOSYS)
1229 					pr_warning("Could not initialize trace "
1230 					"point events/%s\n", call->name);
1231 				continue;
1232 			}
1233 		}
1234 		list_add(&call->list, &ftrace_events);
1235 		event_create_dir(call, d_events, &ftrace_event_id_fops,
1236 				 &ftrace_enable_fops, &ftrace_event_filter_fops,
1237 				 &ftrace_event_format_fops);
1238 	}
1239 
1240 	while (true) {
1241 		token = strsep(&buf, ",");
1242 
1243 		if (!token)
1244 			break;
1245 		if (!*token)
1246 			continue;
1247 
1248 		ret = ftrace_set_clr_event(token, 1);
1249 		if (ret)
1250 			pr_warning("Failed to enable trace event: %s\n", token);
1251 	}
1252 
1253 	ret = register_module_notifier(&trace_module_nb);
1254 	if (ret)
1255 		pr_warning("Failed to register trace events module notifier\n");
1256 
1257 	return 0;
1258 }
1259 fs_initcall(event_trace_init);
1260 
1261 #ifdef CONFIG_FTRACE_STARTUP_TEST
1262 
1263 static DEFINE_SPINLOCK(test_spinlock);
1264 static DEFINE_SPINLOCK(test_spinlock_irq);
1265 static DEFINE_MUTEX(test_mutex);
1266 
1267 static __init void test_work(struct work_struct *dummy)
1268 {
1269 	spin_lock(&test_spinlock);
1270 	spin_lock_irq(&test_spinlock_irq);
1271 	udelay(1);
1272 	spin_unlock_irq(&test_spinlock_irq);
1273 	spin_unlock(&test_spinlock);
1274 
1275 	mutex_lock(&test_mutex);
1276 	msleep(1);
1277 	mutex_unlock(&test_mutex);
1278 }
1279 
1280 static __init int event_test_thread(void *unused)
1281 {
1282 	void *test_malloc;
1283 
1284 	test_malloc = kmalloc(1234, GFP_KERNEL);
1285 	if (!test_malloc)
1286 		pr_info("failed to kmalloc\n");
1287 
1288 	schedule_on_each_cpu(test_work);
1289 
1290 	kfree(test_malloc);
1291 
1292 	set_current_state(TASK_INTERRUPTIBLE);
1293 	while (!kthread_should_stop())
1294 		schedule();
1295 
1296 	return 0;
1297 }
1298 
1299 /*
1300  * Do various things that may trigger events.
1301  */
1302 static __init void event_test_stuff(void)
1303 {
1304 	struct task_struct *test_thread;
1305 
1306 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1307 	msleep(1);
1308 	kthread_stop(test_thread);
1309 }
1310 
1311 /*
1312  * For every trace event defined, we will test each trace point separately,
1313  * and then by groups, and finally all trace points.
1314  */
1315 static __init void event_trace_self_tests(void)
1316 {
1317 	struct ftrace_event_call *call;
1318 	struct event_subsystem *system;
1319 	int ret;
1320 
1321 	pr_info("Running tests on trace events:\n");
1322 
1323 	list_for_each_entry(call, &ftrace_events, list) {
1324 
1325 		/* Only test those that have a regfunc */
1326 		if (!call->regfunc)
1327 			continue;
1328 
1329 /*
1330  * Testing syscall events here is pretty useless, but
1331  * we still do it if configured. But this is time consuming.
1332  * What we really need is a user thread to perform the
1333  * syscalls as we test.
1334  */
1335 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1336 		if (call->system &&
1337 		    strcmp(call->system, "syscalls") == 0)
1338 			continue;
1339 #endif
1340 
1341 		pr_info("Testing event %s: ", call->name);
1342 
1343 		/*
1344 		 * If an event is already enabled, someone is using
1345 		 * it and the self test should not be on.
1346 		 */
1347 		if (call->enabled) {
1348 			pr_warning("Enabled event during self test!\n");
1349 			WARN_ON_ONCE(1);
1350 			continue;
1351 		}
1352 
1353 		ftrace_event_enable_disable(call, 1);
1354 		event_test_stuff();
1355 		ftrace_event_enable_disable(call, 0);
1356 
1357 		pr_cont("OK\n");
1358 	}
1359 
1360 	/* Now test at the sub system level */
1361 
1362 	pr_info("Running tests on trace event systems:\n");
1363 
1364 	list_for_each_entry(system, &event_subsystems, list) {
1365 
1366 		/* the ftrace system is special, skip it */
1367 		if (strcmp(system->name, "ftrace") == 0)
1368 			continue;
1369 
1370 		pr_info("Testing event system %s: ", system->name);
1371 
1372 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1373 		if (WARN_ON_ONCE(ret)) {
1374 			pr_warning("error enabling system %s\n",
1375 				   system->name);
1376 			continue;
1377 		}
1378 
1379 		event_test_stuff();
1380 
1381 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1382 		if (WARN_ON_ONCE(ret))
1383 			pr_warning("error disabling system %s\n",
1384 				   system->name);
1385 
1386 		pr_cont("OK\n");
1387 	}
1388 
1389 	/* Test with all events enabled */
1390 
1391 	pr_info("Running tests on all trace events:\n");
1392 	pr_info("Testing all events: ");
1393 
1394 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1395 	if (WARN_ON_ONCE(ret)) {
1396 		pr_warning("error enabling all events\n");
1397 		return;
1398 	}
1399 
1400 	event_test_stuff();
1401 
1402 	/* reset sysname */
1403 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1404 	if (WARN_ON_ONCE(ret)) {
1405 		pr_warning("error disabling all events\n");
1406 		return;
1407 	}
1408 
1409 	pr_cont("OK\n");
1410 }
1411 
1412 #ifdef CONFIG_FUNCTION_TRACER
1413 
1414 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1415 
1416 static void
1417 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1418 {
1419 	struct ring_buffer_event *event;
1420 	struct ring_buffer *buffer;
1421 	struct ftrace_entry *entry;
1422 	unsigned long flags;
1423 	long disabled;
1424 	int resched;
1425 	int cpu;
1426 	int pc;
1427 
1428 	pc = preempt_count();
1429 	resched = ftrace_preempt_disable();
1430 	cpu = raw_smp_processor_id();
1431 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1432 
1433 	if (disabled != 1)
1434 		goto out;
1435 
1436 	local_save_flags(flags);
1437 
1438 	event = trace_current_buffer_lock_reserve(&buffer,
1439 						  TRACE_FN, sizeof(*entry),
1440 						  flags, pc);
1441 	if (!event)
1442 		goto out;
1443 	entry	= ring_buffer_event_data(event);
1444 	entry->ip			= ip;
1445 	entry->parent_ip		= parent_ip;
1446 
1447 	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1448 
1449  out:
1450 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1451 	ftrace_preempt_enable(resched);
1452 }
1453 
1454 static struct ftrace_ops trace_ops __initdata  =
1455 {
1456 	.func = function_test_events_call,
1457 };
1458 
1459 static __init void event_trace_self_test_with_function(void)
1460 {
1461 	register_ftrace_function(&trace_ops);
1462 	pr_info("Running tests again, along with the function tracer\n");
1463 	event_trace_self_tests();
1464 	unregister_ftrace_function(&trace_ops);
1465 }
1466 #else
1467 static __init void event_trace_self_test_with_function(void)
1468 {
1469 }
1470 #endif
1471 
1472 static __init int event_trace_self_tests_init(void)
1473 {
1474 	if (!tracing_selftest_disabled) {
1475 		event_trace_self_tests();
1476 		event_trace_self_test_with_function();
1477 	}
1478 
1479 	return 0;
1480 }
1481 
1482 late_initcall(event_trace_self_tests_init);
1483 
1484 #endif
1485