xref: /linux/kernel/trace/trace_events.c (revision fea966f7564205fcf5919af9bde031e753419c96)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/delay.h>
19 
20 #include "trace_output.h"
21 
22 #define TRACE_SYSTEM "TRACE_SYSTEM"
23 
24 DEFINE_MUTEX(event_mutex);
25 
26 LIST_HEAD(ftrace_events);
27 
28 int trace_define_field(struct ftrace_event_call *call, char *type,
29 		       char *name, int offset, int size, int is_signed)
30 {
31 	struct ftrace_event_field *field;
32 
33 	field = kzalloc(sizeof(*field), GFP_KERNEL);
34 	if (!field)
35 		goto err;
36 
37 	field->name = kstrdup(name, GFP_KERNEL);
38 	if (!field->name)
39 		goto err;
40 
41 	field->type = kstrdup(type, GFP_KERNEL);
42 	if (!field->type)
43 		goto err;
44 
45 	field->offset = offset;
46 	field->size = size;
47 	field->is_signed = is_signed;
48 	list_add(&field->link, &call->fields);
49 
50 	return 0;
51 
52 err:
53 	if (field) {
54 		kfree(field->name);
55 		kfree(field->type);
56 	}
57 	kfree(field);
58 
59 	return -ENOMEM;
60 }
61 EXPORT_SYMBOL_GPL(trace_define_field);
62 
63 #ifdef CONFIG_MODULES
64 
65 static void trace_destroy_fields(struct ftrace_event_call *call)
66 {
67 	struct ftrace_event_field *field, *next;
68 
69 	list_for_each_entry_safe(field, next, &call->fields, link) {
70 		list_del(&field->link);
71 		kfree(field->type);
72 		kfree(field->name);
73 		kfree(field);
74 	}
75 }
76 
77 #endif /* CONFIG_MODULES */
78 
79 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
80 					int enable)
81 {
82 	switch (enable) {
83 	case 0:
84 		if (call->enabled) {
85 			call->enabled = 0;
86 			tracing_stop_cmdline_record();
87 			call->unregfunc();
88 		}
89 		break;
90 	case 1:
91 		if (!call->enabled) {
92 			call->enabled = 1;
93 			tracing_start_cmdline_record();
94 			call->regfunc();
95 		}
96 		break;
97 	}
98 }
99 
100 static void ftrace_clear_events(void)
101 {
102 	struct ftrace_event_call *call;
103 
104 	mutex_lock(&event_mutex);
105 	list_for_each_entry(call, &ftrace_events, list) {
106 		ftrace_event_enable_disable(call, 0);
107 	}
108 	mutex_unlock(&event_mutex);
109 }
110 
111 /*
112  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
113  */
114 static int __ftrace_set_clr_event(const char *match, const char *sub,
115 				  const char *event, int set)
116 {
117 	struct ftrace_event_call *call;
118 	int ret = -EINVAL;
119 
120 	mutex_lock(&event_mutex);
121 	list_for_each_entry(call, &ftrace_events, list) {
122 
123 		if (!call->name || !call->regfunc)
124 			continue;
125 
126 		if (match &&
127 		    strcmp(match, call->name) != 0 &&
128 		    strcmp(match, call->system) != 0)
129 			continue;
130 
131 		if (sub && strcmp(sub, call->system) != 0)
132 			continue;
133 
134 		if (event && strcmp(event, call->name) != 0)
135 			continue;
136 
137 		ftrace_event_enable_disable(call, set);
138 
139 		ret = 0;
140 	}
141 	mutex_unlock(&event_mutex);
142 
143 	return ret;
144 }
145 
146 static int ftrace_set_clr_event(char *buf, int set)
147 {
148 	char *event = NULL, *sub = NULL, *match;
149 
150 	/*
151 	 * The buf format can be <subsystem>:<event-name>
152 	 *  *:<event-name> means any event by that name.
153 	 *  :<event-name> is the same.
154 	 *
155 	 *  <subsystem>:* means all events in that subsystem
156 	 *  <subsystem>: means the same.
157 	 *
158 	 *  <name> (no ':') means all events in a subsystem with
159 	 *  the name <name> or any event that matches <name>
160 	 */
161 
162 	match = strsep(&buf, ":");
163 	if (buf) {
164 		sub = match;
165 		event = buf;
166 		match = NULL;
167 
168 		if (!strlen(sub) || strcmp(sub, "*") == 0)
169 			sub = NULL;
170 		if (!strlen(event) || strcmp(event, "*") == 0)
171 			event = NULL;
172 	}
173 
174 	return __ftrace_set_clr_event(match, sub, event, set);
175 }
176 
177 /**
178  * trace_set_clr_event - enable or disable an event
179  * @system: system name to match (NULL for any system)
180  * @event: event name to match (NULL for all events, within system)
181  * @set: 1 to enable, 0 to disable
182  *
183  * This is a way for other parts of the kernel to enable or disable
184  * event recording.
185  *
186  * Returns 0 on success, -EINVAL if the parameters do not match any
187  * registered events.
188  */
189 int trace_set_clr_event(const char *system, const char *event, int set)
190 {
191 	return __ftrace_set_clr_event(NULL, system, event, set);
192 }
193 
194 /* 128 should be much more than enough */
195 #define EVENT_BUF_SIZE		127
196 
197 static ssize_t
198 ftrace_event_write(struct file *file, const char __user *ubuf,
199 		   size_t cnt, loff_t *ppos)
200 {
201 	size_t read = 0;
202 	int i, set = 1;
203 	ssize_t ret;
204 	char *buf;
205 	char ch;
206 
207 	if (!cnt || cnt < 0)
208 		return 0;
209 
210 	ret = tracing_update_buffers();
211 	if (ret < 0)
212 		return ret;
213 
214 	ret = get_user(ch, ubuf++);
215 	if (ret)
216 		return ret;
217 	read++;
218 	cnt--;
219 
220 	/* skip white space */
221 	while (cnt && isspace(ch)) {
222 		ret = get_user(ch, ubuf++);
223 		if (ret)
224 			return ret;
225 		read++;
226 		cnt--;
227 	}
228 
229 	/* Only white space found? */
230 	if (isspace(ch)) {
231 		file->f_pos += read;
232 		ret = read;
233 		return ret;
234 	}
235 
236 	buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
237 	if (!buf)
238 		return -ENOMEM;
239 
240 	if (cnt > EVENT_BUF_SIZE)
241 		cnt = EVENT_BUF_SIZE;
242 
243 	i = 0;
244 	while (cnt && !isspace(ch)) {
245 		if (!i && ch == '!')
246 			set = 0;
247 		else
248 			buf[i++] = ch;
249 
250 		ret = get_user(ch, ubuf++);
251 		if (ret)
252 			goto out_free;
253 		read++;
254 		cnt--;
255 	}
256 	buf[i] = 0;
257 
258 	file->f_pos += read;
259 
260 	ret = ftrace_set_clr_event(buf, set);
261 	if (ret)
262 		goto out_free;
263 
264 	ret = read;
265 
266  out_free:
267 	kfree(buf);
268 
269 	return ret;
270 }
271 
272 static void *
273 t_next(struct seq_file *m, void *v, loff_t *pos)
274 {
275 	struct list_head *list = m->private;
276 	struct ftrace_event_call *call;
277 
278 	(*pos)++;
279 
280 	for (;;) {
281 		if (list == &ftrace_events)
282 			return NULL;
283 
284 		call = list_entry(list, struct ftrace_event_call, list);
285 
286 		/*
287 		 * The ftrace subsystem is for showing formats only.
288 		 * They can not be enabled or disabled via the event files.
289 		 */
290 		if (call->regfunc)
291 			break;
292 
293 		list = list->next;
294 	}
295 
296 	m->private = list->next;
297 
298 	return call;
299 }
300 
301 static void *t_start(struct seq_file *m, loff_t *pos)
302 {
303 	struct ftrace_event_call *call = NULL;
304 	loff_t l;
305 
306 	mutex_lock(&event_mutex);
307 
308 	m->private = ftrace_events.next;
309 	for (l = 0; l <= *pos; ) {
310 		call = t_next(m, NULL, &l);
311 		if (!call)
312 			break;
313 	}
314 	return call;
315 }
316 
317 static void *
318 s_next(struct seq_file *m, void *v, loff_t *pos)
319 {
320 	struct list_head *list = m->private;
321 	struct ftrace_event_call *call;
322 
323 	(*pos)++;
324 
325  retry:
326 	if (list == &ftrace_events)
327 		return NULL;
328 
329 	call = list_entry(list, struct ftrace_event_call, list);
330 
331 	if (!call->enabled) {
332 		list = list->next;
333 		goto retry;
334 	}
335 
336 	m->private = list->next;
337 
338 	return call;
339 }
340 
341 static void *s_start(struct seq_file *m, loff_t *pos)
342 {
343 	struct ftrace_event_call *call = NULL;
344 	loff_t l;
345 
346 	mutex_lock(&event_mutex);
347 
348 	m->private = ftrace_events.next;
349 	for (l = 0; l <= *pos; ) {
350 		call = s_next(m, NULL, &l);
351 		if (!call)
352 			break;
353 	}
354 	return call;
355 }
356 
357 static int t_show(struct seq_file *m, void *v)
358 {
359 	struct ftrace_event_call *call = v;
360 
361 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
362 		seq_printf(m, "%s:", call->system);
363 	seq_printf(m, "%s\n", call->name);
364 
365 	return 0;
366 }
367 
368 static void t_stop(struct seq_file *m, void *p)
369 {
370 	mutex_unlock(&event_mutex);
371 }
372 
373 static int
374 ftrace_event_seq_open(struct inode *inode, struct file *file)
375 {
376 	const struct seq_operations *seq_ops;
377 
378 	if ((file->f_mode & FMODE_WRITE) &&
379 	    (file->f_flags & O_TRUNC))
380 		ftrace_clear_events();
381 
382 	seq_ops = inode->i_private;
383 	return seq_open(file, seq_ops);
384 }
385 
386 static ssize_t
387 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
388 		  loff_t *ppos)
389 {
390 	struct ftrace_event_call *call = filp->private_data;
391 	char *buf;
392 
393 	if (call->enabled)
394 		buf = "1\n";
395 	else
396 		buf = "0\n";
397 
398 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
399 }
400 
401 static ssize_t
402 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
403 		   loff_t *ppos)
404 {
405 	struct ftrace_event_call *call = filp->private_data;
406 	char buf[64];
407 	unsigned long val;
408 	int ret;
409 
410 	if (cnt >= sizeof(buf))
411 		return -EINVAL;
412 
413 	if (copy_from_user(&buf, ubuf, cnt))
414 		return -EFAULT;
415 
416 	buf[cnt] = 0;
417 
418 	ret = strict_strtoul(buf, 10, &val);
419 	if (ret < 0)
420 		return ret;
421 
422 	ret = tracing_update_buffers();
423 	if (ret < 0)
424 		return ret;
425 
426 	switch (val) {
427 	case 0:
428 	case 1:
429 		mutex_lock(&event_mutex);
430 		ftrace_event_enable_disable(call, val);
431 		mutex_unlock(&event_mutex);
432 		break;
433 
434 	default:
435 		return -EINVAL;
436 	}
437 
438 	*ppos += cnt;
439 
440 	return cnt;
441 }
442 
443 static ssize_t
444 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
445 		   loff_t *ppos)
446 {
447 	const char set_to_char[4] = { '?', '0', '1', 'X' };
448 	const char *system = filp->private_data;
449 	struct ftrace_event_call *call;
450 	char buf[2];
451 	int set = 0;
452 	int ret;
453 
454 	mutex_lock(&event_mutex);
455 	list_for_each_entry(call, &ftrace_events, list) {
456 		if (!call->name || !call->regfunc)
457 			continue;
458 
459 		if (system && strcmp(call->system, system) != 0)
460 			continue;
461 
462 		/*
463 		 * We need to find out if all the events are set
464 		 * or if all events or cleared, or if we have
465 		 * a mixture.
466 		 */
467 		set |= (1 << !!call->enabled);
468 
469 		/*
470 		 * If we have a mixture, no need to look further.
471 		 */
472 		if (set == 3)
473 			break;
474 	}
475 	mutex_unlock(&event_mutex);
476 
477 	buf[0] = set_to_char[set];
478 	buf[1] = '\n';
479 
480 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
481 
482 	return ret;
483 }
484 
485 static ssize_t
486 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
487 		    loff_t *ppos)
488 {
489 	const char *system = filp->private_data;
490 	unsigned long val;
491 	char buf[64];
492 	ssize_t ret;
493 
494 	if (cnt >= sizeof(buf))
495 		return -EINVAL;
496 
497 	if (copy_from_user(&buf, ubuf, cnt))
498 		return -EFAULT;
499 
500 	buf[cnt] = 0;
501 
502 	ret = strict_strtoul(buf, 10, &val);
503 	if (ret < 0)
504 		return ret;
505 
506 	ret = tracing_update_buffers();
507 	if (ret < 0)
508 		return ret;
509 
510 	if (val != 0 && val != 1)
511 		return -EINVAL;
512 
513 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
514 	if (ret)
515 		goto out;
516 
517 	ret = cnt;
518 
519 out:
520 	*ppos += cnt;
521 
522 	return ret;
523 }
524 
525 extern char *__bad_type_size(void);
526 
527 #undef FIELD
528 #define FIELD(type, name)						\
529 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
530 	#type, "common_" #name, offsetof(typeof(field), name),		\
531 		sizeof(field.name)
532 
533 static int trace_write_header(struct trace_seq *s)
534 {
535 	struct trace_entry field;
536 
537 	/* struct trace_entry */
538 	return trace_seq_printf(s,
539 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
540 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
541 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
542 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
543 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
544 				"\n",
545 				FIELD(unsigned short, type),
546 				FIELD(unsigned char, flags),
547 				FIELD(unsigned char, preempt_count),
548 				FIELD(int, pid),
549 				FIELD(int, tgid));
550 }
551 
552 static ssize_t
553 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
554 		  loff_t *ppos)
555 {
556 	struct ftrace_event_call *call = filp->private_data;
557 	struct trace_seq *s;
558 	char *buf;
559 	int r;
560 
561 	if (*ppos)
562 		return 0;
563 
564 	s = kmalloc(sizeof(*s), GFP_KERNEL);
565 	if (!s)
566 		return -ENOMEM;
567 
568 	trace_seq_init(s);
569 
570 	/* If any of the first writes fail, so will the show_format. */
571 
572 	trace_seq_printf(s, "name: %s\n", call->name);
573 	trace_seq_printf(s, "ID: %d\n", call->id);
574 	trace_seq_printf(s, "format:\n");
575 	trace_write_header(s);
576 
577 	r = call->show_format(s);
578 	if (!r) {
579 		/*
580 		 * ug!  The format output is bigger than a PAGE!!
581 		 */
582 		buf = "FORMAT TOO BIG\n";
583 		r = simple_read_from_buffer(ubuf, cnt, ppos,
584 					      buf, strlen(buf));
585 		goto out;
586 	}
587 
588 	r = simple_read_from_buffer(ubuf, cnt, ppos,
589 				    s->buffer, s->len);
590  out:
591 	kfree(s);
592 	return r;
593 }
594 
595 static ssize_t
596 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
597 {
598 	struct ftrace_event_call *call = filp->private_data;
599 	struct trace_seq *s;
600 	int r;
601 
602 	if (*ppos)
603 		return 0;
604 
605 	s = kmalloc(sizeof(*s), GFP_KERNEL);
606 	if (!s)
607 		return -ENOMEM;
608 
609 	trace_seq_init(s);
610 	trace_seq_printf(s, "%d\n", call->id);
611 
612 	r = simple_read_from_buffer(ubuf, cnt, ppos,
613 				    s->buffer, s->len);
614 	kfree(s);
615 	return r;
616 }
617 
618 static ssize_t
619 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
620 		  loff_t *ppos)
621 {
622 	struct ftrace_event_call *call = filp->private_data;
623 	struct trace_seq *s;
624 	int r;
625 
626 	if (*ppos)
627 		return 0;
628 
629 	s = kmalloc(sizeof(*s), GFP_KERNEL);
630 	if (!s)
631 		return -ENOMEM;
632 
633 	trace_seq_init(s);
634 
635 	print_event_filter(call, s);
636 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
637 
638 	kfree(s);
639 
640 	return r;
641 }
642 
643 static ssize_t
644 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
645 		   loff_t *ppos)
646 {
647 	struct ftrace_event_call *call = filp->private_data;
648 	char *buf;
649 	int err;
650 
651 	if (cnt >= PAGE_SIZE)
652 		return -EINVAL;
653 
654 	buf = (char *)__get_free_page(GFP_TEMPORARY);
655 	if (!buf)
656 		return -ENOMEM;
657 
658 	if (copy_from_user(buf, ubuf, cnt)) {
659 		free_page((unsigned long) buf);
660 		return -EFAULT;
661 	}
662 	buf[cnt] = '\0';
663 
664 	err = apply_event_filter(call, buf);
665 	free_page((unsigned long) buf);
666 	if (err < 0)
667 		return err;
668 
669 	*ppos += cnt;
670 
671 	return cnt;
672 }
673 
674 static ssize_t
675 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
676 		      loff_t *ppos)
677 {
678 	struct event_subsystem *system = filp->private_data;
679 	struct trace_seq *s;
680 	int r;
681 
682 	if (*ppos)
683 		return 0;
684 
685 	s = kmalloc(sizeof(*s), GFP_KERNEL);
686 	if (!s)
687 		return -ENOMEM;
688 
689 	trace_seq_init(s);
690 
691 	print_subsystem_event_filter(system, s);
692 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
693 
694 	kfree(s);
695 
696 	return r;
697 }
698 
699 static ssize_t
700 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
701 		       loff_t *ppos)
702 {
703 	struct event_subsystem *system = filp->private_data;
704 	char *buf;
705 	int err;
706 
707 	if (cnt >= PAGE_SIZE)
708 		return -EINVAL;
709 
710 	buf = (char *)__get_free_page(GFP_TEMPORARY);
711 	if (!buf)
712 		return -ENOMEM;
713 
714 	if (copy_from_user(buf, ubuf, cnt)) {
715 		free_page((unsigned long) buf);
716 		return -EFAULT;
717 	}
718 	buf[cnt] = '\0';
719 
720 	err = apply_subsystem_event_filter(system, buf);
721 	free_page((unsigned long) buf);
722 	if (err < 0)
723 		return err;
724 
725 	*ppos += cnt;
726 
727 	return cnt;
728 }
729 
730 static ssize_t
731 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
732 {
733 	int (*func)(struct trace_seq *s) = filp->private_data;
734 	struct trace_seq *s;
735 	int r;
736 
737 	if (*ppos)
738 		return 0;
739 
740 	s = kmalloc(sizeof(*s), GFP_KERNEL);
741 	if (!s)
742 		return -ENOMEM;
743 
744 	trace_seq_init(s);
745 
746 	func(s);
747 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
748 
749 	kfree(s);
750 
751 	return r;
752 }
753 
754 static const struct seq_operations show_event_seq_ops = {
755 	.start = t_start,
756 	.next = t_next,
757 	.show = t_show,
758 	.stop = t_stop,
759 };
760 
761 static const struct seq_operations show_set_event_seq_ops = {
762 	.start = s_start,
763 	.next = s_next,
764 	.show = t_show,
765 	.stop = t_stop,
766 };
767 
768 static const struct file_operations ftrace_avail_fops = {
769 	.open = ftrace_event_seq_open,
770 	.read = seq_read,
771 	.llseek = seq_lseek,
772 	.release = seq_release,
773 };
774 
775 static const struct file_operations ftrace_set_event_fops = {
776 	.open = ftrace_event_seq_open,
777 	.read = seq_read,
778 	.write = ftrace_event_write,
779 	.llseek = seq_lseek,
780 	.release = seq_release,
781 };
782 
783 static const struct file_operations ftrace_enable_fops = {
784 	.open = tracing_open_generic,
785 	.read = event_enable_read,
786 	.write = event_enable_write,
787 };
788 
789 static const struct file_operations ftrace_event_format_fops = {
790 	.open = tracing_open_generic,
791 	.read = event_format_read,
792 };
793 
794 static const struct file_operations ftrace_event_id_fops = {
795 	.open = tracing_open_generic,
796 	.read = event_id_read,
797 };
798 
799 static const struct file_operations ftrace_event_filter_fops = {
800 	.open = tracing_open_generic,
801 	.read = event_filter_read,
802 	.write = event_filter_write,
803 };
804 
805 static const struct file_operations ftrace_subsystem_filter_fops = {
806 	.open = tracing_open_generic,
807 	.read = subsystem_filter_read,
808 	.write = subsystem_filter_write,
809 };
810 
811 static const struct file_operations ftrace_system_enable_fops = {
812 	.open = tracing_open_generic,
813 	.read = system_enable_read,
814 	.write = system_enable_write,
815 };
816 
817 static const struct file_operations ftrace_show_header_fops = {
818 	.open = tracing_open_generic,
819 	.read = show_header,
820 };
821 
822 static struct dentry *event_trace_events_dir(void)
823 {
824 	static struct dentry *d_tracer;
825 	static struct dentry *d_events;
826 
827 	if (d_events)
828 		return d_events;
829 
830 	d_tracer = tracing_init_dentry();
831 	if (!d_tracer)
832 		return NULL;
833 
834 	d_events = debugfs_create_dir("events", d_tracer);
835 	if (!d_events)
836 		pr_warning("Could not create debugfs "
837 			   "'events' directory\n");
838 
839 	return d_events;
840 }
841 
842 static LIST_HEAD(event_subsystems);
843 
844 static struct dentry *
845 event_subsystem_dir(const char *name, struct dentry *d_events)
846 {
847 	struct event_subsystem *system;
848 	struct dentry *entry;
849 
850 	/* First see if we did not already create this dir */
851 	list_for_each_entry(system, &event_subsystems, list) {
852 		if (strcmp(system->name, name) == 0)
853 			return system->entry;
854 	}
855 
856 	/* need to create new entry */
857 	system = kmalloc(sizeof(*system), GFP_KERNEL);
858 	if (!system) {
859 		pr_warning("No memory to create event subsystem %s\n",
860 			   name);
861 		return d_events;
862 	}
863 
864 	system->entry = debugfs_create_dir(name, d_events);
865 	if (!system->entry) {
866 		pr_warning("Could not create event subsystem %s\n",
867 			   name);
868 		kfree(system);
869 		return d_events;
870 	}
871 
872 	system->name = kstrdup(name, GFP_KERNEL);
873 	if (!system->name) {
874 		debugfs_remove(system->entry);
875 		kfree(system);
876 		return d_events;
877 	}
878 
879 	list_add(&system->list, &event_subsystems);
880 
881 	system->filter = NULL;
882 
883 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
884 	if (!system->filter) {
885 		pr_warning("Could not allocate filter for subsystem "
886 			   "'%s'\n", name);
887 		return system->entry;
888 	}
889 
890 	entry = debugfs_create_file("filter", 0644, system->entry, system,
891 				    &ftrace_subsystem_filter_fops);
892 	if (!entry) {
893 		kfree(system->filter);
894 		system->filter = NULL;
895 		pr_warning("Could not create debugfs "
896 			   "'%s/filter' entry\n", name);
897 	}
898 
899 	entry = trace_create_file("enable", 0644, system->entry,
900 				  (void *)system->name,
901 				  &ftrace_system_enable_fops);
902 
903 	return system->entry;
904 }
905 
906 static int
907 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
908 		 const struct file_operations *id,
909 		 const struct file_operations *enable,
910 		 const struct file_operations *filter,
911 		 const struct file_operations *format)
912 {
913 	struct dentry *entry;
914 	int ret;
915 
916 	/*
917 	 * If the trace point header did not define TRACE_SYSTEM
918 	 * then the system would be called "TRACE_SYSTEM".
919 	 */
920 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
921 		d_events = event_subsystem_dir(call->system, d_events);
922 
923 	if (call->raw_init) {
924 		ret = call->raw_init();
925 		if (ret < 0) {
926 			pr_warning("Could not initialize trace point"
927 				   " events/%s\n", call->name);
928 			return ret;
929 		}
930 	}
931 
932 	call->dir = debugfs_create_dir(call->name, d_events);
933 	if (!call->dir) {
934 		pr_warning("Could not create debugfs "
935 			   "'%s' directory\n", call->name);
936 		return -1;
937 	}
938 
939 	if (call->regfunc)
940 		entry = trace_create_file("enable", 0644, call->dir, call,
941 					  enable);
942 
943 	if (call->id && call->profile_enable)
944 		entry = trace_create_file("id", 0444, call->dir, call,
945 					  id);
946 
947 	if (call->define_fields) {
948 		ret = call->define_fields();
949 		if (ret < 0) {
950 			pr_warning("Could not initialize trace point"
951 				   " events/%s\n", call->name);
952 			return ret;
953 		}
954 		entry = trace_create_file("filter", 0644, call->dir, call,
955 					  filter);
956 	}
957 
958 	/* A trace may not want to export its format */
959 	if (!call->show_format)
960 		return 0;
961 
962 	entry = trace_create_file("format", 0444, call->dir, call,
963 				  format);
964 
965 	return 0;
966 }
967 
968 #define for_each_event(event, start, end)			\
969 	for (event = start;					\
970 	     (unsigned long)event < (unsigned long)end;		\
971 	     event++)
972 
973 #ifdef CONFIG_MODULES
974 
975 static LIST_HEAD(ftrace_module_file_list);
976 
977 /*
978  * Modules must own their file_operations to keep up with
979  * reference counting.
980  */
981 struct ftrace_module_file_ops {
982 	struct list_head		list;
983 	struct module			*mod;
984 	struct file_operations		id;
985 	struct file_operations		enable;
986 	struct file_operations		format;
987 	struct file_operations		filter;
988 };
989 
990 static struct ftrace_module_file_ops *
991 trace_create_file_ops(struct module *mod)
992 {
993 	struct ftrace_module_file_ops *file_ops;
994 
995 	/*
996 	 * This is a bit of a PITA. To allow for correct reference
997 	 * counting, modules must "own" their file_operations.
998 	 * To do this, we allocate the file operations that will be
999 	 * used in the event directory.
1000 	 */
1001 
1002 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1003 	if (!file_ops)
1004 		return NULL;
1005 
1006 	file_ops->mod = mod;
1007 
1008 	file_ops->id = ftrace_event_id_fops;
1009 	file_ops->id.owner = mod;
1010 
1011 	file_ops->enable = ftrace_enable_fops;
1012 	file_ops->enable.owner = mod;
1013 
1014 	file_ops->filter = ftrace_event_filter_fops;
1015 	file_ops->filter.owner = mod;
1016 
1017 	file_ops->format = ftrace_event_format_fops;
1018 	file_ops->format.owner = mod;
1019 
1020 	list_add(&file_ops->list, &ftrace_module_file_list);
1021 
1022 	return file_ops;
1023 }
1024 
1025 static void trace_module_add_events(struct module *mod)
1026 {
1027 	struct ftrace_module_file_ops *file_ops = NULL;
1028 	struct ftrace_event_call *call, *start, *end;
1029 	struct dentry *d_events;
1030 
1031 	start = mod->trace_events;
1032 	end = mod->trace_events + mod->num_trace_events;
1033 
1034 	if (start == end)
1035 		return;
1036 
1037 	d_events = event_trace_events_dir();
1038 	if (!d_events)
1039 		return;
1040 
1041 	for_each_event(call, start, end) {
1042 		/* The linker may leave blanks */
1043 		if (!call->name)
1044 			continue;
1045 
1046 		/*
1047 		 * This module has events, create file ops for this module
1048 		 * if not already done.
1049 		 */
1050 		if (!file_ops) {
1051 			file_ops = trace_create_file_ops(mod);
1052 			if (!file_ops)
1053 				return;
1054 		}
1055 		call->mod = mod;
1056 		list_add(&call->list, &ftrace_events);
1057 		event_create_dir(call, d_events,
1058 				 &file_ops->id, &file_ops->enable,
1059 				 &file_ops->filter, &file_ops->format);
1060 	}
1061 }
1062 
1063 static void trace_module_remove_events(struct module *mod)
1064 {
1065 	struct ftrace_module_file_ops *file_ops;
1066 	struct ftrace_event_call *call, *p;
1067 	bool found = false;
1068 
1069 	down_write(&trace_event_mutex);
1070 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1071 		if (call->mod == mod) {
1072 			found = true;
1073 			ftrace_event_enable_disable(call, 0);
1074 			if (call->event)
1075 				__unregister_ftrace_event(call->event);
1076 			debugfs_remove_recursive(call->dir);
1077 			list_del(&call->list);
1078 			trace_destroy_fields(call);
1079 			destroy_preds(call);
1080 		}
1081 	}
1082 
1083 	/* Now free the file_operations */
1084 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1085 		if (file_ops->mod == mod)
1086 			break;
1087 	}
1088 	if (&file_ops->list != &ftrace_module_file_list) {
1089 		list_del(&file_ops->list);
1090 		kfree(file_ops);
1091 	}
1092 
1093 	/*
1094 	 * It is safest to reset the ring buffer if the module being unloaded
1095 	 * registered any events.
1096 	 */
1097 	if (found)
1098 		tracing_reset_current_online_cpus();
1099 	up_write(&trace_event_mutex);
1100 }
1101 
1102 static int trace_module_notify(struct notifier_block *self,
1103 			       unsigned long val, void *data)
1104 {
1105 	struct module *mod = data;
1106 
1107 	mutex_lock(&event_mutex);
1108 	switch (val) {
1109 	case MODULE_STATE_COMING:
1110 		trace_module_add_events(mod);
1111 		break;
1112 	case MODULE_STATE_GOING:
1113 		trace_module_remove_events(mod);
1114 		break;
1115 	}
1116 	mutex_unlock(&event_mutex);
1117 
1118 	return 0;
1119 }
1120 #else
1121 static int trace_module_notify(struct notifier_block *self,
1122 			       unsigned long val, void *data)
1123 {
1124 	return 0;
1125 }
1126 #endif /* CONFIG_MODULES */
1127 
1128 struct notifier_block trace_module_nb = {
1129 	.notifier_call = trace_module_notify,
1130 	.priority = 0,
1131 };
1132 
1133 extern struct ftrace_event_call __start_ftrace_events[];
1134 extern struct ftrace_event_call __stop_ftrace_events[];
1135 
1136 static __init int event_trace_init(void)
1137 {
1138 	struct ftrace_event_call *call;
1139 	struct dentry *d_tracer;
1140 	struct dentry *entry;
1141 	struct dentry *d_events;
1142 	int ret;
1143 
1144 	d_tracer = tracing_init_dentry();
1145 	if (!d_tracer)
1146 		return 0;
1147 
1148 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1149 				    (void *)&show_event_seq_ops,
1150 				    &ftrace_avail_fops);
1151 	if (!entry)
1152 		pr_warning("Could not create debugfs "
1153 			   "'available_events' entry\n");
1154 
1155 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1156 				    (void *)&show_set_event_seq_ops,
1157 				    &ftrace_set_event_fops);
1158 	if (!entry)
1159 		pr_warning("Could not create debugfs "
1160 			   "'set_event' entry\n");
1161 
1162 	d_events = event_trace_events_dir();
1163 	if (!d_events)
1164 		return 0;
1165 
1166 	/* ring buffer internal formats */
1167 	trace_create_file("header_page", 0444, d_events,
1168 			  ring_buffer_print_page_header,
1169 			  &ftrace_show_header_fops);
1170 
1171 	trace_create_file("header_event", 0444, d_events,
1172 			  ring_buffer_print_entry_header,
1173 			  &ftrace_show_header_fops);
1174 
1175 	trace_create_file("enable", 0644, d_events,
1176 			  NULL, &ftrace_system_enable_fops);
1177 
1178 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1179 		/* The linker may leave blanks */
1180 		if (!call->name)
1181 			continue;
1182 		list_add(&call->list, &ftrace_events);
1183 		event_create_dir(call, d_events, &ftrace_event_id_fops,
1184 				 &ftrace_enable_fops, &ftrace_event_filter_fops,
1185 				 &ftrace_event_format_fops);
1186 	}
1187 
1188 	ret = register_module_notifier(&trace_module_nb);
1189 	if (ret)
1190 		pr_warning("Failed to register trace events module notifier\n");
1191 
1192 	return 0;
1193 }
1194 fs_initcall(event_trace_init);
1195 
1196 #ifdef CONFIG_FTRACE_STARTUP_TEST
1197 
1198 static DEFINE_SPINLOCK(test_spinlock);
1199 static DEFINE_SPINLOCK(test_spinlock_irq);
1200 static DEFINE_MUTEX(test_mutex);
1201 
1202 static __init void test_work(struct work_struct *dummy)
1203 {
1204 	spin_lock(&test_spinlock);
1205 	spin_lock_irq(&test_spinlock_irq);
1206 	udelay(1);
1207 	spin_unlock_irq(&test_spinlock_irq);
1208 	spin_unlock(&test_spinlock);
1209 
1210 	mutex_lock(&test_mutex);
1211 	msleep(1);
1212 	mutex_unlock(&test_mutex);
1213 }
1214 
1215 static __init int event_test_thread(void *unused)
1216 {
1217 	void *test_malloc;
1218 
1219 	test_malloc = kmalloc(1234, GFP_KERNEL);
1220 	if (!test_malloc)
1221 		pr_info("failed to kmalloc\n");
1222 
1223 	schedule_on_each_cpu(test_work);
1224 
1225 	kfree(test_malloc);
1226 
1227 	set_current_state(TASK_INTERRUPTIBLE);
1228 	while (!kthread_should_stop())
1229 		schedule();
1230 
1231 	return 0;
1232 }
1233 
1234 /*
1235  * Do various things that may trigger events.
1236  */
1237 static __init void event_test_stuff(void)
1238 {
1239 	struct task_struct *test_thread;
1240 
1241 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1242 	msleep(1);
1243 	kthread_stop(test_thread);
1244 }
1245 
1246 /*
1247  * For every trace event defined, we will test each trace point separately,
1248  * and then by groups, and finally all trace points.
1249  */
1250 static __init void event_trace_self_tests(void)
1251 {
1252 	struct ftrace_event_call *call;
1253 	struct event_subsystem *system;
1254 	int ret;
1255 
1256 	pr_info("Running tests on trace events:\n");
1257 
1258 	list_for_each_entry(call, &ftrace_events, list) {
1259 
1260 		/* Only test those that have a regfunc */
1261 		if (!call->regfunc)
1262 			continue;
1263 
1264 		pr_info("Testing event %s: ", call->name);
1265 
1266 		/*
1267 		 * If an event is already enabled, someone is using
1268 		 * it and the self test should not be on.
1269 		 */
1270 		if (call->enabled) {
1271 			pr_warning("Enabled event during self test!\n");
1272 			WARN_ON_ONCE(1);
1273 			continue;
1274 		}
1275 
1276 		ftrace_event_enable_disable(call, 1);
1277 		event_test_stuff();
1278 		ftrace_event_enable_disable(call, 0);
1279 
1280 		pr_cont("OK\n");
1281 	}
1282 
1283 	/* Now test at the sub system level */
1284 
1285 	pr_info("Running tests on trace event systems:\n");
1286 
1287 	list_for_each_entry(system, &event_subsystems, list) {
1288 
1289 		/* the ftrace system is special, skip it */
1290 		if (strcmp(system->name, "ftrace") == 0)
1291 			continue;
1292 
1293 		pr_info("Testing event system %s: ", system->name);
1294 
1295 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1296 		if (WARN_ON_ONCE(ret)) {
1297 			pr_warning("error enabling system %s\n",
1298 				   system->name);
1299 			continue;
1300 		}
1301 
1302 		event_test_stuff();
1303 
1304 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1305 		if (WARN_ON_ONCE(ret))
1306 			pr_warning("error disabling system %s\n",
1307 				   system->name);
1308 
1309 		pr_cont("OK\n");
1310 	}
1311 
1312 	/* Test with all events enabled */
1313 
1314 	pr_info("Running tests on all trace events:\n");
1315 	pr_info("Testing all events: ");
1316 
1317 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1318 	if (WARN_ON_ONCE(ret)) {
1319 		pr_warning("error enabling all events\n");
1320 		return;
1321 	}
1322 
1323 	event_test_stuff();
1324 
1325 	/* reset sysname */
1326 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1327 	if (WARN_ON_ONCE(ret)) {
1328 		pr_warning("error disabling all events\n");
1329 		return;
1330 	}
1331 
1332 	pr_cont("OK\n");
1333 }
1334 
1335 #ifdef CONFIG_FUNCTION_TRACER
1336 
1337 static DEFINE_PER_CPU(atomic_t, test_event_disable);
1338 
1339 static void
1340 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1341 {
1342 	struct ring_buffer_event *event;
1343 	struct ftrace_entry *entry;
1344 	unsigned long flags;
1345 	long disabled;
1346 	int resched;
1347 	int cpu;
1348 	int pc;
1349 
1350 	pc = preempt_count();
1351 	resched = ftrace_preempt_disable();
1352 	cpu = raw_smp_processor_id();
1353 	disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
1354 
1355 	if (disabled != 1)
1356 		goto out;
1357 
1358 	local_save_flags(flags);
1359 
1360 	event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
1361 						  flags, pc);
1362 	if (!event)
1363 		goto out;
1364 	entry	= ring_buffer_event_data(event);
1365 	entry->ip			= ip;
1366 	entry->parent_ip		= parent_ip;
1367 
1368 	trace_nowake_buffer_unlock_commit(event, flags, pc);
1369 
1370  out:
1371 	atomic_dec(&per_cpu(test_event_disable, cpu));
1372 	ftrace_preempt_enable(resched);
1373 }
1374 
1375 static struct ftrace_ops trace_ops __initdata  =
1376 {
1377 	.func = function_test_events_call,
1378 };
1379 
1380 static __init void event_trace_self_test_with_function(void)
1381 {
1382 	register_ftrace_function(&trace_ops);
1383 	pr_info("Running tests again, along with the function tracer\n");
1384 	event_trace_self_tests();
1385 	unregister_ftrace_function(&trace_ops);
1386 }
1387 #else
1388 static __init void event_trace_self_test_with_function(void)
1389 {
1390 }
1391 #endif
1392 
1393 static __init int event_trace_self_tests_init(void)
1394 {
1395 
1396 	event_trace_self_tests();
1397 
1398 	event_trace_self_test_with_function();
1399 
1400 	return 0;
1401 }
1402 
1403 late_initcall(event_trace_self_tests_init);
1404 
1405 #endif
1406