xref: /linux/kernel/trace/trace_events.c (revision e27ecdd94d81e5bc3d1f68591701db5adb342f0d)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/delay.h>
19 
20 #include "trace_output.h"
21 
22 #define TRACE_SYSTEM "TRACE_SYSTEM"
23 
24 DEFINE_MUTEX(event_mutex);
25 
26 LIST_HEAD(ftrace_events);
27 
28 int trace_define_field(struct ftrace_event_call *call, char *type,
29 		       char *name, int offset, int size, int is_signed)
30 {
31 	struct ftrace_event_field *field;
32 
33 	field = kzalloc(sizeof(*field), GFP_KERNEL);
34 	if (!field)
35 		goto err;
36 
37 	field->name = kstrdup(name, GFP_KERNEL);
38 	if (!field->name)
39 		goto err;
40 
41 	field->type = kstrdup(type, GFP_KERNEL);
42 	if (!field->type)
43 		goto err;
44 
45 	field->offset = offset;
46 	field->size = size;
47 	field->is_signed = is_signed;
48 	list_add(&field->link, &call->fields);
49 
50 	return 0;
51 
52 err:
53 	if (field) {
54 		kfree(field->name);
55 		kfree(field->type);
56 	}
57 	kfree(field);
58 
59 	return -ENOMEM;
60 }
61 EXPORT_SYMBOL_GPL(trace_define_field);
62 
63 #ifdef CONFIG_MODULES
64 
65 static void trace_destroy_fields(struct ftrace_event_call *call)
66 {
67 	struct ftrace_event_field *field, *next;
68 
69 	list_for_each_entry_safe(field, next, &call->fields, link) {
70 		list_del(&field->link);
71 		kfree(field->type);
72 		kfree(field->name);
73 		kfree(field);
74 	}
75 }
76 
77 #endif /* CONFIG_MODULES */
78 
79 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
80 					int enable)
81 {
82 	switch (enable) {
83 	case 0:
84 		if (call->enabled) {
85 			call->enabled = 0;
86 			tracing_stop_cmdline_record();
87 			call->unregfunc();
88 		}
89 		break;
90 	case 1:
91 		if (!call->enabled) {
92 			call->enabled = 1;
93 			tracing_start_cmdline_record();
94 			call->regfunc();
95 		}
96 		break;
97 	}
98 }
99 
100 static void ftrace_clear_events(void)
101 {
102 	struct ftrace_event_call *call;
103 
104 	mutex_lock(&event_mutex);
105 	list_for_each_entry(call, &ftrace_events, list) {
106 		ftrace_event_enable_disable(call, 0);
107 	}
108 	mutex_unlock(&event_mutex);
109 }
110 
111 /*
112  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
113  */
114 static int __ftrace_set_clr_event(const char *match, const char *sub,
115 				  const char *event, int set)
116 {
117 	struct ftrace_event_call *call;
118 	int ret = -EINVAL;
119 
120 	mutex_lock(&event_mutex);
121 	list_for_each_entry(call, &ftrace_events, list) {
122 
123 		if (!call->name || !call->regfunc)
124 			continue;
125 
126 		if (match &&
127 		    strcmp(match, call->name) != 0 &&
128 		    strcmp(match, call->system) != 0)
129 			continue;
130 
131 		if (sub && strcmp(sub, call->system) != 0)
132 			continue;
133 
134 		if (event && strcmp(event, call->name) != 0)
135 			continue;
136 
137 		ftrace_event_enable_disable(call, set);
138 
139 		ret = 0;
140 	}
141 	mutex_unlock(&event_mutex);
142 
143 	return ret;
144 }
145 
146 static int ftrace_set_clr_event(char *buf, int set)
147 {
148 	char *event = NULL, *sub = NULL, *match;
149 
150 	/*
151 	 * The buf format can be <subsystem>:<event-name>
152 	 *  *:<event-name> means any event by that name.
153 	 *  :<event-name> is the same.
154 	 *
155 	 *  <subsystem>:* means all events in that subsystem
156 	 *  <subsystem>: means the same.
157 	 *
158 	 *  <name> (no ':') means all events in a subsystem with
159 	 *  the name <name> or any event that matches <name>
160 	 */
161 
162 	match = strsep(&buf, ":");
163 	if (buf) {
164 		sub = match;
165 		event = buf;
166 		match = NULL;
167 
168 		if (!strlen(sub) || strcmp(sub, "*") == 0)
169 			sub = NULL;
170 		if (!strlen(event) || strcmp(event, "*") == 0)
171 			event = NULL;
172 	}
173 
174 	return __ftrace_set_clr_event(match, sub, event, set);
175 }
176 
177 /**
178  * trace_set_clr_event - enable or disable an event
179  * @system: system name to match (NULL for any system)
180  * @event: event name to match (NULL for all events, within system)
181  * @set: 1 to enable, 0 to disable
182  *
183  * This is a way for other parts of the kernel to enable or disable
184  * event recording.
185  *
186  * Returns 0 on success, -EINVAL if the parameters do not match any
187  * registered events.
188  */
189 int trace_set_clr_event(const char *system, const char *event, int set)
190 {
191 	return __ftrace_set_clr_event(NULL, system, event, set);
192 }
193 
194 /* 128 should be much more than enough */
195 #define EVENT_BUF_SIZE		127
196 
197 static ssize_t
198 ftrace_event_write(struct file *file, const char __user *ubuf,
199 		   size_t cnt, loff_t *ppos)
200 {
201 	size_t read = 0;
202 	int i, set = 1;
203 	ssize_t ret;
204 	char *buf;
205 	char ch;
206 
207 	if (!cnt || cnt < 0)
208 		return 0;
209 
210 	ret = tracing_update_buffers();
211 	if (ret < 0)
212 		return ret;
213 
214 	ret = get_user(ch, ubuf++);
215 	if (ret)
216 		return ret;
217 	read++;
218 	cnt--;
219 
220 	/* skip white space */
221 	while (cnt && isspace(ch)) {
222 		ret = get_user(ch, ubuf++);
223 		if (ret)
224 			return ret;
225 		read++;
226 		cnt--;
227 	}
228 
229 	/* Only white space found? */
230 	if (isspace(ch)) {
231 		file->f_pos += read;
232 		ret = read;
233 		return ret;
234 	}
235 
236 	buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
237 	if (!buf)
238 		return -ENOMEM;
239 
240 	if (cnt > EVENT_BUF_SIZE)
241 		cnt = EVENT_BUF_SIZE;
242 
243 	i = 0;
244 	while (cnt && !isspace(ch)) {
245 		if (!i && ch == '!')
246 			set = 0;
247 		else
248 			buf[i++] = ch;
249 
250 		ret = get_user(ch, ubuf++);
251 		if (ret)
252 			goto out_free;
253 		read++;
254 		cnt--;
255 	}
256 	buf[i] = 0;
257 
258 	file->f_pos += read;
259 
260 	ret = ftrace_set_clr_event(buf, set);
261 	if (ret)
262 		goto out_free;
263 
264 	ret = read;
265 
266  out_free:
267 	kfree(buf);
268 
269 	return ret;
270 }
271 
272 static void *
273 t_next(struct seq_file *m, void *v, loff_t *pos)
274 {
275 	struct list_head *list = m->private;
276 	struct ftrace_event_call *call;
277 
278 	(*pos)++;
279 
280 	for (;;) {
281 		if (list == &ftrace_events)
282 			return NULL;
283 
284 		call = list_entry(list, struct ftrace_event_call, list);
285 
286 		/*
287 		 * The ftrace subsystem is for showing formats only.
288 		 * They can not be enabled or disabled via the event files.
289 		 */
290 		if (call->regfunc)
291 			break;
292 
293 		list = list->next;
294 	}
295 
296 	m->private = list->next;
297 
298 	return call;
299 }
300 
301 static void *t_start(struct seq_file *m, loff_t *pos)
302 {
303 	mutex_lock(&event_mutex);
304 	if (*pos == 0)
305 		m->private = ftrace_events.next;
306 	return t_next(m, NULL, pos);
307 }
308 
309 static void *
310 s_next(struct seq_file *m, void *v, loff_t *pos)
311 {
312 	struct list_head *list = m->private;
313 	struct ftrace_event_call *call;
314 
315 	(*pos)++;
316 
317  retry:
318 	if (list == &ftrace_events)
319 		return NULL;
320 
321 	call = list_entry(list, struct ftrace_event_call, list);
322 
323 	if (!call->enabled) {
324 		list = list->next;
325 		goto retry;
326 	}
327 
328 	m->private = list->next;
329 
330 	return call;
331 }
332 
333 static void *s_start(struct seq_file *m, loff_t *pos)
334 {
335 	mutex_lock(&event_mutex);
336 	if (*pos == 0)
337 		m->private = ftrace_events.next;
338 	return s_next(m, NULL, pos);
339 }
340 
341 static int t_show(struct seq_file *m, void *v)
342 {
343 	struct ftrace_event_call *call = v;
344 
345 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
346 		seq_printf(m, "%s:", call->system);
347 	seq_printf(m, "%s\n", call->name);
348 
349 	return 0;
350 }
351 
352 static void t_stop(struct seq_file *m, void *p)
353 {
354 	mutex_unlock(&event_mutex);
355 }
356 
357 static int
358 ftrace_event_seq_open(struct inode *inode, struct file *file)
359 {
360 	const struct seq_operations *seq_ops;
361 
362 	if ((file->f_mode & FMODE_WRITE) &&
363 	    !(file->f_flags & O_APPEND))
364 		ftrace_clear_events();
365 
366 	seq_ops = inode->i_private;
367 	return seq_open(file, seq_ops);
368 }
369 
370 static ssize_t
371 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
372 		  loff_t *ppos)
373 {
374 	struct ftrace_event_call *call = filp->private_data;
375 	char *buf;
376 
377 	if (call->enabled)
378 		buf = "1\n";
379 	else
380 		buf = "0\n";
381 
382 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
383 }
384 
385 static ssize_t
386 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
387 		   loff_t *ppos)
388 {
389 	struct ftrace_event_call *call = filp->private_data;
390 	char buf[64];
391 	unsigned long val;
392 	int ret;
393 
394 	if (cnt >= sizeof(buf))
395 		return -EINVAL;
396 
397 	if (copy_from_user(&buf, ubuf, cnt))
398 		return -EFAULT;
399 
400 	buf[cnt] = 0;
401 
402 	ret = strict_strtoul(buf, 10, &val);
403 	if (ret < 0)
404 		return ret;
405 
406 	ret = tracing_update_buffers();
407 	if (ret < 0)
408 		return ret;
409 
410 	switch (val) {
411 	case 0:
412 	case 1:
413 		mutex_lock(&event_mutex);
414 		ftrace_event_enable_disable(call, val);
415 		mutex_unlock(&event_mutex);
416 		break;
417 
418 	default:
419 		return -EINVAL;
420 	}
421 
422 	*ppos += cnt;
423 
424 	return cnt;
425 }
426 
427 static ssize_t
428 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
429 		   loff_t *ppos)
430 {
431 	const char set_to_char[4] = { '?', '0', '1', 'X' };
432 	const char *system = filp->private_data;
433 	struct ftrace_event_call *call;
434 	char buf[2];
435 	int set = 0;
436 	int ret;
437 
438 	mutex_lock(&event_mutex);
439 	list_for_each_entry(call, &ftrace_events, list) {
440 		if (!call->name || !call->regfunc)
441 			continue;
442 
443 		if (system && strcmp(call->system, system) != 0)
444 			continue;
445 
446 		/*
447 		 * We need to find out if all the events are set
448 		 * or if all events or cleared, or if we have
449 		 * a mixture.
450 		 */
451 		set |= (1 << !!call->enabled);
452 
453 		/*
454 		 * If we have a mixture, no need to look further.
455 		 */
456 		if (set == 3)
457 			break;
458 	}
459 	mutex_unlock(&event_mutex);
460 
461 	buf[0] = set_to_char[set];
462 	buf[1] = '\n';
463 
464 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
465 
466 	return ret;
467 }
468 
469 static ssize_t
470 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
471 		    loff_t *ppos)
472 {
473 	const char *system = filp->private_data;
474 	unsigned long val;
475 	char buf[64];
476 	ssize_t ret;
477 
478 	if (cnt >= sizeof(buf))
479 		return -EINVAL;
480 
481 	if (copy_from_user(&buf, ubuf, cnt))
482 		return -EFAULT;
483 
484 	buf[cnt] = 0;
485 
486 	ret = strict_strtoul(buf, 10, &val);
487 	if (ret < 0)
488 		return ret;
489 
490 	ret = tracing_update_buffers();
491 	if (ret < 0)
492 		return ret;
493 
494 	if (val != 0 && val != 1)
495 		return -EINVAL;
496 
497 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
498 	if (ret)
499 		goto out;
500 
501 	ret = cnt;
502 
503 out:
504 	*ppos += cnt;
505 
506 	return ret;
507 }
508 
509 extern char *__bad_type_size(void);
510 
511 #undef FIELD
512 #define FIELD(type, name)						\
513 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
514 	#type, "common_" #name, offsetof(typeof(field), name),		\
515 		sizeof(field.name)
516 
517 static int trace_write_header(struct trace_seq *s)
518 {
519 	struct trace_entry field;
520 
521 	/* struct trace_entry */
522 	return trace_seq_printf(s,
523 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
524 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
525 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
526 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
527 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
528 				"\n",
529 				FIELD(unsigned short, type),
530 				FIELD(unsigned char, flags),
531 				FIELD(unsigned char, preempt_count),
532 				FIELD(int, pid),
533 				FIELD(int, tgid));
534 }
535 
536 static ssize_t
537 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
538 		  loff_t *ppos)
539 {
540 	struct ftrace_event_call *call = filp->private_data;
541 	struct trace_seq *s;
542 	char *buf;
543 	int r;
544 
545 	if (*ppos)
546 		return 0;
547 
548 	s = kmalloc(sizeof(*s), GFP_KERNEL);
549 	if (!s)
550 		return -ENOMEM;
551 
552 	trace_seq_init(s);
553 
554 	/* If any of the first writes fail, so will the show_format. */
555 
556 	trace_seq_printf(s, "name: %s\n", call->name);
557 	trace_seq_printf(s, "ID: %d\n", call->id);
558 	trace_seq_printf(s, "format:\n");
559 	trace_write_header(s);
560 
561 	r = call->show_format(s);
562 	if (!r) {
563 		/*
564 		 * ug!  The format output is bigger than a PAGE!!
565 		 */
566 		buf = "FORMAT TOO BIG\n";
567 		r = simple_read_from_buffer(ubuf, cnt, ppos,
568 					      buf, strlen(buf));
569 		goto out;
570 	}
571 
572 	r = simple_read_from_buffer(ubuf, cnt, ppos,
573 				    s->buffer, s->len);
574  out:
575 	kfree(s);
576 	return r;
577 }
578 
579 static ssize_t
580 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
581 {
582 	struct ftrace_event_call *call = filp->private_data;
583 	struct trace_seq *s;
584 	int r;
585 
586 	if (*ppos)
587 		return 0;
588 
589 	s = kmalloc(sizeof(*s), GFP_KERNEL);
590 	if (!s)
591 		return -ENOMEM;
592 
593 	trace_seq_init(s);
594 	trace_seq_printf(s, "%d\n", call->id);
595 
596 	r = simple_read_from_buffer(ubuf, cnt, ppos,
597 				    s->buffer, s->len);
598 	kfree(s);
599 	return r;
600 }
601 
602 static ssize_t
603 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
604 		  loff_t *ppos)
605 {
606 	struct ftrace_event_call *call = filp->private_data;
607 	struct trace_seq *s;
608 	int r;
609 
610 	if (*ppos)
611 		return 0;
612 
613 	s = kmalloc(sizeof(*s), GFP_KERNEL);
614 	if (!s)
615 		return -ENOMEM;
616 
617 	trace_seq_init(s);
618 
619 	print_event_filter(call, s);
620 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
621 
622 	kfree(s);
623 
624 	return r;
625 }
626 
627 static ssize_t
628 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
629 		   loff_t *ppos)
630 {
631 	struct ftrace_event_call *call = filp->private_data;
632 	char *buf;
633 	int err;
634 
635 	if (cnt >= PAGE_SIZE)
636 		return -EINVAL;
637 
638 	buf = (char *)__get_free_page(GFP_TEMPORARY);
639 	if (!buf)
640 		return -ENOMEM;
641 
642 	if (copy_from_user(buf, ubuf, cnt)) {
643 		free_page((unsigned long) buf);
644 		return -EFAULT;
645 	}
646 	buf[cnt] = '\0';
647 
648 	err = apply_event_filter(call, buf);
649 	free_page((unsigned long) buf);
650 	if (err < 0)
651 		return err;
652 
653 	*ppos += cnt;
654 
655 	return cnt;
656 }
657 
658 static ssize_t
659 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
660 		      loff_t *ppos)
661 {
662 	struct event_subsystem *system = filp->private_data;
663 	struct trace_seq *s;
664 	int r;
665 
666 	if (*ppos)
667 		return 0;
668 
669 	s = kmalloc(sizeof(*s), GFP_KERNEL);
670 	if (!s)
671 		return -ENOMEM;
672 
673 	trace_seq_init(s);
674 
675 	print_subsystem_event_filter(system, s);
676 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
677 
678 	kfree(s);
679 
680 	return r;
681 }
682 
683 static ssize_t
684 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
685 		       loff_t *ppos)
686 {
687 	struct event_subsystem *system = filp->private_data;
688 	char *buf;
689 	int err;
690 
691 	if (cnt >= PAGE_SIZE)
692 		return -EINVAL;
693 
694 	buf = (char *)__get_free_page(GFP_TEMPORARY);
695 	if (!buf)
696 		return -ENOMEM;
697 
698 	if (copy_from_user(buf, ubuf, cnt)) {
699 		free_page((unsigned long) buf);
700 		return -EFAULT;
701 	}
702 	buf[cnt] = '\0';
703 
704 	err = apply_subsystem_event_filter(system, buf);
705 	free_page((unsigned long) buf);
706 	if (err < 0)
707 		return err;
708 
709 	*ppos += cnt;
710 
711 	return cnt;
712 }
713 
714 static ssize_t
715 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
716 {
717 	int (*func)(struct trace_seq *s) = filp->private_data;
718 	struct trace_seq *s;
719 	int r;
720 
721 	if (*ppos)
722 		return 0;
723 
724 	s = kmalloc(sizeof(*s), GFP_KERNEL);
725 	if (!s)
726 		return -ENOMEM;
727 
728 	trace_seq_init(s);
729 
730 	func(s);
731 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
732 
733 	kfree(s);
734 
735 	return r;
736 }
737 
738 static const struct seq_operations show_event_seq_ops = {
739 	.start = t_start,
740 	.next = t_next,
741 	.show = t_show,
742 	.stop = t_stop,
743 };
744 
745 static const struct seq_operations show_set_event_seq_ops = {
746 	.start = s_start,
747 	.next = s_next,
748 	.show = t_show,
749 	.stop = t_stop,
750 };
751 
752 static const struct file_operations ftrace_avail_fops = {
753 	.open = ftrace_event_seq_open,
754 	.read = seq_read,
755 	.llseek = seq_lseek,
756 	.release = seq_release,
757 };
758 
759 static const struct file_operations ftrace_set_event_fops = {
760 	.open = ftrace_event_seq_open,
761 	.read = seq_read,
762 	.write = ftrace_event_write,
763 	.llseek = seq_lseek,
764 	.release = seq_release,
765 };
766 
767 static const struct file_operations ftrace_enable_fops = {
768 	.open = tracing_open_generic,
769 	.read = event_enable_read,
770 	.write = event_enable_write,
771 };
772 
773 static const struct file_operations ftrace_event_format_fops = {
774 	.open = tracing_open_generic,
775 	.read = event_format_read,
776 };
777 
778 static const struct file_operations ftrace_event_id_fops = {
779 	.open = tracing_open_generic,
780 	.read = event_id_read,
781 };
782 
783 static const struct file_operations ftrace_event_filter_fops = {
784 	.open = tracing_open_generic,
785 	.read = event_filter_read,
786 	.write = event_filter_write,
787 };
788 
789 static const struct file_operations ftrace_subsystem_filter_fops = {
790 	.open = tracing_open_generic,
791 	.read = subsystem_filter_read,
792 	.write = subsystem_filter_write,
793 };
794 
795 static const struct file_operations ftrace_system_enable_fops = {
796 	.open = tracing_open_generic,
797 	.read = system_enable_read,
798 	.write = system_enable_write,
799 };
800 
801 static const struct file_operations ftrace_show_header_fops = {
802 	.open = tracing_open_generic,
803 	.read = show_header,
804 };
805 
806 static struct dentry *event_trace_events_dir(void)
807 {
808 	static struct dentry *d_tracer;
809 	static struct dentry *d_events;
810 
811 	if (d_events)
812 		return d_events;
813 
814 	d_tracer = tracing_init_dentry();
815 	if (!d_tracer)
816 		return NULL;
817 
818 	d_events = debugfs_create_dir("events", d_tracer);
819 	if (!d_events)
820 		pr_warning("Could not create debugfs "
821 			   "'events' directory\n");
822 
823 	return d_events;
824 }
825 
826 static LIST_HEAD(event_subsystems);
827 
828 static struct dentry *
829 event_subsystem_dir(const char *name, struct dentry *d_events)
830 {
831 	struct event_subsystem *system;
832 	struct dentry *entry;
833 
834 	/* First see if we did not already create this dir */
835 	list_for_each_entry(system, &event_subsystems, list) {
836 		if (strcmp(system->name, name) == 0)
837 			return system->entry;
838 	}
839 
840 	/* need to create new entry */
841 	system = kmalloc(sizeof(*system), GFP_KERNEL);
842 	if (!system) {
843 		pr_warning("No memory to create event subsystem %s\n",
844 			   name);
845 		return d_events;
846 	}
847 
848 	system->entry = debugfs_create_dir(name, d_events);
849 	if (!system->entry) {
850 		pr_warning("Could not create event subsystem %s\n",
851 			   name);
852 		kfree(system);
853 		return d_events;
854 	}
855 
856 	system->name = kstrdup(name, GFP_KERNEL);
857 	if (!system->name) {
858 		debugfs_remove(system->entry);
859 		kfree(system);
860 		return d_events;
861 	}
862 
863 	list_add(&system->list, &event_subsystems);
864 
865 	system->filter = NULL;
866 
867 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
868 	if (!system->filter) {
869 		pr_warning("Could not allocate filter for subsystem "
870 			   "'%s'\n", name);
871 		return system->entry;
872 	}
873 
874 	entry = debugfs_create_file("filter", 0644, system->entry, system,
875 				    &ftrace_subsystem_filter_fops);
876 	if (!entry) {
877 		kfree(system->filter);
878 		system->filter = NULL;
879 		pr_warning("Could not create debugfs "
880 			   "'%s/filter' entry\n", name);
881 	}
882 
883 	entry = trace_create_file("enable", 0644, system->entry,
884 				  (void *)system->name,
885 				  &ftrace_system_enable_fops);
886 
887 	return system->entry;
888 }
889 
890 static int
891 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
892 		 const struct file_operations *id,
893 		 const struct file_operations *enable,
894 		 const struct file_operations *filter,
895 		 const struct file_operations *format)
896 {
897 	struct dentry *entry;
898 	int ret;
899 
900 	/*
901 	 * If the trace point header did not define TRACE_SYSTEM
902 	 * then the system would be called "TRACE_SYSTEM".
903 	 */
904 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
905 		d_events = event_subsystem_dir(call->system, d_events);
906 
907 	if (call->raw_init) {
908 		ret = call->raw_init();
909 		if (ret < 0) {
910 			pr_warning("Could not initialize trace point"
911 				   " events/%s\n", call->name);
912 			return ret;
913 		}
914 	}
915 
916 	call->dir = debugfs_create_dir(call->name, d_events);
917 	if (!call->dir) {
918 		pr_warning("Could not create debugfs "
919 			   "'%s' directory\n", call->name);
920 		return -1;
921 	}
922 
923 	if (call->regfunc)
924 		entry = trace_create_file("enable", 0644, call->dir, call,
925 					  enable);
926 
927 	if (call->id)
928 		entry = trace_create_file("id", 0444, call->dir, call,
929 					  id);
930 
931 	if (call->define_fields) {
932 		ret = call->define_fields();
933 		if (ret < 0) {
934 			pr_warning("Could not initialize trace point"
935 				   " events/%s\n", call->name);
936 			return ret;
937 		}
938 		entry = trace_create_file("filter", 0644, call->dir, call,
939 					  filter);
940 	}
941 
942 	/* A trace may not want to export its format */
943 	if (!call->show_format)
944 		return 0;
945 
946 	entry = trace_create_file("format", 0444, call->dir, call,
947 				  format);
948 
949 	return 0;
950 }
951 
952 #define for_each_event(event, start, end)			\
953 	for (event = start;					\
954 	     (unsigned long)event < (unsigned long)end;		\
955 	     event++)
956 
957 #ifdef CONFIG_MODULES
958 
959 static LIST_HEAD(ftrace_module_file_list);
960 
961 /*
962  * Modules must own their file_operations to keep up with
963  * reference counting.
964  */
965 struct ftrace_module_file_ops {
966 	struct list_head		list;
967 	struct module			*mod;
968 	struct file_operations		id;
969 	struct file_operations		enable;
970 	struct file_operations		format;
971 	struct file_operations		filter;
972 };
973 
974 static struct ftrace_module_file_ops *
975 trace_create_file_ops(struct module *mod)
976 {
977 	struct ftrace_module_file_ops *file_ops;
978 
979 	/*
980 	 * This is a bit of a PITA. To allow for correct reference
981 	 * counting, modules must "own" their file_operations.
982 	 * To do this, we allocate the file operations that will be
983 	 * used in the event directory.
984 	 */
985 
986 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
987 	if (!file_ops)
988 		return NULL;
989 
990 	file_ops->mod = mod;
991 
992 	file_ops->id = ftrace_event_id_fops;
993 	file_ops->id.owner = mod;
994 
995 	file_ops->enable = ftrace_enable_fops;
996 	file_ops->enable.owner = mod;
997 
998 	file_ops->filter = ftrace_event_filter_fops;
999 	file_ops->filter.owner = mod;
1000 
1001 	file_ops->format = ftrace_event_format_fops;
1002 	file_ops->format.owner = mod;
1003 
1004 	list_add(&file_ops->list, &ftrace_module_file_list);
1005 
1006 	return file_ops;
1007 }
1008 
1009 static void trace_module_add_events(struct module *mod)
1010 {
1011 	struct ftrace_module_file_ops *file_ops = NULL;
1012 	struct ftrace_event_call *call, *start, *end;
1013 	struct dentry *d_events;
1014 
1015 	start = mod->trace_events;
1016 	end = mod->trace_events + mod->num_trace_events;
1017 
1018 	if (start == end)
1019 		return;
1020 
1021 	d_events = event_trace_events_dir();
1022 	if (!d_events)
1023 		return;
1024 
1025 	for_each_event(call, start, end) {
1026 		/* The linker may leave blanks */
1027 		if (!call->name)
1028 			continue;
1029 
1030 		/*
1031 		 * This module has events, create file ops for this module
1032 		 * if not already done.
1033 		 */
1034 		if (!file_ops) {
1035 			file_ops = trace_create_file_ops(mod);
1036 			if (!file_ops)
1037 				return;
1038 		}
1039 		call->mod = mod;
1040 		list_add(&call->list, &ftrace_events);
1041 		event_create_dir(call, d_events,
1042 				 &file_ops->id, &file_ops->enable,
1043 				 &file_ops->filter, &file_ops->format);
1044 	}
1045 }
1046 
1047 static void trace_module_remove_events(struct module *mod)
1048 {
1049 	struct ftrace_module_file_ops *file_ops;
1050 	struct ftrace_event_call *call, *p;
1051 	bool found = false;
1052 
1053 	down_write(&trace_event_mutex);
1054 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1055 		if (call->mod == mod) {
1056 			found = true;
1057 			ftrace_event_enable_disable(call, 0);
1058 			if (call->event)
1059 				__unregister_ftrace_event(call->event);
1060 			debugfs_remove_recursive(call->dir);
1061 			list_del(&call->list);
1062 			trace_destroy_fields(call);
1063 			destroy_preds(call);
1064 		}
1065 	}
1066 
1067 	/* Now free the file_operations */
1068 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1069 		if (file_ops->mod == mod)
1070 			break;
1071 	}
1072 	if (&file_ops->list != &ftrace_module_file_list) {
1073 		list_del(&file_ops->list);
1074 		kfree(file_ops);
1075 	}
1076 
1077 	/*
1078 	 * It is safest to reset the ring buffer if the module being unloaded
1079 	 * registered any events.
1080 	 */
1081 	if (found)
1082 		tracing_reset_current_online_cpus();
1083 	up_write(&trace_event_mutex);
1084 }
1085 
1086 static int trace_module_notify(struct notifier_block *self,
1087 			       unsigned long val, void *data)
1088 {
1089 	struct module *mod = data;
1090 
1091 	mutex_lock(&event_mutex);
1092 	switch (val) {
1093 	case MODULE_STATE_COMING:
1094 		trace_module_add_events(mod);
1095 		break;
1096 	case MODULE_STATE_GOING:
1097 		trace_module_remove_events(mod);
1098 		break;
1099 	}
1100 	mutex_unlock(&event_mutex);
1101 
1102 	return 0;
1103 }
1104 #else
1105 static int trace_module_notify(struct notifier_block *self,
1106 			       unsigned long val, void *data)
1107 {
1108 	return 0;
1109 }
1110 #endif /* CONFIG_MODULES */
1111 
1112 struct notifier_block trace_module_nb = {
1113 	.notifier_call = trace_module_notify,
1114 	.priority = 0,
1115 };
1116 
1117 extern struct ftrace_event_call __start_ftrace_events[];
1118 extern struct ftrace_event_call __stop_ftrace_events[];
1119 
1120 static __init int event_trace_init(void)
1121 {
1122 	struct ftrace_event_call *call;
1123 	struct dentry *d_tracer;
1124 	struct dentry *entry;
1125 	struct dentry *d_events;
1126 	int ret;
1127 
1128 	d_tracer = tracing_init_dentry();
1129 	if (!d_tracer)
1130 		return 0;
1131 
1132 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1133 				    (void *)&show_event_seq_ops,
1134 				    &ftrace_avail_fops);
1135 	if (!entry)
1136 		pr_warning("Could not create debugfs "
1137 			   "'available_events' entry\n");
1138 
1139 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1140 				    (void *)&show_set_event_seq_ops,
1141 				    &ftrace_set_event_fops);
1142 	if (!entry)
1143 		pr_warning("Could not create debugfs "
1144 			   "'set_event' entry\n");
1145 
1146 	d_events = event_trace_events_dir();
1147 	if (!d_events)
1148 		return 0;
1149 
1150 	/* ring buffer internal formats */
1151 	trace_create_file("header_page", 0444, d_events,
1152 			  ring_buffer_print_page_header,
1153 			  &ftrace_show_header_fops);
1154 
1155 	trace_create_file("header_event", 0444, d_events,
1156 			  ring_buffer_print_entry_header,
1157 			  &ftrace_show_header_fops);
1158 
1159 	trace_create_file("enable", 0644, d_events,
1160 			  NULL, &ftrace_system_enable_fops);
1161 
1162 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1163 		/* The linker may leave blanks */
1164 		if (!call->name)
1165 			continue;
1166 		list_add(&call->list, &ftrace_events);
1167 		event_create_dir(call, d_events, &ftrace_event_id_fops,
1168 				 &ftrace_enable_fops, &ftrace_event_filter_fops,
1169 				 &ftrace_event_format_fops);
1170 	}
1171 
1172 	ret = register_module_notifier(&trace_module_nb);
1173 	if (ret)
1174 		pr_warning("Failed to register trace events module notifier\n");
1175 
1176 	return 0;
1177 }
1178 fs_initcall(event_trace_init);
1179 
1180 #ifdef CONFIG_FTRACE_STARTUP_TEST
1181 
1182 static DEFINE_SPINLOCK(test_spinlock);
1183 static DEFINE_SPINLOCK(test_spinlock_irq);
1184 static DEFINE_MUTEX(test_mutex);
1185 
1186 static __init void test_work(struct work_struct *dummy)
1187 {
1188 	spin_lock(&test_spinlock);
1189 	spin_lock_irq(&test_spinlock_irq);
1190 	udelay(1);
1191 	spin_unlock_irq(&test_spinlock_irq);
1192 	spin_unlock(&test_spinlock);
1193 
1194 	mutex_lock(&test_mutex);
1195 	msleep(1);
1196 	mutex_unlock(&test_mutex);
1197 }
1198 
1199 static __init int event_test_thread(void *unused)
1200 {
1201 	void *test_malloc;
1202 
1203 	test_malloc = kmalloc(1234, GFP_KERNEL);
1204 	if (!test_malloc)
1205 		pr_info("failed to kmalloc\n");
1206 
1207 	schedule_on_each_cpu(test_work);
1208 
1209 	kfree(test_malloc);
1210 
1211 	set_current_state(TASK_INTERRUPTIBLE);
1212 	while (!kthread_should_stop())
1213 		schedule();
1214 
1215 	return 0;
1216 }
1217 
1218 /*
1219  * Do various things that may trigger events.
1220  */
1221 static __init void event_test_stuff(void)
1222 {
1223 	struct task_struct *test_thread;
1224 
1225 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1226 	msleep(1);
1227 	kthread_stop(test_thread);
1228 }
1229 
1230 /*
1231  * For every trace event defined, we will test each trace point separately,
1232  * and then by groups, and finally all trace points.
1233  */
1234 static __init void event_trace_self_tests(void)
1235 {
1236 	struct ftrace_event_call *call;
1237 	struct event_subsystem *system;
1238 	int ret;
1239 
1240 	pr_info("Running tests on trace events:\n");
1241 
1242 	list_for_each_entry(call, &ftrace_events, list) {
1243 
1244 		/* Only test those that have a regfunc */
1245 		if (!call->regfunc)
1246 			continue;
1247 
1248 		pr_info("Testing event %s: ", call->name);
1249 
1250 		/*
1251 		 * If an event is already enabled, someone is using
1252 		 * it and the self test should not be on.
1253 		 */
1254 		if (call->enabled) {
1255 			pr_warning("Enabled event during self test!\n");
1256 			WARN_ON_ONCE(1);
1257 			continue;
1258 		}
1259 
1260 		ftrace_event_enable_disable(call, 1);
1261 		event_test_stuff();
1262 		ftrace_event_enable_disable(call, 0);
1263 
1264 		pr_cont("OK\n");
1265 	}
1266 
1267 	/* Now test at the sub system level */
1268 
1269 	pr_info("Running tests on trace event systems:\n");
1270 
1271 	list_for_each_entry(system, &event_subsystems, list) {
1272 
1273 		/* the ftrace system is special, skip it */
1274 		if (strcmp(system->name, "ftrace") == 0)
1275 			continue;
1276 
1277 		pr_info("Testing event system %s: ", system->name);
1278 
1279 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1280 		if (WARN_ON_ONCE(ret)) {
1281 			pr_warning("error enabling system %s\n",
1282 				   system->name);
1283 			continue;
1284 		}
1285 
1286 		event_test_stuff();
1287 
1288 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1289 		if (WARN_ON_ONCE(ret))
1290 			pr_warning("error disabling system %s\n",
1291 				   system->name);
1292 
1293 		pr_cont("OK\n");
1294 	}
1295 
1296 	/* Test with all events enabled */
1297 
1298 	pr_info("Running tests on all trace events:\n");
1299 	pr_info("Testing all events: ");
1300 
1301 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1302 	if (WARN_ON_ONCE(ret)) {
1303 		pr_warning("error enabling all events\n");
1304 		return;
1305 	}
1306 
1307 	event_test_stuff();
1308 
1309 	/* reset sysname */
1310 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1311 	if (WARN_ON_ONCE(ret)) {
1312 		pr_warning("error disabling all events\n");
1313 		return;
1314 	}
1315 
1316 	pr_cont("OK\n");
1317 }
1318 
1319 #ifdef CONFIG_FUNCTION_TRACER
1320 
1321 static DEFINE_PER_CPU(atomic_t, test_event_disable);
1322 
1323 static void
1324 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1325 {
1326 	struct ring_buffer_event *event;
1327 	struct ftrace_entry *entry;
1328 	unsigned long flags;
1329 	long disabled;
1330 	int resched;
1331 	int cpu;
1332 	int pc;
1333 
1334 	pc = preempt_count();
1335 	resched = ftrace_preempt_disable();
1336 	cpu = raw_smp_processor_id();
1337 	disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
1338 
1339 	if (disabled != 1)
1340 		goto out;
1341 
1342 	local_save_flags(flags);
1343 
1344 	event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
1345 						  flags, pc);
1346 	if (!event)
1347 		goto out;
1348 	entry	= ring_buffer_event_data(event);
1349 	entry->ip			= ip;
1350 	entry->parent_ip		= parent_ip;
1351 
1352 	trace_nowake_buffer_unlock_commit(event, flags, pc);
1353 
1354  out:
1355 	atomic_dec(&per_cpu(test_event_disable, cpu));
1356 	ftrace_preempt_enable(resched);
1357 }
1358 
1359 static struct ftrace_ops trace_ops __initdata  =
1360 {
1361 	.func = function_test_events_call,
1362 };
1363 
1364 static __init void event_trace_self_test_with_function(void)
1365 {
1366 	register_ftrace_function(&trace_ops);
1367 	pr_info("Running tests again, along with the function tracer\n");
1368 	event_trace_self_tests();
1369 	unregister_ftrace_function(&trace_ops);
1370 }
1371 #else
1372 static __init void event_trace_self_test_with_function(void)
1373 {
1374 }
1375 #endif
1376 
1377 static __init int event_trace_self_tests_init(void)
1378 {
1379 
1380 	event_trace_self_tests();
1381 
1382 	event_trace_self_test_with_function();
1383 
1384 	return 0;
1385 }
1386 
1387 late_initcall(event_trace_self_tests_init);
1388 
1389 #endif
1390