xref: /linux/kernel/trace/trace_events.c (revision 5499b45190237ca90dd2ac86395cf464fe1f4cc7)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/delay.h>
19 
20 #include <asm/setup.h>
21 
22 #include "trace_output.h"
23 
24 #undef TRACE_SYSTEM
25 #define TRACE_SYSTEM "TRACE_SYSTEM"
26 
27 DEFINE_MUTEX(event_mutex);
28 
29 LIST_HEAD(ftrace_events);
30 
31 int trace_define_field(struct ftrace_event_call *call, const char *type,
32 		       const char *name, int offset, int size, int is_signed,
33 		       int filter_type)
34 {
35 	struct ftrace_event_field *field;
36 
37 	field = kzalloc(sizeof(*field), GFP_KERNEL);
38 	if (!field)
39 		goto err;
40 
41 	field->name = kstrdup(name, GFP_KERNEL);
42 	if (!field->name)
43 		goto err;
44 
45 	field->type = kstrdup(type, GFP_KERNEL);
46 	if (!field->type)
47 		goto err;
48 
49 	if (filter_type == FILTER_OTHER)
50 		field->filter_type = filter_assign_type(type);
51 	else
52 		field->filter_type = filter_type;
53 
54 	field->offset = offset;
55 	field->size = size;
56 	field->is_signed = is_signed;
57 
58 	list_add(&field->link, &call->fields);
59 
60 	return 0;
61 
62 err:
63 	if (field)
64 		kfree(field->name);
65 	kfree(field);
66 
67 	return -ENOMEM;
68 }
69 EXPORT_SYMBOL_GPL(trace_define_field);
70 
71 #define __common_field(type, item)					\
72 	ret = trace_define_field(call, #type, "common_" #item,		\
73 				 offsetof(typeof(ent), item),		\
74 				 sizeof(ent.item),			\
75 				 is_signed_type(type), FILTER_OTHER);	\
76 	if (ret)							\
77 		return ret;
78 
79 static int trace_define_common_fields(struct ftrace_event_call *call)
80 {
81 	int ret;
82 	struct trace_entry ent;
83 
84 	__common_field(unsigned short, type);
85 	__common_field(unsigned char, flags);
86 	__common_field(unsigned char, preempt_count);
87 	__common_field(int, pid);
88 	__common_field(int, lock_depth);
89 
90 	return ret;
91 }
92 
93 void trace_destroy_fields(struct ftrace_event_call *call)
94 {
95 	struct ftrace_event_field *field, *next;
96 
97 	list_for_each_entry_safe(field, next, &call->fields, link) {
98 		list_del(&field->link);
99 		kfree(field->type);
100 		kfree(field->name);
101 		kfree(field);
102 	}
103 }
104 
105 int trace_event_raw_init(struct ftrace_event_call *call)
106 {
107 	int id;
108 
109 	id = register_ftrace_event(call->event);
110 	if (!id)
111 		return -ENODEV;
112 	call->id = id;
113 	INIT_LIST_HEAD(&call->fields);
114 
115 	return 0;
116 }
117 EXPORT_SYMBOL_GPL(trace_event_raw_init);
118 
119 static int ftrace_event_enable_disable(struct ftrace_event_call *call,
120 					int enable)
121 {
122 	int ret = 0;
123 
124 	switch (enable) {
125 	case 0:
126 		if (call->enabled) {
127 			call->enabled = 0;
128 			tracing_stop_cmdline_record();
129 			call->unregfunc(call);
130 		}
131 		break;
132 	case 1:
133 		if (!call->enabled) {
134 			tracing_start_cmdline_record();
135 			ret = call->regfunc(call);
136 			if (ret) {
137 				tracing_stop_cmdline_record();
138 				pr_info("event trace: Could not enable event "
139 					"%s\n", call->name);
140 				break;
141 			}
142 			call->enabled = 1;
143 		}
144 		break;
145 	}
146 
147 	return ret;
148 }
149 
150 static void ftrace_clear_events(void)
151 {
152 	struct ftrace_event_call *call;
153 
154 	mutex_lock(&event_mutex);
155 	list_for_each_entry(call, &ftrace_events, list) {
156 		ftrace_event_enable_disable(call, 0);
157 	}
158 	mutex_unlock(&event_mutex);
159 }
160 
161 /*
162  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
163  */
164 static int __ftrace_set_clr_event(const char *match, const char *sub,
165 				  const char *event, int set)
166 {
167 	struct ftrace_event_call *call;
168 	int ret = -EINVAL;
169 
170 	mutex_lock(&event_mutex);
171 	list_for_each_entry(call, &ftrace_events, list) {
172 
173 		if (!call->name || !call->regfunc)
174 			continue;
175 
176 		if (match &&
177 		    strcmp(match, call->name) != 0 &&
178 		    strcmp(match, call->system) != 0)
179 			continue;
180 
181 		if (sub && strcmp(sub, call->system) != 0)
182 			continue;
183 
184 		if (event && strcmp(event, call->name) != 0)
185 			continue;
186 
187 		ftrace_event_enable_disable(call, set);
188 
189 		ret = 0;
190 	}
191 	mutex_unlock(&event_mutex);
192 
193 	return ret;
194 }
195 
196 static int ftrace_set_clr_event(char *buf, int set)
197 {
198 	char *event = NULL, *sub = NULL, *match;
199 
200 	/*
201 	 * The buf format can be <subsystem>:<event-name>
202 	 *  *:<event-name> means any event by that name.
203 	 *  :<event-name> is the same.
204 	 *
205 	 *  <subsystem>:* means all events in that subsystem
206 	 *  <subsystem>: means the same.
207 	 *
208 	 *  <name> (no ':') means all events in a subsystem with
209 	 *  the name <name> or any event that matches <name>
210 	 */
211 
212 	match = strsep(&buf, ":");
213 	if (buf) {
214 		sub = match;
215 		event = buf;
216 		match = NULL;
217 
218 		if (!strlen(sub) || strcmp(sub, "*") == 0)
219 			sub = NULL;
220 		if (!strlen(event) || strcmp(event, "*") == 0)
221 			event = NULL;
222 	}
223 
224 	return __ftrace_set_clr_event(match, sub, event, set);
225 }
226 
227 /**
228  * trace_set_clr_event - enable or disable an event
229  * @system: system name to match (NULL for any system)
230  * @event: event name to match (NULL for all events, within system)
231  * @set: 1 to enable, 0 to disable
232  *
233  * This is a way for other parts of the kernel to enable or disable
234  * event recording.
235  *
236  * Returns 0 on success, -EINVAL if the parameters do not match any
237  * registered events.
238  */
239 int trace_set_clr_event(const char *system, const char *event, int set)
240 {
241 	return __ftrace_set_clr_event(NULL, system, event, set);
242 }
243 
244 /* 128 should be much more than enough */
245 #define EVENT_BUF_SIZE		127
246 
247 static ssize_t
248 ftrace_event_write(struct file *file, const char __user *ubuf,
249 		   size_t cnt, loff_t *ppos)
250 {
251 	struct trace_parser parser;
252 	ssize_t read, ret;
253 
254 	if (!cnt)
255 		return 0;
256 
257 	ret = tracing_update_buffers();
258 	if (ret < 0)
259 		return ret;
260 
261 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
262 		return -ENOMEM;
263 
264 	read = trace_get_user(&parser, ubuf, cnt, ppos);
265 
266 	if (read >= 0 && trace_parser_loaded((&parser))) {
267 		int set = 1;
268 
269 		if (*parser.buffer == '!')
270 			set = 0;
271 
272 		parser.buffer[parser.idx] = 0;
273 
274 		ret = ftrace_set_clr_event(parser.buffer + !set, set);
275 		if (ret)
276 			goto out_put;
277 	}
278 
279 	ret = read;
280 
281  out_put:
282 	trace_parser_put(&parser);
283 
284 	return ret;
285 }
286 
287 static void *
288 t_next(struct seq_file *m, void *v, loff_t *pos)
289 {
290 	struct ftrace_event_call *call = v;
291 
292 	(*pos)++;
293 
294 	list_for_each_entry_continue(call, &ftrace_events, list) {
295 		/*
296 		 * The ftrace subsystem is for showing formats only.
297 		 * They can not be enabled or disabled via the event files.
298 		 */
299 		if (call->regfunc)
300 			return call;
301 	}
302 
303 	return NULL;
304 }
305 
306 static void *t_start(struct seq_file *m, loff_t *pos)
307 {
308 	struct ftrace_event_call *call;
309 	loff_t l;
310 
311 	mutex_lock(&event_mutex);
312 
313 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
314 	for (l = 0; l <= *pos; ) {
315 		call = t_next(m, call, &l);
316 		if (!call)
317 			break;
318 	}
319 	return call;
320 }
321 
322 static void *
323 s_next(struct seq_file *m, void *v, loff_t *pos)
324 {
325 	struct ftrace_event_call *call = v;
326 
327 	(*pos)++;
328 
329 	list_for_each_entry_continue(call, &ftrace_events, list) {
330 		if (call->enabled)
331 			return call;
332 	}
333 
334 	return NULL;
335 }
336 
337 static void *s_start(struct seq_file *m, loff_t *pos)
338 {
339 	struct ftrace_event_call *call;
340 	loff_t l;
341 
342 	mutex_lock(&event_mutex);
343 
344 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
345 	for (l = 0; l <= *pos; ) {
346 		call = s_next(m, call, &l);
347 		if (!call)
348 			break;
349 	}
350 	return call;
351 }
352 
353 static int t_show(struct seq_file *m, void *v)
354 {
355 	struct ftrace_event_call *call = v;
356 
357 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
358 		seq_printf(m, "%s:", call->system);
359 	seq_printf(m, "%s\n", call->name);
360 
361 	return 0;
362 }
363 
364 static void t_stop(struct seq_file *m, void *p)
365 {
366 	mutex_unlock(&event_mutex);
367 }
368 
369 static int
370 ftrace_event_seq_open(struct inode *inode, struct file *file)
371 {
372 	const struct seq_operations *seq_ops;
373 
374 	if ((file->f_mode & FMODE_WRITE) &&
375 	    (file->f_flags & O_TRUNC))
376 		ftrace_clear_events();
377 
378 	seq_ops = inode->i_private;
379 	return seq_open(file, seq_ops);
380 }
381 
382 static ssize_t
383 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
384 		  loff_t *ppos)
385 {
386 	struct ftrace_event_call *call = filp->private_data;
387 	char *buf;
388 
389 	if (call->enabled)
390 		buf = "1\n";
391 	else
392 		buf = "0\n";
393 
394 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
395 }
396 
397 static ssize_t
398 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
399 		   loff_t *ppos)
400 {
401 	struct ftrace_event_call *call = filp->private_data;
402 	char buf[64];
403 	unsigned long val;
404 	int ret;
405 
406 	if (cnt >= sizeof(buf))
407 		return -EINVAL;
408 
409 	if (copy_from_user(&buf, ubuf, cnt))
410 		return -EFAULT;
411 
412 	buf[cnt] = 0;
413 
414 	ret = strict_strtoul(buf, 10, &val);
415 	if (ret < 0)
416 		return ret;
417 
418 	ret = tracing_update_buffers();
419 	if (ret < 0)
420 		return ret;
421 
422 	switch (val) {
423 	case 0:
424 	case 1:
425 		mutex_lock(&event_mutex);
426 		ret = ftrace_event_enable_disable(call, val);
427 		mutex_unlock(&event_mutex);
428 		break;
429 
430 	default:
431 		return -EINVAL;
432 	}
433 
434 	*ppos += cnt;
435 
436 	return ret ? ret : cnt;
437 }
438 
439 static ssize_t
440 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
441 		   loff_t *ppos)
442 {
443 	const char set_to_char[4] = { '?', '0', '1', 'X' };
444 	const char *system = filp->private_data;
445 	struct ftrace_event_call *call;
446 	char buf[2];
447 	int set = 0;
448 	int ret;
449 
450 	mutex_lock(&event_mutex);
451 	list_for_each_entry(call, &ftrace_events, list) {
452 		if (!call->name || !call->regfunc)
453 			continue;
454 
455 		if (system && strcmp(call->system, system) != 0)
456 			continue;
457 
458 		/*
459 		 * We need to find out if all the events are set
460 		 * or if all events or cleared, or if we have
461 		 * a mixture.
462 		 */
463 		set |= (1 << !!call->enabled);
464 
465 		/*
466 		 * If we have a mixture, no need to look further.
467 		 */
468 		if (set == 3)
469 			break;
470 	}
471 	mutex_unlock(&event_mutex);
472 
473 	buf[0] = set_to_char[set];
474 	buf[1] = '\n';
475 
476 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
477 
478 	return ret;
479 }
480 
481 static ssize_t
482 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
483 		    loff_t *ppos)
484 {
485 	const char *system = filp->private_data;
486 	unsigned long val;
487 	char buf[64];
488 	ssize_t ret;
489 
490 	if (cnt >= sizeof(buf))
491 		return -EINVAL;
492 
493 	if (copy_from_user(&buf, ubuf, cnt))
494 		return -EFAULT;
495 
496 	buf[cnt] = 0;
497 
498 	ret = strict_strtoul(buf, 10, &val);
499 	if (ret < 0)
500 		return ret;
501 
502 	ret = tracing_update_buffers();
503 	if (ret < 0)
504 		return ret;
505 
506 	if (val != 0 && val != 1)
507 		return -EINVAL;
508 
509 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
510 	if (ret)
511 		goto out;
512 
513 	ret = cnt;
514 
515 out:
516 	*ppos += cnt;
517 
518 	return ret;
519 }
520 
521 static ssize_t
522 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
523 		  loff_t *ppos)
524 {
525 	struct ftrace_event_call *call = filp->private_data;
526 	struct ftrace_event_field *field;
527 	struct trace_seq *s;
528 	int common_field_count = 5;
529 	char *buf;
530 	int r = 0;
531 
532 	if (*ppos)
533 		return 0;
534 
535 	s = kmalloc(sizeof(*s), GFP_KERNEL);
536 	if (!s)
537 		return -ENOMEM;
538 
539 	trace_seq_init(s);
540 
541 	trace_seq_printf(s, "name: %s\n", call->name);
542 	trace_seq_printf(s, "ID: %d\n", call->id);
543 	trace_seq_printf(s, "format:\n");
544 
545 	list_for_each_entry_reverse(field, &call->fields, link) {
546 		/*
547 		 * Smartly shows the array type(except dynamic array).
548 		 * Normal:
549 		 *	field:TYPE VAR
550 		 * If TYPE := TYPE[LEN], it is shown:
551 		 *	field:TYPE VAR[LEN]
552 		 */
553 		const char *array_descriptor = strchr(field->type, '[');
554 
555 		if (!strncmp(field->type, "__data_loc", 10))
556 			array_descriptor = NULL;
557 
558 		if (!array_descriptor) {
559 			r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
560 					"\tsize:%u;\tsigned:%d;\n",
561 					field->type, field->name, field->offset,
562 					field->size, !!field->is_signed);
563 		} else {
564 			r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
565 					"\tsize:%u;\tsigned:%d;\n",
566 					(int)(array_descriptor - field->type),
567 					field->type, field->name,
568 					array_descriptor, field->offset,
569 					field->size, !!field->is_signed);
570 		}
571 
572 		if (--common_field_count == 0)
573 			r = trace_seq_printf(s, "\n");
574 
575 		if (!r)
576 			break;
577 	}
578 
579 	if (r)
580 		r = trace_seq_printf(s, "\nprint fmt: %s\n",
581 				call->print_fmt);
582 
583 	if (!r) {
584 		/*
585 		 * ug!  The format output is bigger than a PAGE!!
586 		 */
587 		buf = "FORMAT TOO BIG\n";
588 		r = simple_read_from_buffer(ubuf, cnt, ppos,
589 					      buf, strlen(buf));
590 		goto out;
591 	}
592 
593 	r = simple_read_from_buffer(ubuf, cnt, ppos,
594 				    s->buffer, s->len);
595  out:
596 	kfree(s);
597 	return r;
598 }
599 
600 static ssize_t
601 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
602 {
603 	struct ftrace_event_call *call = filp->private_data;
604 	struct trace_seq *s;
605 	int r;
606 
607 	if (*ppos)
608 		return 0;
609 
610 	s = kmalloc(sizeof(*s), GFP_KERNEL);
611 	if (!s)
612 		return -ENOMEM;
613 
614 	trace_seq_init(s);
615 	trace_seq_printf(s, "%d\n", call->id);
616 
617 	r = simple_read_from_buffer(ubuf, cnt, ppos,
618 				    s->buffer, s->len);
619 	kfree(s);
620 	return r;
621 }
622 
623 static ssize_t
624 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
625 		  loff_t *ppos)
626 {
627 	struct ftrace_event_call *call = filp->private_data;
628 	struct trace_seq *s;
629 	int r;
630 
631 	if (*ppos)
632 		return 0;
633 
634 	s = kmalloc(sizeof(*s), GFP_KERNEL);
635 	if (!s)
636 		return -ENOMEM;
637 
638 	trace_seq_init(s);
639 
640 	print_event_filter(call, s);
641 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
642 
643 	kfree(s);
644 
645 	return r;
646 }
647 
648 static ssize_t
649 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
650 		   loff_t *ppos)
651 {
652 	struct ftrace_event_call *call = filp->private_data;
653 	char *buf;
654 	int err;
655 
656 	if (cnt >= PAGE_SIZE)
657 		return -EINVAL;
658 
659 	buf = (char *)__get_free_page(GFP_TEMPORARY);
660 	if (!buf)
661 		return -ENOMEM;
662 
663 	if (copy_from_user(buf, ubuf, cnt)) {
664 		free_page((unsigned long) buf);
665 		return -EFAULT;
666 	}
667 	buf[cnt] = '\0';
668 
669 	err = apply_event_filter(call, buf);
670 	free_page((unsigned long) buf);
671 	if (err < 0)
672 		return err;
673 
674 	*ppos += cnt;
675 
676 	return cnt;
677 }
678 
679 static ssize_t
680 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
681 		      loff_t *ppos)
682 {
683 	struct event_subsystem *system = filp->private_data;
684 	struct trace_seq *s;
685 	int r;
686 
687 	if (*ppos)
688 		return 0;
689 
690 	s = kmalloc(sizeof(*s), GFP_KERNEL);
691 	if (!s)
692 		return -ENOMEM;
693 
694 	trace_seq_init(s);
695 
696 	print_subsystem_event_filter(system, s);
697 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
698 
699 	kfree(s);
700 
701 	return r;
702 }
703 
704 static ssize_t
705 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
706 		       loff_t *ppos)
707 {
708 	struct event_subsystem *system = filp->private_data;
709 	char *buf;
710 	int err;
711 
712 	if (cnt >= PAGE_SIZE)
713 		return -EINVAL;
714 
715 	buf = (char *)__get_free_page(GFP_TEMPORARY);
716 	if (!buf)
717 		return -ENOMEM;
718 
719 	if (copy_from_user(buf, ubuf, cnt)) {
720 		free_page((unsigned long) buf);
721 		return -EFAULT;
722 	}
723 	buf[cnt] = '\0';
724 
725 	err = apply_subsystem_event_filter(system, buf);
726 	free_page((unsigned long) buf);
727 	if (err < 0)
728 		return err;
729 
730 	*ppos += cnt;
731 
732 	return cnt;
733 }
734 
735 static ssize_t
736 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
737 {
738 	int (*func)(struct trace_seq *s) = filp->private_data;
739 	struct trace_seq *s;
740 	int r;
741 
742 	if (*ppos)
743 		return 0;
744 
745 	s = kmalloc(sizeof(*s), GFP_KERNEL);
746 	if (!s)
747 		return -ENOMEM;
748 
749 	trace_seq_init(s);
750 
751 	func(s);
752 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
753 
754 	kfree(s);
755 
756 	return r;
757 }
758 
759 static const struct seq_operations show_event_seq_ops = {
760 	.start = t_start,
761 	.next = t_next,
762 	.show = t_show,
763 	.stop = t_stop,
764 };
765 
766 static const struct seq_operations show_set_event_seq_ops = {
767 	.start = s_start,
768 	.next = s_next,
769 	.show = t_show,
770 	.stop = t_stop,
771 };
772 
773 static const struct file_operations ftrace_avail_fops = {
774 	.open = ftrace_event_seq_open,
775 	.read = seq_read,
776 	.llseek = seq_lseek,
777 	.release = seq_release,
778 };
779 
780 static const struct file_operations ftrace_set_event_fops = {
781 	.open = ftrace_event_seq_open,
782 	.read = seq_read,
783 	.write = ftrace_event_write,
784 	.llseek = seq_lseek,
785 	.release = seq_release,
786 };
787 
788 static const struct file_operations ftrace_enable_fops = {
789 	.open = tracing_open_generic,
790 	.read = event_enable_read,
791 	.write = event_enable_write,
792 };
793 
794 static const struct file_operations ftrace_event_format_fops = {
795 	.open = tracing_open_generic,
796 	.read = event_format_read,
797 };
798 
799 static const struct file_operations ftrace_event_id_fops = {
800 	.open = tracing_open_generic,
801 	.read = event_id_read,
802 };
803 
804 static const struct file_operations ftrace_event_filter_fops = {
805 	.open = tracing_open_generic,
806 	.read = event_filter_read,
807 	.write = event_filter_write,
808 };
809 
810 static const struct file_operations ftrace_subsystem_filter_fops = {
811 	.open = tracing_open_generic,
812 	.read = subsystem_filter_read,
813 	.write = subsystem_filter_write,
814 };
815 
816 static const struct file_operations ftrace_system_enable_fops = {
817 	.open = tracing_open_generic,
818 	.read = system_enable_read,
819 	.write = system_enable_write,
820 };
821 
822 static const struct file_operations ftrace_show_header_fops = {
823 	.open = tracing_open_generic,
824 	.read = show_header,
825 };
826 
827 static struct dentry *event_trace_events_dir(void)
828 {
829 	static struct dentry *d_tracer;
830 	static struct dentry *d_events;
831 
832 	if (d_events)
833 		return d_events;
834 
835 	d_tracer = tracing_init_dentry();
836 	if (!d_tracer)
837 		return NULL;
838 
839 	d_events = debugfs_create_dir("events", d_tracer);
840 	if (!d_events)
841 		pr_warning("Could not create debugfs "
842 			   "'events' directory\n");
843 
844 	return d_events;
845 }
846 
847 static LIST_HEAD(event_subsystems);
848 
849 static struct dentry *
850 event_subsystem_dir(const char *name, struct dentry *d_events)
851 {
852 	struct event_subsystem *system;
853 	struct dentry *entry;
854 
855 	/* First see if we did not already create this dir */
856 	list_for_each_entry(system, &event_subsystems, list) {
857 		if (strcmp(system->name, name) == 0) {
858 			system->nr_events++;
859 			return system->entry;
860 		}
861 	}
862 
863 	/* need to create new entry */
864 	system = kmalloc(sizeof(*system), GFP_KERNEL);
865 	if (!system) {
866 		pr_warning("No memory to create event subsystem %s\n",
867 			   name);
868 		return d_events;
869 	}
870 
871 	system->entry = debugfs_create_dir(name, d_events);
872 	if (!system->entry) {
873 		pr_warning("Could not create event subsystem %s\n",
874 			   name);
875 		kfree(system);
876 		return d_events;
877 	}
878 
879 	system->nr_events = 1;
880 	system->name = kstrdup(name, GFP_KERNEL);
881 	if (!system->name) {
882 		debugfs_remove(system->entry);
883 		kfree(system);
884 		return d_events;
885 	}
886 
887 	list_add(&system->list, &event_subsystems);
888 
889 	system->filter = NULL;
890 
891 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
892 	if (!system->filter) {
893 		pr_warning("Could not allocate filter for subsystem "
894 			   "'%s'\n", name);
895 		return system->entry;
896 	}
897 
898 	entry = debugfs_create_file("filter", 0644, system->entry, system,
899 				    &ftrace_subsystem_filter_fops);
900 	if (!entry) {
901 		kfree(system->filter);
902 		system->filter = NULL;
903 		pr_warning("Could not create debugfs "
904 			   "'%s/filter' entry\n", name);
905 	}
906 
907 	trace_create_file("enable", 0644, system->entry,
908 			  (void *)system->name,
909 			  &ftrace_system_enable_fops);
910 
911 	return system->entry;
912 }
913 
914 static int
915 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
916 		 const struct file_operations *id,
917 		 const struct file_operations *enable,
918 		 const struct file_operations *filter,
919 		 const struct file_operations *format)
920 {
921 	int ret;
922 
923 	/*
924 	 * If the trace point header did not define TRACE_SYSTEM
925 	 * then the system would be called "TRACE_SYSTEM".
926 	 */
927 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
928 		d_events = event_subsystem_dir(call->system, d_events);
929 
930 	call->dir = debugfs_create_dir(call->name, d_events);
931 	if (!call->dir) {
932 		pr_warning("Could not create debugfs "
933 			   "'%s' directory\n", call->name);
934 		return -1;
935 	}
936 
937 	if (call->regfunc)
938 		trace_create_file("enable", 0644, call->dir, call,
939 				  enable);
940 
941 	if (call->id && call->profile_enable)
942 		trace_create_file("id", 0444, call->dir, call,
943 		 		  id);
944 
945 	if (call->define_fields) {
946 		ret = trace_define_common_fields(call);
947 		if (!ret)
948 			ret = call->define_fields(call);
949 		if (ret < 0) {
950 			pr_warning("Could not initialize trace point"
951 				   " events/%s\n", call->name);
952 			return ret;
953 		}
954 		trace_create_file("filter", 0644, call->dir, call,
955 				  filter);
956 	}
957 
958 	trace_create_file("format", 0444, call->dir, call,
959 			  format);
960 
961 	return 0;
962 }
963 
964 static int __trace_add_event_call(struct ftrace_event_call *call)
965 {
966 	struct dentry *d_events;
967 	int ret;
968 
969 	if (!call->name)
970 		return -EINVAL;
971 
972 	if (call->raw_init) {
973 		ret = call->raw_init(call);
974 		if (ret < 0) {
975 			if (ret != -ENOSYS)
976 				pr_warning("Could not initialize trace "
977 				"events/%s\n", call->name);
978 			return ret;
979 		}
980 	}
981 
982 	d_events = event_trace_events_dir();
983 	if (!d_events)
984 		return -ENOENT;
985 
986 	ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
987 				&ftrace_enable_fops, &ftrace_event_filter_fops,
988 				&ftrace_event_format_fops);
989 	if (!ret)
990 		list_add(&call->list, &ftrace_events);
991 
992 	return ret;
993 }
994 
995 /* Add an additional event_call dynamically */
996 int trace_add_event_call(struct ftrace_event_call *call)
997 {
998 	int ret;
999 	mutex_lock(&event_mutex);
1000 	ret = __trace_add_event_call(call);
1001 	mutex_unlock(&event_mutex);
1002 	return ret;
1003 }
1004 
1005 static void remove_subsystem_dir(const char *name)
1006 {
1007 	struct event_subsystem *system;
1008 
1009 	if (strcmp(name, TRACE_SYSTEM) == 0)
1010 		return;
1011 
1012 	list_for_each_entry(system, &event_subsystems, list) {
1013 		if (strcmp(system->name, name) == 0) {
1014 			if (!--system->nr_events) {
1015 				struct event_filter *filter = system->filter;
1016 
1017 				debugfs_remove_recursive(system->entry);
1018 				list_del(&system->list);
1019 				if (filter) {
1020 					kfree(filter->filter_string);
1021 					kfree(filter);
1022 				}
1023 				kfree(system->name);
1024 				kfree(system);
1025 			}
1026 			break;
1027 		}
1028 	}
1029 }
1030 
1031 /*
1032  * Must be called under locking both of event_mutex and trace_event_mutex.
1033  */
1034 static void __trace_remove_event_call(struct ftrace_event_call *call)
1035 {
1036 	ftrace_event_enable_disable(call, 0);
1037 	if (call->event)
1038 		__unregister_ftrace_event(call->event);
1039 	debugfs_remove_recursive(call->dir);
1040 	list_del(&call->list);
1041 	trace_destroy_fields(call);
1042 	destroy_preds(call);
1043 	remove_subsystem_dir(call->system);
1044 }
1045 
1046 /* Remove an event_call */
1047 void trace_remove_event_call(struct ftrace_event_call *call)
1048 {
1049 	mutex_lock(&event_mutex);
1050 	down_write(&trace_event_mutex);
1051 	__trace_remove_event_call(call);
1052 	up_write(&trace_event_mutex);
1053 	mutex_unlock(&event_mutex);
1054 }
1055 
1056 #define for_each_event(event, start, end)			\
1057 	for (event = start;					\
1058 	     (unsigned long)event < (unsigned long)end;		\
1059 	     event++)
1060 
1061 #ifdef CONFIG_MODULES
1062 
1063 static LIST_HEAD(ftrace_module_file_list);
1064 
1065 /*
1066  * Modules must own their file_operations to keep up with
1067  * reference counting.
1068  */
1069 struct ftrace_module_file_ops {
1070 	struct list_head		list;
1071 	struct module			*mod;
1072 	struct file_operations		id;
1073 	struct file_operations		enable;
1074 	struct file_operations		format;
1075 	struct file_operations		filter;
1076 };
1077 
1078 static struct ftrace_module_file_ops *
1079 trace_create_file_ops(struct module *mod)
1080 {
1081 	struct ftrace_module_file_ops *file_ops;
1082 
1083 	/*
1084 	 * This is a bit of a PITA. To allow for correct reference
1085 	 * counting, modules must "own" their file_operations.
1086 	 * To do this, we allocate the file operations that will be
1087 	 * used in the event directory.
1088 	 */
1089 
1090 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1091 	if (!file_ops)
1092 		return NULL;
1093 
1094 	file_ops->mod = mod;
1095 
1096 	file_ops->id = ftrace_event_id_fops;
1097 	file_ops->id.owner = mod;
1098 
1099 	file_ops->enable = ftrace_enable_fops;
1100 	file_ops->enable.owner = mod;
1101 
1102 	file_ops->filter = ftrace_event_filter_fops;
1103 	file_ops->filter.owner = mod;
1104 
1105 	file_ops->format = ftrace_event_format_fops;
1106 	file_ops->format.owner = mod;
1107 
1108 	list_add(&file_ops->list, &ftrace_module_file_list);
1109 
1110 	return file_ops;
1111 }
1112 
1113 static void trace_module_add_events(struct module *mod)
1114 {
1115 	struct ftrace_module_file_ops *file_ops = NULL;
1116 	struct ftrace_event_call *call, *start, *end;
1117 	struct dentry *d_events;
1118 	int ret;
1119 
1120 	start = mod->trace_events;
1121 	end = mod->trace_events + mod->num_trace_events;
1122 
1123 	if (start == end)
1124 		return;
1125 
1126 	d_events = event_trace_events_dir();
1127 	if (!d_events)
1128 		return;
1129 
1130 	for_each_event(call, start, end) {
1131 		/* The linker may leave blanks */
1132 		if (!call->name)
1133 			continue;
1134 		if (call->raw_init) {
1135 			ret = call->raw_init(call);
1136 			if (ret < 0) {
1137 				if (ret != -ENOSYS)
1138 					pr_warning("Could not initialize trace "
1139 					"point events/%s\n", call->name);
1140 				continue;
1141 			}
1142 		}
1143 		/*
1144 		 * This module has events, create file ops for this module
1145 		 * if not already done.
1146 		 */
1147 		if (!file_ops) {
1148 			file_ops = trace_create_file_ops(mod);
1149 			if (!file_ops)
1150 				return;
1151 		}
1152 		call->mod = mod;
1153 		ret = event_create_dir(call, d_events,
1154 				       &file_ops->id, &file_ops->enable,
1155 				       &file_ops->filter, &file_ops->format);
1156 		if (!ret)
1157 			list_add(&call->list, &ftrace_events);
1158 	}
1159 }
1160 
1161 static void trace_module_remove_events(struct module *mod)
1162 {
1163 	struct ftrace_module_file_ops *file_ops;
1164 	struct ftrace_event_call *call, *p;
1165 	bool found = false;
1166 
1167 	down_write(&trace_event_mutex);
1168 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1169 		if (call->mod == mod) {
1170 			found = true;
1171 			__trace_remove_event_call(call);
1172 		}
1173 	}
1174 
1175 	/* Now free the file_operations */
1176 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1177 		if (file_ops->mod == mod)
1178 			break;
1179 	}
1180 	if (&file_ops->list != &ftrace_module_file_list) {
1181 		list_del(&file_ops->list);
1182 		kfree(file_ops);
1183 	}
1184 
1185 	/*
1186 	 * It is safest to reset the ring buffer if the module being unloaded
1187 	 * registered any events.
1188 	 */
1189 	if (found)
1190 		tracing_reset_current_online_cpus();
1191 	up_write(&trace_event_mutex);
1192 }
1193 
1194 static int trace_module_notify(struct notifier_block *self,
1195 			       unsigned long val, void *data)
1196 {
1197 	struct module *mod = data;
1198 
1199 	mutex_lock(&event_mutex);
1200 	switch (val) {
1201 	case MODULE_STATE_COMING:
1202 		trace_module_add_events(mod);
1203 		break;
1204 	case MODULE_STATE_GOING:
1205 		trace_module_remove_events(mod);
1206 		break;
1207 	}
1208 	mutex_unlock(&event_mutex);
1209 
1210 	return 0;
1211 }
1212 #else
1213 static int trace_module_notify(struct notifier_block *self,
1214 			       unsigned long val, void *data)
1215 {
1216 	return 0;
1217 }
1218 #endif /* CONFIG_MODULES */
1219 
1220 static struct notifier_block trace_module_nb = {
1221 	.notifier_call = trace_module_notify,
1222 	.priority = 0,
1223 };
1224 
1225 extern struct ftrace_event_call __start_ftrace_events[];
1226 extern struct ftrace_event_call __stop_ftrace_events[];
1227 
1228 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1229 
1230 static __init int setup_trace_event(char *str)
1231 {
1232 	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1233 	ring_buffer_expanded = 1;
1234 	tracing_selftest_disabled = 1;
1235 
1236 	return 1;
1237 }
1238 __setup("trace_event=", setup_trace_event);
1239 
1240 static __init int event_trace_init(void)
1241 {
1242 	struct ftrace_event_call *call;
1243 	struct dentry *d_tracer;
1244 	struct dentry *entry;
1245 	struct dentry *d_events;
1246 	int ret;
1247 	char *buf = bootup_event_buf;
1248 	char *token;
1249 
1250 	d_tracer = tracing_init_dentry();
1251 	if (!d_tracer)
1252 		return 0;
1253 
1254 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1255 				    (void *)&show_event_seq_ops,
1256 				    &ftrace_avail_fops);
1257 	if (!entry)
1258 		pr_warning("Could not create debugfs "
1259 			   "'available_events' entry\n");
1260 
1261 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1262 				    (void *)&show_set_event_seq_ops,
1263 				    &ftrace_set_event_fops);
1264 	if (!entry)
1265 		pr_warning("Could not create debugfs "
1266 			   "'set_event' entry\n");
1267 
1268 	d_events = event_trace_events_dir();
1269 	if (!d_events)
1270 		return 0;
1271 
1272 	/* ring buffer internal formats */
1273 	trace_create_file("header_page", 0444, d_events,
1274 			  ring_buffer_print_page_header,
1275 			  &ftrace_show_header_fops);
1276 
1277 	trace_create_file("header_event", 0444, d_events,
1278 			  ring_buffer_print_entry_header,
1279 			  &ftrace_show_header_fops);
1280 
1281 	trace_create_file("enable", 0644, d_events,
1282 			  NULL, &ftrace_system_enable_fops);
1283 
1284 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1285 		/* The linker may leave blanks */
1286 		if (!call->name)
1287 			continue;
1288 		if (call->raw_init) {
1289 			ret = call->raw_init(call);
1290 			if (ret < 0) {
1291 				if (ret != -ENOSYS)
1292 					pr_warning("Could not initialize trace "
1293 					"point events/%s\n", call->name);
1294 				continue;
1295 			}
1296 		}
1297 		ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1298 				       &ftrace_enable_fops,
1299 				       &ftrace_event_filter_fops,
1300 				       &ftrace_event_format_fops);
1301 		if (!ret)
1302 			list_add(&call->list, &ftrace_events);
1303 	}
1304 
1305 	while (true) {
1306 		token = strsep(&buf, ",");
1307 
1308 		if (!token)
1309 			break;
1310 		if (!*token)
1311 			continue;
1312 
1313 		ret = ftrace_set_clr_event(token, 1);
1314 		if (ret)
1315 			pr_warning("Failed to enable trace event: %s\n", token);
1316 	}
1317 
1318 	ret = register_module_notifier(&trace_module_nb);
1319 	if (ret)
1320 		pr_warning("Failed to register trace events module notifier\n");
1321 
1322 	return 0;
1323 }
1324 fs_initcall(event_trace_init);
1325 
1326 #ifdef CONFIG_FTRACE_STARTUP_TEST
1327 
1328 static DEFINE_SPINLOCK(test_spinlock);
1329 static DEFINE_SPINLOCK(test_spinlock_irq);
1330 static DEFINE_MUTEX(test_mutex);
1331 
1332 static __init void test_work(struct work_struct *dummy)
1333 {
1334 	spin_lock(&test_spinlock);
1335 	spin_lock_irq(&test_spinlock_irq);
1336 	udelay(1);
1337 	spin_unlock_irq(&test_spinlock_irq);
1338 	spin_unlock(&test_spinlock);
1339 
1340 	mutex_lock(&test_mutex);
1341 	msleep(1);
1342 	mutex_unlock(&test_mutex);
1343 }
1344 
1345 static __init int event_test_thread(void *unused)
1346 {
1347 	void *test_malloc;
1348 
1349 	test_malloc = kmalloc(1234, GFP_KERNEL);
1350 	if (!test_malloc)
1351 		pr_info("failed to kmalloc\n");
1352 
1353 	schedule_on_each_cpu(test_work);
1354 
1355 	kfree(test_malloc);
1356 
1357 	set_current_state(TASK_INTERRUPTIBLE);
1358 	while (!kthread_should_stop())
1359 		schedule();
1360 
1361 	return 0;
1362 }
1363 
1364 /*
1365  * Do various things that may trigger events.
1366  */
1367 static __init void event_test_stuff(void)
1368 {
1369 	struct task_struct *test_thread;
1370 
1371 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1372 	msleep(1);
1373 	kthread_stop(test_thread);
1374 }
1375 
1376 /*
1377  * For every trace event defined, we will test each trace point separately,
1378  * and then by groups, and finally all trace points.
1379  */
1380 static __init void event_trace_self_tests(void)
1381 {
1382 	struct ftrace_event_call *call;
1383 	struct event_subsystem *system;
1384 	int ret;
1385 
1386 	pr_info("Running tests on trace events:\n");
1387 
1388 	list_for_each_entry(call, &ftrace_events, list) {
1389 
1390 		/* Only test those that have a regfunc */
1391 		if (!call->regfunc)
1392 			continue;
1393 
1394 /*
1395  * Testing syscall events here is pretty useless, but
1396  * we still do it if configured. But this is time consuming.
1397  * What we really need is a user thread to perform the
1398  * syscalls as we test.
1399  */
1400 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1401 		if (call->system &&
1402 		    strcmp(call->system, "syscalls") == 0)
1403 			continue;
1404 #endif
1405 
1406 		pr_info("Testing event %s: ", call->name);
1407 
1408 		/*
1409 		 * If an event is already enabled, someone is using
1410 		 * it and the self test should not be on.
1411 		 */
1412 		if (call->enabled) {
1413 			pr_warning("Enabled event during self test!\n");
1414 			WARN_ON_ONCE(1);
1415 			continue;
1416 		}
1417 
1418 		ftrace_event_enable_disable(call, 1);
1419 		event_test_stuff();
1420 		ftrace_event_enable_disable(call, 0);
1421 
1422 		pr_cont("OK\n");
1423 	}
1424 
1425 	/* Now test at the sub system level */
1426 
1427 	pr_info("Running tests on trace event systems:\n");
1428 
1429 	list_for_each_entry(system, &event_subsystems, list) {
1430 
1431 		/* the ftrace system is special, skip it */
1432 		if (strcmp(system->name, "ftrace") == 0)
1433 			continue;
1434 
1435 		pr_info("Testing event system %s: ", system->name);
1436 
1437 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1438 		if (WARN_ON_ONCE(ret)) {
1439 			pr_warning("error enabling system %s\n",
1440 				   system->name);
1441 			continue;
1442 		}
1443 
1444 		event_test_stuff();
1445 
1446 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1447 		if (WARN_ON_ONCE(ret))
1448 			pr_warning("error disabling system %s\n",
1449 				   system->name);
1450 
1451 		pr_cont("OK\n");
1452 	}
1453 
1454 	/* Test with all events enabled */
1455 
1456 	pr_info("Running tests on all trace events:\n");
1457 	pr_info("Testing all events: ");
1458 
1459 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1460 	if (WARN_ON_ONCE(ret)) {
1461 		pr_warning("error enabling all events\n");
1462 		return;
1463 	}
1464 
1465 	event_test_stuff();
1466 
1467 	/* reset sysname */
1468 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1469 	if (WARN_ON_ONCE(ret)) {
1470 		pr_warning("error disabling all events\n");
1471 		return;
1472 	}
1473 
1474 	pr_cont("OK\n");
1475 }
1476 
1477 #ifdef CONFIG_FUNCTION_TRACER
1478 
1479 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1480 
1481 static void
1482 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1483 {
1484 	struct ring_buffer_event *event;
1485 	struct ring_buffer *buffer;
1486 	struct ftrace_entry *entry;
1487 	unsigned long flags;
1488 	long disabled;
1489 	int resched;
1490 	int cpu;
1491 	int pc;
1492 
1493 	pc = preempt_count();
1494 	resched = ftrace_preempt_disable();
1495 	cpu = raw_smp_processor_id();
1496 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1497 
1498 	if (disabled != 1)
1499 		goto out;
1500 
1501 	local_save_flags(flags);
1502 
1503 	event = trace_current_buffer_lock_reserve(&buffer,
1504 						  TRACE_FN, sizeof(*entry),
1505 						  flags, pc);
1506 	if (!event)
1507 		goto out;
1508 	entry	= ring_buffer_event_data(event);
1509 	entry->ip			= ip;
1510 	entry->parent_ip		= parent_ip;
1511 
1512 	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1513 
1514  out:
1515 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1516 	ftrace_preempt_enable(resched);
1517 }
1518 
1519 static struct ftrace_ops trace_ops __initdata  =
1520 {
1521 	.func = function_test_events_call,
1522 };
1523 
1524 static __init void event_trace_self_test_with_function(void)
1525 {
1526 	register_ftrace_function(&trace_ops);
1527 	pr_info("Running tests again, along with the function tracer\n");
1528 	event_trace_self_tests();
1529 	unregister_ftrace_function(&trace_ops);
1530 }
1531 #else
1532 static __init void event_trace_self_test_with_function(void)
1533 {
1534 }
1535 #endif
1536 
1537 static __init int event_trace_self_tests_init(void)
1538 {
1539 	if (!tracing_selftest_disabled) {
1540 		event_trace_self_tests();
1541 		event_trace_self_test_with_function();
1542 	}
1543 
1544 	return 0;
1545 }
1546 
1547 late_initcall(event_trace_self_tests_init);
1548 
1549 #endif
1550