xref: /linux/kernel/trace/trace_events.c (revision a33f32244d8550da8b4a26e277ce07d5c6d158b5)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/slab.h>
19 #include <linux/delay.h>
20 
21 #include <asm/setup.h>
22 
23 #include "trace_output.h"
24 
25 #undef TRACE_SYSTEM
26 #define TRACE_SYSTEM "TRACE_SYSTEM"
27 
28 DEFINE_MUTEX(event_mutex);
29 
30 LIST_HEAD(ftrace_events);
31 
32 int trace_define_field(struct ftrace_event_call *call, const char *type,
33 		       const char *name, int offset, int size, int is_signed,
34 		       int filter_type)
35 {
36 	struct ftrace_event_field *field;
37 
38 	field = kzalloc(sizeof(*field), GFP_KERNEL);
39 	if (!field)
40 		goto err;
41 
42 	field->name = kstrdup(name, GFP_KERNEL);
43 	if (!field->name)
44 		goto err;
45 
46 	field->type = kstrdup(type, GFP_KERNEL);
47 	if (!field->type)
48 		goto err;
49 
50 	if (filter_type == FILTER_OTHER)
51 		field->filter_type = filter_assign_type(type);
52 	else
53 		field->filter_type = filter_type;
54 
55 	field->offset = offset;
56 	field->size = size;
57 	field->is_signed = is_signed;
58 
59 	list_add(&field->link, &call->fields);
60 
61 	return 0;
62 
63 err:
64 	if (field)
65 		kfree(field->name);
66 	kfree(field);
67 
68 	return -ENOMEM;
69 }
70 EXPORT_SYMBOL_GPL(trace_define_field);
71 
72 #define __common_field(type, item)					\
73 	ret = trace_define_field(call, #type, "common_" #item,		\
74 				 offsetof(typeof(ent), item),		\
75 				 sizeof(ent.item),			\
76 				 is_signed_type(type), FILTER_OTHER);	\
77 	if (ret)							\
78 		return ret;
79 
80 static int trace_define_common_fields(struct ftrace_event_call *call)
81 {
82 	int ret;
83 	struct trace_entry ent;
84 
85 	__common_field(unsigned short, type);
86 	__common_field(unsigned char, flags);
87 	__common_field(unsigned char, preempt_count);
88 	__common_field(int, pid);
89 	__common_field(int, lock_depth);
90 
91 	return ret;
92 }
93 
94 void trace_destroy_fields(struct ftrace_event_call *call)
95 {
96 	struct ftrace_event_field *field, *next;
97 
98 	list_for_each_entry_safe(field, next, &call->fields, link) {
99 		list_del(&field->link);
100 		kfree(field->type);
101 		kfree(field->name);
102 		kfree(field);
103 	}
104 }
105 
106 int trace_event_raw_init(struct ftrace_event_call *call)
107 {
108 	int id;
109 
110 	id = register_ftrace_event(call->event);
111 	if (!id)
112 		return -ENODEV;
113 	call->id = id;
114 	INIT_LIST_HEAD(&call->fields);
115 
116 	return 0;
117 }
118 EXPORT_SYMBOL_GPL(trace_event_raw_init);
119 
120 static int ftrace_event_enable_disable(struct ftrace_event_call *call,
121 					int enable)
122 {
123 	int ret = 0;
124 
125 	switch (enable) {
126 	case 0:
127 		if (call->enabled) {
128 			call->enabled = 0;
129 			tracing_stop_cmdline_record();
130 			call->unregfunc(call);
131 		}
132 		break;
133 	case 1:
134 		if (!call->enabled) {
135 			tracing_start_cmdline_record();
136 			ret = call->regfunc(call);
137 			if (ret) {
138 				tracing_stop_cmdline_record();
139 				pr_info("event trace: Could not enable event "
140 					"%s\n", call->name);
141 				break;
142 			}
143 			call->enabled = 1;
144 		}
145 		break;
146 	}
147 
148 	return ret;
149 }
150 
151 static void ftrace_clear_events(void)
152 {
153 	struct ftrace_event_call *call;
154 
155 	mutex_lock(&event_mutex);
156 	list_for_each_entry(call, &ftrace_events, list) {
157 		ftrace_event_enable_disable(call, 0);
158 	}
159 	mutex_unlock(&event_mutex);
160 }
161 
162 /*
163  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
164  */
165 static int __ftrace_set_clr_event(const char *match, const char *sub,
166 				  const char *event, int set)
167 {
168 	struct ftrace_event_call *call;
169 	int ret = -EINVAL;
170 
171 	mutex_lock(&event_mutex);
172 	list_for_each_entry(call, &ftrace_events, list) {
173 
174 		if (!call->name || !call->regfunc)
175 			continue;
176 
177 		if (match &&
178 		    strcmp(match, call->name) != 0 &&
179 		    strcmp(match, call->system) != 0)
180 			continue;
181 
182 		if (sub && strcmp(sub, call->system) != 0)
183 			continue;
184 
185 		if (event && strcmp(event, call->name) != 0)
186 			continue;
187 
188 		ftrace_event_enable_disable(call, set);
189 
190 		ret = 0;
191 	}
192 	mutex_unlock(&event_mutex);
193 
194 	return ret;
195 }
196 
197 static int ftrace_set_clr_event(char *buf, int set)
198 {
199 	char *event = NULL, *sub = NULL, *match;
200 
201 	/*
202 	 * The buf format can be <subsystem>:<event-name>
203 	 *  *:<event-name> means any event by that name.
204 	 *  :<event-name> is the same.
205 	 *
206 	 *  <subsystem>:* means all events in that subsystem
207 	 *  <subsystem>: means the same.
208 	 *
209 	 *  <name> (no ':') means all events in a subsystem with
210 	 *  the name <name> or any event that matches <name>
211 	 */
212 
213 	match = strsep(&buf, ":");
214 	if (buf) {
215 		sub = match;
216 		event = buf;
217 		match = NULL;
218 
219 		if (!strlen(sub) || strcmp(sub, "*") == 0)
220 			sub = NULL;
221 		if (!strlen(event) || strcmp(event, "*") == 0)
222 			event = NULL;
223 	}
224 
225 	return __ftrace_set_clr_event(match, sub, event, set);
226 }
227 
228 /**
229  * trace_set_clr_event - enable or disable an event
230  * @system: system name to match (NULL for any system)
231  * @event: event name to match (NULL for all events, within system)
232  * @set: 1 to enable, 0 to disable
233  *
234  * This is a way for other parts of the kernel to enable or disable
235  * event recording.
236  *
237  * Returns 0 on success, -EINVAL if the parameters do not match any
238  * registered events.
239  */
240 int trace_set_clr_event(const char *system, const char *event, int set)
241 {
242 	return __ftrace_set_clr_event(NULL, system, event, set);
243 }
244 
245 /* 128 should be much more than enough */
246 #define EVENT_BUF_SIZE		127
247 
248 static ssize_t
249 ftrace_event_write(struct file *file, const char __user *ubuf,
250 		   size_t cnt, loff_t *ppos)
251 {
252 	struct trace_parser parser;
253 	ssize_t read, ret;
254 
255 	if (!cnt)
256 		return 0;
257 
258 	ret = tracing_update_buffers();
259 	if (ret < 0)
260 		return ret;
261 
262 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
263 		return -ENOMEM;
264 
265 	read = trace_get_user(&parser, ubuf, cnt, ppos);
266 
267 	if (read >= 0 && trace_parser_loaded((&parser))) {
268 		int set = 1;
269 
270 		if (*parser.buffer == '!')
271 			set = 0;
272 
273 		parser.buffer[parser.idx] = 0;
274 
275 		ret = ftrace_set_clr_event(parser.buffer + !set, set);
276 		if (ret)
277 			goto out_put;
278 	}
279 
280 	ret = read;
281 
282  out_put:
283 	trace_parser_put(&parser);
284 
285 	return ret;
286 }
287 
288 static void *
289 t_next(struct seq_file *m, void *v, loff_t *pos)
290 {
291 	struct ftrace_event_call *call = v;
292 
293 	(*pos)++;
294 
295 	list_for_each_entry_continue(call, &ftrace_events, list) {
296 		/*
297 		 * The ftrace subsystem is for showing formats only.
298 		 * They can not be enabled or disabled via the event files.
299 		 */
300 		if (call->regfunc)
301 			return call;
302 	}
303 
304 	return NULL;
305 }
306 
307 static void *t_start(struct seq_file *m, loff_t *pos)
308 {
309 	struct ftrace_event_call *call;
310 	loff_t l;
311 
312 	mutex_lock(&event_mutex);
313 
314 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
315 	for (l = 0; l <= *pos; ) {
316 		call = t_next(m, call, &l);
317 		if (!call)
318 			break;
319 	}
320 	return call;
321 }
322 
323 static void *
324 s_next(struct seq_file *m, void *v, loff_t *pos)
325 {
326 	struct ftrace_event_call *call = v;
327 
328 	(*pos)++;
329 
330 	list_for_each_entry_continue(call, &ftrace_events, list) {
331 		if (call->enabled)
332 			return call;
333 	}
334 
335 	return NULL;
336 }
337 
338 static void *s_start(struct seq_file *m, loff_t *pos)
339 {
340 	struct ftrace_event_call *call;
341 	loff_t l;
342 
343 	mutex_lock(&event_mutex);
344 
345 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
346 	for (l = 0; l <= *pos; ) {
347 		call = s_next(m, call, &l);
348 		if (!call)
349 			break;
350 	}
351 	return call;
352 }
353 
354 static int t_show(struct seq_file *m, void *v)
355 {
356 	struct ftrace_event_call *call = v;
357 
358 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
359 		seq_printf(m, "%s:", call->system);
360 	seq_printf(m, "%s\n", call->name);
361 
362 	return 0;
363 }
364 
365 static void t_stop(struct seq_file *m, void *p)
366 {
367 	mutex_unlock(&event_mutex);
368 }
369 
370 static int
371 ftrace_event_seq_open(struct inode *inode, struct file *file)
372 {
373 	const struct seq_operations *seq_ops;
374 
375 	if ((file->f_mode & FMODE_WRITE) &&
376 	    (file->f_flags & O_TRUNC))
377 		ftrace_clear_events();
378 
379 	seq_ops = inode->i_private;
380 	return seq_open(file, seq_ops);
381 }
382 
383 static ssize_t
384 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
385 		  loff_t *ppos)
386 {
387 	struct ftrace_event_call *call = filp->private_data;
388 	char *buf;
389 
390 	if (call->enabled)
391 		buf = "1\n";
392 	else
393 		buf = "0\n";
394 
395 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
396 }
397 
398 static ssize_t
399 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
400 		   loff_t *ppos)
401 {
402 	struct ftrace_event_call *call = filp->private_data;
403 	char buf[64];
404 	unsigned long val;
405 	int ret;
406 
407 	if (cnt >= sizeof(buf))
408 		return -EINVAL;
409 
410 	if (copy_from_user(&buf, ubuf, cnt))
411 		return -EFAULT;
412 
413 	buf[cnt] = 0;
414 
415 	ret = strict_strtoul(buf, 10, &val);
416 	if (ret < 0)
417 		return ret;
418 
419 	ret = tracing_update_buffers();
420 	if (ret < 0)
421 		return ret;
422 
423 	switch (val) {
424 	case 0:
425 	case 1:
426 		mutex_lock(&event_mutex);
427 		ret = ftrace_event_enable_disable(call, val);
428 		mutex_unlock(&event_mutex);
429 		break;
430 
431 	default:
432 		return -EINVAL;
433 	}
434 
435 	*ppos += cnt;
436 
437 	return ret ? ret : cnt;
438 }
439 
440 static ssize_t
441 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
442 		   loff_t *ppos)
443 {
444 	const char set_to_char[4] = { '?', '0', '1', 'X' };
445 	const char *system = filp->private_data;
446 	struct ftrace_event_call *call;
447 	char buf[2];
448 	int set = 0;
449 	int ret;
450 
451 	mutex_lock(&event_mutex);
452 	list_for_each_entry(call, &ftrace_events, list) {
453 		if (!call->name || !call->regfunc)
454 			continue;
455 
456 		if (system && strcmp(call->system, system) != 0)
457 			continue;
458 
459 		/*
460 		 * We need to find out if all the events are set
461 		 * or if all events or cleared, or if we have
462 		 * a mixture.
463 		 */
464 		set |= (1 << !!call->enabled);
465 
466 		/*
467 		 * If we have a mixture, no need to look further.
468 		 */
469 		if (set == 3)
470 			break;
471 	}
472 	mutex_unlock(&event_mutex);
473 
474 	buf[0] = set_to_char[set];
475 	buf[1] = '\n';
476 
477 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
478 
479 	return ret;
480 }
481 
482 static ssize_t
483 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
484 		    loff_t *ppos)
485 {
486 	const char *system = filp->private_data;
487 	unsigned long val;
488 	char buf[64];
489 	ssize_t ret;
490 
491 	if (cnt >= sizeof(buf))
492 		return -EINVAL;
493 
494 	if (copy_from_user(&buf, ubuf, cnt))
495 		return -EFAULT;
496 
497 	buf[cnt] = 0;
498 
499 	ret = strict_strtoul(buf, 10, &val);
500 	if (ret < 0)
501 		return ret;
502 
503 	ret = tracing_update_buffers();
504 	if (ret < 0)
505 		return ret;
506 
507 	if (val != 0 && val != 1)
508 		return -EINVAL;
509 
510 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
511 	if (ret)
512 		goto out;
513 
514 	ret = cnt;
515 
516 out:
517 	*ppos += cnt;
518 
519 	return ret;
520 }
521 
522 static ssize_t
523 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
524 		  loff_t *ppos)
525 {
526 	struct ftrace_event_call *call = filp->private_data;
527 	struct ftrace_event_field *field;
528 	struct trace_seq *s;
529 	int common_field_count = 5;
530 	char *buf;
531 	int r = 0;
532 
533 	if (*ppos)
534 		return 0;
535 
536 	s = kmalloc(sizeof(*s), GFP_KERNEL);
537 	if (!s)
538 		return -ENOMEM;
539 
540 	trace_seq_init(s);
541 
542 	trace_seq_printf(s, "name: %s\n", call->name);
543 	trace_seq_printf(s, "ID: %d\n", call->id);
544 	trace_seq_printf(s, "format:\n");
545 
546 	list_for_each_entry_reverse(field, &call->fields, link) {
547 		/*
548 		 * Smartly shows the array type(except dynamic array).
549 		 * Normal:
550 		 *	field:TYPE VAR
551 		 * If TYPE := TYPE[LEN], it is shown:
552 		 *	field:TYPE VAR[LEN]
553 		 */
554 		const char *array_descriptor = strchr(field->type, '[');
555 
556 		if (!strncmp(field->type, "__data_loc", 10))
557 			array_descriptor = NULL;
558 
559 		if (!array_descriptor) {
560 			r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
561 					"\tsize:%u;\tsigned:%d;\n",
562 					field->type, field->name, field->offset,
563 					field->size, !!field->is_signed);
564 		} else {
565 			r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
566 					"\tsize:%u;\tsigned:%d;\n",
567 					(int)(array_descriptor - field->type),
568 					field->type, field->name,
569 					array_descriptor, field->offset,
570 					field->size, !!field->is_signed);
571 		}
572 
573 		if (--common_field_count == 0)
574 			r = trace_seq_printf(s, "\n");
575 
576 		if (!r)
577 			break;
578 	}
579 
580 	if (r)
581 		r = trace_seq_printf(s, "\nprint fmt: %s\n",
582 				call->print_fmt);
583 
584 	if (!r) {
585 		/*
586 		 * ug!  The format output is bigger than a PAGE!!
587 		 */
588 		buf = "FORMAT TOO BIG\n";
589 		r = simple_read_from_buffer(ubuf, cnt, ppos,
590 					      buf, strlen(buf));
591 		goto out;
592 	}
593 
594 	r = simple_read_from_buffer(ubuf, cnt, ppos,
595 				    s->buffer, s->len);
596  out:
597 	kfree(s);
598 	return r;
599 }
600 
601 static ssize_t
602 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
603 {
604 	struct ftrace_event_call *call = filp->private_data;
605 	struct trace_seq *s;
606 	int r;
607 
608 	if (*ppos)
609 		return 0;
610 
611 	s = kmalloc(sizeof(*s), GFP_KERNEL);
612 	if (!s)
613 		return -ENOMEM;
614 
615 	trace_seq_init(s);
616 	trace_seq_printf(s, "%d\n", call->id);
617 
618 	r = simple_read_from_buffer(ubuf, cnt, ppos,
619 				    s->buffer, s->len);
620 	kfree(s);
621 	return r;
622 }
623 
624 static ssize_t
625 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
626 		  loff_t *ppos)
627 {
628 	struct ftrace_event_call *call = filp->private_data;
629 	struct trace_seq *s;
630 	int r;
631 
632 	if (*ppos)
633 		return 0;
634 
635 	s = kmalloc(sizeof(*s), GFP_KERNEL);
636 	if (!s)
637 		return -ENOMEM;
638 
639 	trace_seq_init(s);
640 
641 	print_event_filter(call, s);
642 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
643 
644 	kfree(s);
645 
646 	return r;
647 }
648 
649 static ssize_t
650 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
651 		   loff_t *ppos)
652 {
653 	struct ftrace_event_call *call = filp->private_data;
654 	char *buf;
655 	int err;
656 
657 	if (cnt >= PAGE_SIZE)
658 		return -EINVAL;
659 
660 	buf = (char *)__get_free_page(GFP_TEMPORARY);
661 	if (!buf)
662 		return -ENOMEM;
663 
664 	if (copy_from_user(buf, ubuf, cnt)) {
665 		free_page((unsigned long) buf);
666 		return -EFAULT;
667 	}
668 	buf[cnt] = '\0';
669 
670 	err = apply_event_filter(call, buf);
671 	free_page((unsigned long) buf);
672 	if (err < 0)
673 		return err;
674 
675 	*ppos += cnt;
676 
677 	return cnt;
678 }
679 
680 static ssize_t
681 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
682 		      loff_t *ppos)
683 {
684 	struct event_subsystem *system = filp->private_data;
685 	struct trace_seq *s;
686 	int r;
687 
688 	if (*ppos)
689 		return 0;
690 
691 	s = kmalloc(sizeof(*s), GFP_KERNEL);
692 	if (!s)
693 		return -ENOMEM;
694 
695 	trace_seq_init(s);
696 
697 	print_subsystem_event_filter(system, s);
698 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
699 
700 	kfree(s);
701 
702 	return r;
703 }
704 
705 static ssize_t
706 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
707 		       loff_t *ppos)
708 {
709 	struct event_subsystem *system = filp->private_data;
710 	char *buf;
711 	int err;
712 
713 	if (cnt >= PAGE_SIZE)
714 		return -EINVAL;
715 
716 	buf = (char *)__get_free_page(GFP_TEMPORARY);
717 	if (!buf)
718 		return -ENOMEM;
719 
720 	if (copy_from_user(buf, ubuf, cnt)) {
721 		free_page((unsigned long) buf);
722 		return -EFAULT;
723 	}
724 	buf[cnt] = '\0';
725 
726 	err = apply_subsystem_event_filter(system, buf);
727 	free_page((unsigned long) buf);
728 	if (err < 0)
729 		return err;
730 
731 	*ppos += cnt;
732 
733 	return cnt;
734 }
735 
736 static ssize_t
737 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
738 {
739 	int (*func)(struct trace_seq *s) = filp->private_data;
740 	struct trace_seq *s;
741 	int r;
742 
743 	if (*ppos)
744 		return 0;
745 
746 	s = kmalloc(sizeof(*s), GFP_KERNEL);
747 	if (!s)
748 		return -ENOMEM;
749 
750 	trace_seq_init(s);
751 
752 	func(s);
753 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
754 
755 	kfree(s);
756 
757 	return r;
758 }
759 
760 static const struct seq_operations show_event_seq_ops = {
761 	.start = t_start,
762 	.next = t_next,
763 	.show = t_show,
764 	.stop = t_stop,
765 };
766 
767 static const struct seq_operations show_set_event_seq_ops = {
768 	.start = s_start,
769 	.next = s_next,
770 	.show = t_show,
771 	.stop = t_stop,
772 };
773 
774 static const struct file_operations ftrace_avail_fops = {
775 	.open = ftrace_event_seq_open,
776 	.read = seq_read,
777 	.llseek = seq_lseek,
778 	.release = seq_release,
779 };
780 
781 static const struct file_operations ftrace_set_event_fops = {
782 	.open = ftrace_event_seq_open,
783 	.read = seq_read,
784 	.write = ftrace_event_write,
785 	.llseek = seq_lseek,
786 	.release = seq_release,
787 };
788 
789 static const struct file_operations ftrace_enable_fops = {
790 	.open = tracing_open_generic,
791 	.read = event_enable_read,
792 	.write = event_enable_write,
793 };
794 
795 static const struct file_operations ftrace_event_format_fops = {
796 	.open = tracing_open_generic,
797 	.read = event_format_read,
798 };
799 
800 static const struct file_operations ftrace_event_id_fops = {
801 	.open = tracing_open_generic,
802 	.read = event_id_read,
803 };
804 
805 static const struct file_operations ftrace_event_filter_fops = {
806 	.open = tracing_open_generic,
807 	.read = event_filter_read,
808 	.write = event_filter_write,
809 };
810 
811 static const struct file_operations ftrace_subsystem_filter_fops = {
812 	.open = tracing_open_generic,
813 	.read = subsystem_filter_read,
814 	.write = subsystem_filter_write,
815 };
816 
817 static const struct file_operations ftrace_system_enable_fops = {
818 	.open = tracing_open_generic,
819 	.read = system_enable_read,
820 	.write = system_enable_write,
821 };
822 
823 static const struct file_operations ftrace_show_header_fops = {
824 	.open = tracing_open_generic,
825 	.read = show_header,
826 };
827 
828 static struct dentry *event_trace_events_dir(void)
829 {
830 	static struct dentry *d_tracer;
831 	static struct dentry *d_events;
832 
833 	if (d_events)
834 		return d_events;
835 
836 	d_tracer = tracing_init_dentry();
837 	if (!d_tracer)
838 		return NULL;
839 
840 	d_events = debugfs_create_dir("events", d_tracer);
841 	if (!d_events)
842 		pr_warning("Could not create debugfs "
843 			   "'events' directory\n");
844 
845 	return d_events;
846 }
847 
848 static LIST_HEAD(event_subsystems);
849 
850 static struct dentry *
851 event_subsystem_dir(const char *name, struct dentry *d_events)
852 {
853 	struct event_subsystem *system;
854 	struct dentry *entry;
855 
856 	/* First see if we did not already create this dir */
857 	list_for_each_entry(system, &event_subsystems, list) {
858 		if (strcmp(system->name, name) == 0) {
859 			system->nr_events++;
860 			return system->entry;
861 		}
862 	}
863 
864 	/* need to create new entry */
865 	system = kmalloc(sizeof(*system), GFP_KERNEL);
866 	if (!system) {
867 		pr_warning("No memory to create event subsystem %s\n",
868 			   name);
869 		return d_events;
870 	}
871 
872 	system->entry = debugfs_create_dir(name, d_events);
873 	if (!system->entry) {
874 		pr_warning("Could not create event subsystem %s\n",
875 			   name);
876 		kfree(system);
877 		return d_events;
878 	}
879 
880 	system->nr_events = 1;
881 	system->name = kstrdup(name, GFP_KERNEL);
882 	if (!system->name) {
883 		debugfs_remove(system->entry);
884 		kfree(system);
885 		return d_events;
886 	}
887 
888 	list_add(&system->list, &event_subsystems);
889 
890 	system->filter = NULL;
891 
892 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
893 	if (!system->filter) {
894 		pr_warning("Could not allocate filter for subsystem "
895 			   "'%s'\n", name);
896 		return system->entry;
897 	}
898 
899 	entry = debugfs_create_file("filter", 0644, system->entry, system,
900 				    &ftrace_subsystem_filter_fops);
901 	if (!entry) {
902 		kfree(system->filter);
903 		system->filter = NULL;
904 		pr_warning("Could not create debugfs "
905 			   "'%s/filter' entry\n", name);
906 	}
907 
908 	trace_create_file("enable", 0644, system->entry,
909 			  (void *)system->name,
910 			  &ftrace_system_enable_fops);
911 
912 	return system->entry;
913 }
914 
915 static int
916 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
917 		 const struct file_operations *id,
918 		 const struct file_operations *enable,
919 		 const struct file_operations *filter,
920 		 const struct file_operations *format)
921 {
922 	int ret;
923 
924 	/*
925 	 * If the trace point header did not define TRACE_SYSTEM
926 	 * then the system would be called "TRACE_SYSTEM".
927 	 */
928 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
929 		d_events = event_subsystem_dir(call->system, d_events);
930 
931 	call->dir = debugfs_create_dir(call->name, d_events);
932 	if (!call->dir) {
933 		pr_warning("Could not create debugfs "
934 			   "'%s' directory\n", call->name);
935 		return -1;
936 	}
937 
938 	if (call->regfunc)
939 		trace_create_file("enable", 0644, call->dir, call,
940 				  enable);
941 
942 	if (call->id && call->perf_event_enable)
943 		trace_create_file("id", 0444, call->dir, call,
944 		 		  id);
945 
946 	if (call->define_fields) {
947 		ret = trace_define_common_fields(call);
948 		if (!ret)
949 			ret = call->define_fields(call);
950 		if (ret < 0) {
951 			pr_warning("Could not initialize trace point"
952 				   " events/%s\n", call->name);
953 			return ret;
954 		}
955 		trace_create_file("filter", 0644, call->dir, call,
956 				  filter);
957 	}
958 
959 	trace_create_file("format", 0444, call->dir, call,
960 			  format);
961 
962 	return 0;
963 }
964 
965 static int __trace_add_event_call(struct ftrace_event_call *call)
966 {
967 	struct dentry *d_events;
968 	int ret;
969 
970 	if (!call->name)
971 		return -EINVAL;
972 
973 	if (call->raw_init) {
974 		ret = call->raw_init(call);
975 		if (ret < 0) {
976 			if (ret != -ENOSYS)
977 				pr_warning("Could not initialize trace "
978 				"events/%s\n", call->name);
979 			return ret;
980 		}
981 	}
982 
983 	d_events = event_trace_events_dir();
984 	if (!d_events)
985 		return -ENOENT;
986 
987 	ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
988 				&ftrace_enable_fops, &ftrace_event_filter_fops,
989 				&ftrace_event_format_fops);
990 	if (!ret)
991 		list_add(&call->list, &ftrace_events);
992 
993 	return ret;
994 }
995 
996 /* Add an additional event_call dynamically */
997 int trace_add_event_call(struct ftrace_event_call *call)
998 {
999 	int ret;
1000 	mutex_lock(&event_mutex);
1001 	ret = __trace_add_event_call(call);
1002 	mutex_unlock(&event_mutex);
1003 	return ret;
1004 }
1005 
1006 static void remove_subsystem_dir(const char *name)
1007 {
1008 	struct event_subsystem *system;
1009 
1010 	if (strcmp(name, TRACE_SYSTEM) == 0)
1011 		return;
1012 
1013 	list_for_each_entry(system, &event_subsystems, list) {
1014 		if (strcmp(system->name, name) == 0) {
1015 			if (!--system->nr_events) {
1016 				struct event_filter *filter = system->filter;
1017 
1018 				debugfs_remove_recursive(system->entry);
1019 				list_del(&system->list);
1020 				if (filter) {
1021 					kfree(filter->filter_string);
1022 					kfree(filter);
1023 				}
1024 				kfree(system->name);
1025 				kfree(system);
1026 			}
1027 			break;
1028 		}
1029 	}
1030 }
1031 
1032 /*
1033  * Must be called under locking both of event_mutex and trace_event_mutex.
1034  */
1035 static void __trace_remove_event_call(struct ftrace_event_call *call)
1036 {
1037 	ftrace_event_enable_disable(call, 0);
1038 	if (call->event)
1039 		__unregister_ftrace_event(call->event);
1040 	debugfs_remove_recursive(call->dir);
1041 	list_del(&call->list);
1042 	trace_destroy_fields(call);
1043 	destroy_preds(call);
1044 	remove_subsystem_dir(call->system);
1045 }
1046 
1047 /* Remove an event_call */
1048 void trace_remove_event_call(struct ftrace_event_call *call)
1049 {
1050 	mutex_lock(&event_mutex);
1051 	down_write(&trace_event_mutex);
1052 	__trace_remove_event_call(call);
1053 	up_write(&trace_event_mutex);
1054 	mutex_unlock(&event_mutex);
1055 }
1056 
1057 #define for_each_event(event, start, end)			\
1058 	for (event = start;					\
1059 	     (unsigned long)event < (unsigned long)end;		\
1060 	     event++)
1061 
1062 #ifdef CONFIG_MODULES
1063 
1064 static LIST_HEAD(ftrace_module_file_list);
1065 
1066 /*
1067  * Modules must own their file_operations to keep up with
1068  * reference counting.
1069  */
1070 struct ftrace_module_file_ops {
1071 	struct list_head		list;
1072 	struct module			*mod;
1073 	struct file_operations		id;
1074 	struct file_operations		enable;
1075 	struct file_operations		format;
1076 	struct file_operations		filter;
1077 };
1078 
1079 static struct ftrace_module_file_ops *
1080 trace_create_file_ops(struct module *mod)
1081 {
1082 	struct ftrace_module_file_ops *file_ops;
1083 
1084 	/*
1085 	 * This is a bit of a PITA. To allow for correct reference
1086 	 * counting, modules must "own" their file_operations.
1087 	 * To do this, we allocate the file operations that will be
1088 	 * used in the event directory.
1089 	 */
1090 
1091 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1092 	if (!file_ops)
1093 		return NULL;
1094 
1095 	file_ops->mod = mod;
1096 
1097 	file_ops->id = ftrace_event_id_fops;
1098 	file_ops->id.owner = mod;
1099 
1100 	file_ops->enable = ftrace_enable_fops;
1101 	file_ops->enable.owner = mod;
1102 
1103 	file_ops->filter = ftrace_event_filter_fops;
1104 	file_ops->filter.owner = mod;
1105 
1106 	file_ops->format = ftrace_event_format_fops;
1107 	file_ops->format.owner = mod;
1108 
1109 	list_add(&file_ops->list, &ftrace_module_file_list);
1110 
1111 	return file_ops;
1112 }
1113 
1114 static void trace_module_add_events(struct module *mod)
1115 {
1116 	struct ftrace_module_file_ops *file_ops = NULL;
1117 	struct ftrace_event_call *call, *start, *end;
1118 	struct dentry *d_events;
1119 	int ret;
1120 
1121 	start = mod->trace_events;
1122 	end = mod->trace_events + mod->num_trace_events;
1123 
1124 	if (start == end)
1125 		return;
1126 
1127 	d_events = event_trace_events_dir();
1128 	if (!d_events)
1129 		return;
1130 
1131 	for_each_event(call, start, end) {
1132 		/* The linker may leave blanks */
1133 		if (!call->name)
1134 			continue;
1135 		if (call->raw_init) {
1136 			ret = call->raw_init(call);
1137 			if (ret < 0) {
1138 				if (ret != -ENOSYS)
1139 					pr_warning("Could not initialize trace "
1140 					"point events/%s\n", call->name);
1141 				continue;
1142 			}
1143 		}
1144 		/*
1145 		 * This module has events, create file ops for this module
1146 		 * if not already done.
1147 		 */
1148 		if (!file_ops) {
1149 			file_ops = trace_create_file_ops(mod);
1150 			if (!file_ops)
1151 				return;
1152 		}
1153 		call->mod = mod;
1154 		ret = event_create_dir(call, d_events,
1155 				       &file_ops->id, &file_ops->enable,
1156 				       &file_ops->filter, &file_ops->format);
1157 		if (!ret)
1158 			list_add(&call->list, &ftrace_events);
1159 	}
1160 }
1161 
1162 static void trace_module_remove_events(struct module *mod)
1163 {
1164 	struct ftrace_module_file_ops *file_ops;
1165 	struct ftrace_event_call *call, *p;
1166 	bool found = false;
1167 
1168 	down_write(&trace_event_mutex);
1169 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1170 		if (call->mod == mod) {
1171 			found = true;
1172 			__trace_remove_event_call(call);
1173 		}
1174 	}
1175 
1176 	/* Now free the file_operations */
1177 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1178 		if (file_ops->mod == mod)
1179 			break;
1180 	}
1181 	if (&file_ops->list != &ftrace_module_file_list) {
1182 		list_del(&file_ops->list);
1183 		kfree(file_ops);
1184 	}
1185 
1186 	/*
1187 	 * It is safest to reset the ring buffer if the module being unloaded
1188 	 * registered any events.
1189 	 */
1190 	if (found)
1191 		tracing_reset_current_online_cpus();
1192 	up_write(&trace_event_mutex);
1193 }
1194 
1195 static int trace_module_notify(struct notifier_block *self,
1196 			       unsigned long val, void *data)
1197 {
1198 	struct module *mod = data;
1199 
1200 	mutex_lock(&event_mutex);
1201 	switch (val) {
1202 	case MODULE_STATE_COMING:
1203 		trace_module_add_events(mod);
1204 		break;
1205 	case MODULE_STATE_GOING:
1206 		trace_module_remove_events(mod);
1207 		break;
1208 	}
1209 	mutex_unlock(&event_mutex);
1210 
1211 	return 0;
1212 }
1213 #else
1214 static int trace_module_notify(struct notifier_block *self,
1215 			       unsigned long val, void *data)
1216 {
1217 	return 0;
1218 }
1219 #endif /* CONFIG_MODULES */
1220 
1221 static struct notifier_block trace_module_nb = {
1222 	.notifier_call = trace_module_notify,
1223 	.priority = 0,
1224 };
1225 
1226 extern struct ftrace_event_call __start_ftrace_events[];
1227 extern struct ftrace_event_call __stop_ftrace_events[];
1228 
1229 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1230 
1231 static __init int setup_trace_event(char *str)
1232 {
1233 	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1234 	ring_buffer_expanded = 1;
1235 	tracing_selftest_disabled = 1;
1236 
1237 	return 1;
1238 }
1239 __setup("trace_event=", setup_trace_event);
1240 
1241 static __init int event_trace_init(void)
1242 {
1243 	struct ftrace_event_call *call;
1244 	struct dentry *d_tracer;
1245 	struct dentry *entry;
1246 	struct dentry *d_events;
1247 	int ret;
1248 	char *buf = bootup_event_buf;
1249 	char *token;
1250 
1251 	d_tracer = tracing_init_dentry();
1252 	if (!d_tracer)
1253 		return 0;
1254 
1255 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1256 				    (void *)&show_event_seq_ops,
1257 				    &ftrace_avail_fops);
1258 	if (!entry)
1259 		pr_warning("Could not create debugfs "
1260 			   "'available_events' entry\n");
1261 
1262 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1263 				    (void *)&show_set_event_seq_ops,
1264 				    &ftrace_set_event_fops);
1265 	if (!entry)
1266 		pr_warning("Could not create debugfs "
1267 			   "'set_event' entry\n");
1268 
1269 	d_events = event_trace_events_dir();
1270 	if (!d_events)
1271 		return 0;
1272 
1273 	/* ring buffer internal formats */
1274 	trace_create_file("header_page", 0444, d_events,
1275 			  ring_buffer_print_page_header,
1276 			  &ftrace_show_header_fops);
1277 
1278 	trace_create_file("header_event", 0444, d_events,
1279 			  ring_buffer_print_entry_header,
1280 			  &ftrace_show_header_fops);
1281 
1282 	trace_create_file("enable", 0644, d_events,
1283 			  NULL, &ftrace_system_enable_fops);
1284 
1285 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1286 		/* The linker may leave blanks */
1287 		if (!call->name)
1288 			continue;
1289 		if (call->raw_init) {
1290 			ret = call->raw_init(call);
1291 			if (ret < 0) {
1292 				if (ret != -ENOSYS)
1293 					pr_warning("Could not initialize trace "
1294 					"point events/%s\n", call->name);
1295 				continue;
1296 			}
1297 		}
1298 		ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1299 				       &ftrace_enable_fops,
1300 				       &ftrace_event_filter_fops,
1301 				       &ftrace_event_format_fops);
1302 		if (!ret)
1303 			list_add(&call->list, &ftrace_events);
1304 	}
1305 
1306 	while (true) {
1307 		token = strsep(&buf, ",");
1308 
1309 		if (!token)
1310 			break;
1311 		if (!*token)
1312 			continue;
1313 
1314 		ret = ftrace_set_clr_event(token, 1);
1315 		if (ret)
1316 			pr_warning("Failed to enable trace event: %s\n", token);
1317 	}
1318 
1319 	ret = register_module_notifier(&trace_module_nb);
1320 	if (ret)
1321 		pr_warning("Failed to register trace events module notifier\n");
1322 
1323 	return 0;
1324 }
1325 fs_initcall(event_trace_init);
1326 
1327 #ifdef CONFIG_FTRACE_STARTUP_TEST
1328 
1329 static DEFINE_SPINLOCK(test_spinlock);
1330 static DEFINE_SPINLOCK(test_spinlock_irq);
1331 static DEFINE_MUTEX(test_mutex);
1332 
1333 static __init void test_work(struct work_struct *dummy)
1334 {
1335 	spin_lock(&test_spinlock);
1336 	spin_lock_irq(&test_spinlock_irq);
1337 	udelay(1);
1338 	spin_unlock_irq(&test_spinlock_irq);
1339 	spin_unlock(&test_spinlock);
1340 
1341 	mutex_lock(&test_mutex);
1342 	msleep(1);
1343 	mutex_unlock(&test_mutex);
1344 }
1345 
1346 static __init int event_test_thread(void *unused)
1347 {
1348 	void *test_malloc;
1349 
1350 	test_malloc = kmalloc(1234, GFP_KERNEL);
1351 	if (!test_malloc)
1352 		pr_info("failed to kmalloc\n");
1353 
1354 	schedule_on_each_cpu(test_work);
1355 
1356 	kfree(test_malloc);
1357 
1358 	set_current_state(TASK_INTERRUPTIBLE);
1359 	while (!kthread_should_stop())
1360 		schedule();
1361 
1362 	return 0;
1363 }
1364 
1365 /*
1366  * Do various things that may trigger events.
1367  */
1368 static __init void event_test_stuff(void)
1369 {
1370 	struct task_struct *test_thread;
1371 
1372 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1373 	msleep(1);
1374 	kthread_stop(test_thread);
1375 }
1376 
1377 /*
1378  * For every trace event defined, we will test each trace point separately,
1379  * and then by groups, and finally all trace points.
1380  */
1381 static __init void event_trace_self_tests(void)
1382 {
1383 	struct ftrace_event_call *call;
1384 	struct event_subsystem *system;
1385 	int ret;
1386 
1387 	pr_info("Running tests on trace events:\n");
1388 
1389 	list_for_each_entry(call, &ftrace_events, list) {
1390 
1391 		/* Only test those that have a regfunc */
1392 		if (!call->regfunc)
1393 			continue;
1394 
1395 /*
1396  * Testing syscall events here is pretty useless, but
1397  * we still do it if configured. But this is time consuming.
1398  * What we really need is a user thread to perform the
1399  * syscalls as we test.
1400  */
1401 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1402 		if (call->system &&
1403 		    strcmp(call->system, "syscalls") == 0)
1404 			continue;
1405 #endif
1406 
1407 		pr_info("Testing event %s: ", call->name);
1408 
1409 		/*
1410 		 * If an event is already enabled, someone is using
1411 		 * it and the self test should not be on.
1412 		 */
1413 		if (call->enabled) {
1414 			pr_warning("Enabled event during self test!\n");
1415 			WARN_ON_ONCE(1);
1416 			continue;
1417 		}
1418 
1419 		ftrace_event_enable_disable(call, 1);
1420 		event_test_stuff();
1421 		ftrace_event_enable_disable(call, 0);
1422 
1423 		pr_cont("OK\n");
1424 	}
1425 
1426 	/* Now test at the sub system level */
1427 
1428 	pr_info("Running tests on trace event systems:\n");
1429 
1430 	list_for_each_entry(system, &event_subsystems, list) {
1431 
1432 		/* the ftrace system is special, skip it */
1433 		if (strcmp(system->name, "ftrace") == 0)
1434 			continue;
1435 
1436 		pr_info("Testing event system %s: ", system->name);
1437 
1438 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1439 		if (WARN_ON_ONCE(ret)) {
1440 			pr_warning("error enabling system %s\n",
1441 				   system->name);
1442 			continue;
1443 		}
1444 
1445 		event_test_stuff();
1446 
1447 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1448 		if (WARN_ON_ONCE(ret))
1449 			pr_warning("error disabling system %s\n",
1450 				   system->name);
1451 
1452 		pr_cont("OK\n");
1453 	}
1454 
1455 	/* Test with all events enabled */
1456 
1457 	pr_info("Running tests on all trace events:\n");
1458 	pr_info("Testing all events: ");
1459 
1460 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1461 	if (WARN_ON_ONCE(ret)) {
1462 		pr_warning("error enabling all events\n");
1463 		return;
1464 	}
1465 
1466 	event_test_stuff();
1467 
1468 	/* reset sysname */
1469 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1470 	if (WARN_ON_ONCE(ret)) {
1471 		pr_warning("error disabling all events\n");
1472 		return;
1473 	}
1474 
1475 	pr_cont("OK\n");
1476 }
1477 
1478 #ifdef CONFIG_FUNCTION_TRACER
1479 
1480 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1481 
1482 static void
1483 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1484 {
1485 	struct ring_buffer_event *event;
1486 	struct ring_buffer *buffer;
1487 	struct ftrace_entry *entry;
1488 	unsigned long flags;
1489 	long disabled;
1490 	int resched;
1491 	int cpu;
1492 	int pc;
1493 
1494 	pc = preempt_count();
1495 	resched = ftrace_preempt_disable();
1496 	cpu = raw_smp_processor_id();
1497 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1498 
1499 	if (disabled != 1)
1500 		goto out;
1501 
1502 	local_save_flags(flags);
1503 
1504 	event = trace_current_buffer_lock_reserve(&buffer,
1505 						  TRACE_FN, sizeof(*entry),
1506 						  flags, pc);
1507 	if (!event)
1508 		goto out;
1509 	entry	= ring_buffer_event_data(event);
1510 	entry->ip			= ip;
1511 	entry->parent_ip		= parent_ip;
1512 
1513 	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1514 
1515  out:
1516 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1517 	ftrace_preempt_enable(resched);
1518 }
1519 
1520 static struct ftrace_ops trace_ops __initdata  =
1521 {
1522 	.func = function_test_events_call,
1523 };
1524 
1525 static __init void event_trace_self_test_with_function(void)
1526 {
1527 	register_ftrace_function(&trace_ops);
1528 	pr_info("Running tests again, along with the function tracer\n");
1529 	event_trace_self_tests();
1530 	unregister_ftrace_function(&trace_ops);
1531 }
1532 #else
1533 static __init void event_trace_self_test_with_function(void)
1534 {
1535 }
1536 #endif
1537 
1538 static __init int event_trace_self_tests_init(void)
1539 {
1540 	if (!tracing_selftest_disabled) {
1541 		event_trace_self_tests();
1542 		event_trace_self_test_with_function();
1543 	}
1544 
1545 	return 0;
1546 }
1547 
1548 late_initcall(event_trace_self_tests_init);
1549 
1550 #endif
1551