xref: /linux/kernel/trace/trace_events.c (revision f8324e20f8289dffc646d64366332e05eaacab25)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/slab.h>
19 #include <linux/delay.h>
20 
21 #include <asm/setup.h>
22 
23 #include "trace_output.h"
24 
25 #undef TRACE_SYSTEM
26 #define TRACE_SYSTEM "TRACE_SYSTEM"
27 
28 DEFINE_MUTEX(event_mutex);
29 
30 LIST_HEAD(ftrace_events);
31 
32 struct list_head *
33 trace_get_fields(struct ftrace_event_call *event_call)
34 {
35 	if (!event_call->class->get_fields)
36 		return &event_call->class->fields;
37 	return event_call->class->get_fields(event_call);
38 }
39 
40 int trace_define_field(struct ftrace_event_call *call, const char *type,
41 		       const char *name, int offset, int size, int is_signed,
42 		       int filter_type)
43 {
44 	struct ftrace_event_field *field;
45 	struct list_head *head;
46 
47 	if (WARN_ON(!call->class))
48 		return 0;
49 
50 	field = kzalloc(sizeof(*field), GFP_KERNEL);
51 	if (!field)
52 		goto err;
53 
54 	field->name = kstrdup(name, GFP_KERNEL);
55 	if (!field->name)
56 		goto err;
57 
58 	field->type = kstrdup(type, GFP_KERNEL);
59 	if (!field->type)
60 		goto err;
61 
62 	if (filter_type == FILTER_OTHER)
63 		field->filter_type = filter_assign_type(type);
64 	else
65 		field->filter_type = filter_type;
66 
67 	field->offset = offset;
68 	field->size = size;
69 	field->is_signed = is_signed;
70 
71 	head = trace_get_fields(call);
72 	list_add(&field->link, head);
73 
74 	return 0;
75 
76 err:
77 	if (field)
78 		kfree(field->name);
79 	kfree(field);
80 
81 	return -ENOMEM;
82 }
83 EXPORT_SYMBOL_GPL(trace_define_field);
84 
85 #define __common_field(type, item)					\
86 	ret = trace_define_field(call, #type, "common_" #item,		\
87 				 offsetof(typeof(ent), item),		\
88 				 sizeof(ent.item),			\
89 				 is_signed_type(type), FILTER_OTHER);	\
90 	if (ret)							\
91 		return ret;
92 
93 static int trace_define_common_fields(struct ftrace_event_call *call)
94 {
95 	int ret;
96 	struct trace_entry ent;
97 
98 	__common_field(unsigned short, type);
99 	__common_field(unsigned char, flags);
100 	__common_field(unsigned char, preempt_count);
101 	__common_field(int, pid);
102 	__common_field(int, lock_depth);
103 
104 	return ret;
105 }
106 
107 void trace_destroy_fields(struct ftrace_event_call *call)
108 {
109 	struct ftrace_event_field *field, *next;
110 	struct list_head *head;
111 
112 	head = trace_get_fields(call);
113 	list_for_each_entry_safe(field, next, head, link) {
114 		list_del(&field->link);
115 		kfree(field->type);
116 		kfree(field->name);
117 		kfree(field);
118 	}
119 }
120 
121 int trace_event_raw_init(struct ftrace_event_call *call)
122 {
123 	int id;
124 
125 	id = register_ftrace_event(&call->event);
126 	if (!id)
127 		return -ENODEV;
128 
129 	return 0;
130 }
131 EXPORT_SYMBOL_GPL(trace_event_raw_init);
132 
133 static int ftrace_event_enable_disable(struct ftrace_event_call *call,
134 					int enable)
135 {
136 	int ret = 0;
137 
138 	switch (enable) {
139 	case 0:
140 		if (call->flags & TRACE_EVENT_FL_ENABLED) {
141 			call->flags &= ~TRACE_EVENT_FL_ENABLED;
142 			tracing_stop_cmdline_record();
143 			if (call->class->reg)
144 				call->class->reg(call, TRACE_REG_UNREGISTER);
145 			else
146 				tracepoint_probe_unregister(call->name,
147 							    call->class->probe,
148 							    call);
149 		}
150 		break;
151 	case 1:
152 		if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
153 			tracing_start_cmdline_record();
154 			if (call->class->reg)
155 				ret = call->class->reg(call, TRACE_REG_REGISTER);
156 			else
157 				ret = tracepoint_probe_register(call->name,
158 								call->class->probe,
159 								call);
160 			if (ret) {
161 				tracing_stop_cmdline_record();
162 				pr_info("event trace: Could not enable event "
163 					"%s\n", call->name);
164 				break;
165 			}
166 			call->flags |= TRACE_EVENT_FL_ENABLED;
167 		}
168 		break;
169 	}
170 
171 	return ret;
172 }
173 
174 static void ftrace_clear_events(void)
175 {
176 	struct ftrace_event_call *call;
177 
178 	mutex_lock(&event_mutex);
179 	list_for_each_entry(call, &ftrace_events, list) {
180 		ftrace_event_enable_disable(call, 0);
181 	}
182 	mutex_unlock(&event_mutex);
183 }
184 
185 /*
186  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
187  */
188 static int __ftrace_set_clr_event(const char *match, const char *sub,
189 				  const char *event, int set)
190 {
191 	struct ftrace_event_call *call;
192 	int ret = -EINVAL;
193 
194 	mutex_lock(&event_mutex);
195 	list_for_each_entry(call, &ftrace_events, list) {
196 
197 		if (!call->name || !call->class ||
198 		    (!call->class->probe && !call->class->reg))
199 			continue;
200 
201 		if (match &&
202 		    strcmp(match, call->name) != 0 &&
203 		    strcmp(match, call->class->system) != 0)
204 			continue;
205 
206 		if (sub && strcmp(sub, call->class->system) != 0)
207 			continue;
208 
209 		if (event && strcmp(event, call->name) != 0)
210 			continue;
211 
212 		ftrace_event_enable_disable(call, set);
213 
214 		ret = 0;
215 	}
216 	mutex_unlock(&event_mutex);
217 
218 	return ret;
219 }
220 
221 static int ftrace_set_clr_event(char *buf, int set)
222 {
223 	char *event = NULL, *sub = NULL, *match;
224 
225 	/*
226 	 * The buf format can be <subsystem>:<event-name>
227 	 *  *:<event-name> means any event by that name.
228 	 *  :<event-name> is the same.
229 	 *
230 	 *  <subsystem>:* means all events in that subsystem
231 	 *  <subsystem>: means the same.
232 	 *
233 	 *  <name> (no ':') means all events in a subsystem with
234 	 *  the name <name> or any event that matches <name>
235 	 */
236 
237 	match = strsep(&buf, ":");
238 	if (buf) {
239 		sub = match;
240 		event = buf;
241 		match = NULL;
242 
243 		if (!strlen(sub) || strcmp(sub, "*") == 0)
244 			sub = NULL;
245 		if (!strlen(event) || strcmp(event, "*") == 0)
246 			event = NULL;
247 	}
248 
249 	return __ftrace_set_clr_event(match, sub, event, set);
250 }
251 
252 /**
253  * trace_set_clr_event - enable or disable an event
254  * @system: system name to match (NULL for any system)
255  * @event: event name to match (NULL for all events, within system)
256  * @set: 1 to enable, 0 to disable
257  *
258  * This is a way for other parts of the kernel to enable or disable
259  * event recording.
260  *
261  * Returns 0 on success, -EINVAL if the parameters do not match any
262  * registered events.
263  */
264 int trace_set_clr_event(const char *system, const char *event, int set)
265 {
266 	return __ftrace_set_clr_event(NULL, system, event, set);
267 }
268 
269 /* 128 should be much more than enough */
270 #define EVENT_BUF_SIZE		127
271 
272 static ssize_t
273 ftrace_event_write(struct file *file, const char __user *ubuf,
274 		   size_t cnt, loff_t *ppos)
275 {
276 	struct trace_parser parser;
277 	ssize_t read, ret;
278 
279 	if (!cnt)
280 		return 0;
281 
282 	ret = tracing_update_buffers();
283 	if (ret < 0)
284 		return ret;
285 
286 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
287 		return -ENOMEM;
288 
289 	read = trace_get_user(&parser, ubuf, cnt, ppos);
290 
291 	if (read >= 0 && trace_parser_loaded((&parser))) {
292 		int set = 1;
293 
294 		if (*parser.buffer == '!')
295 			set = 0;
296 
297 		parser.buffer[parser.idx] = 0;
298 
299 		ret = ftrace_set_clr_event(parser.buffer + !set, set);
300 		if (ret)
301 			goto out_put;
302 	}
303 
304 	ret = read;
305 
306  out_put:
307 	trace_parser_put(&parser);
308 
309 	return ret;
310 }
311 
312 static void *
313 t_next(struct seq_file *m, void *v, loff_t *pos)
314 {
315 	struct ftrace_event_call *call = v;
316 
317 	(*pos)++;
318 
319 	list_for_each_entry_continue(call, &ftrace_events, list) {
320 		/*
321 		 * The ftrace subsystem is for showing formats only.
322 		 * They can not be enabled or disabled via the event files.
323 		 */
324 		if (call->class && (call->class->probe || call->class->reg))
325 			return call;
326 	}
327 
328 	return NULL;
329 }
330 
331 static void *t_start(struct seq_file *m, loff_t *pos)
332 {
333 	struct ftrace_event_call *call;
334 	loff_t l;
335 
336 	mutex_lock(&event_mutex);
337 
338 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
339 	for (l = 0; l <= *pos; ) {
340 		call = t_next(m, call, &l);
341 		if (!call)
342 			break;
343 	}
344 	return call;
345 }
346 
347 static void *
348 s_next(struct seq_file *m, void *v, loff_t *pos)
349 {
350 	struct ftrace_event_call *call = v;
351 
352 	(*pos)++;
353 
354 	list_for_each_entry_continue(call, &ftrace_events, list) {
355 		if (call->flags & TRACE_EVENT_FL_ENABLED)
356 			return call;
357 	}
358 
359 	return NULL;
360 }
361 
362 static void *s_start(struct seq_file *m, loff_t *pos)
363 {
364 	struct ftrace_event_call *call;
365 	loff_t l;
366 
367 	mutex_lock(&event_mutex);
368 
369 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
370 	for (l = 0; l <= *pos; ) {
371 		call = s_next(m, call, &l);
372 		if (!call)
373 			break;
374 	}
375 	return call;
376 }
377 
378 static int t_show(struct seq_file *m, void *v)
379 {
380 	struct ftrace_event_call *call = v;
381 
382 	if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
383 		seq_printf(m, "%s:", call->class->system);
384 	seq_printf(m, "%s\n", call->name);
385 
386 	return 0;
387 }
388 
389 static void t_stop(struct seq_file *m, void *p)
390 {
391 	mutex_unlock(&event_mutex);
392 }
393 
394 static int
395 ftrace_event_seq_open(struct inode *inode, struct file *file)
396 {
397 	const struct seq_operations *seq_ops;
398 
399 	if ((file->f_mode & FMODE_WRITE) &&
400 	    (file->f_flags & O_TRUNC))
401 		ftrace_clear_events();
402 
403 	seq_ops = inode->i_private;
404 	return seq_open(file, seq_ops);
405 }
406 
407 static ssize_t
408 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
409 		  loff_t *ppos)
410 {
411 	struct ftrace_event_call *call = filp->private_data;
412 	char *buf;
413 
414 	if (call->flags & TRACE_EVENT_FL_ENABLED)
415 		buf = "1\n";
416 	else
417 		buf = "0\n";
418 
419 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
420 }
421 
422 static ssize_t
423 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
424 		   loff_t *ppos)
425 {
426 	struct ftrace_event_call *call = filp->private_data;
427 	char buf[64];
428 	unsigned long val;
429 	int ret;
430 
431 	if (cnt >= sizeof(buf))
432 		return -EINVAL;
433 
434 	if (copy_from_user(&buf, ubuf, cnt))
435 		return -EFAULT;
436 
437 	buf[cnt] = 0;
438 
439 	ret = strict_strtoul(buf, 10, &val);
440 	if (ret < 0)
441 		return ret;
442 
443 	ret = tracing_update_buffers();
444 	if (ret < 0)
445 		return ret;
446 
447 	switch (val) {
448 	case 0:
449 	case 1:
450 		mutex_lock(&event_mutex);
451 		ret = ftrace_event_enable_disable(call, val);
452 		mutex_unlock(&event_mutex);
453 		break;
454 
455 	default:
456 		return -EINVAL;
457 	}
458 
459 	*ppos += cnt;
460 
461 	return ret ? ret : cnt;
462 }
463 
464 static ssize_t
465 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
466 		   loff_t *ppos)
467 {
468 	const char set_to_char[4] = { '?', '0', '1', 'X' };
469 	const char *system = filp->private_data;
470 	struct ftrace_event_call *call;
471 	char buf[2];
472 	int set = 0;
473 	int ret;
474 
475 	mutex_lock(&event_mutex);
476 	list_for_each_entry(call, &ftrace_events, list) {
477 		if (!call->name || !call->class ||
478 		    (!call->class->probe && !call->class->reg))
479 			continue;
480 
481 		if (system && strcmp(call->class->system, system) != 0)
482 			continue;
483 
484 		/*
485 		 * We need to find out if all the events are set
486 		 * or if all events or cleared, or if we have
487 		 * a mixture.
488 		 */
489 		set |= (1 << !!(call->flags & TRACE_EVENT_FL_ENABLED));
490 
491 		/*
492 		 * If we have a mixture, no need to look further.
493 		 */
494 		if (set == 3)
495 			break;
496 	}
497 	mutex_unlock(&event_mutex);
498 
499 	buf[0] = set_to_char[set];
500 	buf[1] = '\n';
501 
502 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
503 
504 	return ret;
505 }
506 
507 static ssize_t
508 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
509 		    loff_t *ppos)
510 {
511 	const char *system = filp->private_data;
512 	unsigned long val;
513 	char buf[64];
514 	ssize_t ret;
515 
516 	if (cnt >= sizeof(buf))
517 		return -EINVAL;
518 
519 	if (copy_from_user(&buf, ubuf, cnt))
520 		return -EFAULT;
521 
522 	buf[cnt] = 0;
523 
524 	ret = strict_strtoul(buf, 10, &val);
525 	if (ret < 0)
526 		return ret;
527 
528 	ret = tracing_update_buffers();
529 	if (ret < 0)
530 		return ret;
531 
532 	if (val != 0 && val != 1)
533 		return -EINVAL;
534 
535 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
536 	if (ret)
537 		goto out;
538 
539 	ret = cnt;
540 
541 out:
542 	*ppos += cnt;
543 
544 	return ret;
545 }
546 
547 static ssize_t
548 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
549 		  loff_t *ppos)
550 {
551 	struct ftrace_event_call *call = filp->private_data;
552 	struct ftrace_event_field *field;
553 	struct list_head *head;
554 	struct trace_seq *s;
555 	int common_field_count = 5;
556 	char *buf;
557 	int r = 0;
558 
559 	if (*ppos)
560 		return 0;
561 
562 	s = kmalloc(sizeof(*s), GFP_KERNEL);
563 	if (!s)
564 		return -ENOMEM;
565 
566 	trace_seq_init(s);
567 
568 	trace_seq_printf(s, "name: %s\n", call->name);
569 	trace_seq_printf(s, "ID: %d\n", call->event.type);
570 	trace_seq_printf(s, "format:\n");
571 
572 	head = trace_get_fields(call);
573 	list_for_each_entry_reverse(field, head, link) {
574 		/*
575 		 * Smartly shows the array type(except dynamic array).
576 		 * Normal:
577 		 *	field:TYPE VAR
578 		 * If TYPE := TYPE[LEN], it is shown:
579 		 *	field:TYPE VAR[LEN]
580 		 */
581 		const char *array_descriptor = strchr(field->type, '[');
582 
583 		if (!strncmp(field->type, "__data_loc", 10))
584 			array_descriptor = NULL;
585 
586 		if (!array_descriptor) {
587 			r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
588 					"\tsize:%u;\tsigned:%d;\n",
589 					field->type, field->name, field->offset,
590 					field->size, !!field->is_signed);
591 		} else {
592 			r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
593 					"\tsize:%u;\tsigned:%d;\n",
594 					(int)(array_descriptor - field->type),
595 					field->type, field->name,
596 					array_descriptor, field->offset,
597 					field->size, !!field->is_signed);
598 		}
599 
600 		if (--common_field_count == 0)
601 			r = trace_seq_printf(s, "\n");
602 
603 		if (!r)
604 			break;
605 	}
606 
607 	if (r)
608 		r = trace_seq_printf(s, "\nprint fmt: %s\n",
609 				call->print_fmt);
610 
611 	if (!r) {
612 		/*
613 		 * ug!  The format output is bigger than a PAGE!!
614 		 */
615 		buf = "FORMAT TOO BIG\n";
616 		r = simple_read_from_buffer(ubuf, cnt, ppos,
617 					      buf, strlen(buf));
618 		goto out;
619 	}
620 
621 	r = simple_read_from_buffer(ubuf, cnt, ppos,
622 				    s->buffer, s->len);
623  out:
624 	kfree(s);
625 	return r;
626 }
627 
628 static ssize_t
629 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
630 {
631 	struct ftrace_event_call *call = filp->private_data;
632 	struct trace_seq *s;
633 	int r;
634 
635 	if (*ppos)
636 		return 0;
637 
638 	s = kmalloc(sizeof(*s), GFP_KERNEL);
639 	if (!s)
640 		return -ENOMEM;
641 
642 	trace_seq_init(s);
643 	trace_seq_printf(s, "%d\n", call->event.type);
644 
645 	r = simple_read_from_buffer(ubuf, cnt, ppos,
646 				    s->buffer, s->len);
647 	kfree(s);
648 	return r;
649 }
650 
651 static ssize_t
652 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
653 		  loff_t *ppos)
654 {
655 	struct ftrace_event_call *call = filp->private_data;
656 	struct trace_seq *s;
657 	int r;
658 
659 	if (*ppos)
660 		return 0;
661 
662 	s = kmalloc(sizeof(*s), GFP_KERNEL);
663 	if (!s)
664 		return -ENOMEM;
665 
666 	trace_seq_init(s);
667 
668 	print_event_filter(call, s);
669 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
670 
671 	kfree(s);
672 
673 	return r;
674 }
675 
676 static ssize_t
677 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
678 		   loff_t *ppos)
679 {
680 	struct ftrace_event_call *call = filp->private_data;
681 	char *buf;
682 	int err;
683 
684 	if (cnt >= PAGE_SIZE)
685 		return -EINVAL;
686 
687 	buf = (char *)__get_free_page(GFP_TEMPORARY);
688 	if (!buf)
689 		return -ENOMEM;
690 
691 	if (copy_from_user(buf, ubuf, cnt)) {
692 		free_page((unsigned long) buf);
693 		return -EFAULT;
694 	}
695 	buf[cnt] = '\0';
696 
697 	err = apply_event_filter(call, buf);
698 	free_page((unsigned long) buf);
699 	if (err < 0)
700 		return err;
701 
702 	*ppos += cnt;
703 
704 	return cnt;
705 }
706 
707 static ssize_t
708 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
709 		      loff_t *ppos)
710 {
711 	struct event_subsystem *system = filp->private_data;
712 	struct trace_seq *s;
713 	int r;
714 
715 	if (*ppos)
716 		return 0;
717 
718 	s = kmalloc(sizeof(*s), GFP_KERNEL);
719 	if (!s)
720 		return -ENOMEM;
721 
722 	trace_seq_init(s);
723 
724 	print_subsystem_event_filter(system, s);
725 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
726 
727 	kfree(s);
728 
729 	return r;
730 }
731 
732 static ssize_t
733 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
734 		       loff_t *ppos)
735 {
736 	struct event_subsystem *system = filp->private_data;
737 	char *buf;
738 	int err;
739 
740 	if (cnt >= PAGE_SIZE)
741 		return -EINVAL;
742 
743 	buf = (char *)__get_free_page(GFP_TEMPORARY);
744 	if (!buf)
745 		return -ENOMEM;
746 
747 	if (copy_from_user(buf, ubuf, cnt)) {
748 		free_page((unsigned long) buf);
749 		return -EFAULT;
750 	}
751 	buf[cnt] = '\0';
752 
753 	err = apply_subsystem_event_filter(system, buf);
754 	free_page((unsigned long) buf);
755 	if (err < 0)
756 		return err;
757 
758 	*ppos += cnt;
759 
760 	return cnt;
761 }
762 
763 static ssize_t
764 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
765 {
766 	int (*func)(struct trace_seq *s) = filp->private_data;
767 	struct trace_seq *s;
768 	int r;
769 
770 	if (*ppos)
771 		return 0;
772 
773 	s = kmalloc(sizeof(*s), GFP_KERNEL);
774 	if (!s)
775 		return -ENOMEM;
776 
777 	trace_seq_init(s);
778 
779 	func(s);
780 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
781 
782 	kfree(s);
783 
784 	return r;
785 }
786 
787 static const struct seq_operations show_event_seq_ops = {
788 	.start = t_start,
789 	.next = t_next,
790 	.show = t_show,
791 	.stop = t_stop,
792 };
793 
794 static const struct seq_operations show_set_event_seq_ops = {
795 	.start = s_start,
796 	.next = s_next,
797 	.show = t_show,
798 	.stop = t_stop,
799 };
800 
801 static const struct file_operations ftrace_avail_fops = {
802 	.open = ftrace_event_seq_open,
803 	.read = seq_read,
804 	.llseek = seq_lseek,
805 	.release = seq_release,
806 };
807 
808 static const struct file_operations ftrace_set_event_fops = {
809 	.open = ftrace_event_seq_open,
810 	.read = seq_read,
811 	.write = ftrace_event_write,
812 	.llseek = seq_lseek,
813 	.release = seq_release,
814 };
815 
816 static const struct file_operations ftrace_enable_fops = {
817 	.open = tracing_open_generic,
818 	.read = event_enable_read,
819 	.write = event_enable_write,
820 };
821 
822 static const struct file_operations ftrace_event_format_fops = {
823 	.open = tracing_open_generic,
824 	.read = event_format_read,
825 };
826 
827 static const struct file_operations ftrace_event_id_fops = {
828 	.open = tracing_open_generic,
829 	.read = event_id_read,
830 };
831 
832 static const struct file_operations ftrace_event_filter_fops = {
833 	.open = tracing_open_generic,
834 	.read = event_filter_read,
835 	.write = event_filter_write,
836 };
837 
838 static const struct file_operations ftrace_subsystem_filter_fops = {
839 	.open = tracing_open_generic,
840 	.read = subsystem_filter_read,
841 	.write = subsystem_filter_write,
842 };
843 
844 static const struct file_operations ftrace_system_enable_fops = {
845 	.open = tracing_open_generic,
846 	.read = system_enable_read,
847 	.write = system_enable_write,
848 };
849 
850 static const struct file_operations ftrace_show_header_fops = {
851 	.open = tracing_open_generic,
852 	.read = show_header,
853 };
854 
855 static struct dentry *event_trace_events_dir(void)
856 {
857 	static struct dentry *d_tracer;
858 	static struct dentry *d_events;
859 
860 	if (d_events)
861 		return d_events;
862 
863 	d_tracer = tracing_init_dentry();
864 	if (!d_tracer)
865 		return NULL;
866 
867 	d_events = debugfs_create_dir("events", d_tracer);
868 	if (!d_events)
869 		pr_warning("Could not create debugfs "
870 			   "'events' directory\n");
871 
872 	return d_events;
873 }
874 
875 static LIST_HEAD(event_subsystems);
876 
877 static struct dentry *
878 event_subsystem_dir(const char *name, struct dentry *d_events)
879 {
880 	struct event_subsystem *system;
881 	struct dentry *entry;
882 
883 	/* First see if we did not already create this dir */
884 	list_for_each_entry(system, &event_subsystems, list) {
885 		if (strcmp(system->name, name) == 0) {
886 			system->nr_events++;
887 			return system->entry;
888 		}
889 	}
890 
891 	/* need to create new entry */
892 	system = kmalloc(sizeof(*system), GFP_KERNEL);
893 	if (!system) {
894 		pr_warning("No memory to create event subsystem %s\n",
895 			   name);
896 		return d_events;
897 	}
898 
899 	system->entry = debugfs_create_dir(name, d_events);
900 	if (!system->entry) {
901 		pr_warning("Could not create event subsystem %s\n",
902 			   name);
903 		kfree(system);
904 		return d_events;
905 	}
906 
907 	system->nr_events = 1;
908 	system->name = kstrdup(name, GFP_KERNEL);
909 	if (!system->name) {
910 		debugfs_remove(system->entry);
911 		kfree(system);
912 		return d_events;
913 	}
914 
915 	list_add(&system->list, &event_subsystems);
916 
917 	system->filter = NULL;
918 
919 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
920 	if (!system->filter) {
921 		pr_warning("Could not allocate filter for subsystem "
922 			   "'%s'\n", name);
923 		return system->entry;
924 	}
925 
926 	entry = debugfs_create_file("filter", 0644, system->entry, system,
927 				    &ftrace_subsystem_filter_fops);
928 	if (!entry) {
929 		kfree(system->filter);
930 		system->filter = NULL;
931 		pr_warning("Could not create debugfs "
932 			   "'%s/filter' entry\n", name);
933 	}
934 
935 	trace_create_file("enable", 0644, system->entry,
936 			  (void *)system->name,
937 			  &ftrace_system_enable_fops);
938 
939 	return system->entry;
940 }
941 
942 static int
943 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
944 		 const struct file_operations *id,
945 		 const struct file_operations *enable,
946 		 const struct file_operations *filter,
947 		 const struct file_operations *format)
948 {
949 	struct list_head *head;
950 	int ret;
951 
952 	/*
953 	 * If the trace point header did not define TRACE_SYSTEM
954 	 * then the system would be called "TRACE_SYSTEM".
955 	 */
956 	if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
957 		d_events = event_subsystem_dir(call->class->system, d_events);
958 
959 	call->dir = debugfs_create_dir(call->name, d_events);
960 	if (!call->dir) {
961 		pr_warning("Could not create debugfs "
962 			   "'%s' directory\n", call->name);
963 		return -1;
964 	}
965 
966 	if (call->class->probe || call->class->reg)
967 		trace_create_file("enable", 0644, call->dir, call,
968 				  enable);
969 
970 #ifdef CONFIG_PERF_EVENTS
971 	if (call->event.type && (call->class->perf_probe || call->class->reg))
972 		trace_create_file("id", 0444, call->dir, call,
973 		 		  id);
974 #endif
975 
976 	if (call->class->define_fields) {
977 		/*
978 		 * Other events may have the same class. Only update
979 		 * the fields if they are not already defined.
980 		 */
981 		head = trace_get_fields(call);
982 		if (list_empty(head)) {
983 			ret = trace_define_common_fields(call);
984 			if (!ret)
985 				ret = call->class->define_fields(call);
986 			if (ret < 0) {
987 				pr_warning("Could not initialize trace point"
988 					   " events/%s\n", call->name);
989 				return ret;
990 			}
991 		}
992 		trace_create_file("filter", 0644, call->dir, call,
993 				  filter);
994 	}
995 
996 	trace_create_file("format", 0444, call->dir, call,
997 			  format);
998 
999 	return 0;
1000 }
1001 
1002 static int __trace_add_event_call(struct ftrace_event_call *call)
1003 {
1004 	struct dentry *d_events;
1005 	int ret;
1006 
1007 	if (!call->name)
1008 		return -EINVAL;
1009 
1010 	if (call->class->raw_init) {
1011 		ret = call->class->raw_init(call);
1012 		if (ret < 0) {
1013 			if (ret != -ENOSYS)
1014 				pr_warning("Could not initialize trace "
1015 				"events/%s\n", call->name);
1016 			return ret;
1017 		}
1018 	}
1019 
1020 	d_events = event_trace_events_dir();
1021 	if (!d_events)
1022 		return -ENOENT;
1023 
1024 	ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1025 				&ftrace_enable_fops, &ftrace_event_filter_fops,
1026 				&ftrace_event_format_fops);
1027 	if (!ret)
1028 		list_add(&call->list, &ftrace_events);
1029 
1030 	return ret;
1031 }
1032 
1033 /* Add an additional event_call dynamically */
1034 int trace_add_event_call(struct ftrace_event_call *call)
1035 {
1036 	int ret;
1037 	mutex_lock(&event_mutex);
1038 	ret = __trace_add_event_call(call);
1039 	mutex_unlock(&event_mutex);
1040 	return ret;
1041 }
1042 
1043 static void remove_subsystem_dir(const char *name)
1044 {
1045 	struct event_subsystem *system;
1046 
1047 	if (strcmp(name, TRACE_SYSTEM) == 0)
1048 		return;
1049 
1050 	list_for_each_entry(system, &event_subsystems, list) {
1051 		if (strcmp(system->name, name) == 0) {
1052 			if (!--system->nr_events) {
1053 				struct event_filter *filter = system->filter;
1054 
1055 				debugfs_remove_recursive(system->entry);
1056 				list_del(&system->list);
1057 				if (filter) {
1058 					kfree(filter->filter_string);
1059 					kfree(filter);
1060 				}
1061 				kfree(system->name);
1062 				kfree(system);
1063 			}
1064 			break;
1065 		}
1066 	}
1067 }
1068 
1069 /*
1070  * Must be called under locking both of event_mutex and trace_event_mutex.
1071  */
1072 static void __trace_remove_event_call(struct ftrace_event_call *call)
1073 {
1074 	ftrace_event_enable_disable(call, 0);
1075 	if (call->event.funcs)
1076 		__unregister_ftrace_event(&call->event);
1077 	debugfs_remove_recursive(call->dir);
1078 	list_del(&call->list);
1079 	trace_destroy_fields(call);
1080 	destroy_preds(call);
1081 	remove_subsystem_dir(call->class->system);
1082 }
1083 
1084 /* Remove an event_call */
1085 void trace_remove_event_call(struct ftrace_event_call *call)
1086 {
1087 	mutex_lock(&event_mutex);
1088 	down_write(&trace_event_mutex);
1089 	__trace_remove_event_call(call);
1090 	up_write(&trace_event_mutex);
1091 	mutex_unlock(&event_mutex);
1092 }
1093 
1094 #define for_each_event(event, start, end)			\
1095 	for (event = start;					\
1096 	     (unsigned long)event < (unsigned long)end;		\
1097 	     event++)
1098 
1099 #ifdef CONFIG_MODULES
1100 
1101 static LIST_HEAD(ftrace_module_file_list);
1102 
1103 /*
1104  * Modules must own their file_operations to keep up with
1105  * reference counting.
1106  */
1107 struct ftrace_module_file_ops {
1108 	struct list_head		list;
1109 	struct module			*mod;
1110 	struct file_operations		id;
1111 	struct file_operations		enable;
1112 	struct file_operations		format;
1113 	struct file_operations		filter;
1114 };
1115 
1116 static struct ftrace_module_file_ops *
1117 trace_create_file_ops(struct module *mod)
1118 {
1119 	struct ftrace_module_file_ops *file_ops;
1120 
1121 	/*
1122 	 * This is a bit of a PITA. To allow for correct reference
1123 	 * counting, modules must "own" their file_operations.
1124 	 * To do this, we allocate the file operations that will be
1125 	 * used in the event directory.
1126 	 */
1127 
1128 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1129 	if (!file_ops)
1130 		return NULL;
1131 
1132 	file_ops->mod = mod;
1133 
1134 	file_ops->id = ftrace_event_id_fops;
1135 	file_ops->id.owner = mod;
1136 
1137 	file_ops->enable = ftrace_enable_fops;
1138 	file_ops->enable.owner = mod;
1139 
1140 	file_ops->filter = ftrace_event_filter_fops;
1141 	file_ops->filter.owner = mod;
1142 
1143 	file_ops->format = ftrace_event_format_fops;
1144 	file_ops->format.owner = mod;
1145 
1146 	list_add(&file_ops->list, &ftrace_module_file_list);
1147 
1148 	return file_ops;
1149 }
1150 
1151 static void trace_module_add_events(struct module *mod)
1152 {
1153 	struct ftrace_module_file_ops *file_ops = NULL;
1154 	struct ftrace_event_call *call, *start, *end;
1155 	struct dentry *d_events;
1156 	int ret;
1157 
1158 	start = mod->trace_events;
1159 	end = mod->trace_events + mod->num_trace_events;
1160 
1161 	if (start == end)
1162 		return;
1163 
1164 	d_events = event_trace_events_dir();
1165 	if (!d_events)
1166 		return;
1167 
1168 	for_each_event(call, start, end) {
1169 		/* The linker may leave blanks */
1170 		if (!call->name)
1171 			continue;
1172 		if (call->class->raw_init) {
1173 			ret = call->class->raw_init(call);
1174 			if (ret < 0) {
1175 				if (ret != -ENOSYS)
1176 					pr_warning("Could not initialize trace "
1177 					"point events/%s\n", call->name);
1178 				continue;
1179 			}
1180 		}
1181 		/*
1182 		 * This module has events, create file ops for this module
1183 		 * if not already done.
1184 		 */
1185 		if (!file_ops) {
1186 			file_ops = trace_create_file_ops(mod);
1187 			if (!file_ops)
1188 				return;
1189 		}
1190 		call->mod = mod;
1191 		ret = event_create_dir(call, d_events,
1192 				       &file_ops->id, &file_ops->enable,
1193 				       &file_ops->filter, &file_ops->format);
1194 		if (!ret)
1195 			list_add(&call->list, &ftrace_events);
1196 	}
1197 }
1198 
1199 static void trace_module_remove_events(struct module *mod)
1200 {
1201 	struct ftrace_module_file_ops *file_ops;
1202 	struct ftrace_event_call *call, *p;
1203 	bool found = false;
1204 
1205 	down_write(&trace_event_mutex);
1206 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1207 		if (call->mod == mod) {
1208 			found = true;
1209 			__trace_remove_event_call(call);
1210 		}
1211 	}
1212 
1213 	/* Now free the file_operations */
1214 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1215 		if (file_ops->mod == mod)
1216 			break;
1217 	}
1218 	if (&file_ops->list != &ftrace_module_file_list) {
1219 		list_del(&file_ops->list);
1220 		kfree(file_ops);
1221 	}
1222 
1223 	/*
1224 	 * It is safest to reset the ring buffer if the module being unloaded
1225 	 * registered any events.
1226 	 */
1227 	if (found)
1228 		tracing_reset_current_online_cpus();
1229 	up_write(&trace_event_mutex);
1230 }
1231 
1232 static int trace_module_notify(struct notifier_block *self,
1233 			       unsigned long val, void *data)
1234 {
1235 	struct module *mod = data;
1236 
1237 	mutex_lock(&event_mutex);
1238 	switch (val) {
1239 	case MODULE_STATE_COMING:
1240 		trace_module_add_events(mod);
1241 		break;
1242 	case MODULE_STATE_GOING:
1243 		trace_module_remove_events(mod);
1244 		break;
1245 	}
1246 	mutex_unlock(&event_mutex);
1247 
1248 	return 0;
1249 }
1250 #else
1251 static int trace_module_notify(struct notifier_block *self,
1252 			       unsigned long val, void *data)
1253 {
1254 	return 0;
1255 }
1256 #endif /* CONFIG_MODULES */
1257 
1258 static struct notifier_block trace_module_nb = {
1259 	.notifier_call = trace_module_notify,
1260 	.priority = 0,
1261 };
1262 
1263 extern struct ftrace_event_call __start_ftrace_events[];
1264 extern struct ftrace_event_call __stop_ftrace_events[];
1265 
1266 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1267 
1268 static __init int setup_trace_event(char *str)
1269 {
1270 	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1271 	ring_buffer_expanded = 1;
1272 	tracing_selftest_disabled = 1;
1273 
1274 	return 1;
1275 }
1276 __setup("trace_event=", setup_trace_event);
1277 
1278 static __init int event_trace_init(void)
1279 {
1280 	struct ftrace_event_call *call;
1281 	struct dentry *d_tracer;
1282 	struct dentry *entry;
1283 	struct dentry *d_events;
1284 	int ret;
1285 	char *buf = bootup_event_buf;
1286 	char *token;
1287 
1288 	d_tracer = tracing_init_dentry();
1289 	if (!d_tracer)
1290 		return 0;
1291 
1292 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1293 				    (void *)&show_event_seq_ops,
1294 				    &ftrace_avail_fops);
1295 	if (!entry)
1296 		pr_warning("Could not create debugfs "
1297 			   "'available_events' entry\n");
1298 
1299 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1300 				    (void *)&show_set_event_seq_ops,
1301 				    &ftrace_set_event_fops);
1302 	if (!entry)
1303 		pr_warning("Could not create debugfs "
1304 			   "'set_event' entry\n");
1305 
1306 	d_events = event_trace_events_dir();
1307 	if (!d_events)
1308 		return 0;
1309 
1310 	/* ring buffer internal formats */
1311 	trace_create_file("header_page", 0444, d_events,
1312 			  ring_buffer_print_page_header,
1313 			  &ftrace_show_header_fops);
1314 
1315 	trace_create_file("header_event", 0444, d_events,
1316 			  ring_buffer_print_entry_header,
1317 			  &ftrace_show_header_fops);
1318 
1319 	trace_create_file("enable", 0644, d_events,
1320 			  NULL, &ftrace_system_enable_fops);
1321 
1322 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1323 		/* The linker may leave blanks */
1324 		if (!call->name)
1325 			continue;
1326 		if (call->class->raw_init) {
1327 			ret = call->class->raw_init(call);
1328 			if (ret < 0) {
1329 				if (ret != -ENOSYS)
1330 					pr_warning("Could not initialize trace "
1331 					"point events/%s\n", call->name);
1332 				continue;
1333 			}
1334 		}
1335 		ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
1336 				       &ftrace_enable_fops,
1337 				       &ftrace_event_filter_fops,
1338 				       &ftrace_event_format_fops);
1339 		if (!ret)
1340 			list_add(&call->list, &ftrace_events);
1341 	}
1342 
1343 	while (true) {
1344 		token = strsep(&buf, ",");
1345 
1346 		if (!token)
1347 			break;
1348 		if (!*token)
1349 			continue;
1350 
1351 		ret = ftrace_set_clr_event(token, 1);
1352 		if (ret)
1353 			pr_warning("Failed to enable trace event: %s\n", token);
1354 	}
1355 
1356 	ret = register_module_notifier(&trace_module_nb);
1357 	if (ret)
1358 		pr_warning("Failed to register trace events module notifier\n");
1359 
1360 	return 0;
1361 }
1362 fs_initcall(event_trace_init);
1363 
1364 #ifdef CONFIG_FTRACE_STARTUP_TEST
1365 
1366 static DEFINE_SPINLOCK(test_spinlock);
1367 static DEFINE_SPINLOCK(test_spinlock_irq);
1368 static DEFINE_MUTEX(test_mutex);
1369 
1370 static __init void test_work(struct work_struct *dummy)
1371 {
1372 	spin_lock(&test_spinlock);
1373 	spin_lock_irq(&test_spinlock_irq);
1374 	udelay(1);
1375 	spin_unlock_irq(&test_spinlock_irq);
1376 	spin_unlock(&test_spinlock);
1377 
1378 	mutex_lock(&test_mutex);
1379 	msleep(1);
1380 	mutex_unlock(&test_mutex);
1381 }
1382 
1383 static __init int event_test_thread(void *unused)
1384 {
1385 	void *test_malloc;
1386 
1387 	test_malloc = kmalloc(1234, GFP_KERNEL);
1388 	if (!test_malloc)
1389 		pr_info("failed to kmalloc\n");
1390 
1391 	schedule_on_each_cpu(test_work);
1392 
1393 	kfree(test_malloc);
1394 
1395 	set_current_state(TASK_INTERRUPTIBLE);
1396 	while (!kthread_should_stop())
1397 		schedule();
1398 
1399 	return 0;
1400 }
1401 
1402 /*
1403  * Do various things that may trigger events.
1404  */
1405 static __init void event_test_stuff(void)
1406 {
1407 	struct task_struct *test_thread;
1408 
1409 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1410 	msleep(1);
1411 	kthread_stop(test_thread);
1412 }
1413 
1414 /*
1415  * For every trace event defined, we will test each trace point separately,
1416  * and then by groups, and finally all trace points.
1417  */
1418 static __init void event_trace_self_tests(void)
1419 {
1420 	struct ftrace_event_call *call;
1421 	struct event_subsystem *system;
1422 	int ret;
1423 
1424 	pr_info("Running tests on trace events:\n");
1425 
1426 	list_for_each_entry(call, &ftrace_events, list) {
1427 
1428 		/* Only test those that have a probe */
1429 		if (!call->class || !call->class->probe)
1430 			continue;
1431 
1432 /*
1433  * Testing syscall events here is pretty useless, but
1434  * we still do it if configured. But this is time consuming.
1435  * What we really need is a user thread to perform the
1436  * syscalls as we test.
1437  */
1438 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1439 		if (call->class->system &&
1440 		    strcmp(call->class->system, "syscalls") == 0)
1441 			continue;
1442 #endif
1443 
1444 		pr_info("Testing event %s: ", call->name);
1445 
1446 		/*
1447 		 * If an event is already enabled, someone is using
1448 		 * it and the self test should not be on.
1449 		 */
1450 		if (call->flags & TRACE_EVENT_FL_ENABLED) {
1451 			pr_warning("Enabled event during self test!\n");
1452 			WARN_ON_ONCE(1);
1453 			continue;
1454 		}
1455 
1456 		ftrace_event_enable_disable(call, 1);
1457 		event_test_stuff();
1458 		ftrace_event_enable_disable(call, 0);
1459 
1460 		pr_cont("OK\n");
1461 	}
1462 
1463 	/* Now test at the sub system level */
1464 
1465 	pr_info("Running tests on trace event systems:\n");
1466 
1467 	list_for_each_entry(system, &event_subsystems, list) {
1468 
1469 		/* the ftrace system is special, skip it */
1470 		if (strcmp(system->name, "ftrace") == 0)
1471 			continue;
1472 
1473 		pr_info("Testing event system %s: ", system->name);
1474 
1475 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1476 		if (WARN_ON_ONCE(ret)) {
1477 			pr_warning("error enabling system %s\n",
1478 				   system->name);
1479 			continue;
1480 		}
1481 
1482 		event_test_stuff();
1483 
1484 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1485 		if (WARN_ON_ONCE(ret))
1486 			pr_warning("error disabling system %s\n",
1487 				   system->name);
1488 
1489 		pr_cont("OK\n");
1490 	}
1491 
1492 	/* Test with all events enabled */
1493 
1494 	pr_info("Running tests on all trace events:\n");
1495 	pr_info("Testing all events: ");
1496 
1497 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1498 	if (WARN_ON_ONCE(ret)) {
1499 		pr_warning("error enabling all events\n");
1500 		return;
1501 	}
1502 
1503 	event_test_stuff();
1504 
1505 	/* reset sysname */
1506 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1507 	if (WARN_ON_ONCE(ret)) {
1508 		pr_warning("error disabling all events\n");
1509 		return;
1510 	}
1511 
1512 	pr_cont("OK\n");
1513 }
1514 
1515 #ifdef CONFIG_FUNCTION_TRACER
1516 
1517 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1518 
1519 static void
1520 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1521 {
1522 	struct ring_buffer_event *event;
1523 	struct ring_buffer *buffer;
1524 	struct ftrace_entry *entry;
1525 	unsigned long flags;
1526 	long disabled;
1527 	int resched;
1528 	int cpu;
1529 	int pc;
1530 
1531 	pc = preempt_count();
1532 	resched = ftrace_preempt_disable();
1533 	cpu = raw_smp_processor_id();
1534 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1535 
1536 	if (disabled != 1)
1537 		goto out;
1538 
1539 	local_save_flags(flags);
1540 
1541 	event = trace_current_buffer_lock_reserve(&buffer,
1542 						  TRACE_FN, sizeof(*entry),
1543 						  flags, pc);
1544 	if (!event)
1545 		goto out;
1546 	entry	= ring_buffer_event_data(event);
1547 	entry->ip			= ip;
1548 	entry->parent_ip		= parent_ip;
1549 
1550 	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1551 
1552  out:
1553 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1554 	ftrace_preempt_enable(resched);
1555 }
1556 
1557 static struct ftrace_ops trace_ops __initdata  =
1558 {
1559 	.func = function_test_events_call,
1560 };
1561 
1562 static __init void event_trace_self_test_with_function(void)
1563 {
1564 	register_ftrace_function(&trace_ops);
1565 	pr_info("Running tests again, along with the function tracer\n");
1566 	event_trace_self_tests();
1567 	unregister_ftrace_function(&trace_ops);
1568 }
1569 #else
1570 static __init void event_trace_self_test_with_function(void)
1571 {
1572 }
1573 #endif
1574 
1575 static __init int event_trace_self_tests_init(void)
1576 {
1577 	if (!tracing_selftest_disabled) {
1578 		event_trace_self_tests();
1579 		event_trace_self_test_with_function();
1580 	}
1581 
1582 	return 0;
1583 }
1584 
1585 late_initcall(event_trace_self_tests_init);
1586 
1587 #endif
1588