1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * event tracer
4 *
5 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
6 *
7 * - Added format output of fields of the trace point.
8 * This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
9 *
10 */
11
12 #define pr_fmt(fmt) fmt
13
14 #include <linux/workqueue.h>
15 #include <linux/security.h>
16 #include <linux/spinlock.h>
17 #include <linux/kthread.h>
18 #include <linux/tracefs.h>
19 #include <linux/uaccess.h>
20 #include <linux/module.h>
21 #include <linux/ctype.h>
22 #include <linux/sort.h>
23 #include <linux/slab.h>
24 #include <linux/delay.h>
25
26 #include <trace/events/sched.h>
27 #include <trace/syscall.h>
28
29 #include <asm/setup.h>
30
31 #include "trace_output.h"
32
33 #undef TRACE_SYSTEM
34 #define TRACE_SYSTEM "TRACE_SYSTEM"
35
36 DEFINE_MUTEX(event_mutex);
37
38 LIST_HEAD(ftrace_events);
39 static LIST_HEAD(ftrace_generic_fields);
40 static LIST_HEAD(ftrace_common_fields);
41 static bool eventdir_initialized;
42
43 static LIST_HEAD(module_strings);
44
45 struct module_string {
46 struct list_head next;
47 struct module *module;
48 char *str;
49 };
50
51 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
52
53 static struct kmem_cache *field_cachep;
54 static struct kmem_cache *file_cachep;
55
system_refcount(struct event_subsystem * system)56 static inline int system_refcount(struct event_subsystem *system)
57 {
58 return system->ref_count;
59 }
60
system_refcount_inc(struct event_subsystem * system)61 static int system_refcount_inc(struct event_subsystem *system)
62 {
63 return system->ref_count++;
64 }
65
system_refcount_dec(struct event_subsystem * system)66 static int system_refcount_dec(struct event_subsystem *system)
67 {
68 return --system->ref_count;
69 }
70
71 /* Double loops, do not use break, only goto's work */
72 #define do_for_each_event_file(tr, file) \
73 list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
74 list_for_each_entry(file, &tr->events, list)
75
76 #define do_for_each_event_file_safe(tr, file) \
77 list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
78 struct trace_event_file *___n; \
79 list_for_each_entry_safe(file, ___n, &tr->events, list)
80
81 #define while_for_each_event_file() \
82 }
83
84 static struct ftrace_event_field *
__find_event_field(struct list_head * head,const char * name)85 __find_event_field(struct list_head *head, const char *name)
86 {
87 struct ftrace_event_field *field;
88
89 list_for_each_entry(field, head, link) {
90 if (!strcmp(field->name, name))
91 return field;
92 }
93
94 return NULL;
95 }
96
97 struct ftrace_event_field *
trace_find_event_field(struct trace_event_call * call,char * name)98 trace_find_event_field(struct trace_event_call *call, char *name)
99 {
100 struct ftrace_event_field *field;
101 struct list_head *head;
102
103 head = trace_get_fields(call);
104 field = __find_event_field(head, name);
105 if (field)
106 return field;
107
108 field = __find_event_field(&ftrace_generic_fields, name);
109 if (field)
110 return field;
111
112 return __find_event_field(&ftrace_common_fields, name);
113 }
114
__trace_define_field(struct list_head * head,const char * type,const char * name,int offset,int size,int is_signed,int filter_type,int len,int need_test)115 static int __trace_define_field(struct list_head *head, const char *type,
116 const char *name, int offset, int size,
117 int is_signed, int filter_type, int len,
118 int need_test)
119 {
120 struct ftrace_event_field *field;
121
122 field = kmem_cache_alloc(field_cachep, GFP_TRACE);
123 if (!field)
124 return -ENOMEM;
125
126 field->name = name;
127 field->type = type;
128
129 if (filter_type == FILTER_OTHER)
130 field->filter_type = filter_assign_type(type);
131 else
132 field->filter_type = filter_type;
133
134 field->offset = offset;
135 field->size = size;
136 field->is_signed = is_signed;
137 field->needs_test = need_test;
138 field->len = len;
139
140 list_add(&field->link, head);
141
142 return 0;
143 }
144
trace_define_field(struct trace_event_call * call,const char * type,const char * name,int offset,int size,int is_signed,int filter_type)145 int trace_define_field(struct trace_event_call *call, const char *type,
146 const char *name, int offset, int size, int is_signed,
147 int filter_type)
148 {
149 struct list_head *head;
150
151 if (WARN_ON(!call->class))
152 return 0;
153
154 head = trace_get_fields(call);
155 return __trace_define_field(head, type, name, offset, size,
156 is_signed, filter_type, 0, 0);
157 }
158 EXPORT_SYMBOL_GPL(trace_define_field);
159
trace_define_field_ext(struct trace_event_call * call,const char * type,const char * name,int offset,int size,int is_signed,int filter_type,int len,int need_test)160 static int trace_define_field_ext(struct trace_event_call *call, const char *type,
161 const char *name, int offset, int size, int is_signed,
162 int filter_type, int len, int need_test)
163 {
164 struct list_head *head;
165
166 if (WARN_ON(!call->class))
167 return 0;
168
169 head = trace_get_fields(call);
170 return __trace_define_field(head, type, name, offset, size,
171 is_signed, filter_type, len, need_test);
172 }
173
174 #define __generic_field(type, item, filter_type) \
175 ret = __trace_define_field(&ftrace_generic_fields, #type, \
176 #item, 0, 0, is_signed_type(type), \
177 filter_type, 0, 0); \
178 if (ret) \
179 return ret;
180
181 #define __common_field(type, item) \
182 ret = __trace_define_field(&ftrace_common_fields, #type, \
183 "common_" #item, \
184 offsetof(typeof(ent), item), \
185 sizeof(ent.item), \
186 is_signed_type(type), FILTER_OTHER, \
187 0, 0); \
188 if (ret) \
189 return ret;
190
trace_define_generic_fields(void)191 static int trace_define_generic_fields(void)
192 {
193 int ret;
194
195 __generic_field(int, CPU, FILTER_CPU);
196 __generic_field(int, cpu, FILTER_CPU);
197 __generic_field(int, common_cpu, FILTER_CPU);
198 __generic_field(char *, COMM, FILTER_COMM);
199 __generic_field(char *, comm, FILTER_COMM);
200 __generic_field(char *, stacktrace, FILTER_STACKTRACE);
201 __generic_field(char *, STACKTRACE, FILTER_STACKTRACE);
202
203 return ret;
204 }
205
trace_define_common_fields(void)206 static int trace_define_common_fields(void)
207 {
208 int ret;
209 struct trace_entry ent;
210
211 __common_field(unsigned short, type);
212 __common_field(unsigned char, flags);
213 /* Holds both preempt_count and migrate_disable */
214 __common_field(unsigned char, preempt_count);
215 __common_field(int, pid);
216
217 return ret;
218 }
219
trace_destroy_fields(struct trace_event_call * call)220 static void trace_destroy_fields(struct trace_event_call *call)
221 {
222 struct ftrace_event_field *field, *next;
223 struct list_head *head;
224
225 head = trace_get_fields(call);
226 list_for_each_entry_safe(field, next, head, link) {
227 list_del(&field->link);
228 kmem_cache_free(field_cachep, field);
229 }
230 }
231
232 /*
233 * run-time version of trace_event_get_offsets_<call>() that returns the last
234 * accessible offset of trace fields excluding __dynamic_array bytes
235 */
trace_event_get_offsets(struct trace_event_call * call)236 int trace_event_get_offsets(struct trace_event_call *call)
237 {
238 struct ftrace_event_field *tail;
239 struct list_head *head;
240
241 head = trace_get_fields(call);
242 /*
243 * head->next points to the last field with the largest offset,
244 * since it was added last by trace_define_field()
245 */
246 tail = list_first_entry(head, struct ftrace_event_field, link);
247 return tail->offset + tail->size;
248 }
249
250
find_event_field(const char * fmt,struct trace_event_call * call)251 static struct trace_event_fields *find_event_field(const char *fmt,
252 struct trace_event_call *call)
253 {
254 struct trace_event_fields *field = call->class->fields_array;
255 const char *p = fmt;
256 int len;
257
258 if (!(len = str_has_prefix(fmt, "REC->")))
259 return NULL;
260 fmt += len;
261 for (p = fmt; *p; p++) {
262 if (!isalnum(*p) && *p != '_')
263 break;
264 }
265 len = p - fmt;
266
267 for (; field->type; field++) {
268 if (strncmp(field->name, fmt, len) || field->name[len])
269 continue;
270
271 return field;
272 }
273 return NULL;
274 }
275
276 /*
277 * Check if the referenced field is an array and return true,
278 * as arrays are OK to dereference.
279 */
test_field(const char * fmt,struct trace_event_call * call)280 static bool test_field(const char *fmt, struct trace_event_call *call)
281 {
282 struct trace_event_fields *field;
283
284 field = find_event_field(fmt, call);
285 if (!field)
286 return false;
287
288 /* This is an array and is OK to dereference. */
289 return strchr(field->type, '[') != NULL;
290 }
291
292 /* Look for a string within an argument */
find_print_string(const char * arg,const char * str,const char * end)293 static bool find_print_string(const char *arg, const char *str, const char *end)
294 {
295 const char *r;
296
297 r = strstr(arg, str);
298 return r && r < end;
299 }
300
301 /* Return true if the argument pointer is safe */
process_pointer(const char * fmt,int len,struct trace_event_call * call)302 static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
303 {
304 const char *r, *e, *a;
305
306 e = fmt + len;
307
308 /* Find the REC-> in the argument */
309 r = strstr(fmt, "REC->");
310 if (r && r < e) {
311 /*
312 * Addresses of events on the buffer, or an array on the buffer is
313 * OK to dereference. There's ways to fool this, but
314 * this is to catch common mistakes, not malicious code.
315 */
316 a = strchr(fmt, '&');
317 if ((a && (a < r)) || test_field(r, call))
318 return true;
319 } else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
320 return true;
321 } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
322 return true;
323 } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
324 return true;
325 } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
326 return true;
327 } else if (find_print_string(fmt, "__get_sockaddr(", e)) {
328 return true;
329 } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
330 return true;
331 }
332 return false;
333 }
334
335 /* Return true if the string is safe */
process_string(const char * fmt,int len,struct trace_event_call * call)336 static bool process_string(const char *fmt, int len, struct trace_event_call *call)
337 {
338 struct trace_event_fields *field;
339 const char *r, *e, *s;
340
341 e = fmt + len;
342
343 /*
344 * There are several helper functions that return strings.
345 * If the argument contains a function, then assume its field is valid.
346 * It is considered that the argument has a function if it has:
347 * alphanumeric or '_' before a parenthesis.
348 */
349 s = fmt;
350 do {
351 r = strstr(s, "(");
352 if (!r || r >= e)
353 break;
354 for (int i = 1; r - i >= s; i++) {
355 char ch = *(r - i);
356 if (isspace(ch))
357 continue;
358 if (isalnum(ch) || ch == '_')
359 return true;
360 /* Anything else, this isn't a function */
361 break;
362 }
363 /* A function could be wrapped in parethesis, try the next one */
364 s = r + 1;
365 } while (s < e);
366
367 /*
368 * If there's any strings in the argument consider this arg OK as it
369 * could be: REC->field ? "foo" : "bar" and we don't want to get into
370 * verifying that logic here.
371 */
372 if (find_print_string(fmt, "\"", e))
373 return true;
374
375 /* Dereferenced strings are also valid like any other pointer */
376 if (process_pointer(fmt, len, call))
377 return true;
378
379 /* Make sure the field is found */
380 field = find_event_field(fmt, call);
381 if (!field)
382 return false;
383
384 /* Test this field's string before printing the event */
385 call->flags |= TRACE_EVENT_FL_TEST_STR;
386 field->needs_test = 1;
387
388 return true;
389 }
390
391 /*
392 * Examine the print fmt of the event looking for unsafe dereference
393 * pointers using %p* that could be recorded in the trace event and
394 * much later referenced after the pointer was freed. Dereferencing
395 * pointers are OK, if it is dereferenced into the event itself.
396 */
test_event_printk(struct trace_event_call * call)397 static void test_event_printk(struct trace_event_call *call)
398 {
399 u64 dereference_flags = 0;
400 u64 string_flags = 0;
401 bool first = true;
402 const char *fmt;
403 int parens = 0;
404 char in_quote = 0;
405 int start_arg = 0;
406 int arg = 0;
407 int i, e;
408
409 fmt = call->print_fmt;
410
411 if (!fmt)
412 return;
413
414 for (i = 0; fmt[i]; i++) {
415 switch (fmt[i]) {
416 case '\\':
417 i++;
418 if (!fmt[i])
419 return;
420 continue;
421 case '"':
422 case '\'':
423 /*
424 * The print fmt starts with a string that
425 * is processed first to find %p* usage,
426 * then after the first string, the print fmt
427 * contains arguments that are used to check
428 * if the dereferenced %p* usage is safe.
429 */
430 if (first) {
431 if (fmt[i] == '\'')
432 continue;
433 if (in_quote) {
434 arg = 0;
435 first = false;
436 /*
437 * If there was no %p* uses
438 * the fmt is OK.
439 */
440 if (!dereference_flags)
441 return;
442 }
443 }
444 if (in_quote) {
445 if (in_quote == fmt[i])
446 in_quote = 0;
447 } else {
448 in_quote = fmt[i];
449 }
450 continue;
451 case '%':
452 if (!first || !in_quote)
453 continue;
454 i++;
455 if (!fmt[i])
456 return;
457 switch (fmt[i]) {
458 case '%':
459 continue;
460 case 'p':
461 /* Find dereferencing fields */
462 switch (fmt[i + 1]) {
463 case 'B': case 'R': case 'r':
464 case 'b': case 'M': case 'm':
465 case 'I': case 'i': case 'E':
466 case 'U': case 'V': case 'N':
467 case 'a': case 'd': case 'D':
468 case 'g': case 't': case 'C':
469 case 'O': case 'f':
470 if (WARN_ONCE(arg == 63,
471 "Too many args for event: %s",
472 trace_event_name(call)))
473 return;
474 dereference_flags |= 1ULL << arg;
475 }
476 break;
477 default:
478 {
479 bool star = false;
480 int j;
481
482 /* Increment arg if %*s exists. */
483 for (j = 0; fmt[i + j]; j++) {
484 if (isdigit(fmt[i + j]) ||
485 fmt[i + j] == '.')
486 continue;
487 if (fmt[i + j] == '*') {
488 star = true;
489 continue;
490 }
491 if ((fmt[i + j] == 's')) {
492 if (star)
493 arg++;
494 if (WARN_ONCE(arg == 63,
495 "Too many args for event: %s",
496 trace_event_name(call)))
497 return;
498 dereference_flags |= 1ULL << arg;
499 string_flags |= 1ULL << arg;
500 }
501 break;
502 }
503 break;
504 } /* default */
505
506 } /* switch */
507 arg++;
508 continue;
509 case '(':
510 if (in_quote)
511 continue;
512 parens++;
513 continue;
514 case ')':
515 if (in_quote)
516 continue;
517 parens--;
518 if (WARN_ONCE(parens < 0,
519 "Paren mismatch for event: %s\narg='%s'\n%*s",
520 trace_event_name(call),
521 fmt + start_arg,
522 (i - start_arg) + 5, "^"))
523 return;
524 continue;
525 case ',':
526 if (in_quote || parens)
527 continue;
528 e = i;
529 i++;
530 while (isspace(fmt[i]))
531 i++;
532
533 /*
534 * If start_arg is zero, then this is the start of the
535 * first argument. The processing of the argument happens
536 * when the end of the argument is found, as it needs to
537 * handle paranthesis and such.
538 */
539 if (!start_arg) {
540 start_arg = i;
541 /* Balance out the i++ in the for loop */
542 i--;
543 continue;
544 }
545
546 if (dereference_flags & (1ULL << arg)) {
547 if (string_flags & (1ULL << arg)) {
548 if (process_string(fmt + start_arg, e - start_arg, call))
549 dereference_flags &= ~(1ULL << arg);
550 } else if (process_pointer(fmt + start_arg, e - start_arg, call))
551 dereference_flags &= ~(1ULL << arg);
552 }
553
554 start_arg = i;
555 arg++;
556 /* Balance out the i++ in the for loop */
557 i--;
558 }
559 }
560
561 if (dereference_flags & (1ULL << arg)) {
562 if (string_flags & (1ULL << arg)) {
563 if (process_string(fmt + start_arg, i - start_arg, call))
564 dereference_flags &= ~(1ULL << arg);
565 } else if (process_pointer(fmt + start_arg, i - start_arg, call))
566 dereference_flags &= ~(1ULL << arg);
567 }
568
569 /*
570 * If you triggered the below warning, the trace event reported
571 * uses an unsafe dereference pointer %p*. As the data stored
572 * at the trace event time may no longer exist when the trace
573 * event is printed, dereferencing to the original source is
574 * unsafe. The source of the dereference must be copied into the
575 * event itself, and the dereference must access the copy instead.
576 */
577 if (WARN_ON_ONCE(dereference_flags)) {
578 arg = 1;
579 while (!(dereference_flags & 1)) {
580 dereference_flags >>= 1;
581 arg++;
582 }
583 pr_warn("event %s has unsafe dereference of argument %d\n",
584 trace_event_name(call), arg);
585 pr_warn("print_fmt: %s\n", fmt);
586 }
587 }
588
trace_event_raw_init(struct trace_event_call * call)589 int trace_event_raw_init(struct trace_event_call *call)
590 {
591 int id;
592
593 id = register_trace_event(&call->event);
594 if (!id)
595 return -ENODEV;
596
597 test_event_printk(call);
598
599 return 0;
600 }
601 EXPORT_SYMBOL_GPL(trace_event_raw_init);
602
trace_event_ignore_this_pid(struct trace_event_file * trace_file)603 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
604 {
605 struct trace_array *tr = trace_file->tr;
606 struct trace_array_cpu *data;
607 struct trace_pid_list *no_pid_list;
608 struct trace_pid_list *pid_list;
609
610 pid_list = rcu_dereference_raw(tr->filtered_pids);
611 no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
612
613 if (!pid_list && !no_pid_list)
614 return false;
615
616 data = this_cpu_ptr(tr->array_buffer.data);
617
618 return data->ignore_pid;
619 }
620 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
621
trace_event_buffer_reserve(struct trace_event_buffer * fbuffer,struct trace_event_file * trace_file,unsigned long len)622 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
623 struct trace_event_file *trace_file,
624 unsigned long len)
625 {
626 struct trace_event_call *event_call = trace_file->event_call;
627
628 if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
629 trace_event_ignore_this_pid(trace_file))
630 return NULL;
631
632 /*
633 * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
634 * preemption (adding one to the preempt_count). Since we are
635 * interested in the preempt_count at the time the tracepoint was
636 * hit, we need to subtract one to offset the increment.
637 */
638 fbuffer->trace_ctx = tracing_gen_ctx_dec();
639 fbuffer->trace_file = trace_file;
640
641 fbuffer->event =
642 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
643 event_call->event.type, len,
644 fbuffer->trace_ctx);
645 if (!fbuffer->event)
646 return NULL;
647
648 fbuffer->regs = NULL;
649 fbuffer->entry = ring_buffer_event_data(fbuffer->event);
650 return fbuffer->entry;
651 }
652 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
653
trace_event_reg(struct trace_event_call * call,enum trace_reg type,void * data)654 int trace_event_reg(struct trace_event_call *call,
655 enum trace_reg type, void *data)
656 {
657 struct trace_event_file *file = data;
658
659 WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
660 switch (type) {
661 case TRACE_REG_REGISTER:
662 return tracepoint_probe_register(call->tp,
663 call->class->probe,
664 file);
665 case TRACE_REG_UNREGISTER:
666 tracepoint_probe_unregister(call->tp,
667 call->class->probe,
668 file);
669 return 0;
670
671 #ifdef CONFIG_PERF_EVENTS
672 case TRACE_REG_PERF_REGISTER:
673 return tracepoint_probe_register(call->tp,
674 call->class->perf_probe,
675 call);
676 case TRACE_REG_PERF_UNREGISTER:
677 tracepoint_probe_unregister(call->tp,
678 call->class->perf_probe,
679 call);
680 return 0;
681 case TRACE_REG_PERF_OPEN:
682 case TRACE_REG_PERF_CLOSE:
683 case TRACE_REG_PERF_ADD:
684 case TRACE_REG_PERF_DEL:
685 return 0;
686 #endif
687 }
688 return 0;
689 }
690 EXPORT_SYMBOL_GPL(trace_event_reg);
691
trace_event_enable_cmd_record(bool enable)692 void trace_event_enable_cmd_record(bool enable)
693 {
694 struct trace_event_file *file;
695 struct trace_array *tr;
696
697 lockdep_assert_held(&event_mutex);
698
699 do_for_each_event_file(tr, file) {
700
701 if (!(file->flags & EVENT_FILE_FL_ENABLED))
702 continue;
703
704 if (enable) {
705 tracing_start_cmdline_record();
706 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
707 } else {
708 tracing_stop_cmdline_record();
709 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
710 }
711 } while_for_each_event_file();
712 }
713
trace_event_enable_tgid_record(bool enable)714 void trace_event_enable_tgid_record(bool enable)
715 {
716 struct trace_event_file *file;
717 struct trace_array *tr;
718
719 lockdep_assert_held(&event_mutex);
720
721 do_for_each_event_file(tr, file) {
722 if (!(file->flags & EVENT_FILE_FL_ENABLED))
723 continue;
724
725 if (enable) {
726 tracing_start_tgid_record();
727 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
728 } else {
729 tracing_stop_tgid_record();
730 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
731 &file->flags);
732 }
733 } while_for_each_event_file();
734 }
735
__ftrace_event_enable_disable(struct trace_event_file * file,int enable,int soft_disable)736 static int __ftrace_event_enable_disable(struct trace_event_file *file,
737 int enable, int soft_disable)
738 {
739 struct trace_event_call *call = file->event_call;
740 struct trace_array *tr = file->tr;
741 int ret = 0;
742 int disable;
743
744 switch (enable) {
745 case 0:
746 /*
747 * When soft_disable is set and enable is cleared, the sm_ref
748 * reference counter is decremented. If it reaches 0, we want
749 * to clear the SOFT_DISABLED flag but leave the event in the
750 * state that it was. That is, if the event was enabled and
751 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
752 * is set we do not want the event to be enabled before we
753 * clear the bit.
754 *
755 * When soft_disable is not set but the SOFT_MODE flag is,
756 * we do nothing. Do not disable the tracepoint, otherwise
757 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
758 */
759 if (soft_disable) {
760 if (atomic_dec_return(&file->sm_ref) > 0)
761 break;
762 disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
763 clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
764 /* Disable use of trace_buffered_event */
765 trace_buffered_event_disable();
766 } else
767 disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
768
769 if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
770 clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
771 if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
772 tracing_stop_cmdline_record();
773 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
774 }
775
776 if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
777 tracing_stop_tgid_record();
778 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
779 }
780
781 call->class->reg(call, TRACE_REG_UNREGISTER, file);
782 }
783 /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
784 if (file->flags & EVENT_FILE_FL_SOFT_MODE)
785 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
786 else
787 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
788 break;
789 case 1:
790 /*
791 * When soft_disable is set and enable is set, we want to
792 * register the tracepoint for the event, but leave the event
793 * as is. That means, if the event was already enabled, we do
794 * nothing (but set SOFT_MODE). If the event is disabled, we
795 * set SOFT_DISABLED before enabling the event tracepoint, so
796 * it still seems to be disabled.
797 */
798 if (!soft_disable)
799 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
800 else {
801 if (atomic_inc_return(&file->sm_ref) > 1)
802 break;
803 set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
804 /* Enable use of trace_buffered_event */
805 trace_buffered_event_enable();
806 }
807
808 if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
809 bool cmd = false, tgid = false;
810
811 /* Keep the event disabled, when going to SOFT_MODE. */
812 if (soft_disable)
813 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
814
815 if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
816 cmd = true;
817 tracing_start_cmdline_record();
818 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
819 }
820
821 if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
822 tgid = true;
823 tracing_start_tgid_record();
824 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
825 }
826
827 ret = call->class->reg(call, TRACE_REG_REGISTER, file);
828 if (ret) {
829 if (cmd)
830 tracing_stop_cmdline_record();
831 if (tgid)
832 tracing_stop_tgid_record();
833 pr_info("event trace: Could not enable event "
834 "%s\n", trace_event_name(call));
835 break;
836 }
837 set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
838
839 /* WAS_ENABLED gets set but never cleared. */
840 set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags);
841 }
842 break;
843 }
844
845 return ret;
846 }
847
trace_event_enable_disable(struct trace_event_file * file,int enable,int soft_disable)848 int trace_event_enable_disable(struct trace_event_file *file,
849 int enable, int soft_disable)
850 {
851 return __ftrace_event_enable_disable(file, enable, soft_disable);
852 }
853
ftrace_event_enable_disable(struct trace_event_file * file,int enable)854 static int ftrace_event_enable_disable(struct trace_event_file *file,
855 int enable)
856 {
857 return __ftrace_event_enable_disable(file, enable, 0);
858 }
859
ftrace_clear_events(struct trace_array * tr)860 static void ftrace_clear_events(struct trace_array *tr)
861 {
862 struct trace_event_file *file;
863
864 mutex_lock(&event_mutex);
865 list_for_each_entry(file, &tr->events, list) {
866 ftrace_event_enable_disable(file, 0);
867 }
868 mutex_unlock(&event_mutex);
869 }
870
871 static void
event_filter_pid_sched_process_exit(void * data,struct task_struct * task)872 event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
873 {
874 struct trace_pid_list *pid_list;
875 struct trace_array *tr = data;
876
877 pid_list = rcu_dereference_raw(tr->filtered_pids);
878 trace_filter_add_remove_task(pid_list, NULL, task);
879
880 pid_list = rcu_dereference_raw(tr->filtered_no_pids);
881 trace_filter_add_remove_task(pid_list, NULL, task);
882 }
883
884 static void
event_filter_pid_sched_process_fork(void * data,struct task_struct * self,struct task_struct * task)885 event_filter_pid_sched_process_fork(void *data,
886 struct task_struct *self,
887 struct task_struct *task)
888 {
889 struct trace_pid_list *pid_list;
890 struct trace_array *tr = data;
891
892 pid_list = rcu_dereference_sched(tr->filtered_pids);
893 trace_filter_add_remove_task(pid_list, self, task);
894
895 pid_list = rcu_dereference_sched(tr->filtered_no_pids);
896 trace_filter_add_remove_task(pid_list, self, task);
897 }
898
trace_event_follow_fork(struct trace_array * tr,bool enable)899 void trace_event_follow_fork(struct trace_array *tr, bool enable)
900 {
901 if (enable) {
902 register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
903 tr, INT_MIN);
904 register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit,
905 tr, INT_MAX);
906 } else {
907 unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
908 tr);
909 unregister_trace_sched_process_free(event_filter_pid_sched_process_exit,
910 tr);
911 }
912 }
913
914 static void
event_filter_pid_sched_switch_probe_pre(void * data,bool preempt,struct task_struct * prev,struct task_struct * next,unsigned int prev_state)915 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
916 struct task_struct *prev,
917 struct task_struct *next,
918 unsigned int prev_state)
919 {
920 struct trace_array *tr = data;
921 struct trace_pid_list *no_pid_list;
922 struct trace_pid_list *pid_list;
923 bool ret;
924
925 pid_list = rcu_dereference_sched(tr->filtered_pids);
926 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
927
928 /*
929 * Sched switch is funny, as we only want to ignore it
930 * in the notrace case if both prev and next should be ignored.
931 */
932 ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
933 trace_ignore_this_task(NULL, no_pid_list, next);
934
935 this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
936 (trace_ignore_this_task(pid_list, NULL, prev) &&
937 trace_ignore_this_task(pid_list, NULL, next)));
938 }
939
940 static void
event_filter_pid_sched_switch_probe_post(void * data,bool preempt,struct task_struct * prev,struct task_struct * next,unsigned int prev_state)941 event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
942 struct task_struct *prev,
943 struct task_struct *next,
944 unsigned int prev_state)
945 {
946 struct trace_array *tr = data;
947 struct trace_pid_list *no_pid_list;
948 struct trace_pid_list *pid_list;
949
950 pid_list = rcu_dereference_sched(tr->filtered_pids);
951 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
952
953 this_cpu_write(tr->array_buffer.data->ignore_pid,
954 trace_ignore_this_task(pid_list, no_pid_list, next));
955 }
956
957 static void
event_filter_pid_sched_wakeup_probe_pre(void * data,struct task_struct * task)958 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
959 {
960 struct trace_array *tr = data;
961 struct trace_pid_list *no_pid_list;
962 struct trace_pid_list *pid_list;
963
964 /* Nothing to do if we are already tracing */
965 if (!this_cpu_read(tr->array_buffer.data->ignore_pid))
966 return;
967
968 pid_list = rcu_dereference_sched(tr->filtered_pids);
969 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
970
971 this_cpu_write(tr->array_buffer.data->ignore_pid,
972 trace_ignore_this_task(pid_list, no_pid_list, task));
973 }
974
975 static void
event_filter_pid_sched_wakeup_probe_post(void * data,struct task_struct * task)976 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
977 {
978 struct trace_array *tr = data;
979 struct trace_pid_list *no_pid_list;
980 struct trace_pid_list *pid_list;
981
982 /* Nothing to do if we are not tracing */
983 if (this_cpu_read(tr->array_buffer.data->ignore_pid))
984 return;
985
986 pid_list = rcu_dereference_sched(tr->filtered_pids);
987 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
988
989 /* Set tracing if current is enabled */
990 this_cpu_write(tr->array_buffer.data->ignore_pid,
991 trace_ignore_this_task(pid_list, no_pid_list, current));
992 }
993
unregister_pid_events(struct trace_array * tr)994 static void unregister_pid_events(struct trace_array *tr)
995 {
996 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
997 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
998
999 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
1000 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
1001
1002 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
1003 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
1004
1005 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
1006 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
1007 }
1008
__ftrace_clear_event_pids(struct trace_array * tr,int type)1009 static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
1010 {
1011 struct trace_pid_list *pid_list;
1012 struct trace_pid_list *no_pid_list;
1013 struct trace_event_file *file;
1014 int cpu;
1015
1016 pid_list = rcu_dereference_protected(tr->filtered_pids,
1017 lockdep_is_held(&event_mutex));
1018 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
1019 lockdep_is_held(&event_mutex));
1020
1021 /* Make sure there's something to do */
1022 if (!pid_type_enabled(type, pid_list, no_pid_list))
1023 return;
1024
1025 if (!still_need_pid_events(type, pid_list, no_pid_list)) {
1026 unregister_pid_events(tr);
1027
1028 list_for_each_entry(file, &tr->events, list) {
1029 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1030 }
1031
1032 for_each_possible_cpu(cpu)
1033 per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
1034 }
1035
1036 if (type & TRACE_PIDS)
1037 rcu_assign_pointer(tr->filtered_pids, NULL);
1038
1039 if (type & TRACE_NO_PIDS)
1040 rcu_assign_pointer(tr->filtered_no_pids, NULL);
1041
1042 /* Wait till all users are no longer using pid filtering */
1043 tracepoint_synchronize_unregister();
1044
1045 if ((type & TRACE_PIDS) && pid_list)
1046 trace_pid_list_free(pid_list);
1047
1048 if ((type & TRACE_NO_PIDS) && no_pid_list)
1049 trace_pid_list_free(no_pid_list);
1050 }
1051
ftrace_clear_event_pids(struct trace_array * tr,int type)1052 static void ftrace_clear_event_pids(struct trace_array *tr, int type)
1053 {
1054 mutex_lock(&event_mutex);
1055 __ftrace_clear_event_pids(tr, type);
1056 mutex_unlock(&event_mutex);
1057 }
1058
__put_system(struct event_subsystem * system)1059 static void __put_system(struct event_subsystem *system)
1060 {
1061 struct event_filter *filter = system->filter;
1062
1063 WARN_ON_ONCE(system_refcount(system) == 0);
1064 if (system_refcount_dec(system))
1065 return;
1066
1067 list_del(&system->list);
1068
1069 if (filter) {
1070 kfree(filter->filter_string);
1071 kfree(filter);
1072 }
1073 kfree_const(system->name);
1074 kfree(system);
1075 }
1076
__get_system(struct event_subsystem * system)1077 static void __get_system(struct event_subsystem *system)
1078 {
1079 WARN_ON_ONCE(system_refcount(system) == 0);
1080 system_refcount_inc(system);
1081 }
1082
__get_system_dir(struct trace_subsystem_dir * dir)1083 static void __get_system_dir(struct trace_subsystem_dir *dir)
1084 {
1085 WARN_ON_ONCE(dir->ref_count == 0);
1086 dir->ref_count++;
1087 __get_system(dir->subsystem);
1088 }
1089
__put_system_dir(struct trace_subsystem_dir * dir)1090 static void __put_system_dir(struct trace_subsystem_dir *dir)
1091 {
1092 WARN_ON_ONCE(dir->ref_count == 0);
1093 /* If the subsystem is about to be freed, the dir must be too */
1094 WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
1095
1096 __put_system(dir->subsystem);
1097 if (!--dir->ref_count)
1098 kfree(dir);
1099 }
1100
put_system(struct trace_subsystem_dir * dir)1101 static void put_system(struct trace_subsystem_dir *dir)
1102 {
1103 mutex_lock(&event_mutex);
1104 __put_system_dir(dir);
1105 mutex_unlock(&event_mutex);
1106 }
1107
remove_subsystem(struct trace_subsystem_dir * dir)1108 static void remove_subsystem(struct trace_subsystem_dir *dir)
1109 {
1110 if (!dir)
1111 return;
1112
1113 if (!--dir->nr_events) {
1114 eventfs_remove_dir(dir->ei);
1115 list_del(&dir->list);
1116 __put_system_dir(dir);
1117 }
1118 }
1119
event_file_get(struct trace_event_file * file)1120 void event_file_get(struct trace_event_file *file)
1121 {
1122 refcount_inc(&file->ref);
1123 }
1124
event_file_put(struct trace_event_file * file)1125 void event_file_put(struct trace_event_file *file)
1126 {
1127 if (WARN_ON_ONCE(!refcount_read(&file->ref))) {
1128 if (file->flags & EVENT_FILE_FL_FREED)
1129 kmem_cache_free(file_cachep, file);
1130 return;
1131 }
1132
1133 if (refcount_dec_and_test(&file->ref)) {
1134 /* Count should only go to zero when it is freed */
1135 if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
1136 return;
1137 kmem_cache_free(file_cachep, file);
1138 }
1139 }
1140
remove_event_file_dir(struct trace_event_file * file)1141 static void remove_event_file_dir(struct trace_event_file *file)
1142 {
1143 eventfs_remove_dir(file->ei);
1144 list_del(&file->list);
1145 remove_subsystem(file->system);
1146 free_event_filter(file->filter);
1147 file->flags |= EVENT_FILE_FL_FREED;
1148 event_file_put(file);
1149 }
1150
1151 /*
1152 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
1153 */
1154 static int
__ftrace_set_clr_event_nolock(struct trace_array * tr,const char * match,const char * sub,const char * event,int set)1155 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
1156 const char *sub, const char *event, int set)
1157 {
1158 struct trace_event_file *file;
1159 struct trace_event_call *call;
1160 const char *name;
1161 int ret = -EINVAL;
1162 int eret = 0;
1163
1164 list_for_each_entry(file, &tr->events, list) {
1165
1166 call = file->event_call;
1167 name = trace_event_name(call);
1168
1169 if (!name || !call->class || !call->class->reg)
1170 continue;
1171
1172 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
1173 continue;
1174
1175 if (match &&
1176 strcmp(match, name) != 0 &&
1177 strcmp(match, call->class->system) != 0)
1178 continue;
1179
1180 if (sub && strcmp(sub, call->class->system) != 0)
1181 continue;
1182
1183 if (event && strcmp(event, name) != 0)
1184 continue;
1185
1186 ret = ftrace_event_enable_disable(file, set);
1187
1188 /*
1189 * Save the first error and return that. Some events
1190 * may still have been enabled, but let the user
1191 * know that something went wrong.
1192 */
1193 if (ret && !eret)
1194 eret = ret;
1195
1196 ret = eret;
1197 }
1198
1199 return ret;
1200 }
1201
__ftrace_set_clr_event(struct trace_array * tr,const char * match,const char * sub,const char * event,int set)1202 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
1203 const char *sub, const char *event, int set)
1204 {
1205 int ret;
1206
1207 mutex_lock(&event_mutex);
1208 ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
1209 mutex_unlock(&event_mutex);
1210
1211 return ret;
1212 }
1213
ftrace_set_clr_event(struct trace_array * tr,char * buf,int set)1214 int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
1215 {
1216 char *event = NULL, *sub = NULL, *match;
1217 int ret;
1218
1219 if (!tr)
1220 return -ENOENT;
1221 /*
1222 * The buf format can be <subsystem>:<event-name>
1223 * *:<event-name> means any event by that name.
1224 * :<event-name> is the same.
1225 *
1226 * <subsystem>:* means all events in that subsystem
1227 * <subsystem>: means the same.
1228 *
1229 * <name> (no ':') means all events in a subsystem with
1230 * the name <name> or any event that matches <name>
1231 */
1232
1233 match = strsep(&buf, ":");
1234 if (buf) {
1235 sub = match;
1236 event = buf;
1237 match = NULL;
1238
1239 if (!strlen(sub) || strcmp(sub, "*") == 0)
1240 sub = NULL;
1241 if (!strlen(event) || strcmp(event, "*") == 0)
1242 event = NULL;
1243 }
1244
1245 ret = __ftrace_set_clr_event(tr, match, sub, event, set);
1246
1247 /* Put back the colon to allow this to be called again */
1248 if (buf)
1249 *(buf - 1) = ':';
1250
1251 return ret;
1252 }
1253
1254 /**
1255 * trace_set_clr_event - enable or disable an event
1256 * @system: system name to match (NULL for any system)
1257 * @event: event name to match (NULL for all events, within system)
1258 * @set: 1 to enable, 0 to disable
1259 *
1260 * This is a way for other parts of the kernel to enable or disable
1261 * event recording.
1262 *
1263 * Returns 0 on success, -EINVAL if the parameters do not match any
1264 * registered events.
1265 */
trace_set_clr_event(const char * system,const char * event,int set)1266 int trace_set_clr_event(const char *system, const char *event, int set)
1267 {
1268 struct trace_array *tr = top_trace_array();
1269
1270 if (!tr)
1271 return -ENODEV;
1272
1273 return __ftrace_set_clr_event(tr, NULL, system, event, set);
1274 }
1275 EXPORT_SYMBOL_GPL(trace_set_clr_event);
1276
1277 /**
1278 * trace_array_set_clr_event - enable or disable an event for a trace array.
1279 * @tr: concerned trace array.
1280 * @system: system name to match (NULL for any system)
1281 * @event: event name to match (NULL for all events, within system)
1282 * @enable: true to enable, false to disable
1283 *
1284 * This is a way for other parts of the kernel to enable or disable
1285 * event recording.
1286 *
1287 * Returns 0 on success, -EINVAL if the parameters do not match any
1288 * registered events.
1289 */
trace_array_set_clr_event(struct trace_array * tr,const char * system,const char * event,bool enable)1290 int trace_array_set_clr_event(struct trace_array *tr, const char *system,
1291 const char *event, bool enable)
1292 {
1293 int set;
1294
1295 if (!tr)
1296 return -ENOENT;
1297
1298 set = (enable == true) ? 1 : 0;
1299 return __ftrace_set_clr_event(tr, NULL, system, event, set);
1300 }
1301 EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
1302
1303 /* 128 should be much more than enough */
1304 #define EVENT_BUF_SIZE 127
1305
1306 static ssize_t
ftrace_event_write(struct file * file,const char __user * ubuf,size_t cnt,loff_t * ppos)1307 ftrace_event_write(struct file *file, const char __user *ubuf,
1308 size_t cnt, loff_t *ppos)
1309 {
1310 struct trace_parser parser;
1311 struct seq_file *m = file->private_data;
1312 struct trace_array *tr = m->private;
1313 ssize_t read, ret;
1314
1315 if (!cnt)
1316 return 0;
1317
1318 ret = tracing_update_buffers(tr);
1319 if (ret < 0)
1320 return ret;
1321
1322 if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
1323 return -ENOMEM;
1324
1325 read = trace_get_user(&parser, ubuf, cnt, ppos);
1326
1327 if (read >= 0 && trace_parser_loaded((&parser))) {
1328 int set = 1;
1329
1330 if (*parser.buffer == '!')
1331 set = 0;
1332
1333 ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
1334 if (ret)
1335 goto out_put;
1336 }
1337
1338 ret = read;
1339
1340 out_put:
1341 trace_parser_put(&parser);
1342
1343 return ret;
1344 }
1345
1346 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)1347 t_next(struct seq_file *m, void *v, loff_t *pos)
1348 {
1349 struct trace_event_file *file = v;
1350 struct trace_event_call *call;
1351 struct trace_array *tr = m->private;
1352
1353 (*pos)++;
1354
1355 list_for_each_entry_continue(file, &tr->events, list) {
1356 call = file->event_call;
1357 /*
1358 * The ftrace subsystem is for showing formats only.
1359 * They can not be enabled or disabled via the event files.
1360 */
1361 if (call->class && call->class->reg &&
1362 !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
1363 return file;
1364 }
1365
1366 return NULL;
1367 }
1368
t_start(struct seq_file * m,loff_t * pos)1369 static void *t_start(struct seq_file *m, loff_t *pos)
1370 {
1371 struct trace_event_file *file;
1372 struct trace_array *tr = m->private;
1373 loff_t l;
1374
1375 mutex_lock(&event_mutex);
1376
1377 file = list_entry(&tr->events, struct trace_event_file, list);
1378 for (l = 0; l <= *pos; ) {
1379 file = t_next(m, file, &l);
1380 if (!file)
1381 break;
1382 }
1383 return file;
1384 }
1385
1386 static void *
s_next(struct seq_file * m,void * v,loff_t * pos)1387 s_next(struct seq_file *m, void *v, loff_t *pos)
1388 {
1389 struct trace_event_file *file = v;
1390 struct trace_array *tr = m->private;
1391
1392 (*pos)++;
1393
1394 list_for_each_entry_continue(file, &tr->events, list) {
1395 if (file->flags & EVENT_FILE_FL_ENABLED)
1396 return file;
1397 }
1398
1399 return NULL;
1400 }
1401
s_start(struct seq_file * m,loff_t * pos)1402 static void *s_start(struct seq_file *m, loff_t *pos)
1403 {
1404 struct trace_event_file *file;
1405 struct trace_array *tr = m->private;
1406 loff_t l;
1407
1408 mutex_lock(&event_mutex);
1409
1410 file = list_entry(&tr->events, struct trace_event_file, list);
1411 for (l = 0; l <= *pos; ) {
1412 file = s_next(m, file, &l);
1413 if (!file)
1414 break;
1415 }
1416 return file;
1417 }
1418
t_show(struct seq_file * m,void * v)1419 static int t_show(struct seq_file *m, void *v)
1420 {
1421 struct trace_event_file *file = v;
1422 struct trace_event_call *call = file->event_call;
1423
1424 if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
1425 seq_printf(m, "%s:", call->class->system);
1426 seq_printf(m, "%s\n", trace_event_name(call));
1427
1428 return 0;
1429 }
1430
t_stop(struct seq_file * m,void * p)1431 static void t_stop(struct seq_file *m, void *p)
1432 {
1433 mutex_unlock(&event_mutex);
1434 }
1435
1436 static void *
__next(struct seq_file * m,void * v,loff_t * pos,int type)1437 __next(struct seq_file *m, void *v, loff_t *pos, int type)
1438 {
1439 struct trace_array *tr = m->private;
1440 struct trace_pid_list *pid_list;
1441
1442 if (type == TRACE_PIDS)
1443 pid_list = rcu_dereference_sched(tr->filtered_pids);
1444 else
1445 pid_list = rcu_dereference_sched(tr->filtered_no_pids);
1446
1447 return trace_pid_next(pid_list, v, pos);
1448 }
1449
1450 static void *
p_next(struct seq_file * m,void * v,loff_t * pos)1451 p_next(struct seq_file *m, void *v, loff_t *pos)
1452 {
1453 return __next(m, v, pos, TRACE_PIDS);
1454 }
1455
1456 static void *
np_next(struct seq_file * m,void * v,loff_t * pos)1457 np_next(struct seq_file *m, void *v, loff_t *pos)
1458 {
1459 return __next(m, v, pos, TRACE_NO_PIDS);
1460 }
1461
__start(struct seq_file * m,loff_t * pos,int type)1462 static void *__start(struct seq_file *m, loff_t *pos, int type)
1463 __acquires(RCU)
1464 {
1465 struct trace_pid_list *pid_list;
1466 struct trace_array *tr = m->private;
1467
1468 /*
1469 * Grab the mutex, to keep calls to p_next() having the same
1470 * tr->filtered_pids as p_start() has.
1471 * If we just passed the tr->filtered_pids around, then RCU would
1472 * have been enough, but doing that makes things more complex.
1473 */
1474 mutex_lock(&event_mutex);
1475 rcu_read_lock_sched();
1476
1477 if (type == TRACE_PIDS)
1478 pid_list = rcu_dereference_sched(tr->filtered_pids);
1479 else
1480 pid_list = rcu_dereference_sched(tr->filtered_no_pids);
1481
1482 if (!pid_list)
1483 return NULL;
1484
1485 return trace_pid_start(pid_list, pos);
1486 }
1487
p_start(struct seq_file * m,loff_t * pos)1488 static void *p_start(struct seq_file *m, loff_t *pos)
1489 __acquires(RCU)
1490 {
1491 return __start(m, pos, TRACE_PIDS);
1492 }
1493
np_start(struct seq_file * m,loff_t * pos)1494 static void *np_start(struct seq_file *m, loff_t *pos)
1495 __acquires(RCU)
1496 {
1497 return __start(m, pos, TRACE_NO_PIDS);
1498 }
1499
p_stop(struct seq_file * m,void * p)1500 static void p_stop(struct seq_file *m, void *p)
1501 __releases(RCU)
1502 {
1503 rcu_read_unlock_sched();
1504 mutex_unlock(&event_mutex);
1505 }
1506
1507 static ssize_t
event_enable_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1508 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1509 loff_t *ppos)
1510 {
1511 struct trace_event_file *file;
1512 unsigned long flags;
1513 char buf[4] = "0";
1514
1515 mutex_lock(&event_mutex);
1516 file = event_file_file(filp);
1517 if (likely(file))
1518 flags = file->flags;
1519 mutex_unlock(&event_mutex);
1520
1521 if (!file)
1522 return -ENODEV;
1523
1524 if (flags & EVENT_FILE_FL_ENABLED &&
1525 !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1526 strcpy(buf, "1");
1527
1528 if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1529 flags & EVENT_FILE_FL_SOFT_MODE)
1530 strcat(buf, "*");
1531
1532 strcat(buf, "\n");
1533
1534 return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1535 }
1536
1537 static ssize_t
event_enable_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)1538 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1539 loff_t *ppos)
1540 {
1541 struct trace_event_file *file;
1542 unsigned long val;
1543 int ret;
1544
1545 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1546 if (ret)
1547 return ret;
1548
1549 switch (val) {
1550 case 0:
1551 case 1:
1552 ret = -ENODEV;
1553 mutex_lock(&event_mutex);
1554 file = event_file_file(filp);
1555 if (likely(file)) {
1556 ret = tracing_update_buffers(file->tr);
1557 if (ret < 0) {
1558 mutex_unlock(&event_mutex);
1559 return ret;
1560 }
1561 ret = ftrace_event_enable_disable(file, val);
1562 }
1563 mutex_unlock(&event_mutex);
1564 break;
1565
1566 default:
1567 return -EINVAL;
1568 }
1569
1570 *ppos += cnt;
1571
1572 return ret ? ret : cnt;
1573 }
1574
1575 static ssize_t
system_enable_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1576 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1577 loff_t *ppos)
1578 {
1579 const char set_to_char[4] = { '?', '0', '1', 'X' };
1580 struct trace_subsystem_dir *dir = filp->private_data;
1581 struct event_subsystem *system = dir->subsystem;
1582 struct trace_event_call *call;
1583 struct trace_event_file *file;
1584 struct trace_array *tr = dir->tr;
1585 char buf[2];
1586 int set = 0;
1587 int ret;
1588
1589 mutex_lock(&event_mutex);
1590 list_for_each_entry(file, &tr->events, list) {
1591 call = file->event_call;
1592 if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
1593 !trace_event_name(call) || !call->class || !call->class->reg)
1594 continue;
1595
1596 if (system && strcmp(call->class->system, system->name) != 0)
1597 continue;
1598
1599 /*
1600 * We need to find out if all the events are set
1601 * or if all events or cleared, or if we have
1602 * a mixture.
1603 */
1604 set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1605
1606 /*
1607 * If we have a mixture, no need to look further.
1608 */
1609 if (set == 3)
1610 break;
1611 }
1612 mutex_unlock(&event_mutex);
1613
1614 buf[0] = set_to_char[set];
1615 buf[1] = '\n';
1616
1617 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1618
1619 return ret;
1620 }
1621
1622 static ssize_t
system_enable_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)1623 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1624 loff_t *ppos)
1625 {
1626 struct trace_subsystem_dir *dir = filp->private_data;
1627 struct event_subsystem *system = dir->subsystem;
1628 const char *name = NULL;
1629 unsigned long val;
1630 ssize_t ret;
1631
1632 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1633 if (ret)
1634 return ret;
1635
1636 ret = tracing_update_buffers(dir->tr);
1637 if (ret < 0)
1638 return ret;
1639
1640 if (val != 0 && val != 1)
1641 return -EINVAL;
1642
1643 /*
1644 * Opening of "enable" adds a ref count to system,
1645 * so the name is safe to use.
1646 */
1647 if (system)
1648 name = system->name;
1649
1650 ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1651 if (ret)
1652 goto out;
1653
1654 ret = cnt;
1655
1656 out:
1657 *ppos += cnt;
1658
1659 return ret;
1660 }
1661
1662 enum {
1663 FORMAT_HEADER = 1,
1664 FORMAT_FIELD_SEPERATOR = 2,
1665 FORMAT_PRINTFMT = 3,
1666 };
1667
f_next(struct seq_file * m,void * v,loff_t * pos)1668 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1669 {
1670 struct trace_event_file *file = event_file_data(m->private);
1671 struct trace_event_call *call = file->event_call;
1672 struct list_head *common_head = &ftrace_common_fields;
1673 struct list_head *head = trace_get_fields(call);
1674 struct list_head *node = v;
1675
1676 (*pos)++;
1677
1678 switch ((unsigned long)v) {
1679 case FORMAT_HEADER:
1680 node = common_head;
1681 break;
1682
1683 case FORMAT_FIELD_SEPERATOR:
1684 node = head;
1685 break;
1686
1687 case FORMAT_PRINTFMT:
1688 /* all done */
1689 return NULL;
1690 }
1691
1692 node = node->prev;
1693 if (node == common_head)
1694 return (void *)FORMAT_FIELD_SEPERATOR;
1695 else if (node == head)
1696 return (void *)FORMAT_PRINTFMT;
1697 else
1698 return node;
1699 }
1700
f_show(struct seq_file * m,void * v)1701 static int f_show(struct seq_file *m, void *v)
1702 {
1703 struct trace_event_file *file = event_file_data(m->private);
1704 struct trace_event_call *call = file->event_call;
1705 struct ftrace_event_field *field;
1706 const char *array_descriptor;
1707
1708 switch ((unsigned long)v) {
1709 case FORMAT_HEADER:
1710 seq_printf(m, "name: %s\n", trace_event_name(call));
1711 seq_printf(m, "ID: %d\n", call->event.type);
1712 seq_puts(m, "format:\n");
1713 return 0;
1714
1715 case FORMAT_FIELD_SEPERATOR:
1716 seq_putc(m, '\n');
1717 return 0;
1718
1719 case FORMAT_PRINTFMT:
1720 seq_printf(m, "\nprint fmt: %s\n",
1721 call->print_fmt);
1722 return 0;
1723 }
1724
1725 field = list_entry(v, struct ftrace_event_field, link);
1726 /*
1727 * Smartly shows the array type(except dynamic array).
1728 * Normal:
1729 * field:TYPE VAR
1730 * If TYPE := TYPE[LEN], it is shown:
1731 * field:TYPE VAR[LEN]
1732 */
1733 array_descriptor = strchr(field->type, '[');
1734
1735 if (str_has_prefix(field->type, "__data_loc"))
1736 array_descriptor = NULL;
1737
1738 if (!array_descriptor)
1739 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1740 field->type, field->name, field->offset,
1741 field->size, !!field->is_signed);
1742 else if (field->len)
1743 seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1744 (int)(array_descriptor - field->type),
1745 field->type, field->name,
1746 field->len, field->offset,
1747 field->size, !!field->is_signed);
1748 else
1749 seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1750 (int)(array_descriptor - field->type),
1751 field->type, field->name,
1752 field->offset, field->size, !!field->is_signed);
1753
1754 return 0;
1755 }
1756
f_start(struct seq_file * m,loff_t * pos)1757 static void *f_start(struct seq_file *m, loff_t *pos)
1758 {
1759 struct trace_event_file *file;
1760 void *p = (void *)FORMAT_HEADER;
1761 loff_t l = 0;
1762
1763 /* ->stop() is called even if ->start() fails */
1764 mutex_lock(&event_mutex);
1765 file = event_file_file(m->private);
1766 if (!file)
1767 return ERR_PTR(-ENODEV);
1768
1769 while (l < *pos && p)
1770 p = f_next(m, p, &l);
1771
1772 return p;
1773 }
1774
f_stop(struct seq_file * m,void * p)1775 static void f_stop(struct seq_file *m, void *p)
1776 {
1777 mutex_unlock(&event_mutex);
1778 }
1779
1780 static const struct seq_operations trace_format_seq_ops = {
1781 .start = f_start,
1782 .next = f_next,
1783 .stop = f_stop,
1784 .show = f_show,
1785 };
1786
trace_format_open(struct inode * inode,struct file * file)1787 static int trace_format_open(struct inode *inode, struct file *file)
1788 {
1789 struct seq_file *m;
1790 int ret;
1791
1792 /* Do we want to hide event format files on tracefs lockdown? */
1793
1794 ret = seq_open(file, &trace_format_seq_ops);
1795 if (ret < 0)
1796 return ret;
1797
1798 m = file->private_data;
1799 m->private = file;
1800
1801 return 0;
1802 }
1803
1804 #ifdef CONFIG_PERF_EVENTS
1805 static ssize_t
event_id_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1806 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1807 {
1808 int id = (long)event_file_data(filp);
1809 char buf[32];
1810 int len;
1811
1812 if (unlikely(!id))
1813 return -ENODEV;
1814
1815 len = sprintf(buf, "%d\n", id);
1816
1817 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1818 }
1819 #endif
1820
1821 static ssize_t
event_filter_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1822 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1823 loff_t *ppos)
1824 {
1825 struct trace_event_file *file;
1826 struct trace_seq *s;
1827 int r = -ENODEV;
1828
1829 if (*ppos)
1830 return 0;
1831
1832 s = kmalloc(sizeof(*s), GFP_KERNEL);
1833
1834 if (!s)
1835 return -ENOMEM;
1836
1837 trace_seq_init(s);
1838
1839 mutex_lock(&event_mutex);
1840 file = event_file_file(filp);
1841 if (file)
1842 print_event_filter(file, s);
1843 mutex_unlock(&event_mutex);
1844
1845 if (file)
1846 r = simple_read_from_buffer(ubuf, cnt, ppos,
1847 s->buffer, trace_seq_used(s));
1848
1849 kfree(s);
1850
1851 return r;
1852 }
1853
1854 static ssize_t
event_filter_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)1855 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1856 loff_t *ppos)
1857 {
1858 struct trace_event_file *file;
1859 char *buf;
1860 int err = -ENODEV;
1861
1862 if (cnt >= PAGE_SIZE)
1863 return -EINVAL;
1864
1865 buf = memdup_user_nul(ubuf, cnt);
1866 if (IS_ERR(buf))
1867 return PTR_ERR(buf);
1868
1869 mutex_lock(&event_mutex);
1870 file = event_file_file(filp);
1871 if (file) {
1872 if (file->flags & EVENT_FILE_FL_FREED)
1873 err = -ENODEV;
1874 else
1875 err = apply_event_filter(file, buf);
1876 }
1877 mutex_unlock(&event_mutex);
1878
1879 kfree(buf);
1880 if (err < 0)
1881 return err;
1882
1883 *ppos += cnt;
1884
1885 return cnt;
1886 }
1887
1888 static LIST_HEAD(event_subsystems);
1889
subsystem_open(struct inode * inode,struct file * filp)1890 static int subsystem_open(struct inode *inode, struct file *filp)
1891 {
1892 struct trace_subsystem_dir *dir = NULL, *iter_dir;
1893 struct trace_array *tr = NULL, *iter_tr;
1894 struct event_subsystem *system = NULL;
1895 int ret;
1896
1897 if (tracing_is_disabled())
1898 return -ENODEV;
1899
1900 /* Make sure the system still exists */
1901 mutex_lock(&event_mutex);
1902 mutex_lock(&trace_types_lock);
1903 list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
1904 list_for_each_entry(iter_dir, &iter_tr->systems, list) {
1905 if (iter_dir == inode->i_private) {
1906 /* Don't open systems with no events */
1907 tr = iter_tr;
1908 dir = iter_dir;
1909 if (dir->nr_events) {
1910 __get_system_dir(dir);
1911 system = dir->subsystem;
1912 }
1913 goto exit_loop;
1914 }
1915 }
1916 }
1917 exit_loop:
1918 mutex_unlock(&trace_types_lock);
1919 mutex_unlock(&event_mutex);
1920
1921 if (!system)
1922 return -ENODEV;
1923
1924 /* Still need to increment the ref count of the system */
1925 if (trace_array_get(tr) < 0) {
1926 put_system(dir);
1927 return -ENODEV;
1928 }
1929
1930 ret = tracing_open_generic(inode, filp);
1931 if (ret < 0) {
1932 trace_array_put(tr);
1933 put_system(dir);
1934 }
1935
1936 return ret;
1937 }
1938
system_tr_open(struct inode * inode,struct file * filp)1939 static int system_tr_open(struct inode *inode, struct file *filp)
1940 {
1941 struct trace_subsystem_dir *dir;
1942 struct trace_array *tr = inode->i_private;
1943 int ret;
1944
1945 /* Make a temporary dir that has no system but points to tr */
1946 dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1947 if (!dir)
1948 return -ENOMEM;
1949
1950 ret = tracing_open_generic_tr(inode, filp);
1951 if (ret < 0) {
1952 kfree(dir);
1953 return ret;
1954 }
1955 dir->tr = tr;
1956 filp->private_data = dir;
1957
1958 return 0;
1959 }
1960
subsystem_release(struct inode * inode,struct file * file)1961 static int subsystem_release(struct inode *inode, struct file *file)
1962 {
1963 struct trace_subsystem_dir *dir = file->private_data;
1964
1965 trace_array_put(dir->tr);
1966
1967 /*
1968 * If dir->subsystem is NULL, then this is a temporary
1969 * descriptor that was made for a trace_array to enable
1970 * all subsystems.
1971 */
1972 if (dir->subsystem)
1973 put_system(dir);
1974 else
1975 kfree(dir);
1976
1977 return 0;
1978 }
1979
1980 static ssize_t
subsystem_filter_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)1981 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1982 loff_t *ppos)
1983 {
1984 struct trace_subsystem_dir *dir = filp->private_data;
1985 struct event_subsystem *system = dir->subsystem;
1986 struct trace_seq *s;
1987 int r;
1988
1989 if (*ppos)
1990 return 0;
1991
1992 s = kmalloc(sizeof(*s), GFP_KERNEL);
1993 if (!s)
1994 return -ENOMEM;
1995
1996 trace_seq_init(s);
1997
1998 print_subsystem_event_filter(system, s);
1999 r = simple_read_from_buffer(ubuf, cnt, ppos,
2000 s->buffer, trace_seq_used(s));
2001
2002 kfree(s);
2003
2004 return r;
2005 }
2006
2007 static ssize_t
subsystem_filter_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2008 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
2009 loff_t *ppos)
2010 {
2011 struct trace_subsystem_dir *dir = filp->private_data;
2012 char *buf;
2013 int err;
2014
2015 if (cnt >= PAGE_SIZE)
2016 return -EINVAL;
2017
2018 buf = memdup_user_nul(ubuf, cnt);
2019 if (IS_ERR(buf))
2020 return PTR_ERR(buf);
2021
2022 err = apply_subsystem_event_filter(dir, buf);
2023 kfree(buf);
2024 if (err < 0)
2025 return err;
2026
2027 *ppos += cnt;
2028
2029 return cnt;
2030 }
2031
2032 static ssize_t
show_header_page_file(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)2033 show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
2034 {
2035 struct trace_array *tr = filp->private_data;
2036 struct trace_seq *s;
2037 int r;
2038
2039 if (*ppos)
2040 return 0;
2041
2042 s = kmalloc(sizeof(*s), GFP_KERNEL);
2043 if (!s)
2044 return -ENOMEM;
2045
2046 trace_seq_init(s);
2047
2048 ring_buffer_print_page_header(tr->array_buffer.buffer, s);
2049 r = simple_read_from_buffer(ubuf, cnt, ppos,
2050 s->buffer, trace_seq_used(s));
2051
2052 kfree(s);
2053
2054 return r;
2055 }
2056
2057 static ssize_t
show_header_event_file(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)2058 show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
2059 {
2060 struct trace_seq *s;
2061 int r;
2062
2063 if (*ppos)
2064 return 0;
2065
2066 s = kmalloc(sizeof(*s), GFP_KERNEL);
2067 if (!s)
2068 return -ENOMEM;
2069
2070 trace_seq_init(s);
2071
2072 ring_buffer_print_entry_header(s);
2073 r = simple_read_from_buffer(ubuf, cnt, ppos,
2074 s->buffer, trace_seq_used(s));
2075
2076 kfree(s);
2077
2078 return r;
2079 }
2080
ignore_task_cpu(void * data)2081 static void ignore_task_cpu(void *data)
2082 {
2083 struct trace_array *tr = data;
2084 struct trace_pid_list *pid_list;
2085 struct trace_pid_list *no_pid_list;
2086
2087 /*
2088 * This function is called by on_each_cpu() while the
2089 * event_mutex is held.
2090 */
2091 pid_list = rcu_dereference_protected(tr->filtered_pids,
2092 mutex_is_locked(&event_mutex));
2093 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
2094 mutex_is_locked(&event_mutex));
2095
2096 this_cpu_write(tr->array_buffer.data->ignore_pid,
2097 trace_ignore_this_task(pid_list, no_pid_list, current));
2098 }
2099
register_pid_events(struct trace_array * tr)2100 static void register_pid_events(struct trace_array *tr)
2101 {
2102 /*
2103 * Register a probe that is called before all other probes
2104 * to set ignore_pid if next or prev do not match.
2105 * Register a probe this is called after all other probes
2106 * to only keep ignore_pid set if next pid matches.
2107 */
2108 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
2109 tr, INT_MAX);
2110 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
2111 tr, 0);
2112
2113 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
2114 tr, INT_MAX);
2115 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
2116 tr, 0);
2117
2118 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
2119 tr, INT_MAX);
2120 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
2121 tr, 0);
2122
2123 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
2124 tr, INT_MAX);
2125 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
2126 tr, 0);
2127 }
2128
2129 static ssize_t
event_pid_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos,int type)2130 event_pid_write(struct file *filp, const char __user *ubuf,
2131 size_t cnt, loff_t *ppos, int type)
2132 {
2133 struct seq_file *m = filp->private_data;
2134 struct trace_array *tr = m->private;
2135 struct trace_pid_list *filtered_pids = NULL;
2136 struct trace_pid_list *other_pids = NULL;
2137 struct trace_pid_list *pid_list;
2138 struct trace_event_file *file;
2139 ssize_t ret;
2140
2141 if (!cnt)
2142 return 0;
2143
2144 ret = tracing_update_buffers(tr);
2145 if (ret < 0)
2146 return ret;
2147
2148 mutex_lock(&event_mutex);
2149
2150 if (type == TRACE_PIDS) {
2151 filtered_pids = rcu_dereference_protected(tr->filtered_pids,
2152 lockdep_is_held(&event_mutex));
2153 other_pids = rcu_dereference_protected(tr->filtered_no_pids,
2154 lockdep_is_held(&event_mutex));
2155 } else {
2156 filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
2157 lockdep_is_held(&event_mutex));
2158 other_pids = rcu_dereference_protected(tr->filtered_pids,
2159 lockdep_is_held(&event_mutex));
2160 }
2161
2162 ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
2163 if (ret < 0)
2164 goto out;
2165
2166 if (type == TRACE_PIDS)
2167 rcu_assign_pointer(tr->filtered_pids, pid_list);
2168 else
2169 rcu_assign_pointer(tr->filtered_no_pids, pid_list);
2170
2171 list_for_each_entry(file, &tr->events, list) {
2172 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
2173 }
2174
2175 if (filtered_pids) {
2176 tracepoint_synchronize_unregister();
2177 trace_pid_list_free(filtered_pids);
2178 } else if (pid_list && !other_pids) {
2179 register_pid_events(tr);
2180 }
2181
2182 /*
2183 * Ignoring of pids is done at task switch. But we have to
2184 * check for those tasks that are currently running.
2185 * Always do this in case a pid was appended or removed.
2186 */
2187 on_each_cpu(ignore_task_cpu, tr, 1);
2188
2189 out:
2190 mutex_unlock(&event_mutex);
2191
2192 if (ret > 0)
2193 *ppos += ret;
2194
2195 return ret;
2196 }
2197
2198 static ssize_t
ftrace_event_pid_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2199 ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
2200 size_t cnt, loff_t *ppos)
2201 {
2202 return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
2203 }
2204
2205 static ssize_t
ftrace_event_npid_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)2206 ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
2207 size_t cnt, loff_t *ppos)
2208 {
2209 return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
2210 }
2211
2212 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
2213 static int ftrace_event_set_open(struct inode *inode, struct file *file);
2214 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
2215 static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
2216 static int ftrace_event_release(struct inode *inode, struct file *file);
2217
2218 static const struct seq_operations show_event_seq_ops = {
2219 .start = t_start,
2220 .next = t_next,
2221 .show = t_show,
2222 .stop = t_stop,
2223 };
2224
2225 static const struct seq_operations show_set_event_seq_ops = {
2226 .start = s_start,
2227 .next = s_next,
2228 .show = t_show,
2229 .stop = t_stop,
2230 };
2231
2232 static const struct seq_operations show_set_pid_seq_ops = {
2233 .start = p_start,
2234 .next = p_next,
2235 .show = trace_pid_show,
2236 .stop = p_stop,
2237 };
2238
2239 static const struct seq_operations show_set_no_pid_seq_ops = {
2240 .start = np_start,
2241 .next = np_next,
2242 .show = trace_pid_show,
2243 .stop = p_stop,
2244 };
2245
2246 static const struct file_operations ftrace_avail_fops = {
2247 .open = ftrace_event_avail_open,
2248 .read = seq_read,
2249 .llseek = seq_lseek,
2250 .release = seq_release,
2251 };
2252
2253 static const struct file_operations ftrace_set_event_fops = {
2254 .open = ftrace_event_set_open,
2255 .read = seq_read,
2256 .write = ftrace_event_write,
2257 .llseek = seq_lseek,
2258 .release = ftrace_event_release,
2259 };
2260
2261 static const struct file_operations ftrace_set_event_pid_fops = {
2262 .open = ftrace_event_set_pid_open,
2263 .read = seq_read,
2264 .write = ftrace_event_pid_write,
2265 .llseek = seq_lseek,
2266 .release = ftrace_event_release,
2267 };
2268
2269 static const struct file_operations ftrace_set_event_notrace_pid_fops = {
2270 .open = ftrace_event_set_npid_open,
2271 .read = seq_read,
2272 .write = ftrace_event_npid_write,
2273 .llseek = seq_lseek,
2274 .release = ftrace_event_release,
2275 };
2276
2277 static const struct file_operations ftrace_enable_fops = {
2278 .open = tracing_open_file_tr,
2279 .read = event_enable_read,
2280 .write = event_enable_write,
2281 .release = tracing_release_file_tr,
2282 .llseek = default_llseek,
2283 };
2284
2285 static const struct file_operations ftrace_event_format_fops = {
2286 .open = trace_format_open,
2287 .read = seq_read,
2288 .llseek = seq_lseek,
2289 .release = seq_release,
2290 };
2291
2292 #ifdef CONFIG_PERF_EVENTS
2293 static const struct file_operations ftrace_event_id_fops = {
2294 .read = event_id_read,
2295 .llseek = default_llseek,
2296 };
2297 #endif
2298
2299 static const struct file_operations ftrace_event_filter_fops = {
2300 .open = tracing_open_file_tr,
2301 .read = event_filter_read,
2302 .write = event_filter_write,
2303 .release = tracing_release_file_tr,
2304 .llseek = default_llseek,
2305 };
2306
2307 static const struct file_operations ftrace_subsystem_filter_fops = {
2308 .open = subsystem_open,
2309 .read = subsystem_filter_read,
2310 .write = subsystem_filter_write,
2311 .llseek = default_llseek,
2312 .release = subsystem_release,
2313 };
2314
2315 static const struct file_operations ftrace_system_enable_fops = {
2316 .open = subsystem_open,
2317 .read = system_enable_read,
2318 .write = system_enable_write,
2319 .llseek = default_llseek,
2320 .release = subsystem_release,
2321 };
2322
2323 static const struct file_operations ftrace_tr_enable_fops = {
2324 .open = system_tr_open,
2325 .read = system_enable_read,
2326 .write = system_enable_write,
2327 .llseek = default_llseek,
2328 .release = subsystem_release,
2329 };
2330
2331 static const struct file_operations ftrace_show_header_page_fops = {
2332 .open = tracing_open_generic_tr,
2333 .read = show_header_page_file,
2334 .llseek = default_llseek,
2335 .release = tracing_release_generic_tr,
2336 };
2337
2338 static const struct file_operations ftrace_show_header_event_fops = {
2339 .open = tracing_open_generic_tr,
2340 .read = show_header_event_file,
2341 .llseek = default_llseek,
2342 .release = tracing_release_generic_tr,
2343 };
2344
2345 static int
ftrace_event_open(struct inode * inode,struct file * file,const struct seq_operations * seq_ops)2346 ftrace_event_open(struct inode *inode, struct file *file,
2347 const struct seq_operations *seq_ops)
2348 {
2349 struct seq_file *m;
2350 int ret;
2351
2352 ret = security_locked_down(LOCKDOWN_TRACEFS);
2353 if (ret)
2354 return ret;
2355
2356 ret = seq_open(file, seq_ops);
2357 if (ret < 0)
2358 return ret;
2359 m = file->private_data;
2360 /* copy tr over to seq ops */
2361 m->private = inode->i_private;
2362
2363 return ret;
2364 }
2365
ftrace_event_release(struct inode * inode,struct file * file)2366 static int ftrace_event_release(struct inode *inode, struct file *file)
2367 {
2368 struct trace_array *tr = inode->i_private;
2369
2370 trace_array_put(tr);
2371
2372 return seq_release(inode, file);
2373 }
2374
2375 static int
ftrace_event_avail_open(struct inode * inode,struct file * file)2376 ftrace_event_avail_open(struct inode *inode, struct file *file)
2377 {
2378 const struct seq_operations *seq_ops = &show_event_seq_ops;
2379
2380 /* Checks for tracefs lockdown */
2381 return ftrace_event_open(inode, file, seq_ops);
2382 }
2383
2384 static int
ftrace_event_set_open(struct inode * inode,struct file * file)2385 ftrace_event_set_open(struct inode *inode, struct file *file)
2386 {
2387 const struct seq_operations *seq_ops = &show_set_event_seq_ops;
2388 struct trace_array *tr = inode->i_private;
2389 int ret;
2390
2391 ret = tracing_check_open_get_tr(tr);
2392 if (ret)
2393 return ret;
2394
2395 if ((file->f_mode & FMODE_WRITE) &&
2396 (file->f_flags & O_TRUNC))
2397 ftrace_clear_events(tr);
2398
2399 ret = ftrace_event_open(inode, file, seq_ops);
2400 if (ret < 0)
2401 trace_array_put(tr);
2402 return ret;
2403 }
2404
2405 static int
ftrace_event_set_pid_open(struct inode * inode,struct file * file)2406 ftrace_event_set_pid_open(struct inode *inode, struct file *file)
2407 {
2408 const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
2409 struct trace_array *tr = inode->i_private;
2410 int ret;
2411
2412 ret = tracing_check_open_get_tr(tr);
2413 if (ret)
2414 return ret;
2415
2416 if ((file->f_mode & FMODE_WRITE) &&
2417 (file->f_flags & O_TRUNC))
2418 ftrace_clear_event_pids(tr, TRACE_PIDS);
2419
2420 ret = ftrace_event_open(inode, file, seq_ops);
2421 if (ret < 0)
2422 trace_array_put(tr);
2423 return ret;
2424 }
2425
2426 static int
ftrace_event_set_npid_open(struct inode * inode,struct file * file)2427 ftrace_event_set_npid_open(struct inode *inode, struct file *file)
2428 {
2429 const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
2430 struct trace_array *tr = inode->i_private;
2431 int ret;
2432
2433 ret = tracing_check_open_get_tr(tr);
2434 if (ret)
2435 return ret;
2436
2437 if ((file->f_mode & FMODE_WRITE) &&
2438 (file->f_flags & O_TRUNC))
2439 ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
2440
2441 ret = ftrace_event_open(inode, file, seq_ops);
2442 if (ret < 0)
2443 trace_array_put(tr);
2444 return ret;
2445 }
2446
2447 static struct event_subsystem *
create_new_subsystem(const char * name)2448 create_new_subsystem(const char *name)
2449 {
2450 struct event_subsystem *system;
2451
2452 /* need to create new entry */
2453 system = kmalloc(sizeof(*system), GFP_KERNEL);
2454 if (!system)
2455 return NULL;
2456
2457 system->ref_count = 1;
2458
2459 /* Only allocate if dynamic (kprobes and modules) */
2460 system->name = kstrdup_const(name, GFP_KERNEL);
2461 if (!system->name)
2462 goto out_free;
2463
2464 system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
2465 if (!system->filter)
2466 goto out_free;
2467
2468 list_add(&system->list, &event_subsystems);
2469
2470 return system;
2471
2472 out_free:
2473 kfree_const(system->name);
2474 kfree(system);
2475 return NULL;
2476 }
2477
system_callback(const char * name,umode_t * mode,void ** data,const struct file_operations ** fops)2478 static int system_callback(const char *name, umode_t *mode, void **data,
2479 const struct file_operations **fops)
2480 {
2481 if (strcmp(name, "filter") == 0)
2482 *fops = &ftrace_subsystem_filter_fops;
2483
2484 else if (strcmp(name, "enable") == 0)
2485 *fops = &ftrace_system_enable_fops;
2486
2487 else
2488 return 0;
2489
2490 *mode = TRACE_MODE_WRITE;
2491 return 1;
2492 }
2493
2494 static struct eventfs_inode *
event_subsystem_dir(struct trace_array * tr,const char * name,struct trace_event_file * file,struct eventfs_inode * parent)2495 event_subsystem_dir(struct trace_array *tr, const char *name,
2496 struct trace_event_file *file, struct eventfs_inode *parent)
2497 {
2498 struct event_subsystem *system, *iter;
2499 struct trace_subsystem_dir *dir;
2500 struct eventfs_inode *ei;
2501 int nr_entries;
2502 static struct eventfs_entry system_entries[] = {
2503 {
2504 .name = "filter",
2505 .callback = system_callback,
2506 },
2507 {
2508 .name = "enable",
2509 .callback = system_callback,
2510 }
2511 };
2512
2513 /* First see if we did not already create this dir */
2514 list_for_each_entry(dir, &tr->systems, list) {
2515 system = dir->subsystem;
2516 if (strcmp(system->name, name) == 0) {
2517 dir->nr_events++;
2518 file->system = dir;
2519 return dir->ei;
2520 }
2521 }
2522
2523 /* Now see if the system itself exists. */
2524 system = NULL;
2525 list_for_each_entry(iter, &event_subsystems, list) {
2526 if (strcmp(iter->name, name) == 0) {
2527 system = iter;
2528 break;
2529 }
2530 }
2531
2532 dir = kmalloc(sizeof(*dir), GFP_KERNEL);
2533 if (!dir)
2534 goto out_fail;
2535
2536 if (!system) {
2537 system = create_new_subsystem(name);
2538 if (!system)
2539 goto out_free;
2540 } else
2541 __get_system(system);
2542
2543 /* ftrace only has directories no files */
2544 if (strcmp(name, "ftrace") == 0)
2545 nr_entries = 0;
2546 else
2547 nr_entries = ARRAY_SIZE(system_entries);
2548
2549 ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir);
2550 if (IS_ERR(ei)) {
2551 pr_warn("Failed to create system directory %s\n", name);
2552 __put_system(system);
2553 goto out_free;
2554 }
2555
2556 dir->ei = ei;
2557 dir->tr = tr;
2558 dir->ref_count = 1;
2559 dir->nr_events = 1;
2560 dir->subsystem = system;
2561 file->system = dir;
2562
2563 list_add(&dir->list, &tr->systems);
2564
2565 return dir->ei;
2566
2567 out_free:
2568 kfree(dir);
2569 out_fail:
2570 /* Only print this message if failed on memory allocation */
2571 if (!dir || !system)
2572 pr_warn("No memory to create event subsystem %s\n", name);
2573 return NULL;
2574 }
2575
2576 static int
event_define_fields(struct trace_event_call * call)2577 event_define_fields(struct trace_event_call *call)
2578 {
2579 struct list_head *head;
2580 int ret = 0;
2581
2582 /*
2583 * Other events may have the same class. Only update
2584 * the fields if they are not already defined.
2585 */
2586 head = trace_get_fields(call);
2587 if (list_empty(head)) {
2588 struct trace_event_fields *field = call->class->fields_array;
2589 unsigned int offset = sizeof(struct trace_entry);
2590
2591 for (; field->type; field++) {
2592 if (field->type == TRACE_FUNCTION_TYPE) {
2593 field->define_fields(call);
2594 break;
2595 }
2596
2597 offset = ALIGN(offset, field->align);
2598 ret = trace_define_field_ext(call, field->type, field->name,
2599 offset, field->size,
2600 field->is_signed, field->filter_type,
2601 field->len, field->needs_test);
2602 if (WARN_ON_ONCE(ret)) {
2603 pr_err("error code is %d\n", ret);
2604 break;
2605 }
2606
2607 offset += field->size;
2608 }
2609 }
2610
2611 return ret;
2612 }
2613
event_callback(const char * name,umode_t * mode,void ** data,const struct file_operations ** fops)2614 static int event_callback(const char *name, umode_t *mode, void **data,
2615 const struct file_operations **fops)
2616 {
2617 struct trace_event_file *file = *data;
2618 struct trace_event_call *call = file->event_call;
2619
2620 if (strcmp(name, "format") == 0) {
2621 *mode = TRACE_MODE_READ;
2622 *fops = &ftrace_event_format_fops;
2623 return 1;
2624 }
2625
2626 /*
2627 * Only event directories that can be enabled should have
2628 * triggers or filters, with the exception of the "print"
2629 * event that can have a "trigger" file.
2630 */
2631 if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
2632 if (call->class->reg && strcmp(name, "enable") == 0) {
2633 *mode = TRACE_MODE_WRITE;
2634 *fops = &ftrace_enable_fops;
2635 return 1;
2636 }
2637
2638 if (strcmp(name, "filter") == 0) {
2639 *mode = TRACE_MODE_WRITE;
2640 *fops = &ftrace_event_filter_fops;
2641 return 1;
2642 }
2643 }
2644
2645 if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
2646 strcmp(trace_event_name(call), "print") == 0) {
2647 if (strcmp(name, "trigger") == 0) {
2648 *mode = TRACE_MODE_WRITE;
2649 *fops = &event_trigger_fops;
2650 return 1;
2651 }
2652 }
2653
2654 #ifdef CONFIG_PERF_EVENTS
2655 if (call->event.type && call->class->reg &&
2656 strcmp(name, "id") == 0) {
2657 *mode = TRACE_MODE_READ;
2658 *data = (void *)(long)call->event.type;
2659 *fops = &ftrace_event_id_fops;
2660 return 1;
2661 }
2662 #endif
2663
2664 #ifdef CONFIG_HIST_TRIGGERS
2665 if (strcmp(name, "hist") == 0) {
2666 *mode = TRACE_MODE_READ;
2667 *fops = &event_hist_fops;
2668 return 1;
2669 }
2670 #endif
2671 #ifdef CONFIG_HIST_TRIGGERS_DEBUG
2672 if (strcmp(name, "hist_debug") == 0) {
2673 *mode = TRACE_MODE_READ;
2674 *fops = &event_hist_debug_fops;
2675 return 1;
2676 }
2677 #endif
2678 #ifdef CONFIG_TRACE_EVENT_INJECT
2679 if (call->event.type && call->class->reg &&
2680 strcmp(name, "inject") == 0) {
2681 *mode = 0200;
2682 *fops = &event_inject_fops;
2683 return 1;
2684 }
2685 #endif
2686 return 0;
2687 }
2688
2689 /* The file is incremented on creation and freeing the enable file decrements it */
event_release(const char * name,void * data)2690 static void event_release(const char *name, void *data)
2691 {
2692 struct trace_event_file *file = data;
2693
2694 event_file_put(file);
2695 }
2696
2697 static int
event_create_dir(struct eventfs_inode * parent,struct trace_event_file * file)2698 event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
2699 {
2700 struct trace_event_call *call = file->event_call;
2701 struct trace_array *tr = file->tr;
2702 struct eventfs_inode *e_events;
2703 struct eventfs_inode *ei;
2704 const char *name;
2705 int nr_entries;
2706 int ret;
2707 static struct eventfs_entry event_entries[] = {
2708 {
2709 .name = "enable",
2710 .callback = event_callback,
2711 .release = event_release,
2712 },
2713 {
2714 .name = "filter",
2715 .callback = event_callback,
2716 },
2717 {
2718 .name = "trigger",
2719 .callback = event_callback,
2720 },
2721 {
2722 .name = "format",
2723 .callback = event_callback,
2724 },
2725 #ifdef CONFIG_PERF_EVENTS
2726 {
2727 .name = "id",
2728 .callback = event_callback,
2729 },
2730 #endif
2731 #ifdef CONFIG_HIST_TRIGGERS
2732 {
2733 .name = "hist",
2734 .callback = event_callback,
2735 },
2736 #endif
2737 #ifdef CONFIG_HIST_TRIGGERS_DEBUG
2738 {
2739 .name = "hist_debug",
2740 .callback = event_callback,
2741 },
2742 #endif
2743 #ifdef CONFIG_TRACE_EVENT_INJECT
2744 {
2745 .name = "inject",
2746 .callback = event_callback,
2747 },
2748 #endif
2749 };
2750
2751 /*
2752 * If the trace point header did not define TRACE_SYSTEM
2753 * then the system would be called "TRACE_SYSTEM". This should
2754 * never happen.
2755 */
2756 if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
2757 return -ENODEV;
2758
2759 e_events = event_subsystem_dir(tr, call->class->system, file, parent);
2760 if (!e_events)
2761 return -ENOMEM;
2762
2763 nr_entries = ARRAY_SIZE(event_entries);
2764
2765 name = trace_event_name(call);
2766 ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
2767 if (IS_ERR(ei)) {
2768 pr_warn("Could not create tracefs '%s' directory\n", name);
2769 return -1;
2770 }
2771
2772 file->ei = ei;
2773
2774 ret = event_define_fields(call);
2775 if (ret < 0) {
2776 pr_warn("Could not initialize trace point events/%s\n", name);
2777 return ret;
2778 }
2779
2780 /* Gets decremented on freeing of the "enable" file */
2781 event_file_get(file);
2782
2783 return 0;
2784 }
2785
remove_event_from_tracers(struct trace_event_call * call)2786 static void remove_event_from_tracers(struct trace_event_call *call)
2787 {
2788 struct trace_event_file *file;
2789 struct trace_array *tr;
2790
2791 do_for_each_event_file_safe(tr, file) {
2792 if (file->event_call != call)
2793 continue;
2794
2795 remove_event_file_dir(file);
2796 /*
2797 * The do_for_each_event_file_safe() is
2798 * a double loop. After finding the call for this
2799 * trace_array, we use break to jump to the next
2800 * trace_array.
2801 */
2802 break;
2803 } while_for_each_event_file();
2804 }
2805
event_remove(struct trace_event_call * call)2806 static void event_remove(struct trace_event_call *call)
2807 {
2808 struct trace_array *tr;
2809 struct trace_event_file *file;
2810
2811 do_for_each_event_file(tr, file) {
2812 if (file->event_call != call)
2813 continue;
2814
2815 if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2816 tr->clear_trace = true;
2817
2818 ftrace_event_enable_disable(file, 0);
2819 /*
2820 * The do_for_each_event_file() is
2821 * a double loop. After finding the call for this
2822 * trace_array, we use break to jump to the next
2823 * trace_array.
2824 */
2825 break;
2826 } while_for_each_event_file();
2827
2828 if (call->event.funcs)
2829 __unregister_trace_event(&call->event);
2830 remove_event_from_tracers(call);
2831 list_del(&call->list);
2832 }
2833
event_init(struct trace_event_call * call)2834 static int event_init(struct trace_event_call *call)
2835 {
2836 int ret = 0;
2837 const char *name;
2838
2839 name = trace_event_name(call);
2840 if (WARN_ON(!name))
2841 return -EINVAL;
2842
2843 if (call->class->raw_init) {
2844 ret = call->class->raw_init(call);
2845 if (ret < 0 && ret != -ENOSYS)
2846 pr_warn("Could not initialize trace events/%s\n", name);
2847 }
2848
2849 return ret;
2850 }
2851
2852 static int
__register_event(struct trace_event_call * call,struct module * mod)2853 __register_event(struct trace_event_call *call, struct module *mod)
2854 {
2855 int ret;
2856
2857 ret = event_init(call);
2858 if (ret < 0)
2859 return ret;
2860
2861 list_add(&call->list, &ftrace_events);
2862 if (call->flags & TRACE_EVENT_FL_DYNAMIC)
2863 atomic_set(&call->refcnt, 0);
2864 else
2865 call->module = mod;
2866
2867 return 0;
2868 }
2869
eval_replace(char * ptr,struct trace_eval_map * map,int len)2870 static char *eval_replace(char *ptr, struct trace_eval_map *map, int len)
2871 {
2872 int rlen;
2873 int elen;
2874
2875 /* Find the length of the eval value as a string */
2876 elen = snprintf(ptr, 0, "%ld", map->eval_value);
2877 /* Make sure there's enough room to replace the string with the value */
2878 if (len < elen)
2879 return NULL;
2880
2881 snprintf(ptr, elen + 1, "%ld", map->eval_value);
2882
2883 /* Get the rest of the string of ptr */
2884 rlen = strlen(ptr + len);
2885 memmove(ptr + elen, ptr + len, rlen);
2886 /* Make sure we end the new string */
2887 ptr[elen + rlen] = 0;
2888
2889 return ptr + elen;
2890 }
2891
update_event_printk(struct trace_event_call * call,struct trace_eval_map * map)2892 static void update_event_printk(struct trace_event_call *call,
2893 struct trace_eval_map *map)
2894 {
2895 char *ptr;
2896 int quote = 0;
2897 int len = strlen(map->eval_string);
2898
2899 for (ptr = call->print_fmt; *ptr; ptr++) {
2900 if (*ptr == '\\') {
2901 ptr++;
2902 /* paranoid */
2903 if (!*ptr)
2904 break;
2905 continue;
2906 }
2907 if (*ptr == '"') {
2908 quote ^= 1;
2909 continue;
2910 }
2911 if (quote)
2912 continue;
2913 if (isdigit(*ptr)) {
2914 /* skip numbers */
2915 do {
2916 ptr++;
2917 /* Check for alpha chars like ULL */
2918 } while (isalnum(*ptr));
2919 if (!*ptr)
2920 break;
2921 /*
2922 * A number must have some kind of delimiter after
2923 * it, and we can ignore that too.
2924 */
2925 continue;
2926 }
2927 if (isalpha(*ptr) || *ptr == '_') {
2928 if (strncmp(map->eval_string, ptr, len) == 0 &&
2929 !isalnum(ptr[len]) && ptr[len] != '_') {
2930 ptr = eval_replace(ptr, map, len);
2931 /* enum/sizeof string smaller than value */
2932 if (WARN_ON_ONCE(!ptr))
2933 return;
2934 /*
2935 * No need to decrement here, as eval_replace()
2936 * returns the pointer to the character passed
2937 * the eval, and two evals can not be placed
2938 * back to back without something in between.
2939 * We can skip that something in between.
2940 */
2941 continue;
2942 }
2943 skip_more:
2944 do {
2945 ptr++;
2946 } while (isalnum(*ptr) || *ptr == '_');
2947 if (!*ptr)
2948 break;
2949 /*
2950 * If what comes after this variable is a '.' or
2951 * '->' then we can continue to ignore that string.
2952 */
2953 if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2954 ptr += *ptr == '.' ? 1 : 2;
2955 if (!*ptr)
2956 break;
2957 goto skip_more;
2958 }
2959 /*
2960 * Once again, we can skip the delimiter that came
2961 * after the string.
2962 */
2963 continue;
2964 }
2965 }
2966 }
2967
add_str_to_module(struct module * module,char * str)2968 static void add_str_to_module(struct module *module, char *str)
2969 {
2970 struct module_string *modstr;
2971
2972 modstr = kmalloc(sizeof(*modstr), GFP_KERNEL);
2973
2974 /*
2975 * If we failed to allocate memory here, then we'll just
2976 * let the str memory leak when the module is removed.
2977 * If this fails to allocate, there's worse problems than
2978 * a leaked string on module removal.
2979 */
2980 if (WARN_ON_ONCE(!modstr))
2981 return;
2982
2983 modstr->module = module;
2984 modstr->str = str;
2985
2986 list_add(&modstr->next, &module_strings);
2987 }
2988
update_event_fields(struct trace_event_call * call,struct trace_eval_map * map)2989 static void update_event_fields(struct trace_event_call *call,
2990 struct trace_eval_map *map)
2991 {
2992 struct ftrace_event_field *field;
2993 struct list_head *head;
2994 char *ptr;
2995 char *str;
2996 int len = strlen(map->eval_string);
2997
2998 /* Dynamic events should never have field maps */
2999 if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC))
3000 return;
3001
3002 head = trace_get_fields(call);
3003 list_for_each_entry(field, head, link) {
3004 ptr = strchr(field->type, '[');
3005 if (!ptr)
3006 continue;
3007 ptr++;
3008
3009 if (!isalpha(*ptr) && *ptr != '_')
3010 continue;
3011
3012 if (strncmp(map->eval_string, ptr, len) != 0)
3013 continue;
3014
3015 str = kstrdup(field->type, GFP_KERNEL);
3016 if (WARN_ON_ONCE(!str))
3017 return;
3018 ptr = str + (ptr - field->type);
3019 ptr = eval_replace(ptr, map, len);
3020 /* enum/sizeof string smaller than value */
3021 if (WARN_ON_ONCE(!ptr)) {
3022 kfree(str);
3023 continue;
3024 }
3025
3026 /*
3027 * If the event is part of a module, then we need to free the string
3028 * when the module is removed. Otherwise, it will stay allocated
3029 * until a reboot.
3030 */
3031 if (call->module)
3032 add_str_to_module(call->module, str);
3033
3034 field->type = str;
3035 }
3036 }
3037
trace_event_eval_update(struct trace_eval_map ** map,int len)3038 void trace_event_eval_update(struct trace_eval_map **map, int len)
3039 {
3040 struct trace_event_call *call, *p;
3041 const char *last_system = NULL;
3042 bool first = false;
3043 int last_i;
3044 int i;
3045
3046 down_write(&trace_event_sem);
3047 list_for_each_entry_safe(call, p, &ftrace_events, list) {
3048 /* events are usually grouped together with systems */
3049 if (!last_system || call->class->system != last_system) {
3050 first = true;
3051 last_i = 0;
3052 last_system = call->class->system;
3053 }
3054
3055 /*
3056 * Since calls are grouped by systems, the likelihood that the
3057 * next call in the iteration belongs to the same system as the
3058 * previous call is high. As an optimization, we skip searching
3059 * for a map[] that matches the call's system if the last call
3060 * was from the same system. That's what last_i is for. If the
3061 * call has the same system as the previous call, then last_i
3062 * will be the index of the first map[] that has a matching
3063 * system.
3064 */
3065 for (i = last_i; i < len; i++) {
3066 if (call->class->system == map[i]->system) {
3067 /* Save the first system if need be */
3068 if (first) {
3069 last_i = i;
3070 first = false;
3071 }
3072 update_event_printk(call, map[i]);
3073 update_event_fields(call, map[i]);
3074 }
3075 }
3076 cond_resched();
3077 }
3078 up_write(&trace_event_sem);
3079 }
3080
event_in_systems(struct trace_event_call * call,const char * systems)3081 static bool event_in_systems(struct trace_event_call *call,
3082 const char *systems)
3083 {
3084 const char *system;
3085 const char *p;
3086
3087 if (!systems)
3088 return true;
3089
3090 system = call->class->system;
3091 p = strstr(systems, system);
3092 if (!p)
3093 return false;
3094
3095 if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',')
3096 return false;
3097
3098 p += strlen(system);
3099 return !*p || isspace(*p) || *p == ',';
3100 }
3101
3102 static struct trace_event_file *
trace_create_new_event(struct trace_event_call * call,struct trace_array * tr)3103 trace_create_new_event(struct trace_event_call *call,
3104 struct trace_array *tr)
3105 {
3106 struct trace_pid_list *no_pid_list;
3107 struct trace_pid_list *pid_list;
3108 struct trace_event_file *file;
3109 unsigned int first;
3110
3111 if (!event_in_systems(call, tr->system_names))
3112 return NULL;
3113
3114 file = kmem_cache_alloc(file_cachep, GFP_TRACE);
3115 if (!file)
3116 return ERR_PTR(-ENOMEM);
3117
3118 pid_list = rcu_dereference_protected(tr->filtered_pids,
3119 lockdep_is_held(&event_mutex));
3120 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
3121 lockdep_is_held(&event_mutex));
3122
3123 if (!trace_pid_list_first(pid_list, &first) ||
3124 !trace_pid_list_first(no_pid_list, &first))
3125 file->flags |= EVENT_FILE_FL_PID_FILTER;
3126
3127 file->event_call = call;
3128 file->tr = tr;
3129 atomic_set(&file->sm_ref, 0);
3130 atomic_set(&file->tm_ref, 0);
3131 INIT_LIST_HEAD(&file->triggers);
3132 list_add(&file->list, &tr->events);
3133 refcount_set(&file->ref, 1);
3134
3135 return file;
3136 }
3137
3138 #define MAX_BOOT_TRIGGERS 32
3139
3140 static struct boot_triggers {
3141 const char *event;
3142 char *trigger;
3143 } bootup_triggers[MAX_BOOT_TRIGGERS];
3144
3145 static char bootup_trigger_buf[COMMAND_LINE_SIZE];
3146 static int nr_boot_triggers;
3147
setup_trace_triggers(char * str)3148 static __init int setup_trace_triggers(char *str)
3149 {
3150 char *trigger;
3151 char *buf;
3152 int i;
3153
3154 strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
3155 trace_set_ring_buffer_expanded(NULL);
3156 disable_tracing_selftest("running event triggers");
3157
3158 buf = bootup_trigger_buf;
3159 for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
3160 trigger = strsep(&buf, ",");
3161 if (!trigger)
3162 break;
3163 bootup_triggers[i].event = strsep(&trigger, ".");
3164 bootup_triggers[i].trigger = trigger;
3165 if (!bootup_triggers[i].trigger)
3166 break;
3167 }
3168
3169 nr_boot_triggers = i;
3170 return 1;
3171 }
3172 __setup("trace_trigger=", setup_trace_triggers);
3173
3174 /* Add an event to a trace directory */
3175 static int
__trace_add_new_event(struct trace_event_call * call,struct trace_array * tr)3176 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
3177 {
3178 struct trace_event_file *file;
3179
3180 file = trace_create_new_event(call, tr);
3181 /*
3182 * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed
3183 * allocation, or NULL if the event is not part of the tr->system_names.
3184 * When the event is not part of the tr->system_names, return zero, not
3185 * an error.
3186 */
3187 if (!file)
3188 return 0;
3189
3190 if (IS_ERR(file))
3191 return PTR_ERR(file);
3192
3193 if (eventdir_initialized)
3194 return event_create_dir(tr->event_dir, file);
3195 else
3196 return event_define_fields(call);
3197 }
3198
trace_early_triggers(struct trace_event_file * file,const char * name)3199 static void trace_early_triggers(struct trace_event_file *file, const char *name)
3200 {
3201 int ret;
3202 int i;
3203
3204 for (i = 0; i < nr_boot_triggers; i++) {
3205 if (strcmp(name, bootup_triggers[i].event))
3206 continue;
3207 mutex_lock(&event_mutex);
3208 ret = trigger_process_regex(file, bootup_triggers[i].trigger);
3209 mutex_unlock(&event_mutex);
3210 if (ret)
3211 pr_err("Failed to register trigger '%s' on event %s\n",
3212 bootup_triggers[i].trigger,
3213 bootup_triggers[i].event);
3214 }
3215 }
3216
3217 /*
3218 * Just create a descriptor for early init. A descriptor is required
3219 * for enabling events at boot. We want to enable events before
3220 * the filesystem is initialized.
3221 */
3222 static int
__trace_early_add_new_event(struct trace_event_call * call,struct trace_array * tr)3223 __trace_early_add_new_event(struct trace_event_call *call,
3224 struct trace_array *tr)
3225 {
3226 struct trace_event_file *file;
3227 int ret;
3228
3229 file = trace_create_new_event(call, tr);
3230 /*
3231 * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed
3232 * allocation, or NULL if the event is not part of the tr->system_names.
3233 * When the event is not part of the tr->system_names, return zero, not
3234 * an error.
3235 */
3236 if (!file)
3237 return 0;
3238
3239 if (IS_ERR(file))
3240 return PTR_ERR(file);
3241
3242 ret = event_define_fields(call);
3243 if (ret)
3244 return ret;
3245
3246 trace_early_triggers(file, trace_event_name(call));
3247
3248 return 0;
3249 }
3250
3251 struct ftrace_module_file_ops;
3252 static void __add_event_to_tracers(struct trace_event_call *call);
3253
3254 /* Add an additional event_call dynamically */
trace_add_event_call(struct trace_event_call * call)3255 int trace_add_event_call(struct trace_event_call *call)
3256 {
3257 int ret;
3258 lockdep_assert_held(&event_mutex);
3259
3260 mutex_lock(&trace_types_lock);
3261
3262 ret = __register_event(call, NULL);
3263 if (ret >= 0)
3264 __add_event_to_tracers(call);
3265
3266 mutex_unlock(&trace_types_lock);
3267 return ret;
3268 }
3269 EXPORT_SYMBOL_GPL(trace_add_event_call);
3270
3271 /*
3272 * Must be called under locking of trace_types_lock, event_mutex and
3273 * trace_event_sem.
3274 */
__trace_remove_event_call(struct trace_event_call * call)3275 static void __trace_remove_event_call(struct trace_event_call *call)
3276 {
3277 event_remove(call);
3278 trace_destroy_fields(call);
3279 }
3280
probe_remove_event_call(struct trace_event_call * call)3281 static int probe_remove_event_call(struct trace_event_call *call)
3282 {
3283 struct trace_array *tr;
3284 struct trace_event_file *file;
3285
3286 #ifdef CONFIG_PERF_EVENTS
3287 if (call->perf_refcount)
3288 return -EBUSY;
3289 #endif
3290 do_for_each_event_file(tr, file) {
3291 if (file->event_call != call)
3292 continue;
3293 /*
3294 * We can't rely on ftrace_event_enable_disable(enable => 0)
3295 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
3296 * TRACE_REG_UNREGISTER.
3297 */
3298 if (file->flags & EVENT_FILE_FL_ENABLED)
3299 goto busy;
3300
3301 if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
3302 tr->clear_trace = true;
3303 /*
3304 * The do_for_each_event_file_safe() is
3305 * a double loop. After finding the call for this
3306 * trace_array, we use break to jump to the next
3307 * trace_array.
3308 */
3309 break;
3310 } while_for_each_event_file();
3311
3312 __trace_remove_event_call(call);
3313
3314 return 0;
3315 busy:
3316 /* No need to clear the trace now */
3317 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
3318 tr->clear_trace = false;
3319 }
3320 return -EBUSY;
3321 }
3322
3323 /* Remove an event_call */
trace_remove_event_call(struct trace_event_call * call)3324 int trace_remove_event_call(struct trace_event_call *call)
3325 {
3326 int ret;
3327
3328 lockdep_assert_held(&event_mutex);
3329
3330 mutex_lock(&trace_types_lock);
3331 down_write(&trace_event_sem);
3332 ret = probe_remove_event_call(call);
3333 up_write(&trace_event_sem);
3334 mutex_unlock(&trace_types_lock);
3335
3336 return ret;
3337 }
3338 EXPORT_SYMBOL_GPL(trace_remove_event_call);
3339
3340 #define for_each_event(event, start, end) \
3341 for (event = start; \
3342 (unsigned long)event < (unsigned long)end; \
3343 event++)
3344
3345 #ifdef CONFIG_MODULES
3346
trace_module_add_events(struct module * mod)3347 static void trace_module_add_events(struct module *mod)
3348 {
3349 struct trace_event_call **call, **start, **end;
3350
3351 if (!mod->num_trace_events)
3352 return;
3353
3354 /* Don't add infrastructure for mods without tracepoints */
3355 if (trace_module_has_bad_taint(mod)) {
3356 pr_err("%s: module has bad taint, not creating trace events\n",
3357 mod->name);
3358 return;
3359 }
3360
3361 start = mod->trace_events;
3362 end = mod->trace_events + mod->num_trace_events;
3363
3364 for_each_event(call, start, end) {
3365 __register_event(*call, mod);
3366 __add_event_to_tracers(*call);
3367 }
3368 }
3369
trace_module_remove_events(struct module * mod)3370 static void trace_module_remove_events(struct module *mod)
3371 {
3372 struct trace_event_call *call, *p;
3373 struct module_string *modstr, *m;
3374
3375 down_write(&trace_event_sem);
3376 list_for_each_entry_safe(call, p, &ftrace_events, list) {
3377 if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
3378 continue;
3379 if (call->module == mod)
3380 __trace_remove_event_call(call);
3381 }
3382 /* Check for any strings allocade for this module */
3383 list_for_each_entry_safe(modstr, m, &module_strings, next) {
3384 if (modstr->module != mod)
3385 continue;
3386 list_del(&modstr->next);
3387 kfree(modstr->str);
3388 kfree(modstr);
3389 }
3390 up_write(&trace_event_sem);
3391
3392 /*
3393 * It is safest to reset the ring buffer if the module being unloaded
3394 * registered any events that were used. The only worry is if
3395 * a new module gets loaded, and takes on the same id as the events
3396 * of this module. When printing out the buffer, traced events left
3397 * over from this module may be passed to the new module events and
3398 * unexpected results may occur.
3399 */
3400 tracing_reset_all_online_cpus_unlocked();
3401 }
3402
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)3403 static int trace_module_notify(struct notifier_block *self,
3404 unsigned long val, void *data)
3405 {
3406 struct module *mod = data;
3407
3408 mutex_lock(&event_mutex);
3409 mutex_lock(&trace_types_lock);
3410 switch (val) {
3411 case MODULE_STATE_COMING:
3412 trace_module_add_events(mod);
3413 break;
3414 case MODULE_STATE_GOING:
3415 trace_module_remove_events(mod);
3416 break;
3417 }
3418 mutex_unlock(&trace_types_lock);
3419 mutex_unlock(&event_mutex);
3420
3421 return NOTIFY_OK;
3422 }
3423
3424 static struct notifier_block trace_module_nb = {
3425 .notifier_call = trace_module_notify,
3426 .priority = 1, /* higher than trace.c module notify */
3427 };
3428 #endif /* CONFIG_MODULES */
3429
3430 /* Create a new event directory structure for a trace directory. */
3431 static void
__trace_add_event_dirs(struct trace_array * tr)3432 __trace_add_event_dirs(struct trace_array *tr)
3433 {
3434 struct trace_event_call *call;
3435 int ret;
3436
3437 list_for_each_entry(call, &ftrace_events, list) {
3438 ret = __trace_add_new_event(call, tr);
3439 if (ret < 0)
3440 pr_warn("Could not create directory for event %s\n",
3441 trace_event_name(call));
3442 }
3443 }
3444
3445 /* Returns any file that matches the system and event */
3446 struct trace_event_file *
__find_event_file(struct trace_array * tr,const char * system,const char * event)3447 __find_event_file(struct trace_array *tr, const char *system, const char *event)
3448 {
3449 struct trace_event_file *file;
3450 struct trace_event_call *call;
3451 const char *name;
3452
3453 list_for_each_entry(file, &tr->events, list) {
3454
3455 call = file->event_call;
3456 name = trace_event_name(call);
3457
3458 if (!name || !call->class)
3459 continue;
3460
3461 if (strcmp(event, name) == 0 &&
3462 strcmp(system, call->class->system) == 0)
3463 return file;
3464 }
3465 return NULL;
3466 }
3467
3468 /* Returns valid trace event files that match system and event */
3469 struct trace_event_file *
find_event_file(struct trace_array * tr,const char * system,const char * event)3470 find_event_file(struct trace_array *tr, const char *system, const char *event)
3471 {
3472 struct trace_event_file *file;
3473
3474 file = __find_event_file(tr, system, event);
3475 if (!file || !file->event_call->class->reg ||
3476 file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
3477 return NULL;
3478
3479 return file;
3480 }
3481
3482 /**
3483 * trace_get_event_file - Find and return a trace event file
3484 * @instance: The name of the trace instance containing the event
3485 * @system: The name of the system containing the event
3486 * @event: The name of the event
3487 *
3488 * Return a trace event file given the trace instance name, trace
3489 * system, and trace event name. If the instance name is NULL, it
3490 * refers to the top-level trace array.
3491 *
3492 * This function will look it up and return it if found, after calling
3493 * trace_array_get() to prevent the instance from going away, and
3494 * increment the event's module refcount to prevent it from being
3495 * removed.
3496 *
3497 * To release the file, call trace_put_event_file(), which will call
3498 * trace_array_put() and decrement the event's module refcount.
3499 *
3500 * Return: The trace event on success, ERR_PTR otherwise.
3501 */
trace_get_event_file(const char * instance,const char * system,const char * event)3502 struct trace_event_file *trace_get_event_file(const char *instance,
3503 const char *system,
3504 const char *event)
3505 {
3506 struct trace_array *tr = top_trace_array();
3507 struct trace_event_file *file = NULL;
3508 int ret = -EINVAL;
3509
3510 if (instance) {
3511 tr = trace_array_find_get(instance);
3512 if (!tr)
3513 return ERR_PTR(-ENOENT);
3514 } else {
3515 ret = trace_array_get(tr);
3516 if (ret)
3517 return ERR_PTR(ret);
3518 }
3519
3520 mutex_lock(&event_mutex);
3521
3522 file = find_event_file(tr, system, event);
3523 if (!file) {
3524 trace_array_put(tr);
3525 ret = -EINVAL;
3526 goto out;
3527 }
3528
3529 /* Don't let event modules unload while in use */
3530 ret = trace_event_try_get_ref(file->event_call);
3531 if (!ret) {
3532 trace_array_put(tr);
3533 ret = -EBUSY;
3534 goto out;
3535 }
3536
3537 ret = 0;
3538 out:
3539 mutex_unlock(&event_mutex);
3540
3541 if (ret)
3542 file = ERR_PTR(ret);
3543
3544 return file;
3545 }
3546 EXPORT_SYMBOL_GPL(trace_get_event_file);
3547
3548 /**
3549 * trace_put_event_file - Release a file from trace_get_event_file()
3550 * @file: The trace event file
3551 *
3552 * If a file was retrieved using trace_get_event_file(), this should
3553 * be called when it's no longer needed. It will cancel the previous
3554 * trace_array_get() called by that function, and decrement the
3555 * event's module refcount.
3556 */
trace_put_event_file(struct trace_event_file * file)3557 void trace_put_event_file(struct trace_event_file *file)
3558 {
3559 mutex_lock(&event_mutex);
3560 trace_event_put_ref(file->event_call);
3561 mutex_unlock(&event_mutex);
3562
3563 trace_array_put(file->tr);
3564 }
3565 EXPORT_SYMBOL_GPL(trace_put_event_file);
3566
3567 #ifdef CONFIG_DYNAMIC_FTRACE
3568
3569 /* Avoid typos */
3570 #define ENABLE_EVENT_STR "enable_event"
3571 #define DISABLE_EVENT_STR "disable_event"
3572
3573 struct event_probe_data {
3574 struct trace_event_file *file;
3575 unsigned long count;
3576 int ref;
3577 bool enable;
3578 };
3579
update_event_probe(struct event_probe_data * data)3580 static void update_event_probe(struct event_probe_data *data)
3581 {
3582 if (data->enable)
3583 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
3584 else
3585 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
3586 }
3587
3588 static void
event_enable_probe(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)3589 event_enable_probe(unsigned long ip, unsigned long parent_ip,
3590 struct trace_array *tr, struct ftrace_probe_ops *ops,
3591 void *data)
3592 {
3593 struct ftrace_func_mapper *mapper = data;
3594 struct event_probe_data *edata;
3595 void **pdata;
3596
3597 pdata = ftrace_func_mapper_find_ip(mapper, ip);
3598 if (!pdata || !*pdata)
3599 return;
3600
3601 edata = *pdata;
3602 update_event_probe(edata);
3603 }
3604
3605 static void
event_enable_count_probe(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)3606 event_enable_count_probe(unsigned long ip, unsigned long parent_ip,
3607 struct trace_array *tr, struct ftrace_probe_ops *ops,
3608 void *data)
3609 {
3610 struct ftrace_func_mapper *mapper = data;
3611 struct event_probe_data *edata;
3612 void **pdata;
3613
3614 pdata = ftrace_func_mapper_find_ip(mapper, ip);
3615 if (!pdata || !*pdata)
3616 return;
3617
3618 edata = *pdata;
3619
3620 if (!edata->count)
3621 return;
3622
3623 /* Skip if the event is in a state we want to switch to */
3624 if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
3625 return;
3626
3627 if (edata->count != -1)
3628 (edata->count)--;
3629
3630 update_event_probe(edata);
3631 }
3632
3633 static int
event_enable_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)3634 event_enable_print(struct seq_file *m, unsigned long ip,
3635 struct ftrace_probe_ops *ops, void *data)
3636 {
3637 struct ftrace_func_mapper *mapper = data;
3638 struct event_probe_data *edata;
3639 void **pdata;
3640
3641 pdata = ftrace_func_mapper_find_ip(mapper, ip);
3642
3643 if (WARN_ON_ONCE(!pdata || !*pdata))
3644 return 0;
3645
3646 edata = *pdata;
3647
3648 seq_printf(m, "%ps:", (void *)ip);
3649
3650 seq_printf(m, "%s:%s:%s",
3651 edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
3652 edata->file->event_call->class->system,
3653 trace_event_name(edata->file->event_call));
3654
3655 if (edata->count == -1)
3656 seq_puts(m, ":unlimited\n");
3657 else
3658 seq_printf(m, ":count=%ld\n", edata->count);
3659
3660 return 0;
3661 }
3662
3663 static int
event_enable_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)3664 event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
3665 unsigned long ip, void *init_data, void **data)
3666 {
3667 struct ftrace_func_mapper *mapper = *data;
3668 struct event_probe_data *edata = init_data;
3669 int ret;
3670
3671 if (!mapper) {
3672 mapper = allocate_ftrace_func_mapper();
3673 if (!mapper)
3674 return -ENODEV;
3675 *data = mapper;
3676 }
3677
3678 ret = ftrace_func_mapper_add_ip(mapper, ip, edata);
3679 if (ret < 0)
3680 return ret;
3681
3682 edata->ref++;
3683
3684 return 0;
3685 }
3686
free_probe_data(void * data)3687 static int free_probe_data(void *data)
3688 {
3689 struct event_probe_data *edata = data;
3690
3691 edata->ref--;
3692 if (!edata->ref) {
3693 /* Remove the SOFT_MODE flag */
3694 __ftrace_event_enable_disable(edata->file, 0, 1);
3695 trace_event_put_ref(edata->file->event_call);
3696 kfree(edata);
3697 }
3698 return 0;
3699 }
3700
3701 static void
event_enable_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)3702 event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
3703 unsigned long ip, void *data)
3704 {
3705 struct ftrace_func_mapper *mapper = data;
3706 struct event_probe_data *edata;
3707
3708 if (!ip) {
3709 if (!mapper)
3710 return;
3711 free_ftrace_func_mapper(mapper, free_probe_data);
3712 return;
3713 }
3714
3715 edata = ftrace_func_mapper_remove_ip(mapper, ip);
3716
3717 if (WARN_ON_ONCE(!edata))
3718 return;
3719
3720 if (WARN_ON_ONCE(edata->ref <= 0))
3721 return;
3722
3723 free_probe_data(edata);
3724 }
3725
3726 static struct ftrace_probe_ops event_enable_probe_ops = {
3727 .func = event_enable_probe,
3728 .print = event_enable_print,
3729 .init = event_enable_init,
3730 .free = event_enable_free,
3731 };
3732
3733 static struct ftrace_probe_ops event_enable_count_probe_ops = {
3734 .func = event_enable_count_probe,
3735 .print = event_enable_print,
3736 .init = event_enable_init,
3737 .free = event_enable_free,
3738 };
3739
3740 static struct ftrace_probe_ops event_disable_probe_ops = {
3741 .func = event_enable_probe,
3742 .print = event_enable_print,
3743 .init = event_enable_init,
3744 .free = event_enable_free,
3745 };
3746
3747 static struct ftrace_probe_ops event_disable_count_probe_ops = {
3748 .func = event_enable_count_probe,
3749 .print = event_enable_print,
3750 .init = event_enable_init,
3751 .free = event_enable_free,
3752 };
3753
3754 static int
event_enable_func(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enabled)3755 event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
3756 char *glob, char *cmd, char *param, int enabled)
3757 {
3758 struct trace_event_file *file;
3759 struct ftrace_probe_ops *ops;
3760 struct event_probe_data *data;
3761 const char *system;
3762 const char *event;
3763 char *number;
3764 bool enable;
3765 int ret;
3766
3767 if (!tr)
3768 return -ENODEV;
3769
3770 /* hash funcs only work with set_ftrace_filter */
3771 if (!enabled || !param)
3772 return -EINVAL;
3773
3774 system = strsep(¶m, ":");
3775 if (!param)
3776 return -EINVAL;
3777
3778 event = strsep(¶m, ":");
3779
3780 mutex_lock(&event_mutex);
3781
3782 ret = -EINVAL;
3783 file = find_event_file(tr, system, event);
3784 if (!file)
3785 goto out;
3786
3787 enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
3788
3789 if (enable)
3790 ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
3791 else
3792 ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
3793
3794 if (glob[0] == '!') {
3795 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
3796 goto out;
3797 }
3798
3799 ret = -ENOMEM;
3800
3801 data = kzalloc(sizeof(*data), GFP_KERNEL);
3802 if (!data)
3803 goto out;
3804
3805 data->enable = enable;
3806 data->count = -1;
3807 data->file = file;
3808
3809 if (!param)
3810 goto out_reg;
3811
3812 number = strsep(¶m, ":");
3813
3814 ret = -EINVAL;
3815 if (!strlen(number))
3816 goto out_free;
3817
3818 /*
3819 * We use the callback data field (which is a pointer)
3820 * as our counter.
3821 */
3822 ret = kstrtoul(number, 0, &data->count);
3823 if (ret)
3824 goto out_free;
3825
3826 out_reg:
3827 /* Don't let event modules unload while probe registered */
3828 ret = trace_event_try_get_ref(file->event_call);
3829 if (!ret) {
3830 ret = -EBUSY;
3831 goto out_free;
3832 }
3833
3834 ret = __ftrace_event_enable_disable(file, 1, 1);
3835 if (ret < 0)
3836 goto out_put;
3837
3838 ret = register_ftrace_function_probe(glob, tr, ops, data);
3839 /*
3840 * The above returns on success the # of functions enabled,
3841 * but if it didn't find any functions it returns zero.
3842 * Consider no functions a failure too.
3843 */
3844 if (!ret) {
3845 ret = -ENOENT;
3846 goto out_disable;
3847 } else if (ret < 0)
3848 goto out_disable;
3849 /* Just return zero, not the number of enabled functions */
3850 ret = 0;
3851 out:
3852 mutex_unlock(&event_mutex);
3853 return ret;
3854
3855 out_disable:
3856 __ftrace_event_enable_disable(file, 0, 1);
3857 out_put:
3858 trace_event_put_ref(file->event_call);
3859 out_free:
3860 kfree(data);
3861 goto out;
3862 }
3863
3864 static struct ftrace_func_command event_enable_cmd = {
3865 .name = ENABLE_EVENT_STR,
3866 .func = event_enable_func,
3867 };
3868
3869 static struct ftrace_func_command event_disable_cmd = {
3870 .name = DISABLE_EVENT_STR,
3871 .func = event_enable_func,
3872 };
3873
register_event_cmds(void)3874 static __init int register_event_cmds(void)
3875 {
3876 int ret;
3877
3878 ret = register_ftrace_command(&event_enable_cmd);
3879 if (WARN_ON(ret < 0))
3880 return ret;
3881 ret = register_ftrace_command(&event_disable_cmd);
3882 if (WARN_ON(ret < 0))
3883 unregister_ftrace_command(&event_enable_cmd);
3884 return ret;
3885 }
3886 #else
register_event_cmds(void)3887 static inline int register_event_cmds(void) { return 0; }
3888 #endif /* CONFIG_DYNAMIC_FTRACE */
3889
3890 /*
3891 * The top level array and trace arrays created by boot-time tracing
3892 * have already had its trace_event_file descriptors created in order
3893 * to allow for early events to be recorded.
3894 * This function is called after the tracefs has been initialized,
3895 * and we now have to create the files associated to the events.
3896 */
__trace_early_add_event_dirs(struct trace_array * tr)3897 static void __trace_early_add_event_dirs(struct trace_array *tr)
3898 {
3899 struct trace_event_file *file;
3900 int ret;
3901
3902
3903 list_for_each_entry(file, &tr->events, list) {
3904 ret = event_create_dir(tr->event_dir, file);
3905 if (ret < 0)
3906 pr_warn("Could not create directory for event %s\n",
3907 trace_event_name(file->event_call));
3908 }
3909 }
3910
3911 /*
3912 * For early boot up, the top trace array and the trace arrays created
3913 * by boot-time tracing require to have a list of events that can be
3914 * enabled. This must be done before the filesystem is set up in order
3915 * to allow events to be traced early.
3916 */
__trace_early_add_events(struct trace_array * tr)3917 void __trace_early_add_events(struct trace_array *tr)
3918 {
3919 struct trace_event_call *call;
3920 int ret;
3921
3922 list_for_each_entry(call, &ftrace_events, list) {
3923 /* Early boot up should not have any modules loaded */
3924 if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
3925 WARN_ON_ONCE(call->module))
3926 continue;
3927
3928 ret = __trace_early_add_new_event(call, tr);
3929 if (ret < 0)
3930 pr_warn("Could not create early event %s\n",
3931 trace_event_name(call));
3932 }
3933 }
3934
3935 /* Remove the event directory structure for a trace directory. */
3936 static void
__trace_remove_event_dirs(struct trace_array * tr)3937 __trace_remove_event_dirs(struct trace_array *tr)
3938 {
3939 struct trace_event_file *file, *next;
3940
3941 list_for_each_entry_safe(file, next, &tr->events, list)
3942 remove_event_file_dir(file);
3943 }
3944
__add_event_to_tracers(struct trace_event_call * call)3945 static void __add_event_to_tracers(struct trace_event_call *call)
3946 {
3947 struct trace_array *tr;
3948
3949 list_for_each_entry(tr, &ftrace_trace_arrays, list)
3950 __trace_add_new_event(call, tr);
3951 }
3952
3953 extern struct trace_event_call *__start_ftrace_events[];
3954 extern struct trace_event_call *__stop_ftrace_events[];
3955
3956 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
3957
setup_trace_event(char * str)3958 static __init int setup_trace_event(char *str)
3959 {
3960 strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
3961 trace_set_ring_buffer_expanded(NULL);
3962 disable_tracing_selftest("running event tracing");
3963
3964 return 1;
3965 }
3966 __setup("trace_event=", setup_trace_event);
3967
events_callback(const char * name,umode_t * mode,void ** data,const struct file_operations ** fops)3968 static int events_callback(const char *name, umode_t *mode, void **data,
3969 const struct file_operations **fops)
3970 {
3971 if (strcmp(name, "enable") == 0) {
3972 *mode = TRACE_MODE_WRITE;
3973 *fops = &ftrace_tr_enable_fops;
3974 return 1;
3975 }
3976
3977 if (strcmp(name, "header_page") == 0) {
3978 *mode = TRACE_MODE_READ;
3979 *fops = &ftrace_show_header_page_fops;
3980
3981 } else if (strcmp(name, "header_event") == 0) {
3982 *mode = TRACE_MODE_READ;
3983 *fops = &ftrace_show_header_event_fops;
3984 } else
3985 return 0;
3986
3987 return 1;
3988 }
3989
3990 /* Expects to have event_mutex held when called */
3991 static int
create_event_toplevel_files(struct dentry * parent,struct trace_array * tr)3992 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
3993 {
3994 struct eventfs_inode *e_events;
3995 struct dentry *entry;
3996 int nr_entries;
3997 static struct eventfs_entry events_entries[] = {
3998 {
3999 .name = "enable",
4000 .callback = events_callback,
4001 },
4002 {
4003 .name = "header_page",
4004 .callback = events_callback,
4005 },
4006 {
4007 .name = "header_event",
4008 .callback = events_callback,
4009 },
4010 };
4011
4012 entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
4013 tr, &ftrace_set_event_fops);
4014 if (!entry)
4015 return -ENOMEM;
4016
4017 nr_entries = ARRAY_SIZE(events_entries);
4018
4019 e_events = eventfs_create_events_dir("events", parent, events_entries,
4020 nr_entries, tr);
4021 if (IS_ERR(e_events)) {
4022 pr_warn("Could not create tracefs 'events' directory\n");
4023 return -ENOMEM;
4024 }
4025
4026 /* There are not as crucial, just warn if they are not created */
4027
4028 trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
4029 tr, &ftrace_set_event_pid_fops);
4030
4031 trace_create_file("set_event_notrace_pid",
4032 TRACE_MODE_WRITE, parent, tr,
4033 &ftrace_set_event_notrace_pid_fops);
4034
4035 tr->event_dir = e_events;
4036
4037 return 0;
4038 }
4039
4040 /**
4041 * event_trace_add_tracer - add a instance of a trace_array to events
4042 * @parent: The parent dentry to place the files/directories for events in
4043 * @tr: The trace array associated with these events
4044 *
4045 * When a new instance is created, it needs to set up its events
4046 * directory, as well as other files associated with events. It also
4047 * creates the event hierarchy in the @parent/events directory.
4048 *
4049 * Returns 0 on success.
4050 *
4051 * Must be called with event_mutex held.
4052 */
event_trace_add_tracer(struct dentry * parent,struct trace_array * tr)4053 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
4054 {
4055 int ret;
4056
4057 lockdep_assert_held(&event_mutex);
4058
4059 ret = create_event_toplevel_files(parent, tr);
4060 if (ret)
4061 goto out;
4062
4063 down_write(&trace_event_sem);
4064 /* If tr already has the event list, it is initialized in early boot. */
4065 if (unlikely(!list_empty(&tr->events)))
4066 __trace_early_add_event_dirs(tr);
4067 else
4068 __trace_add_event_dirs(tr);
4069 up_write(&trace_event_sem);
4070
4071 out:
4072 return ret;
4073 }
4074
4075 /*
4076 * The top trace array already had its file descriptors created.
4077 * Now the files themselves need to be created.
4078 */
4079 static __init int
early_event_add_tracer(struct dentry * parent,struct trace_array * tr)4080 early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
4081 {
4082 int ret;
4083
4084 mutex_lock(&event_mutex);
4085
4086 ret = create_event_toplevel_files(parent, tr);
4087 if (ret)
4088 goto out_unlock;
4089
4090 down_write(&trace_event_sem);
4091 __trace_early_add_event_dirs(tr);
4092 up_write(&trace_event_sem);
4093
4094 out_unlock:
4095 mutex_unlock(&event_mutex);
4096
4097 return ret;
4098 }
4099
4100 /* Must be called with event_mutex held */
event_trace_del_tracer(struct trace_array * tr)4101 int event_trace_del_tracer(struct trace_array *tr)
4102 {
4103 lockdep_assert_held(&event_mutex);
4104
4105 /* Disable any event triggers and associated soft-disabled events */
4106 clear_event_triggers(tr);
4107
4108 /* Clear the pid list */
4109 __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
4110
4111 /* Disable any running events */
4112 __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
4113
4114 /* Make sure no more events are being executed */
4115 tracepoint_synchronize_unregister();
4116
4117 down_write(&trace_event_sem);
4118 __trace_remove_event_dirs(tr);
4119 eventfs_remove_events_dir(tr->event_dir);
4120 up_write(&trace_event_sem);
4121
4122 tr->event_dir = NULL;
4123
4124 return 0;
4125 }
4126
event_trace_memsetup(void)4127 static __init int event_trace_memsetup(void)
4128 {
4129 field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
4130 file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
4131 return 0;
4132 }
4133
4134 __init void
early_enable_events(struct trace_array * tr,char * buf,bool disable_first)4135 early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
4136 {
4137 char *token;
4138 int ret;
4139
4140 while (true) {
4141 token = strsep(&buf, ",");
4142
4143 if (!token)
4144 break;
4145
4146 if (*token) {
4147 /* Restarting syscalls requires that we stop them first */
4148 if (disable_first)
4149 ftrace_set_clr_event(tr, token, 0);
4150
4151 ret = ftrace_set_clr_event(tr, token, 1);
4152 if (ret)
4153 pr_warn("Failed to enable trace event: %s\n", token);
4154 }
4155
4156 /* Put back the comma to allow this to be called again */
4157 if (buf)
4158 *(buf - 1) = ',';
4159 }
4160 }
4161
event_trace_enable(void)4162 static __init int event_trace_enable(void)
4163 {
4164 struct trace_array *tr = top_trace_array();
4165 struct trace_event_call **iter, *call;
4166 int ret;
4167
4168 if (!tr)
4169 return -ENODEV;
4170
4171 for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
4172
4173 call = *iter;
4174 ret = event_init(call);
4175 if (!ret)
4176 list_add(&call->list, &ftrace_events);
4177 }
4178
4179 register_trigger_cmds();
4180
4181 /*
4182 * We need the top trace array to have a working set of trace
4183 * points at early init, before the debug files and directories
4184 * are created. Create the file entries now, and attach them
4185 * to the actual file dentries later.
4186 */
4187 __trace_early_add_events(tr);
4188
4189 early_enable_events(tr, bootup_event_buf, false);
4190
4191 trace_printk_start_comm();
4192
4193 register_event_cmds();
4194
4195
4196 return 0;
4197 }
4198
4199 /*
4200 * event_trace_enable() is called from trace_event_init() first to
4201 * initialize events and perhaps start any events that are on the
4202 * command line. Unfortunately, there are some events that will not
4203 * start this early, like the system call tracepoints that need
4204 * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
4205 * event_trace_enable() is called before pid 1 starts, and this flag
4206 * is never set, making the syscall tracepoint never get reached, but
4207 * the event is enabled regardless (and not doing anything).
4208 */
event_trace_enable_again(void)4209 static __init int event_trace_enable_again(void)
4210 {
4211 struct trace_array *tr;
4212
4213 tr = top_trace_array();
4214 if (!tr)
4215 return -ENODEV;
4216
4217 early_enable_events(tr, bootup_event_buf, true);
4218
4219 return 0;
4220 }
4221
4222 early_initcall(event_trace_enable_again);
4223
4224 /* Init fields which doesn't related to the tracefs */
event_trace_init_fields(void)4225 static __init int event_trace_init_fields(void)
4226 {
4227 if (trace_define_generic_fields())
4228 pr_warn("tracing: Failed to allocated generic fields");
4229
4230 if (trace_define_common_fields())
4231 pr_warn("tracing: Failed to allocate common fields");
4232
4233 return 0;
4234 }
4235
event_trace_init(void)4236 __init int event_trace_init(void)
4237 {
4238 struct trace_array *tr;
4239 int ret;
4240
4241 tr = top_trace_array();
4242 if (!tr)
4243 return -ENODEV;
4244
4245 trace_create_file("available_events", TRACE_MODE_READ,
4246 NULL, tr, &ftrace_avail_fops);
4247
4248 ret = early_event_add_tracer(NULL, tr);
4249 if (ret)
4250 return ret;
4251
4252 #ifdef CONFIG_MODULES
4253 ret = register_module_notifier(&trace_module_nb);
4254 if (ret)
4255 pr_warn("Failed to register trace events module notifier\n");
4256 #endif
4257
4258 eventdir_initialized = true;
4259
4260 return 0;
4261 }
4262
trace_event_init(void)4263 void __init trace_event_init(void)
4264 {
4265 event_trace_memsetup();
4266 init_ftrace_syscalls();
4267 event_trace_enable();
4268 event_trace_init_fields();
4269 }
4270
4271 #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
4272
4273 static DEFINE_SPINLOCK(test_spinlock);
4274 static DEFINE_SPINLOCK(test_spinlock_irq);
4275 static DEFINE_MUTEX(test_mutex);
4276
test_work(struct work_struct * dummy)4277 static __init void test_work(struct work_struct *dummy)
4278 {
4279 spin_lock(&test_spinlock);
4280 spin_lock_irq(&test_spinlock_irq);
4281 udelay(1);
4282 spin_unlock_irq(&test_spinlock_irq);
4283 spin_unlock(&test_spinlock);
4284
4285 mutex_lock(&test_mutex);
4286 msleep(1);
4287 mutex_unlock(&test_mutex);
4288 }
4289
event_test_thread(void * unused)4290 static __init int event_test_thread(void *unused)
4291 {
4292 void *test_malloc;
4293
4294 test_malloc = kmalloc(1234, GFP_KERNEL);
4295 if (!test_malloc)
4296 pr_info("failed to kmalloc\n");
4297
4298 schedule_on_each_cpu(test_work);
4299
4300 kfree(test_malloc);
4301
4302 set_current_state(TASK_INTERRUPTIBLE);
4303 while (!kthread_should_stop()) {
4304 schedule();
4305 set_current_state(TASK_INTERRUPTIBLE);
4306 }
4307 __set_current_state(TASK_RUNNING);
4308
4309 return 0;
4310 }
4311
4312 /*
4313 * Do various things that may trigger events.
4314 */
event_test_stuff(void)4315 static __init void event_test_stuff(void)
4316 {
4317 struct task_struct *test_thread;
4318
4319 test_thread = kthread_run(event_test_thread, NULL, "test-events");
4320 msleep(1);
4321 kthread_stop(test_thread);
4322 }
4323
4324 /*
4325 * For every trace event defined, we will test each trace point separately,
4326 * and then by groups, and finally all trace points.
4327 */
event_trace_self_tests(void)4328 static __init void event_trace_self_tests(void)
4329 {
4330 struct trace_subsystem_dir *dir;
4331 struct trace_event_file *file;
4332 struct trace_event_call *call;
4333 struct event_subsystem *system;
4334 struct trace_array *tr;
4335 int ret;
4336
4337 tr = top_trace_array();
4338 if (!tr)
4339 return;
4340
4341 pr_info("Running tests on trace events:\n");
4342
4343 list_for_each_entry(file, &tr->events, list) {
4344
4345 call = file->event_call;
4346
4347 /* Only test those that have a probe */
4348 if (!call->class || !call->class->probe)
4349 continue;
4350
4351 /*
4352 * Testing syscall events here is pretty useless, but
4353 * we still do it if configured. But this is time consuming.
4354 * What we really need is a user thread to perform the
4355 * syscalls as we test.
4356 */
4357 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
4358 if (call->class->system &&
4359 strcmp(call->class->system, "syscalls") == 0)
4360 continue;
4361 #endif
4362
4363 pr_info("Testing event %s: ", trace_event_name(call));
4364
4365 /*
4366 * If an event is already enabled, someone is using
4367 * it and the self test should not be on.
4368 */
4369 if (file->flags & EVENT_FILE_FL_ENABLED) {
4370 pr_warn("Enabled event during self test!\n");
4371 WARN_ON_ONCE(1);
4372 continue;
4373 }
4374
4375 ftrace_event_enable_disable(file, 1);
4376 event_test_stuff();
4377 ftrace_event_enable_disable(file, 0);
4378
4379 pr_cont("OK\n");
4380 }
4381
4382 /* Now test at the sub system level */
4383
4384 pr_info("Running tests on trace event systems:\n");
4385
4386 list_for_each_entry(dir, &tr->systems, list) {
4387
4388 system = dir->subsystem;
4389
4390 /* the ftrace system is special, skip it */
4391 if (strcmp(system->name, "ftrace") == 0)
4392 continue;
4393
4394 pr_info("Testing event system %s: ", system->name);
4395
4396 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
4397 if (WARN_ON_ONCE(ret)) {
4398 pr_warn("error enabling system %s\n",
4399 system->name);
4400 continue;
4401 }
4402
4403 event_test_stuff();
4404
4405 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
4406 if (WARN_ON_ONCE(ret)) {
4407 pr_warn("error disabling system %s\n",
4408 system->name);
4409 continue;
4410 }
4411
4412 pr_cont("OK\n");
4413 }
4414
4415 /* Test with all events enabled */
4416
4417 pr_info("Running tests on all trace events:\n");
4418 pr_info("Testing all events: ");
4419
4420 ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
4421 if (WARN_ON_ONCE(ret)) {
4422 pr_warn("error enabling all events\n");
4423 return;
4424 }
4425
4426 event_test_stuff();
4427
4428 /* reset sysname */
4429 ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
4430 if (WARN_ON_ONCE(ret)) {
4431 pr_warn("error disabling all events\n");
4432 return;
4433 }
4434
4435 pr_cont("OK\n");
4436 }
4437
4438 #ifdef CONFIG_FUNCTION_TRACER
4439
4440 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
4441
4442 static struct trace_event_file event_trace_file __initdata;
4443
4444 static void __init
function_test_events_call(unsigned long ip,unsigned long parent_ip,struct ftrace_ops * op,struct ftrace_regs * regs)4445 function_test_events_call(unsigned long ip, unsigned long parent_ip,
4446 struct ftrace_ops *op, struct ftrace_regs *regs)
4447 {
4448 struct trace_buffer *buffer;
4449 struct ring_buffer_event *event;
4450 struct ftrace_entry *entry;
4451 unsigned int trace_ctx;
4452 long disabled;
4453 int cpu;
4454
4455 trace_ctx = tracing_gen_ctx();
4456 preempt_disable_notrace();
4457 cpu = raw_smp_processor_id();
4458 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
4459
4460 if (disabled != 1)
4461 goto out;
4462
4463 event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
4464 TRACE_FN, sizeof(*entry),
4465 trace_ctx);
4466 if (!event)
4467 goto out;
4468 entry = ring_buffer_event_data(event);
4469 entry->ip = ip;
4470 entry->parent_ip = parent_ip;
4471
4472 event_trigger_unlock_commit(&event_trace_file, buffer, event,
4473 entry, trace_ctx);
4474 out:
4475 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
4476 preempt_enable_notrace();
4477 }
4478
4479 static struct ftrace_ops trace_ops __initdata =
4480 {
4481 .func = function_test_events_call,
4482 };
4483
event_trace_self_test_with_function(void)4484 static __init void event_trace_self_test_with_function(void)
4485 {
4486 int ret;
4487
4488 event_trace_file.tr = top_trace_array();
4489 if (WARN_ON(!event_trace_file.tr))
4490 return;
4491
4492 ret = register_ftrace_function(&trace_ops);
4493 if (WARN_ON(ret < 0)) {
4494 pr_info("Failed to enable function tracer for event tests\n");
4495 return;
4496 }
4497 pr_info("Running tests again, along with the function tracer\n");
4498 event_trace_self_tests();
4499 unregister_ftrace_function(&trace_ops);
4500 }
4501 #else
event_trace_self_test_with_function(void)4502 static __init void event_trace_self_test_with_function(void)
4503 {
4504 }
4505 #endif
4506
event_trace_self_tests_init(void)4507 static __init int event_trace_self_tests_init(void)
4508 {
4509 if (!tracing_selftest_disabled) {
4510 event_trace_self_tests();
4511 event_trace_self_test_with_function();
4512 }
4513
4514 return 0;
4515 }
4516
4517 late_initcall(event_trace_self_tests_init);
4518
4519 #endif
4520