1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * event tracer 4 * 5 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> 6 * 7 * - Added format output of fields of the trace point. 8 * This was based off of work by Tom Zanussi <tzanussi@gmail.com>. 9 * 10 */ 11 12 #define pr_fmt(fmt) fmt 13 14 #include <linux/workqueue.h> 15 #include <linux/security.h> 16 #include <linux/spinlock.h> 17 #include <linux/kthread.h> 18 #include <linux/tracefs.h> 19 #include <linux/uaccess.h> 20 #include <linux/module.h> 21 #include <linux/ctype.h> 22 #include <linux/sort.h> 23 #include <linux/slab.h> 24 #include <linux/delay.h> 25 26 #include <trace/events/sched.h> 27 #include <trace/syscall.h> 28 29 #include <asm/setup.h> 30 31 #include "trace_output.h" 32 33 #undef TRACE_SYSTEM 34 #define TRACE_SYSTEM "TRACE_SYSTEM" 35 36 DEFINE_MUTEX(event_mutex); 37 38 LIST_HEAD(ftrace_events); 39 static LIST_HEAD(ftrace_generic_fields); 40 static LIST_HEAD(ftrace_common_fields); 41 static bool eventdir_initialized; 42 43 static LIST_HEAD(module_strings); 44 45 struct module_string { 46 struct list_head next; 47 struct module *module; 48 char *str; 49 }; 50 51 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) 52 53 static struct kmem_cache *field_cachep; 54 static struct kmem_cache *file_cachep; 55 56 static inline int system_refcount(struct event_subsystem *system) 57 { 58 return system->ref_count; 59 } 60 61 static int system_refcount_inc(struct event_subsystem *system) 62 { 63 return system->ref_count++; 64 } 65 66 static int system_refcount_dec(struct event_subsystem *system) 67 { 68 return --system->ref_count; 69 } 70 71 /* Double loops, do not use break, only goto's work */ 72 #define do_for_each_event_file(tr, file) \ 73 list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ 74 list_for_each_entry(file, &tr->events, list) 75 76 #define do_for_each_event_file_safe(tr, file) \ 77 list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ 78 struct trace_event_file *___n; \ 79 list_for_each_entry_safe(file, ___n, &tr->events, list) 80 81 #define while_for_each_event_file() \ 82 } 83 84 static struct ftrace_event_field * 85 __find_event_field(struct list_head *head, const char *name) 86 { 87 struct ftrace_event_field *field; 88 89 list_for_each_entry(field, head, link) { 90 if (!strcmp(field->name, name)) 91 return field; 92 } 93 94 return NULL; 95 } 96 97 struct ftrace_event_field * 98 trace_find_event_field(struct trace_event_call *call, char *name) 99 { 100 struct ftrace_event_field *field; 101 struct list_head *head; 102 103 head = trace_get_fields(call); 104 field = __find_event_field(head, name); 105 if (field) 106 return field; 107 108 field = __find_event_field(&ftrace_generic_fields, name); 109 if (field) 110 return field; 111 112 return __find_event_field(&ftrace_common_fields, name); 113 } 114 115 static int __trace_define_field(struct list_head *head, const char *type, 116 const char *name, int offset, int size, 117 int is_signed, int filter_type, int len, 118 int need_test) 119 { 120 struct ftrace_event_field *field; 121 122 field = kmem_cache_alloc(field_cachep, GFP_TRACE); 123 if (!field) 124 return -ENOMEM; 125 126 field->name = name; 127 field->type = type; 128 129 if (filter_type == FILTER_OTHER) 130 field->filter_type = filter_assign_type(type); 131 else 132 field->filter_type = filter_type; 133 134 field->offset = offset; 135 field->size = size; 136 field->is_signed = is_signed; 137 field->needs_test = need_test; 138 field->len = len; 139 140 list_add(&field->link, head); 141 142 return 0; 143 } 144 145 int trace_define_field(struct trace_event_call *call, const char *type, 146 const char *name, int offset, int size, int is_signed, 147 int filter_type) 148 { 149 struct list_head *head; 150 151 if (WARN_ON(!call->class)) 152 return 0; 153 154 head = trace_get_fields(call); 155 return __trace_define_field(head, type, name, offset, size, 156 is_signed, filter_type, 0, 0); 157 } 158 EXPORT_SYMBOL_GPL(trace_define_field); 159 160 static int trace_define_field_ext(struct trace_event_call *call, const char *type, 161 const char *name, int offset, int size, int is_signed, 162 int filter_type, int len, int need_test) 163 { 164 struct list_head *head; 165 166 if (WARN_ON(!call->class)) 167 return 0; 168 169 head = trace_get_fields(call); 170 return __trace_define_field(head, type, name, offset, size, 171 is_signed, filter_type, len, need_test); 172 } 173 174 #define __generic_field(type, item, filter_type) \ 175 ret = __trace_define_field(&ftrace_generic_fields, #type, \ 176 #item, 0, 0, is_signed_type(type), \ 177 filter_type, 0, 0); \ 178 if (ret) \ 179 return ret; 180 181 #define __common_field(type, item) \ 182 ret = __trace_define_field(&ftrace_common_fields, #type, \ 183 "common_" #item, \ 184 offsetof(typeof(ent), item), \ 185 sizeof(ent.item), \ 186 is_signed_type(type), FILTER_OTHER, \ 187 0, 0); \ 188 if (ret) \ 189 return ret; 190 191 static int trace_define_generic_fields(void) 192 { 193 int ret; 194 195 __generic_field(int, CPU, FILTER_CPU); 196 __generic_field(int, cpu, FILTER_CPU); 197 __generic_field(int, common_cpu, FILTER_CPU); 198 __generic_field(char *, COMM, FILTER_COMM); 199 __generic_field(char *, comm, FILTER_COMM); 200 __generic_field(char *, stacktrace, FILTER_STACKTRACE); 201 __generic_field(char *, STACKTRACE, FILTER_STACKTRACE); 202 203 return ret; 204 } 205 206 static int trace_define_common_fields(void) 207 { 208 int ret; 209 struct trace_entry ent; 210 211 __common_field(unsigned short, type); 212 __common_field(unsigned char, flags); 213 /* Holds both preempt_count and migrate_disable */ 214 __common_field(unsigned char, preempt_count); 215 __common_field(int, pid); 216 217 return ret; 218 } 219 220 static void trace_destroy_fields(struct trace_event_call *call) 221 { 222 struct ftrace_event_field *field, *next; 223 struct list_head *head; 224 225 head = trace_get_fields(call); 226 list_for_each_entry_safe(field, next, head, link) { 227 list_del(&field->link); 228 kmem_cache_free(field_cachep, field); 229 } 230 } 231 232 /* 233 * run-time version of trace_event_get_offsets_<call>() that returns the last 234 * accessible offset of trace fields excluding __dynamic_array bytes 235 */ 236 int trace_event_get_offsets(struct trace_event_call *call) 237 { 238 struct ftrace_event_field *tail; 239 struct list_head *head; 240 241 head = trace_get_fields(call); 242 /* 243 * head->next points to the last field with the largest offset, 244 * since it was added last by trace_define_field() 245 */ 246 tail = list_first_entry(head, struct ftrace_event_field, link); 247 return tail->offset + tail->size; 248 } 249 250 251 static struct trace_event_fields *find_event_field(const char *fmt, 252 struct trace_event_call *call) 253 { 254 struct trace_event_fields *field = call->class->fields_array; 255 const char *p = fmt; 256 int len; 257 258 if (!(len = str_has_prefix(fmt, "REC->"))) 259 return NULL; 260 fmt += len; 261 for (p = fmt; *p; p++) { 262 if (!isalnum(*p) && *p != '_') 263 break; 264 } 265 len = p - fmt; 266 267 for (; field->type; field++) { 268 if (strncmp(field->name, fmt, len) || field->name[len]) 269 continue; 270 271 return field; 272 } 273 return NULL; 274 } 275 276 /* 277 * Check if the referenced field is an array and return true, 278 * as arrays are OK to dereference. 279 */ 280 static bool test_field(const char *fmt, struct trace_event_call *call) 281 { 282 struct trace_event_fields *field; 283 284 field = find_event_field(fmt, call); 285 if (!field) 286 return false; 287 288 /* This is an array and is OK to dereference. */ 289 return strchr(field->type, '[') != NULL; 290 } 291 292 /* Look for a string within an argument */ 293 static bool find_print_string(const char *arg, const char *str, const char *end) 294 { 295 const char *r; 296 297 r = strstr(arg, str); 298 return r && r < end; 299 } 300 301 /* Return true if the argument pointer is safe */ 302 static bool process_pointer(const char *fmt, int len, struct trace_event_call *call) 303 { 304 const char *r, *e, *a; 305 306 e = fmt + len; 307 308 /* Find the REC-> in the argument */ 309 r = strstr(fmt, "REC->"); 310 if (r && r < e) { 311 /* 312 * Addresses of events on the buffer, or an array on the buffer is 313 * OK to dereference. There's ways to fool this, but 314 * this is to catch common mistakes, not malicious code. 315 */ 316 a = strchr(fmt, '&'); 317 if ((a && (a < r)) || test_field(r, call)) 318 return true; 319 } else if (find_print_string(fmt, "__get_dynamic_array(", e)) { 320 return true; 321 } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) { 322 return true; 323 } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) { 324 return true; 325 } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) { 326 return true; 327 } else if (find_print_string(fmt, "__get_sockaddr(", e)) { 328 return true; 329 } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) { 330 return true; 331 } 332 return false; 333 } 334 335 /* Return true if the string is safe */ 336 static bool process_string(const char *fmt, int len, struct trace_event_call *call) 337 { 338 struct trace_event_fields *field; 339 const char *r, *e, *s; 340 341 e = fmt + len; 342 343 /* 344 * There are several helper functions that return strings. 345 * If the argument contains a function, then assume its field is valid. 346 * It is considered that the argument has a function if it has: 347 * alphanumeric or '_' before a parenthesis. 348 */ 349 s = fmt; 350 do { 351 r = strstr(s, "("); 352 if (!r || r >= e) 353 break; 354 for (int i = 1; r - i >= s; i++) { 355 char ch = *(r - i); 356 if (isspace(ch)) 357 continue; 358 if (isalnum(ch) || ch == '_') 359 return true; 360 /* Anything else, this isn't a function */ 361 break; 362 } 363 /* A function could be wrapped in parenthesis, try the next one */ 364 s = r + 1; 365 } while (s < e); 366 367 /* 368 * Check for arrays. If the argument has: foo[REC->val] 369 * then it is very likely that foo is an array of strings 370 * that are safe to use. 371 */ 372 r = strstr(s, "["); 373 if (r && r < e) { 374 r = strstr(r, "REC->"); 375 if (r && r < e) 376 return true; 377 } 378 379 /* 380 * If there's any strings in the argument consider this arg OK as it 381 * could be: REC->field ? "foo" : "bar" and we don't want to get into 382 * verifying that logic here. 383 */ 384 if (find_print_string(fmt, "\"", e)) 385 return true; 386 387 /* Dereferenced strings are also valid like any other pointer */ 388 if (process_pointer(fmt, len, call)) 389 return true; 390 391 /* Make sure the field is found */ 392 field = find_event_field(fmt, call); 393 if (!field) 394 return false; 395 396 /* Test this field's string before printing the event */ 397 call->flags |= TRACE_EVENT_FL_TEST_STR; 398 field->needs_test = 1; 399 400 return true; 401 } 402 403 static void handle_dereference_arg(const char *arg_str, u64 string_flags, int len, 404 u64 *dereference_flags, int arg, 405 struct trace_event_call *call) 406 { 407 if (string_flags & (1ULL << arg)) { 408 if (process_string(arg_str, len, call)) 409 *dereference_flags &= ~(1ULL << arg); 410 } else if (process_pointer(arg_str, len, call)) 411 *dereference_flags &= ~(1ULL << arg); 412 else 413 pr_warn("TRACE EVENT ERROR: Bad dereference argument: '%.*s'\n", 414 len, arg_str); 415 } 416 417 /* 418 * Examine the print fmt of the event looking for unsafe dereference 419 * pointers using %p* that could be recorded in the trace event and 420 * much later referenced after the pointer was freed. Dereferencing 421 * pointers are OK, if it is dereferenced into the event itself. 422 */ 423 static void test_event_printk(struct trace_event_call *call) 424 { 425 u64 dereference_flags = 0; 426 u64 string_flags = 0; 427 bool first = true; 428 const char *fmt; 429 int parens = 0; 430 char in_quote = 0; 431 int start_arg = 0; 432 int arg = 0; 433 int i, e; 434 435 fmt = call->print_fmt; 436 437 if (!fmt) 438 return; 439 440 for (i = 0; fmt[i]; i++) { 441 switch (fmt[i]) { 442 case '\\': 443 i++; 444 if (!fmt[i]) 445 return; 446 continue; 447 case '"': 448 case '\'': 449 /* 450 * The print fmt starts with a string that 451 * is processed first to find %p* usage, 452 * then after the first string, the print fmt 453 * contains arguments that are used to check 454 * if the dereferenced %p* usage is safe. 455 */ 456 if (first) { 457 if (fmt[i] == '\'') 458 continue; 459 if (in_quote) { 460 arg = 0; 461 first = false; 462 /* 463 * If there was no %p* uses 464 * the fmt is OK. 465 */ 466 if (!dereference_flags) 467 return; 468 } 469 } 470 if (in_quote) { 471 if (in_quote == fmt[i]) 472 in_quote = 0; 473 } else { 474 in_quote = fmt[i]; 475 } 476 continue; 477 case '%': 478 if (!first || !in_quote) 479 continue; 480 i++; 481 if (!fmt[i]) 482 return; 483 switch (fmt[i]) { 484 case '%': 485 continue; 486 case 'p': 487 do_pointer: 488 /* Find dereferencing fields */ 489 switch (fmt[i + 1]) { 490 case 'B': case 'R': case 'r': 491 case 'b': case 'M': case 'm': 492 case 'I': case 'i': case 'E': 493 case 'U': case 'V': case 'N': 494 case 'a': case 'd': case 'D': 495 case 'g': case 't': case 'C': 496 case 'O': case 'f': 497 if (WARN_ONCE(arg == 63, 498 "Too many args for event: %s", 499 trace_event_name(call))) 500 return; 501 dereference_flags |= 1ULL << arg; 502 } 503 break; 504 default: 505 { 506 bool star = false; 507 int j; 508 509 /* Increment arg if %*s exists. */ 510 for (j = 0; fmt[i + j]; j++) { 511 if (isdigit(fmt[i + j]) || 512 fmt[i + j] == '.') 513 continue; 514 if (fmt[i + j] == '*') { 515 star = true; 516 /* Handle %*pbl case */ 517 if (!j && fmt[i + 1] == 'p') { 518 arg++; 519 i++; 520 goto do_pointer; 521 } 522 continue; 523 } 524 if ((fmt[i + j] == 's')) { 525 if (star) 526 arg++; 527 if (WARN_ONCE(arg == 63, 528 "Too many args for event: %s", 529 trace_event_name(call))) 530 return; 531 dereference_flags |= 1ULL << arg; 532 string_flags |= 1ULL << arg; 533 } 534 break; 535 } 536 break; 537 } /* default */ 538 539 } /* switch */ 540 arg++; 541 continue; 542 case '(': 543 if (in_quote) 544 continue; 545 parens++; 546 continue; 547 case ')': 548 if (in_quote) 549 continue; 550 parens--; 551 if (WARN_ONCE(parens < 0, 552 "Paren mismatch for event: %s\narg='%s'\n%*s", 553 trace_event_name(call), 554 fmt + start_arg, 555 (i - start_arg) + 5, "^")) 556 return; 557 continue; 558 case ',': 559 if (in_quote || parens) 560 continue; 561 e = i; 562 i++; 563 while (isspace(fmt[i])) 564 i++; 565 566 /* 567 * If start_arg is zero, then this is the start of the 568 * first argument. The processing of the argument happens 569 * when the end of the argument is found, as it needs to 570 * handle parenthesis and such. 571 */ 572 if (!start_arg) { 573 start_arg = i; 574 /* Balance out the i++ in the for loop */ 575 i--; 576 continue; 577 } 578 579 if (dereference_flags & (1ULL << arg)) { 580 handle_dereference_arg(fmt + start_arg, string_flags, 581 e - start_arg, 582 &dereference_flags, arg, call); 583 } 584 585 start_arg = i; 586 arg++; 587 /* Balance out the i++ in the for loop */ 588 i--; 589 } 590 } 591 592 if (dereference_flags & (1ULL << arg)) { 593 handle_dereference_arg(fmt + start_arg, string_flags, 594 i - start_arg, 595 &dereference_flags, arg, call); 596 } 597 598 /* 599 * If you triggered the below warning, the trace event reported 600 * uses an unsafe dereference pointer %p*. As the data stored 601 * at the trace event time may no longer exist when the trace 602 * event is printed, dereferencing to the original source is 603 * unsafe. The source of the dereference must be copied into the 604 * event itself, and the dereference must access the copy instead. 605 */ 606 if (WARN_ON_ONCE(dereference_flags)) { 607 arg = 1; 608 while (!(dereference_flags & 1)) { 609 dereference_flags >>= 1; 610 arg++; 611 } 612 pr_warn("event %s has unsafe dereference of argument %d\n", 613 trace_event_name(call), arg); 614 pr_warn("print_fmt: %s\n", fmt); 615 } 616 } 617 618 int trace_event_raw_init(struct trace_event_call *call) 619 { 620 int id; 621 622 id = register_trace_event(&call->event); 623 if (!id) 624 return -ENODEV; 625 626 test_event_printk(call); 627 628 return 0; 629 } 630 EXPORT_SYMBOL_GPL(trace_event_raw_init); 631 632 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) 633 { 634 struct trace_array *tr = trace_file->tr; 635 struct trace_pid_list *no_pid_list; 636 struct trace_pid_list *pid_list; 637 638 pid_list = rcu_dereference_raw(tr->filtered_pids); 639 no_pid_list = rcu_dereference_raw(tr->filtered_no_pids); 640 641 if (!pid_list && !no_pid_list) 642 return false; 643 644 /* 645 * This is recorded at every sched_switch for this task. 646 * Thus, even if the task migrates the ignore value will be the same. 647 */ 648 return this_cpu_read(tr->array_buffer.data->ignore_pid) != 0; 649 } 650 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); 651 652 /** 653 * trace_event_buffer_reserve - reserve space on the ring buffer for an event 654 * @fbuffer: information about how to save the event 655 * @trace_file: the instance file descriptor for the event 656 * @len: The length of the event 657 * 658 * The @fbuffer has information about the ring buffer and data will 659 * be added to it to be used by the call to trace_event_buffer_commit(). 660 * The @trace_file is the desrciptor with information about the status 661 * of the given event for a specific trace_array instance. 662 * The @len is the length of data to save for the event. 663 * 664 * Returns a pointer to the data on the ring buffer or NULL if the 665 * event was not reserved (event was filtered, too big, or the buffer 666 * simply was disabled for write). 667 */ 668 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, 669 struct trace_event_file *trace_file, 670 unsigned long len) 671 { 672 struct trace_event_call *event_call = trace_file->event_call; 673 674 if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) && 675 trace_event_ignore_this_pid(trace_file)) 676 return NULL; 677 678 /* 679 * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables 680 * preemption (adding one to the preempt_count). Since we are 681 * interested in the preempt_count at the time the tracepoint was 682 * hit, we need to subtract one to offset the increment. 683 */ 684 fbuffer->trace_ctx = tracing_gen_ctx_dec(); 685 fbuffer->trace_file = trace_file; 686 687 fbuffer->event = 688 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, 689 event_call->event.type, len, 690 fbuffer->trace_ctx); 691 if (!fbuffer->event) 692 return NULL; 693 694 fbuffer->regs = NULL; 695 fbuffer->entry = ring_buffer_event_data(fbuffer->event); 696 return fbuffer->entry; 697 } 698 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); 699 700 int trace_event_reg(struct trace_event_call *call, 701 enum trace_reg type, void *data) 702 { 703 struct trace_event_file *file = data; 704 705 WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); 706 switch (type) { 707 case TRACE_REG_REGISTER: 708 return tracepoint_probe_register(call->tp, 709 call->class->probe, 710 file); 711 case TRACE_REG_UNREGISTER: 712 tracepoint_probe_unregister(call->tp, 713 call->class->probe, 714 file); 715 return 0; 716 717 #ifdef CONFIG_PERF_EVENTS 718 case TRACE_REG_PERF_REGISTER: 719 if (!call->class->perf_probe) 720 return -ENODEV; 721 return tracepoint_probe_register(call->tp, 722 call->class->perf_probe, 723 call); 724 case TRACE_REG_PERF_UNREGISTER: 725 tracepoint_probe_unregister(call->tp, 726 call->class->perf_probe, 727 call); 728 return 0; 729 case TRACE_REG_PERF_OPEN: 730 case TRACE_REG_PERF_CLOSE: 731 case TRACE_REG_PERF_ADD: 732 case TRACE_REG_PERF_DEL: 733 return 0; 734 #endif 735 } 736 return 0; 737 } 738 EXPORT_SYMBOL_GPL(trace_event_reg); 739 740 void trace_event_enable_cmd_record(bool enable) 741 { 742 struct trace_event_file *file; 743 struct trace_array *tr; 744 745 lockdep_assert_held(&event_mutex); 746 747 do_for_each_event_file(tr, file) { 748 749 if (!(file->flags & EVENT_FILE_FL_ENABLED)) 750 continue; 751 752 if (enable) { 753 tracing_start_cmdline_record(); 754 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 755 } else { 756 tracing_stop_cmdline_record(); 757 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 758 } 759 } while_for_each_event_file(); 760 } 761 762 void trace_event_enable_tgid_record(bool enable) 763 { 764 struct trace_event_file *file; 765 struct trace_array *tr; 766 767 lockdep_assert_held(&event_mutex); 768 769 do_for_each_event_file(tr, file) { 770 if (!(file->flags & EVENT_FILE_FL_ENABLED)) 771 continue; 772 773 if (enable) { 774 tracing_start_tgid_record(); 775 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 776 } else { 777 tracing_stop_tgid_record(); 778 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, 779 &file->flags); 780 } 781 } while_for_each_event_file(); 782 } 783 784 static int __ftrace_event_enable_disable(struct trace_event_file *file, 785 int enable, int soft_disable) 786 { 787 struct trace_event_call *call = file->event_call; 788 struct trace_array *tr = file->tr; 789 bool soft_mode = atomic_read(&file->sm_ref) != 0; 790 int ret = 0; 791 int disable; 792 793 switch (enable) { 794 case 0: 795 /* 796 * When soft_disable is set and enable is cleared, the sm_ref 797 * reference counter is decremented. If it reaches 0, we want 798 * to clear the SOFT_DISABLED flag but leave the event in the 799 * state that it was. That is, if the event was enabled and 800 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED 801 * is set we do not want the event to be enabled before we 802 * clear the bit. 803 * 804 * When soft_disable is not set but the soft_mode is, 805 * we do nothing. Do not disable the tracepoint, otherwise 806 * "soft enable"s (clearing the SOFT_DISABLED bit) won't work. 807 */ 808 if (soft_disable) { 809 if (atomic_dec_return(&file->sm_ref) > 0) 810 break; 811 disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; 812 soft_mode = false; 813 /* Disable use of trace_buffered_event */ 814 trace_buffered_event_disable(); 815 } else 816 disable = !soft_mode; 817 818 if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { 819 clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); 820 if (file->flags & EVENT_FILE_FL_RECORDED_CMD) { 821 tracing_stop_cmdline_record(); 822 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 823 } 824 825 if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { 826 tracing_stop_tgid_record(); 827 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 828 } 829 830 ret = call->class->reg(call, TRACE_REG_UNREGISTER, file); 831 832 WARN_ON_ONCE(ret); 833 } 834 /* If in soft mode, just set the SOFT_DISABLE_BIT, else clear it */ 835 if (soft_mode) 836 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 837 else 838 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 839 break; 840 case 1: 841 /* 842 * When soft_disable is set and enable is set, we want to 843 * register the tracepoint for the event, but leave the event 844 * as is. That means, if the event was already enabled, we do 845 * nothing. If the event is disabled, we set SOFT_DISABLED 846 * before enabling the event tracepoint, so it still seems 847 * to be disabled. 848 */ 849 if (!soft_disable) 850 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 851 else { 852 if (atomic_inc_return(&file->sm_ref) > 1) 853 break; 854 /* Enable use of trace_buffered_event */ 855 trace_buffered_event_enable(); 856 } 857 858 if (!(file->flags & EVENT_FILE_FL_ENABLED)) { 859 bool cmd = false, tgid = false; 860 861 /* Keep the event disabled, when going to soft mode. */ 862 if (soft_disable) 863 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 864 865 if (tr->trace_flags & TRACE_ITER(RECORD_CMD)) { 866 cmd = true; 867 tracing_start_cmdline_record(); 868 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 869 } 870 871 if (tr->trace_flags & TRACE_ITER(RECORD_TGID)) { 872 tgid = true; 873 tracing_start_tgid_record(); 874 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 875 } 876 877 ret = call->class->reg(call, TRACE_REG_REGISTER, file); 878 if (ret) { 879 if (cmd) 880 tracing_stop_cmdline_record(); 881 if (tgid) 882 tracing_stop_tgid_record(); 883 pr_info("event trace: Could not enable event " 884 "%s\n", trace_event_name(call)); 885 break; 886 } 887 set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); 888 889 /* WAS_ENABLED gets set but never cleared. */ 890 set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags); 891 } 892 break; 893 } 894 895 return ret; 896 } 897 898 int trace_event_enable_disable(struct trace_event_file *file, 899 int enable, int soft_disable) 900 { 901 return __ftrace_event_enable_disable(file, enable, soft_disable); 902 } 903 904 static int ftrace_event_enable_disable(struct trace_event_file *file, 905 int enable) 906 { 907 return __ftrace_event_enable_disable(file, enable, 0); 908 } 909 910 #ifdef CONFIG_MODULES 911 struct event_mod_load { 912 struct list_head list; 913 char *module; 914 char *match; 915 char *system; 916 char *event; 917 }; 918 919 static void free_event_mod(struct event_mod_load *event_mod) 920 { 921 list_del(&event_mod->list); 922 kfree(event_mod->module); 923 kfree(event_mod->match); 924 kfree(event_mod->system); 925 kfree(event_mod->event); 926 kfree(event_mod); 927 } 928 929 static void clear_mod_events(struct trace_array *tr) 930 { 931 struct event_mod_load *event_mod, *n; 932 933 list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) { 934 free_event_mod(event_mod); 935 } 936 } 937 938 static int remove_cache_mod(struct trace_array *tr, const char *mod, 939 const char *match, const char *system, const char *event) 940 { 941 struct event_mod_load *event_mod, *n; 942 int ret = -EINVAL; 943 944 list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) { 945 if (strcmp(event_mod->module, mod) != 0) 946 continue; 947 948 if (match && strcmp(event_mod->match, match) != 0) 949 continue; 950 951 if (system && 952 (!event_mod->system || strcmp(event_mod->system, system) != 0)) 953 continue; 954 955 if (event && 956 (!event_mod->event || strcmp(event_mod->event, event) != 0)) 957 continue; 958 959 free_event_mod(event_mod); 960 ret = 0; 961 } 962 963 return ret; 964 } 965 966 static int cache_mod(struct trace_array *tr, const char *mod, int set, 967 const char *match, const char *system, const char *event) 968 { 969 struct event_mod_load *event_mod; 970 971 /* If the module exists, then this just failed to find an event */ 972 if (module_exists(mod)) 973 return -EINVAL; 974 975 /* See if this is to remove a cached filter */ 976 if (!set) 977 return remove_cache_mod(tr, mod, match, system, event); 978 979 event_mod = kzalloc_obj(*event_mod, GFP_KERNEL); 980 if (!event_mod) 981 return -ENOMEM; 982 983 INIT_LIST_HEAD(&event_mod->list); 984 event_mod->module = kstrdup(mod, GFP_KERNEL); 985 if (!event_mod->module) 986 goto out_free; 987 988 if (match) { 989 event_mod->match = kstrdup(match, GFP_KERNEL); 990 if (!event_mod->match) 991 goto out_free; 992 } 993 994 if (system) { 995 event_mod->system = kstrdup(system, GFP_KERNEL); 996 if (!event_mod->system) 997 goto out_free; 998 } 999 1000 if (event) { 1001 event_mod->event = kstrdup(event, GFP_KERNEL); 1002 if (!event_mod->event) 1003 goto out_free; 1004 } 1005 1006 list_add(&event_mod->list, &tr->mod_events); 1007 1008 return 0; 1009 1010 out_free: 1011 free_event_mod(event_mod); 1012 1013 return -ENOMEM; 1014 } 1015 #else /* CONFIG_MODULES */ 1016 static inline void clear_mod_events(struct trace_array *tr) { } 1017 static int cache_mod(struct trace_array *tr, const char *mod, int set, 1018 const char *match, const char *system, const char *event) 1019 { 1020 return -EINVAL; 1021 } 1022 #endif 1023 1024 static void ftrace_clear_events(struct trace_array *tr) 1025 { 1026 struct trace_event_file *file; 1027 1028 mutex_lock(&event_mutex); 1029 list_for_each_entry(file, &tr->events, list) { 1030 ftrace_event_enable_disable(file, 0); 1031 } 1032 clear_mod_events(tr); 1033 mutex_unlock(&event_mutex); 1034 } 1035 1036 static void 1037 event_filter_pid_sched_process_exit(void *data, struct task_struct *task) 1038 { 1039 struct trace_pid_list *pid_list; 1040 struct trace_array *tr = data; 1041 1042 pid_list = rcu_dereference_raw(tr->filtered_pids); 1043 trace_filter_add_remove_task(pid_list, NULL, task); 1044 1045 pid_list = rcu_dereference_raw(tr->filtered_no_pids); 1046 trace_filter_add_remove_task(pid_list, NULL, task); 1047 } 1048 1049 static void 1050 event_filter_pid_sched_process_fork(void *data, 1051 struct task_struct *self, 1052 struct task_struct *task) 1053 { 1054 struct trace_pid_list *pid_list; 1055 struct trace_array *tr = data; 1056 1057 pid_list = rcu_dereference_sched(tr->filtered_pids); 1058 trace_filter_add_remove_task(pid_list, self, task); 1059 1060 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1061 trace_filter_add_remove_task(pid_list, self, task); 1062 } 1063 1064 void trace_event_follow_fork(struct trace_array *tr, bool enable) 1065 { 1066 if (enable) { 1067 register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork, 1068 tr, INT_MIN); 1069 register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit, 1070 tr, INT_MAX); 1071 } else { 1072 unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork, 1073 tr); 1074 unregister_trace_sched_process_free(event_filter_pid_sched_process_exit, 1075 tr); 1076 } 1077 } 1078 1079 static void 1080 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, 1081 struct task_struct *prev, 1082 struct task_struct *next, 1083 unsigned int prev_state) 1084 { 1085 struct trace_array *tr = data; 1086 struct trace_pid_list *no_pid_list; 1087 struct trace_pid_list *pid_list; 1088 bool ret; 1089 1090 pid_list = rcu_dereference_sched(tr->filtered_pids); 1091 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1092 1093 /* 1094 * Sched switch is funny, as we only want to ignore it 1095 * in the notrace case if both prev and next should be ignored. 1096 */ 1097 ret = trace_ignore_this_task(NULL, no_pid_list, prev) && 1098 trace_ignore_this_task(NULL, no_pid_list, next); 1099 1100 this_cpu_write(tr->array_buffer.data->ignore_pid, ret || 1101 (trace_ignore_this_task(pid_list, NULL, prev) && 1102 trace_ignore_this_task(pid_list, NULL, next))); 1103 } 1104 1105 static void 1106 event_filter_pid_sched_switch_probe_post(void *data, bool preempt, 1107 struct task_struct *prev, 1108 struct task_struct *next, 1109 unsigned int prev_state) 1110 { 1111 struct trace_array *tr = data; 1112 struct trace_pid_list *no_pid_list; 1113 struct trace_pid_list *pid_list; 1114 1115 pid_list = rcu_dereference_sched(tr->filtered_pids); 1116 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1117 1118 this_cpu_write(tr->array_buffer.data->ignore_pid, 1119 trace_ignore_this_task(pid_list, no_pid_list, next)); 1120 } 1121 1122 static void 1123 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) 1124 { 1125 struct trace_array *tr = data; 1126 struct trace_pid_list *no_pid_list; 1127 struct trace_pid_list *pid_list; 1128 1129 /* Nothing to do if we are already tracing */ 1130 if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) 1131 return; 1132 1133 pid_list = rcu_dereference_sched(tr->filtered_pids); 1134 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1135 1136 this_cpu_write(tr->array_buffer.data->ignore_pid, 1137 trace_ignore_this_task(pid_list, no_pid_list, task)); 1138 } 1139 1140 static void 1141 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) 1142 { 1143 struct trace_array *tr = data; 1144 struct trace_pid_list *no_pid_list; 1145 struct trace_pid_list *pid_list; 1146 1147 /* Nothing to do if we are not tracing */ 1148 if (this_cpu_read(tr->array_buffer.data->ignore_pid)) 1149 return; 1150 1151 pid_list = rcu_dereference_sched(tr->filtered_pids); 1152 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1153 1154 /* Set tracing if current is enabled */ 1155 this_cpu_write(tr->array_buffer.data->ignore_pid, 1156 trace_ignore_this_task(pid_list, no_pid_list, current)); 1157 } 1158 1159 static void unregister_pid_events(struct trace_array *tr) 1160 { 1161 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr); 1162 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr); 1163 1164 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); 1165 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); 1166 1167 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr); 1168 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr); 1169 1170 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr); 1171 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr); 1172 } 1173 1174 static void __ftrace_clear_event_pids(struct trace_array *tr, int type) 1175 { 1176 struct trace_pid_list *pid_list; 1177 struct trace_pid_list *no_pid_list; 1178 struct trace_event_file *file; 1179 int cpu; 1180 1181 pid_list = rcu_dereference_protected(tr->filtered_pids, 1182 lockdep_is_held(&event_mutex)); 1183 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 1184 lockdep_is_held(&event_mutex)); 1185 1186 /* Make sure there's something to do */ 1187 if (!pid_type_enabled(type, pid_list, no_pid_list)) 1188 return; 1189 1190 if (!still_need_pid_events(type, pid_list, no_pid_list)) { 1191 unregister_pid_events(tr); 1192 1193 list_for_each_entry(file, &tr->events, list) { 1194 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); 1195 } 1196 1197 for_each_possible_cpu(cpu) 1198 per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; 1199 } 1200 1201 if (type & TRACE_PIDS) 1202 rcu_assign_pointer(tr->filtered_pids, NULL); 1203 1204 if (type & TRACE_NO_PIDS) 1205 rcu_assign_pointer(tr->filtered_no_pids, NULL); 1206 1207 /* Wait till all users are no longer using pid filtering */ 1208 tracepoint_synchronize_unregister(); 1209 1210 if ((type & TRACE_PIDS) && pid_list) 1211 trace_pid_list_free(pid_list); 1212 1213 if ((type & TRACE_NO_PIDS) && no_pid_list) 1214 trace_pid_list_free(no_pid_list); 1215 } 1216 1217 static void ftrace_clear_event_pids(struct trace_array *tr, int type) 1218 { 1219 mutex_lock(&event_mutex); 1220 __ftrace_clear_event_pids(tr, type); 1221 mutex_unlock(&event_mutex); 1222 } 1223 1224 static void __put_system(struct event_subsystem *system) 1225 { 1226 struct event_filter *filter = system->filter; 1227 1228 WARN_ON_ONCE(system_refcount(system) == 0); 1229 if (system_refcount_dec(system)) 1230 return; 1231 1232 list_del(&system->list); 1233 1234 if (filter) { 1235 kfree(filter->filter_string); 1236 kfree(filter); 1237 } 1238 kfree_const(system->name); 1239 kfree(system); 1240 } 1241 1242 static void __get_system(struct event_subsystem *system) 1243 { 1244 WARN_ON_ONCE(system_refcount(system) == 0); 1245 system_refcount_inc(system); 1246 } 1247 1248 static void __get_system_dir(struct trace_subsystem_dir *dir) 1249 { 1250 WARN_ON_ONCE(dir->ref_count == 0); 1251 dir->ref_count++; 1252 __get_system(dir->subsystem); 1253 } 1254 1255 static void __put_system_dir(struct trace_subsystem_dir *dir) 1256 { 1257 WARN_ON_ONCE(dir->ref_count == 0); 1258 /* If the subsystem is about to be freed, the dir must be too */ 1259 WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); 1260 1261 __put_system(dir->subsystem); 1262 if (!--dir->ref_count) 1263 kfree(dir); 1264 } 1265 1266 static void put_system(struct trace_subsystem_dir *dir) 1267 { 1268 mutex_lock(&event_mutex); 1269 __put_system_dir(dir); 1270 mutex_unlock(&event_mutex); 1271 } 1272 1273 static void remove_subsystem(struct trace_subsystem_dir *dir) 1274 { 1275 if (!dir) 1276 return; 1277 1278 if (!--dir->nr_events) { 1279 eventfs_remove_dir(dir->ei); 1280 list_del(&dir->list); 1281 __put_system_dir(dir); 1282 } 1283 } 1284 1285 void event_file_get(struct trace_event_file *file) 1286 { 1287 refcount_inc(&file->ref); 1288 } 1289 1290 void event_file_put(struct trace_event_file *file) 1291 { 1292 if (WARN_ON_ONCE(!refcount_read(&file->ref))) { 1293 if (file->flags & EVENT_FILE_FL_FREED) 1294 kmem_cache_free(file_cachep, file); 1295 return; 1296 } 1297 1298 if (refcount_dec_and_test(&file->ref)) { 1299 /* Count should only go to zero when it is freed */ 1300 if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED))) 1301 return; 1302 kmem_cache_free(file_cachep, file); 1303 } 1304 } 1305 1306 static void remove_event_file_dir(struct trace_event_file *file) 1307 { 1308 eventfs_remove_dir(file->ei); 1309 list_del(&file->list); 1310 remove_subsystem(file->system); 1311 free_event_filter(file->filter); 1312 file->flags |= EVENT_FILE_FL_FREED; 1313 event_file_put(file); 1314 1315 /* Wake up hist poll waiters to notice the EVENT_FILE_FL_FREED flag. */ 1316 hist_poll_wakeup(); 1317 } 1318 1319 /* 1320 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. 1321 */ 1322 static int 1323 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, 1324 const char *sub, const char *event, int set, 1325 const char *mod) 1326 { 1327 struct trace_event_file *file; 1328 struct trace_event_call *call; 1329 char *module __free(kfree) = NULL; 1330 const char *name; 1331 int ret = -EINVAL; 1332 int eret = 0; 1333 1334 if (mod) { 1335 char *p; 1336 1337 module = kstrdup(mod, GFP_KERNEL); 1338 if (!module) 1339 return -ENOMEM; 1340 1341 /* Replace all '-' with '_' as that's what modules do */ 1342 for (p = strchr(module, '-'); p; p = strchr(p + 1, '-')) 1343 *p = '_'; 1344 } 1345 1346 list_for_each_entry(file, &tr->events, list) { 1347 1348 call = file->event_call; 1349 1350 /* If a module is specified, skip events that are not that module */ 1351 if (module && (!call->module || strcmp(module_name(call->module), module))) 1352 continue; 1353 1354 name = trace_event_name(call); 1355 1356 if (!name || !call->class || !call->class->reg) 1357 continue; 1358 1359 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) 1360 continue; 1361 1362 if (match && 1363 strcmp(match, name) != 0 && 1364 strcmp(match, call->class->system) != 0) 1365 continue; 1366 1367 if (sub && strcmp(sub, call->class->system) != 0) 1368 continue; 1369 1370 if (event && strcmp(event, name) != 0) 1371 continue; 1372 1373 ret = ftrace_event_enable_disable(file, set); 1374 1375 /* 1376 * Save the first error and return that. Some events 1377 * may still have been enabled, but let the user 1378 * know that something went wrong. 1379 */ 1380 if (ret && !eret) 1381 eret = ret; 1382 1383 ret = eret; 1384 } 1385 1386 /* 1387 * If this is a module setting and nothing was found, 1388 * check if the module was loaded. If it wasn't cache it. 1389 */ 1390 if (module && ret == -EINVAL && !eret) 1391 ret = cache_mod(tr, module, set, match, sub, event); 1392 1393 return ret; 1394 } 1395 1396 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, 1397 const char *sub, const char *event, int set, 1398 const char *mod) 1399 { 1400 int ret; 1401 1402 mutex_lock(&event_mutex); 1403 ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set, mod); 1404 mutex_unlock(&event_mutex); 1405 1406 return ret; 1407 } 1408 1409 int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) 1410 { 1411 char *event = NULL, *sub = NULL, *match, *mod; 1412 int ret; 1413 1414 if (!tr) 1415 return -ENOENT; 1416 1417 /* Modules events can be appended with :mod:<module> */ 1418 mod = strstr(buf, ":mod:"); 1419 if (mod) { 1420 *mod = '\0'; 1421 /* move to the module name */ 1422 mod += 5; 1423 } 1424 1425 /* 1426 * The buf format can be <subsystem>:<event-name> 1427 * *:<event-name> means any event by that name. 1428 * :<event-name> is the same. 1429 * 1430 * <subsystem>:* means all events in that subsystem 1431 * <subsystem>: means the same. 1432 * 1433 * <name> (no ':') means all events in a subsystem with 1434 * the name <name> or any event that matches <name> 1435 */ 1436 1437 match = strsep(&buf, ":"); 1438 if (buf) { 1439 sub = match; 1440 event = buf; 1441 match = NULL; 1442 1443 if (!strlen(sub) || strcmp(sub, "*") == 0) 1444 sub = NULL; 1445 if (!strlen(event) || strcmp(event, "*") == 0) 1446 event = NULL; 1447 } else if (mod) { 1448 /* Allow wildcard for no length or star */ 1449 if (!strlen(match) || strcmp(match, "*") == 0) 1450 match = NULL; 1451 } 1452 1453 ret = __ftrace_set_clr_event(tr, match, sub, event, set, mod); 1454 1455 /* Put back the colon to allow this to be called again */ 1456 if (buf) 1457 *(buf - 1) = ':'; 1458 1459 return ret; 1460 } 1461 1462 /** 1463 * trace_set_clr_event - enable or disable an event 1464 * @system: system name to match (NULL for any system) 1465 * @event: event name to match (NULL for all events, within system) 1466 * @set: 1 to enable, 0 to disable 1467 * 1468 * This is a way for other parts of the kernel to enable or disable 1469 * event recording. 1470 * 1471 * Returns 0 on success, -EINVAL if the parameters do not match any 1472 * registered events. 1473 */ 1474 int trace_set_clr_event(const char *system, const char *event, int set) 1475 { 1476 struct trace_array *tr = top_trace_array(); 1477 1478 if (!tr) 1479 return -ENODEV; 1480 1481 return __ftrace_set_clr_event(tr, NULL, system, event, set, NULL); 1482 } 1483 EXPORT_SYMBOL_GPL(trace_set_clr_event); 1484 1485 /** 1486 * trace_array_set_clr_event - enable or disable an event for a trace array. 1487 * @tr: concerned trace array. 1488 * @system: system name to match (NULL for any system) 1489 * @event: event name to match (NULL for all events, within system) 1490 * @enable: true to enable, false to disable 1491 * 1492 * This is a way for other parts of the kernel to enable or disable 1493 * event recording. 1494 * 1495 * Returns 0 on success, -EINVAL if the parameters do not match any 1496 * registered events. 1497 */ 1498 int trace_array_set_clr_event(struct trace_array *tr, const char *system, 1499 const char *event, bool enable) 1500 { 1501 int set; 1502 1503 if (!tr) 1504 return -ENOENT; 1505 1506 set = (enable == true) ? 1 : 0; 1507 return __ftrace_set_clr_event(tr, NULL, system, event, set, NULL); 1508 } 1509 EXPORT_SYMBOL_GPL(trace_array_set_clr_event); 1510 1511 /* 128 should be much more than enough */ 1512 #define EVENT_BUF_SIZE 127 1513 1514 static ssize_t 1515 ftrace_event_write(struct file *file, const char __user *ubuf, 1516 size_t cnt, loff_t *ppos) 1517 { 1518 struct trace_parser parser; 1519 struct seq_file *m = file->private_data; 1520 struct trace_array *tr = m->private; 1521 ssize_t read, ret; 1522 1523 if (!cnt) 1524 return 0; 1525 1526 ret = tracing_update_buffers(tr); 1527 if (ret < 0) 1528 return ret; 1529 1530 if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) 1531 return -ENOMEM; 1532 1533 read = trace_get_user(&parser, ubuf, cnt, ppos); 1534 1535 if (read >= 0 && trace_parser_loaded((&parser))) { 1536 int set = 1; 1537 1538 if (*parser.buffer == '!') 1539 set = 0; 1540 1541 ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); 1542 if (ret) 1543 goto out_put; 1544 } 1545 1546 ret = read; 1547 1548 out_put: 1549 trace_parser_put(&parser); 1550 1551 return ret; 1552 } 1553 1554 static void * 1555 t_next(struct seq_file *m, void *v, loff_t *pos) 1556 { 1557 struct trace_event_file *file = v; 1558 struct trace_event_call *call; 1559 struct trace_array *tr = m->private; 1560 1561 (*pos)++; 1562 1563 list_for_each_entry_continue(file, &tr->events, list) { 1564 call = file->event_call; 1565 /* 1566 * The ftrace subsystem is for showing formats only. 1567 * They can not be enabled or disabled via the event files. 1568 */ 1569 if (call->class && call->class->reg && 1570 !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) 1571 return file; 1572 } 1573 1574 return NULL; 1575 } 1576 1577 static void *t_start(struct seq_file *m, loff_t *pos) 1578 { 1579 struct trace_event_file *file; 1580 struct trace_array *tr = m->private; 1581 loff_t l; 1582 1583 mutex_lock(&event_mutex); 1584 1585 file = list_entry(&tr->events, struct trace_event_file, list); 1586 for (l = 0; l <= *pos; ) { 1587 file = t_next(m, file, &l); 1588 if (!file) 1589 break; 1590 } 1591 return file; 1592 } 1593 1594 enum set_event_iter_type { 1595 SET_EVENT_FILE, 1596 SET_EVENT_MOD, 1597 }; 1598 1599 struct set_event_iter { 1600 enum set_event_iter_type type; 1601 union { 1602 struct trace_event_file *file; 1603 struct event_mod_load *event_mod; 1604 }; 1605 }; 1606 1607 static void * 1608 s_next(struct seq_file *m, void *v, loff_t *pos) 1609 { 1610 struct set_event_iter *iter = v; 1611 struct trace_event_file *file; 1612 struct trace_array *tr = m->private; 1613 1614 (*pos)++; 1615 1616 if (iter->type == SET_EVENT_FILE) { 1617 file = iter->file; 1618 list_for_each_entry_continue(file, &tr->events, list) { 1619 if (file->flags & EVENT_FILE_FL_ENABLED) { 1620 iter->file = file; 1621 return iter; 1622 } 1623 } 1624 #ifdef CONFIG_MODULES 1625 iter->type = SET_EVENT_MOD; 1626 iter->event_mod = list_entry(&tr->mod_events, struct event_mod_load, list); 1627 #endif 1628 } 1629 1630 #ifdef CONFIG_MODULES 1631 list_for_each_entry_continue(iter->event_mod, &tr->mod_events, list) 1632 return iter; 1633 #endif 1634 1635 /* 1636 * The iter is allocated in s_start() and passed via the 'v' 1637 * parameter. To stop the iterator, NULL must be returned. But 1638 * the return value is what the 'v' parameter in s_stop() receives 1639 * and frees. Free iter here as it will no longer be used. 1640 */ 1641 kfree(iter); 1642 return NULL; 1643 } 1644 1645 static void *s_start(struct seq_file *m, loff_t *pos) 1646 { 1647 struct trace_array *tr = m->private; 1648 struct set_event_iter *iter; 1649 loff_t l; 1650 1651 iter = kzalloc_obj(*iter, GFP_KERNEL); 1652 mutex_lock(&event_mutex); 1653 if (!iter) 1654 return NULL; 1655 1656 iter->type = SET_EVENT_FILE; 1657 iter->file = list_entry(&tr->events, struct trace_event_file, list); 1658 1659 for (l = 0; l <= *pos; ) { 1660 iter = s_next(m, iter, &l); 1661 if (!iter) 1662 break; 1663 } 1664 return iter; 1665 } 1666 1667 static int t_show(struct seq_file *m, void *v) 1668 { 1669 struct trace_event_file *file = v; 1670 struct trace_event_call *call = file->event_call; 1671 1672 if (strcmp(call->class->system, TRACE_SYSTEM) != 0) 1673 seq_printf(m, "%s:", call->class->system); 1674 seq_printf(m, "%s\n", trace_event_name(call)); 1675 1676 return 0; 1677 } 1678 1679 static void t_stop(struct seq_file *m, void *p) 1680 { 1681 mutex_unlock(&event_mutex); 1682 } 1683 1684 static int get_call_len(struct trace_event_call *call) 1685 { 1686 int len; 1687 1688 /* Get the length of "<system>:<event>" */ 1689 len = strlen(call->class->system) + 1; 1690 len += strlen(trace_event_name(call)); 1691 1692 /* Set the index to 32 bytes to separate event from data */ 1693 return len >= 32 ? 1 : 32 - len; 1694 } 1695 1696 /** 1697 * t_show_filters - seq_file callback to display active event filters 1698 * @m: The seq_file interface for formatted output 1699 * @v: The current trace_event_file being iterated 1700 * 1701 * Identifies and prints active filters for the current event file in the 1702 * iteration. If a filter is applied to the current event and, if so, 1703 * prints the system name, event name, and the filter string. 1704 */ 1705 static int t_show_filters(struct seq_file *m, void *v) 1706 { 1707 struct trace_event_file *file = v; 1708 struct trace_event_call *call = file->event_call; 1709 struct event_filter *filter; 1710 int len; 1711 1712 guard(rcu)(); 1713 filter = rcu_dereference(file->filter); 1714 if (!filter || !filter->filter_string) 1715 return 0; 1716 1717 len = get_call_len(call); 1718 1719 seq_printf(m, "%s:%s%*.s%s\n", call->class->system, 1720 trace_event_name(call), len, "", filter->filter_string); 1721 1722 return 0; 1723 } 1724 1725 /** 1726 * t_show_triggers - seq_file callback to display active event triggers 1727 * @m: The seq_file interface for formatted output 1728 * @v: The current trace_event_file being iterated 1729 * 1730 * Iterates through the trigger list of the current event file and prints 1731 * each active trigger's configuration using its associated print 1732 * operation. 1733 */ 1734 static int t_show_triggers(struct seq_file *m, void *v) 1735 { 1736 struct trace_event_file *file = v; 1737 struct trace_event_call *call = file->event_call; 1738 struct event_trigger_data *data; 1739 int len; 1740 1741 /* 1742 * The event_mutex is held by t_start(), protecting the 1743 * file->triggers list traversal. 1744 */ 1745 if (list_empty(&file->triggers)) 1746 return 0; 1747 1748 len = get_call_len(call); 1749 1750 list_for_each_entry_rcu(data, &file->triggers, list) { 1751 seq_printf(m, "%s:%s%*.s", call->class->system, 1752 trace_event_name(call), len, ""); 1753 1754 data->cmd_ops->print(m, data); 1755 } 1756 1757 return 0; 1758 } 1759 1760 #ifdef CONFIG_MODULES 1761 static int s_show(struct seq_file *m, void *v) 1762 { 1763 struct set_event_iter *iter = v; 1764 const char *system; 1765 const char *event; 1766 1767 if (iter->type == SET_EVENT_FILE) 1768 return t_show(m, iter->file); 1769 1770 /* When match is set, system and event are not */ 1771 if (iter->event_mod->match) { 1772 seq_printf(m, "%s:mod:%s\n", iter->event_mod->match, 1773 iter->event_mod->module); 1774 return 0; 1775 } 1776 1777 system = iter->event_mod->system ? : "*"; 1778 event = iter->event_mod->event ? : "*"; 1779 1780 seq_printf(m, "%s:%s:mod:%s\n", system, event, iter->event_mod->module); 1781 1782 return 0; 1783 } 1784 #else /* CONFIG_MODULES */ 1785 static int s_show(struct seq_file *m, void *v) 1786 { 1787 struct set_event_iter *iter = v; 1788 1789 return t_show(m, iter->file); 1790 } 1791 #endif 1792 1793 static void s_stop(struct seq_file *m, void *v) 1794 { 1795 kfree(v); 1796 t_stop(m, NULL); 1797 } 1798 1799 static void * 1800 __next(struct seq_file *m, void *v, loff_t *pos, int type) 1801 { 1802 struct trace_array *tr = m->private; 1803 struct trace_pid_list *pid_list; 1804 1805 if (type == TRACE_PIDS) 1806 pid_list = rcu_dereference_sched(tr->filtered_pids); 1807 else 1808 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1809 1810 return trace_pid_next(pid_list, v, pos); 1811 } 1812 1813 static void * 1814 p_next(struct seq_file *m, void *v, loff_t *pos) 1815 { 1816 return __next(m, v, pos, TRACE_PIDS); 1817 } 1818 1819 static void * 1820 np_next(struct seq_file *m, void *v, loff_t *pos) 1821 { 1822 return __next(m, v, pos, TRACE_NO_PIDS); 1823 } 1824 1825 static void *__start(struct seq_file *m, loff_t *pos, int type) 1826 __acquires(RCU) 1827 { 1828 struct trace_pid_list *pid_list; 1829 struct trace_array *tr = m->private; 1830 1831 /* 1832 * Grab the mutex, to keep calls to p_next() having the same 1833 * tr->filtered_pids as p_start() has. 1834 * If we just passed the tr->filtered_pids around, then RCU would 1835 * have been enough, but doing that makes things more complex. 1836 */ 1837 mutex_lock(&event_mutex); 1838 rcu_read_lock_sched(); 1839 1840 if (type == TRACE_PIDS) 1841 pid_list = rcu_dereference_sched(tr->filtered_pids); 1842 else 1843 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1844 1845 if (!pid_list) 1846 return NULL; 1847 1848 return trace_pid_start(pid_list, pos); 1849 } 1850 1851 static void *p_start(struct seq_file *m, loff_t *pos) 1852 __acquires(RCU) 1853 { 1854 return __start(m, pos, TRACE_PIDS); 1855 } 1856 1857 static void *np_start(struct seq_file *m, loff_t *pos) 1858 __acquires(RCU) 1859 { 1860 return __start(m, pos, TRACE_NO_PIDS); 1861 } 1862 1863 static void p_stop(struct seq_file *m, void *p) 1864 __releases(RCU) 1865 { 1866 rcu_read_unlock_sched(); 1867 mutex_unlock(&event_mutex); 1868 } 1869 1870 static ssize_t 1871 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1872 loff_t *ppos) 1873 { 1874 struct trace_event_file *file; 1875 unsigned long flags; 1876 char buf[4] = "0"; 1877 1878 mutex_lock(&event_mutex); 1879 file = event_file_file(filp); 1880 if (likely(file)) 1881 flags = file->flags; 1882 mutex_unlock(&event_mutex); 1883 1884 if (!file) 1885 return -ENODEV; 1886 1887 if (flags & EVENT_FILE_FL_ENABLED && 1888 !(flags & EVENT_FILE_FL_SOFT_DISABLED)) 1889 strcpy(buf, "1"); 1890 1891 if (atomic_read(&file->sm_ref) != 0) 1892 strcat(buf, "*"); 1893 1894 strcat(buf, "\n"); 1895 1896 return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); 1897 } 1898 1899 static ssize_t 1900 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 1901 loff_t *ppos) 1902 { 1903 struct trace_event_file *file; 1904 unsigned long val; 1905 int ret; 1906 1907 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 1908 if (ret) 1909 return ret; 1910 1911 guard(mutex)(&event_mutex); 1912 1913 switch (val) { 1914 case 0: 1915 case 1: 1916 file = event_file_file(filp); 1917 if (!file) 1918 return -ENODEV; 1919 ret = tracing_update_buffers(file->tr); 1920 if (ret < 0) 1921 return ret; 1922 ret = ftrace_event_enable_disable(file, val); 1923 if (ret < 0) 1924 return ret; 1925 break; 1926 1927 default: 1928 return -EINVAL; 1929 } 1930 1931 *ppos += cnt; 1932 1933 return cnt; 1934 } 1935 1936 /* 1937 * Returns: 1938 * 0 : no events exist? 1939 * 1 : all events are disabled 1940 * 2 : all events are enabled 1941 * 3 : some events are enabled and some are enabled 1942 */ 1943 int trace_events_enabled(struct trace_array *tr, const char *system) 1944 { 1945 struct trace_event_call *call; 1946 struct trace_event_file *file; 1947 int set = 0; 1948 1949 guard(mutex)(&event_mutex); 1950 1951 list_for_each_entry(file, &tr->events, list) { 1952 call = file->event_call; 1953 if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || 1954 !trace_event_name(call) || !call->class || !call->class->reg) 1955 continue; 1956 1957 if (system && strcmp(call->class->system, system) != 0) 1958 continue; 1959 1960 /* 1961 * We need to find out if all the events are set 1962 * or if all events or cleared, or if we have 1963 * a mixture. 1964 */ 1965 set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED)); 1966 1967 /* 1968 * If we have a mixture, no need to look further. 1969 */ 1970 if (set == 3) 1971 break; 1972 } 1973 1974 return set; 1975 } 1976 1977 static ssize_t 1978 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1979 loff_t *ppos) 1980 { 1981 const char set_to_char[4] = { '?', '0', '1', 'X' }; 1982 struct trace_subsystem_dir *dir = filp->private_data; 1983 struct event_subsystem *system = dir->subsystem; 1984 struct trace_array *tr = dir->tr; 1985 char buf[2]; 1986 int set; 1987 int ret; 1988 1989 set = trace_events_enabled(tr, system ? system->name : NULL); 1990 1991 buf[0] = set_to_char[set]; 1992 buf[1] = '\n'; 1993 1994 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 1995 1996 return ret; 1997 } 1998 1999 static ssize_t 2000 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 2001 loff_t *ppos) 2002 { 2003 struct trace_subsystem_dir *dir = filp->private_data; 2004 struct event_subsystem *system = dir->subsystem; 2005 const char *name = NULL; 2006 unsigned long val; 2007 ssize_t ret; 2008 2009 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 2010 if (ret) 2011 return ret; 2012 2013 ret = tracing_update_buffers(dir->tr); 2014 if (ret < 0) 2015 return ret; 2016 2017 if (val != 0 && val != 1) 2018 return -EINVAL; 2019 2020 /* 2021 * Opening of "enable" adds a ref count to system, 2022 * so the name is safe to use. 2023 */ 2024 if (system) 2025 name = system->name; 2026 2027 ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val, NULL); 2028 if (ret) 2029 goto out; 2030 2031 ret = cnt; 2032 2033 out: 2034 *ppos += cnt; 2035 2036 return ret; 2037 } 2038 2039 enum { 2040 FORMAT_HEADER = 1, 2041 FORMAT_FIELD_SEPERATOR = 2, 2042 FORMAT_PRINTFMT = 3, 2043 }; 2044 2045 static void *f_next(struct seq_file *m, void *v, loff_t *pos) 2046 { 2047 struct trace_event_file *file = event_file_data(m->private); 2048 struct trace_event_call *call = file->event_call; 2049 struct list_head *common_head = &ftrace_common_fields; 2050 struct list_head *head = trace_get_fields(call); 2051 struct list_head *node = v; 2052 2053 (*pos)++; 2054 2055 switch ((unsigned long)v) { 2056 case FORMAT_HEADER: 2057 node = common_head; 2058 break; 2059 2060 case FORMAT_FIELD_SEPERATOR: 2061 node = head; 2062 break; 2063 2064 case FORMAT_PRINTFMT: 2065 /* all done */ 2066 return NULL; 2067 } 2068 2069 node = node->prev; 2070 if (node == common_head) 2071 return (void *)FORMAT_FIELD_SEPERATOR; 2072 else if (node == head) 2073 return (void *)FORMAT_PRINTFMT; 2074 else 2075 return node; 2076 } 2077 2078 static int f_show(struct seq_file *m, void *v) 2079 { 2080 struct trace_event_file *file = event_file_data(m->private); 2081 struct trace_event_call *call = file->event_call; 2082 struct ftrace_event_field *field; 2083 const char *array_descriptor; 2084 2085 switch ((unsigned long)v) { 2086 case FORMAT_HEADER: 2087 seq_printf(m, "name: %s\n", trace_event_name(call)); 2088 seq_printf(m, "ID: %d\n", call->event.type); 2089 seq_puts(m, "format:\n"); 2090 return 0; 2091 2092 case FORMAT_FIELD_SEPERATOR: 2093 seq_putc(m, '\n'); 2094 return 0; 2095 2096 case FORMAT_PRINTFMT: 2097 seq_printf(m, "\nprint fmt: %s\n", 2098 call->print_fmt); 2099 return 0; 2100 } 2101 2102 field = list_entry(v, struct ftrace_event_field, link); 2103 /* 2104 * Smartly shows the array type(except dynamic array). 2105 * Normal: 2106 * field:TYPE VAR 2107 * If TYPE := TYPE[LEN], it is shown: 2108 * field:TYPE VAR[LEN] 2109 */ 2110 array_descriptor = strchr(field->type, '['); 2111 2112 if (str_has_prefix(field->type, "__data_loc")) 2113 array_descriptor = NULL; 2114 2115 if (!array_descriptor) 2116 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", 2117 field->type, field->name, field->offset, 2118 field->size, !!field->is_signed); 2119 else if (field->len) 2120 seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n", 2121 (int)(array_descriptor - field->type), 2122 field->type, field->name, 2123 field->len, field->offset, 2124 field->size, !!field->is_signed); 2125 else 2126 seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n", 2127 (int)(array_descriptor - field->type), 2128 field->type, field->name, 2129 field->offset, field->size, !!field->is_signed); 2130 2131 return 0; 2132 } 2133 2134 static void *f_start(struct seq_file *m, loff_t *pos) 2135 { 2136 struct trace_event_file *file; 2137 void *p = (void *)FORMAT_HEADER; 2138 loff_t l = 0; 2139 2140 /* ->stop() is called even if ->start() fails */ 2141 mutex_lock(&event_mutex); 2142 file = event_file_file(m->private); 2143 if (!file) 2144 return ERR_PTR(-ENODEV); 2145 2146 while (l < *pos && p) 2147 p = f_next(m, p, &l); 2148 2149 return p; 2150 } 2151 2152 static void f_stop(struct seq_file *m, void *p) 2153 { 2154 mutex_unlock(&event_mutex); 2155 } 2156 2157 static const struct seq_operations trace_format_seq_ops = { 2158 .start = f_start, 2159 .next = f_next, 2160 .stop = f_stop, 2161 .show = f_show, 2162 }; 2163 2164 static int trace_format_open(struct inode *inode, struct file *file) 2165 { 2166 struct seq_file *m; 2167 int ret; 2168 2169 /* Do we want to hide event format files on tracefs lockdown? */ 2170 2171 ret = seq_open(file, &trace_format_seq_ops); 2172 if (ret < 0) 2173 return ret; 2174 2175 m = file->private_data; 2176 m->private = file; 2177 2178 return 0; 2179 } 2180 2181 #ifdef CONFIG_PERF_EVENTS 2182 static ssize_t 2183 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 2184 { 2185 int id = (long)event_file_data(filp); 2186 char buf[32]; 2187 int len; 2188 2189 if (unlikely(!id)) 2190 return -ENODEV; 2191 2192 len = sprintf(buf, "%d\n", id); 2193 2194 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 2195 } 2196 #endif 2197 2198 static ssize_t 2199 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 2200 loff_t *ppos) 2201 { 2202 struct trace_event_file *file; 2203 struct trace_seq *s; 2204 int r = -ENODEV; 2205 2206 if (*ppos) 2207 return 0; 2208 2209 s = kmalloc_obj(*s, GFP_KERNEL); 2210 2211 if (!s) 2212 return -ENOMEM; 2213 2214 trace_seq_init(s); 2215 2216 mutex_lock(&event_mutex); 2217 file = event_file_file(filp); 2218 if (file) 2219 print_event_filter(file, s); 2220 mutex_unlock(&event_mutex); 2221 2222 if (file) 2223 r = simple_read_from_buffer(ubuf, cnt, ppos, 2224 s->buffer, trace_seq_used(s)); 2225 2226 kfree(s); 2227 2228 return r; 2229 } 2230 2231 static ssize_t 2232 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, 2233 loff_t *ppos) 2234 { 2235 struct trace_event_file *file; 2236 char *buf; 2237 int err = -ENODEV; 2238 2239 if (cnt >= PAGE_SIZE) 2240 return -EINVAL; 2241 2242 buf = memdup_user_nul(ubuf, cnt); 2243 if (IS_ERR(buf)) 2244 return PTR_ERR(buf); 2245 2246 mutex_lock(&event_mutex); 2247 file = event_file_file(filp); 2248 if (file) { 2249 if (file->flags & EVENT_FILE_FL_FREED) 2250 err = -ENODEV; 2251 else 2252 err = apply_event_filter(file, buf); 2253 } 2254 mutex_unlock(&event_mutex); 2255 2256 kfree(buf); 2257 if (err < 0) 2258 return err; 2259 2260 *ppos += cnt; 2261 2262 return cnt; 2263 } 2264 2265 static LIST_HEAD(event_subsystems); 2266 2267 static int subsystem_open(struct inode *inode, struct file *filp) 2268 { 2269 struct trace_subsystem_dir *dir = NULL, *iter_dir; 2270 struct trace_array *tr = NULL, *iter_tr; 2271 struct event_subsystem *system = NULL; 2272 int ret; 2273 2274 if (unlikely(tracing_disabled)) 2275 return -ENODEV; 2276 2277 /* Make sure the system still exists */ 2278 mutex_lock(&event_mutex); 2279 mutex_lock(&trace_types_lock); 2280 list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) { 2281 list_for_each_entry(iter_dir, &iter_tr->systems, list) { 2282 if (iter_dir == inode->i_private) { 2283 /* Don't open systems with no events */ 2284 tr = iter_tr; 2285 dir = iter_dir; 2286 if (dir->nr_events) { 2287 __get_system_dir(dir); 2288 system = dir->subsystem; 2289 } 2290 goto exit_loop; 2291 } 2292 } 2293 } 2294 exit_loop: 2295 mutex_unlock(&trace_types_lock); 2296 mutex_unlock(&event_mutex); 2297 2298 if (!system) 2299 return -ENODEV; 2300 2301 /* Still need to increment the ref count of the system */ 2302 if (trace_array_get(tr) < 0) { 2303 put_system(dir); 2304 return -ENODEV; 2305 } 2306 2307 ret = tracing_open_generic(inode, filp); 2308 if (ret < 0) { 2309 trace_array_put(tr); 2310 put_system(dir); 2311 } 2312 2313 return ret; 2314 } 2315 2316 static int system_tr_open(struct inode *inode, struct file *filp) 2317 { 2318 struct trace_subsystem_dir *dir; 2319 struct trace_array *tr = inode->i_private; 2320 int ret; 2321 2322 /* Make a temporary dir that has no system but points to tr */ 2323 dir = kzalloc_obj(*dir, GFP_KERNEL); 2324 if (!dir) 2325 return -ENOMEM; 2326 2327 ret = tracing_open_generic_tr(inode, filp); 2328 if (ret < 0) { 2329 kfree(dir); 2330 return ret; 2331 } 2332 dir->tr = tr; 2333 filp->private_data = dir; 2334 2335 return 0; 2336 } 2337 2338 static int subsystem_release(struct inode *inode, struct file *file) 2339 { 2340 struct trace_subsystem_dir *dir = file->private_data; 2341 2342 trace_array_put(dir->tr); 2343 2344 /* 2345 * If dir->subsystem is NULL, then this is a temporary 2346 * descriptor that was made for a trace_array to enable 2347 * all subsystems. 2348 */ 2349 if (dir->subsystem) 2350 put_system(dir); 2351 else 2352 kfree(dir); 2353 2354 return 0; 2355 } 2356 2357 static ssize_t 2358 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 2359 loff_t *ppos) 2360 { 2361 struct trace_subsystem_dir *dir = filp->private_data; 2362 struct event_subsystem *system = dir->subsystem; 2363 struct trace_seq *s; 2364 int r; 2365 2366 if (*ppos) 2367 return 0; 2368 2369 s = kmalloc_obj(*s, GFP_KERNEL); 2370 if (!s) 2371 return -ENOMEM; 2372 2373 trace_seq_init(s); 2374 2375 print_subsystem_event_filter(system, s); 2376 r = simple_read_from_buffer(ubuf, cnt, ppos, 2377 s->buffer, trace_seq_used(s)); 2378 2379 kfree(s); 2380 2381 return r; 2382 } 2383 2384 static ssize_t 2385 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, 2386 loff_t *ppos) 2387 { 2388 struct trace_subsystem_dir *dir = filp->private_data; 2389 char *buf; 2390 int err; 2391 2392 if (cnt >= PAGE_SIZE) 2393 return -EINVAL; 2394 2395 buf = memdup_user_nul(ubuf, cnt); 2396 if (IS_ERR(buf)) 2397 return PTR_ERR(buf); 2398 2399 err = apply_subsystem_event_filter(dir, buf); 2400 kfree(buf); 2401 if (err < 0) 2402 return err; 2403 2404 *ppos += cnt; 2405 2406 return cnt; 2407 } 2408 2409 static ssize_t 2410 show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 2411 { 2412 struct trace_array *tr = filp->private_data; 2413 struct trace_seq *s; 2414 int r; 2415 2416 if (*ppos) 2417 return 0; 2418 2419 s = kmalloc_obj(*s, GFP_KERNEL); 2420 if (!s) 2421 return -ENOMEM; 2422 2423 trace_seq_init(s); 2424 2425 ring_buffer_print_page_header(tr->array_buffer.buffer, s); 2426 r = simple_read_from_buffer(ubuf, cnt, ppos, 2427 s->buffer, trace_seq_used(s)); 2428 2429 kfree(s); 2430 2431 return r; 2432 } 2433 2434 static ssize_t 2435 show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 2436 { 2437 struct trace_seq *s; 2438 int r; 2439 2440 if (*ppos) 2441 return 0; 2442 2443 s = kmalloc_obj(*s, GFP_KERNEL); 2444 if (!s) 2445 return -ENOMEM; 2446 2447 trace_seq_init(s); 2448 2449 ring_buffer_print_entry_header(s); 2450 r = simple_read_from_buffer(ubuf, cnt, ppos, 2451 s->buffer, trace_seq_used(s)); 2452 2453 kfree(s); 2454 2455 return r; 2456 } 2457 2458 static void ignore_task_cpu(void *data) 2459 { 2460 struct trace_array *tr = data; 2461 struct trace_pid_list *pid_list; 2462 struct trace_pid_list *no_pid_list; 2463 2464 /* 2465 * This function is called by on_each_cpu() while the 2466 * event_mutex is held. 2467 */ 2468 pid_list = rcu_dereference_protected(tr->filtered_pids, 2469 mutex_is_locked(&event_mutex)); 2470 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 2471 mutex_is_locked(&event_mutex)); 2472 2473 this_cpu_write(tr->array_buffer.data->ignore_pid, 2474 trace_ignore_this_task(pid_list, no_pid_list, current)); 2475 } 2476 2477 static void register_pid_events(struct trace_array *tr) 2478 { 2479 /* 2480 * Register a probe that is called before all other probes 2481 * to set ignore_pid if next or prev do not match. 2482 * Register a probe this is called after all other probes 2483 * to only keep ignore_pid set if next pid matches. 2484 */ 2485 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre, 2486 tr, INT_MAX); 2487 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post, 2488 tr, 0); 2489 2490 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, 2491 tr, INT_MAX); 2492 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, 2493 tr, 0); 2494 2495 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, 2496 tr, INT_MAX); 2497 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, 2498 tr, 0); 2499 2500 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre, 2501 tr, INT_MAX); 2502 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post, 2503 tr, 0); 2504 } 2505 2506 static ssize_t 2507 event_pid_write(struct file *filp, const char __user *ubuf, 2508 size_t cnt, loff_t *ppos, int type) 2509 { 2510 struct seq_file *m = filp->private_data; 2511 struct trace_array *tr = m->private; 2512 struct trace_pid_list *filtered_pids = NULL; 2513 struct trace_pid_list *other_pids = NULL; 2514 struct trace_pid_list *pid_list; 2515 struct trace_event_file *file; 2516 ssize_t ret; 2517 2518 if (!cnt) 2519 return 0; 2520 2521 ret = tracing_update_buffers(tr); 2522 if (ret < 0) 2523 return ret; 2524 2525 guard(mutex)(&event_mutex); 2526 2527 if (type == TRACE_PIDS) { 2528 filtered_pids = rcu_dereference_protected(tr->filtered_pids, 2529 lockdep_is_held(&event_mutex)); 2530 other_pids = rcu_dereference_protected(tr->filtered_no_pids, 2531 lockdep_is_held(&event_mutex)); 2532 } else { 2533 filtered_pids = rcu_dereference_protected(tr->filtered_no_pids, 2534 lockdep_is_held(&event_mutex)); 2535 other_pids = rcu_dereference_protected(tr->filtered_pids, 2536 lockdep_is_held(&event_mutex)); 2537 } 2538 2539 ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); 2540 if (ret < 0) 2541 return ret; 2542 2543 if (type == TRACE_PIDS) 2544 rcu_assign_pointer(tr->filtered_pids, pid_list); 2545 else 2546 rcu_assign_pointer(tr->filtered_no_pids, pid_list); 2547 2548 list_for_each_entry(file, &tr->events, list) { 2549 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); 2550 } 2551 2552 if (filtered_pids) { 2553 tracepoint_synchronize_unregister(); 2554 trace_pid_list_free(filtered_pids); 2555 } else if (pid_list && !other_pids) { 2556 register_pid_events(tr); 2557 } 2558 2559 /* 2560 * Ignoring of pids is done at task switch. But we have to 2561 * check for those tasks that are currently running. 2562 * Always do this in case a pid was appended or removed. 2563 */ 2564 on_each_cpu(ignore_task_cpu, tr, 1); 2565 2566 *ppos += ret; 2567 2568 return ret; 2569 } 2570 2571 static ssize_t 2572 ftrace_event_pid_write(struct file *filp, const char __user *ubuf, 2573 size_t cnt, loff_t *ppos) 2574 { 2575 return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); 2576 } 2577 2578 static ssize_t 2579 ftrace_event_npid_write(struct file *filp, const char __user *ubuf, 2580 size_t cnt, loff_t *ppos) 2581 { 2582 return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); 2583 } 2584 2585 static int ftrace_event_avail_open(struct inode *inode, struct file *file); 2586 static int ftrace_event_set_open(struct inode *inode, struct file *file); 2587 static int ftrace_event_show_filters_open(struct inode *inode, struct file *file); 2588 static int ftrace_event_show_triggers_open(struct inode *inode, struct file *file); 2589 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); 2590 static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); 2591 static int ftrace_event_release(struct inode *inode, struct file *file); 2592 2593 static const struct seq_operations show_event_seq_ops = { 2594 .start = t_start, 2595 .next = t_next, 2596 .show = t_show, 2597 .stop = t_stop, 2598 }; 2599 2600 static const struct seq_operations show_set_event_seq_ops = { 2601 .start = s_start, 2602 .next = s_next, 2603 .show = s_show, 2604 .stop = s_stop, 2605 }; 2606 2607 static const struct seq_operations show_show_event_filters_seq_ops = { 2608 .start = t_start, 2609 .next = t_next, 2610 .show = t_show_filters, 2611 .stop = t_stop, 2612 }; 2613 2614 static const struct seq_operations show_show_event_triggers_seq_ops = { 2615 .start = t_start, 2616 .next = t_next, 2617 .show = t_show_triggers, 2618 .stop = t_stop, 2619 }; 2620 2621 static const struct seq_operations show_set_pid_seq_ops = { 2622 .start = p_start, 2623 .next = p_next, 2624 .show = trace_pid_show, 2625 .stop = p_stop, 2626 }; 2627 2628 static const struct seq_operations show_set_no_pid_seq_ops = { 2629 .start = np_start, 2630 .next = np_next, 2631 .show = trace_pid_show, 2632 .stop = p_stop, 2633 }; 2634 2635 static const struct file_operations ftrace_avail_fops = { 2636 .open = ftrace_event_avail_open, 2637 .read = seq_read, 2638 .llseek = seq_lseek, 2639 .release = seq_release, 2640 }; 2641 2642 static const struct file_operations ftrace_set_event_fops = { 2643 .open = ftrace_event_set_open, 2644 .read = seq_read, 2645 .write = ftrace_event_write, 2646 .llseek = seq_lseek, 2647 .release = ftrace_event_release, 2648 }; 2649 2650 static const struct file_operations ftrace_show_event_filters_fops = { 2651 .open = ftrace_event_show_filters_open, 2652 .read = seq_read, 2653 .llseek = seq_lseek, 2654 .release = seq_release, 2655 }; 2656 2657 static const struct file_operations ftrace_show_event_triggers_fops = { 2658 .open = ftrace_event_show_triggers_open, 2659 .read = seq_read, 2660 .llseek = seq_lseek, 2661 .release = seq_release, 2662 }; 2663 2664 static const struct file_operations ftrace_set_event_pid_fops = { 2665 .open = ftrace_event_set_pid_open, 2666 .read = seq_read, 2667 .write = ftrace_event_pid_write, 2668 .llseek = seq_lseek, 2669 .release = ftrace_event_release, 2670 }; 2671 2672 static const struct file_operations ftrace_set_event_notrace_pid_fops = { 2673 .open = ftrace_event_set_npid_open, 2674 .read = seq_read, 2675 .write = ftrace_event_npid_write, 2676 .llseek = seq_lseek, 2677 .release = ftrace_event_release, 2678 }; 2679 2680 static const struct file_operations ftrace_enable_fops = { 2681 .open = tracing_open_file_tr, 2682 .read = event_enable_read, 2683 .write = event_enable_write, 2684 .release = tracing_release_file_tr, 2685 .llseek = default_llseek, 2686 }; 2687 2688 static const struct file_operations ftrace_event_format_fops = { 2689 .open = trace_format_open, 2690 .read = seq_read, 2691 .llseek = seq_lseek, 2692 .release = seq_release, 2693 }; 2694 2695 #ifdef CONFIG_PERF_EVENTS 2696 static const struct file_operations ftrace_event_id_fops = { 2697 .read = event_id_read, 2698 .llseek = default_llseek, 2699 }; 2700 #endif 2701 2702 static const struct file_operations ftrace_event_filter_fops = { 2703 .open = tracing_open_file_tr, 2704 .read = event_filter_read, 2705 .write = event_filter_write, 2706 .release = tracing_release_file_tr, 2707 .llseek = default_llseek, 2708 }; 2709 2710 static const struct file_operations ftrace_subsystem_filter_fops = { 2711 .open = subsystem_open, 2712 .read = subsystem_filter_read, 2713 .write = subsystem_filter_write, 2714 .llseek = default_llseek, 2715 .release = subsystem_release, 2716 }; 2717 2718 static const struct file_operations ftrace_system_enable_fops = { 2719 .open = subsystem_open, 2720 .read = system_enable_read, 2721 .write = system_enable_write, 2722 .llseek = default_llseek, 2723 .release = subsystem_release, 2724 }; 2725 2726 static const struct file_operations ftrace_tr_enable_fops = { 2727 .open = system_tr_open, 2728 .read = system_enable_read, 2729 .write = system_enable_write, 2730 .llseek = default_llseek, 2731 .release = subsystem_release, 2732 }; 2733 2734 static const struct file_operations ftrace_show_header_page_fops = { 2735 .open = tracing_open_generic_tr, 2736 .read = show_header_page_file, 2737 .llseek = default_llseek, 2738 .release = tracing_release_generic_tr, 2739 }; 2740 2741 static const struct file_operations ftrace_show_header_event_fops = { 2742 .open = tracing_open_generic_tr, 2743 .read = show_header_event_file, 2744 .llseek = default_llseek, 2745 .release = tracing_release_generic_tr, 2746 }; 2747 2748 static int 2749 ftrace_event_open(struct inode *inode, struct file *file, 2750 const struct seq_operations *seq_ops) 2751 { 2752 struct seq_file *m; 2753 int ret; 2754 2755 ret = security_locked_down(LOCKDOWN_TRACEFS); 2756 if (ret) 2757 return ret; 2758 2759 ret = seq_open(file, seq_ops); 2760 if (ret < 0) 2761 return ret; 2762 m = file->private_data; 2763 /* copy tr over to seq ops */ 2764 m->private = inode->i_private; 2765 2766 return ret; 2767 } 2768 2769 static int ftrace_event_release(struct inode *inode, struct file *file) 2770 { 2771 struct trace_array *tr = inode->i_private; 2772 2773 trace_array_put(tr); 2774 2775 return seq_release(inode, file); 2776 } 2777 2778 static int 2779 ftrace_event_avail_open(struct inode *inode, struct file *file) 2780 { 2781 const struct seq_operations *seq_ops = &show_event_seq_ops; 2782 2783 /* Checks for tracefs lockdown */ 2784 return ftrace_event_open(inode, file, seq_ops); 2785 } 2786 2787 static int 2788 ftrace_event_set_open(struct inode *inode, struct file *file) 2789 { 2790 const struct seq_operations *seq_ops = &show_set_event_seq_ops; 2791 struct trace_array *tr = inode->i_private; 2792 int ret; 2793 2794 ret = tracing_check_open_get_tr(tr); 2795 if (ret) 2796 return ret; 2797 2798 if ((file->f_mode & FMODE_WRITE) && 2799 (file->f_flags & O_TRUNC)) 2800 ftrace_clear_events(tr); 2801 2802 ret = ftrace_event_open(inode, file, seq_ops); 2803 if (ret < 0) 2804 trace_array_put(tr); 2805 return ret; 2806 } 2807 2808 /** 2809 * ftrace_event_show_filters_open - open interface for set_event_filters 2810 * @inode: The inode of the file 2811 * @file: The file being opened 2812 * 2813 * Connects the set_event_filters file to the sequence operations 2814 * required to iterate over and display active event filters. 2815 */ 2816 static int 2817 ftrace_event_show_filters_open(struct inode *inode, struct file *file) 2818 { 2819 return ftrace_event_open(inode, file, &show_show_event_filters_seq_ops); 2820 } 2821 2822 /** 2823 * ftrace_event_show_triggers_open - open interface for show_event_triggers 2824 * @inode: The inode of the file 2825 * @file: The file being opened 2826 * 2827 * Connects the show_event_triggers file to the sequence operations 2828 * required to iterate over and display active event triggers. 2829 */ 2830 static int 2831 ftrace_event_show_triggers_open(struct inode *inode, struct file *file) 2832 { 2833 return ftrace_event_open(inode, file, &show_show_event_triggers_seq_ops); 2834 } 2835 2836 static int 2837 ftrace_event_set_pid_open(struct inode *inode, struct file *file) 2838 { 2839 const struct seq_operations *seq_ops = &show_set_pid_seq_ops; 2840 struct trace_array *tr = inode->i_private; 2841 int ret; 2842 2843 ret = tracing_check_open_get_tr(tr); 2844 if (ret) 2845 return ret; 2846 2847 if ((file->f_mode & FMODE_WRITE) && 2848 (file->f_flags & O_TRUNC)) 2849 ftrace_clear_event_pids(tr, TRACE_PIDS); 2850 2851 ret = ftrace_event_open(inode, file, seq_ops); 2852 if (ret < 0) 2853 trace_array_put(tr); 2854 return ret; 2855 } 2856 2857 static int 2858 ftrace_event_set_npid_open(struct inode *inode, struct file *file) 2859 { 2860 const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops; 2861 struct trace_array *tr = inode->i_private; 2862 int ret; 2863 2864 ret = tracing_check_open_get_tr(tr); 2865 if (ret) 2866 return ret; 2867 2868 if ((file->f_mode & FMODE_WRITE) && 2869 (file->f_flags & O_TRUNC)) 2870 ftrace_clear_event_pids(tr, TRACE_NO_PIDS); 2871 2872 ret = ftrace_event_open(inode, file, seq_ops); 2873 if (ret < 0) 2874 trace_array_put(tr); 2875 return ret; 2876 } 2877 2878 static struct event_subsystem * 2879 create_new_subsystem(const char *name) 2880 { 2881 struct event_subsystem *system; 2882 2883 /* need to create new entry */ 2884 system = kmalloc_obj(*system, GFP_KERNEL); 2885 if (!system) 2886 return NULL; 2887 2888 system->ref_count = 1; 2889 2890 /* Only allocate if dynamic (kprobes and modules) */ 2891 system->name = kstrdup_const(name, GFP_KERNEL); 2892 if (!system->name) 2893 goto out_free; 2894 2895 system->filter = kzalloc_obj(struct event_filter, GFP_KERNEL); 2896 if (!system->filter) 2897 goto out_free; 2898 2899 list_add(&system->list, &event_subsystems); 2900 2901 return system; 2902 2903 out_free: 2904 kfree_const(system->name); 2905 kfree(system); 2906 return NULL; 2907 } 2908 2909 static int system_callback(const char *name, umode_t *mode, void **data, 2910 const struct file_operations **fops) 2911 { 2912 if (strcmp(name, "filter") == 0) 2913 *fops = &ftrace_subsystem_filter_fops; 2914 2915 else if (strcmp(name, "enable") == 0) 2916 *fops = &ftrace_system_enable_fops; 2917 2918 else 2919 return 0; 2920 2921 *mode = TRACE_MODE_WRITE; 2922 return 1; 2923 } 2924 2925 static struct eventfs_inode * 2926 event_subsystem_dir(struct trace_array *tr, const char *name, 2927 struct trace_event_file *file, struct eventfs_inode *parent) 2928 { 2929 struct event_subsystem *system, *iter; 2930 struct trace_subsystem_dir *dir; 2931 struct eventfs_inode *ei; 2932 int nr_entries; 2933 static struct eventfs_entry system_entries[] = { 2934 { 2935 .name = "filter", 2936 .callback = system_callback, 2937 }, 2938 { 2939 .name = "enable", 2940 .callback = system_callback, 2941 } 2942 }; 2943 2944 /* First see if we did not already create this dir */ 2945 list_for_each_entry(dir, &tr->systems, list) { 2946 system = dir->subsystem; 2947 if (strcmp(system->name, name) == 0) { 2948 dir->nr_events++; 2949 file->system = dir; 2950 return dir->ei; 2951 } 2952 } 2953 2954 /* Now see if the system itself exists. */ 2955 system = NULL; 2956 list_for_each_entry(iter, &event_subsystems, list) { 2957 if (strcmp(iter->name, name) == 0) { 2958 system = iter; 2959 break; 2960 } 2961 } 2962 2963 dir = kmalloc_obj(*dir, GFP_KERNEL); 2964 if (!dir) 2965 goto out_fail; 2966 2967 if (!system) { 2968 system = create_new_subsystem(name); 2969 if (!system) 2970 goto out_free; 2971 } else 2972 __get_system(system); 2973 2974 /* ftrace only has directories no files */ 2975 if (strcmp(name, "ftrace") == 0) 2976 nr_entries = 0; 2977 else 2978 nr_entries = ARRAY_SIZE(system_entries); 2979 2980 ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir); 2981 if (IS_ERR(ei)) { 2982 pr_warn("Failed to create system directory %s\n", name); 2983 __put_system(system); 2984 goto out_free; 2985 } 2986 2987 dir->ei = ei; 2988 dir->tr = tr; 2989 dir->ref_count = 1; 2990 dir->nr_events = 1; 2991 dir->subsystem = system; 2992 file->system = dir; 2993 2994 list_add(&dir->list, &tr->systems); 2995 2996 return dir->ei; 2997 2998 out_free: 2999 kfree(dir); 3000 out_fail: 3001 /* Only print this message if failed on memory allocation */ 3002 if (!dir || !system) 3003 pr_warn("No memory to create event subsystem %s\n", name); 3004 return NULL; 3005 } 3006 3007 static int 3008 event_define_fields(struct trace_event_call *call) 3009 { 3010 struct list_head *head; 3011 int ret = 0; 3012 3013 /* 3014 * Other events may have the same class. Only update 3015 * the fields if they are not already defined. 3016 */ 3017 head = trace_get_fields(call); 3018 if (list_empty(head)) { 3019 struct trace_event_fields *field = call->class->fields_array; 3020 unsigned int offset = sizeof(struct trace_entry); 3021 3022 for (; field->type; field++) { 3023 if (field->type == TRACE_FUNCTION_TYPE) { 3024 field->define_fields(call); 3025 break; 3026 } 3027 3028 offset = ALIGN(offset, field->align); 3029 ret = trace_define_field_ext(call, field->type, field->name, 3030 offset, field->size, 3031 field->is_signed, field->filter_type, 3032 field->len, field->needs_test); 3033 if (WARN_ON_ONCE(ret)) { 3034 pr_err("error code is %d\n", ret); 3035 break; 3036 } 3037 3038 offset += field->size; 3039 } 3040 } 3041 3042 return ret; 3043 } 3044 3045 static int event_callback(const char *name, umode_t *mode, void **data, 3046 const struct file_operations **fops) 3047 { 3048 struct trace_event_file *file = *data; 3049 struct trace_event_call *call = file->event_call; 3050 3051 if (strcmp(name, "format") == 0) { 3052 *mode = TRACE_MODE_READ; 3053 *fops = &ftrace_event_format_fops; 3054 return 1; 3055 } 3056 3057 /* 3058 * Only event directories that can be enabled should have 3059 * triggers or filters, with the exception of the "print" 3060 * event that can have a "trigger" file. 3061 */ 3062 if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { 3063 if (call->class->reg && strcmp(name, "enable") == 0) { 3064 *mode = TRACE_MODE_WRITE; 3065 *fops = &ftrace_enable_fops; 3066 return 1; 3067 } 3068 3069 if (strcmp(name, "filter") == 0) { 3070 *mode = TRACE_MODE_WRITE; 3071 *fops = &ftrace_event_filter_fops; 3072 return 1; 3073 } 3074 } 3075 3076 if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || 3077 strcmp(trace_event_name(call), "print") == 0) { 3078 if (strcmp(name, "trigger") == 0) { 3079 *mode = TRACE_MODE_WRITE; 3080 *fops = &event_trigger_fops; 3081 return 1; 3082 } 3083 } 3084 3085 #ifdef CONFIG_PERF_EVENTS 3086 if (call->event.type && call->class->reg && 3087 strcmp(name, "id") == 0) { 3088 *mode = TRACE_MODE_READ; 3089 *data = (void *)(long)call->event.type; 3090 *fops = &ftrace_event_id_fops; 3091 return 1; 3092 } 3093 #endif 3094 3095 #ifdef CONFIG_HIST_TRIGGERS 3096 if (strcmp(name, "hist") == 0) { 3097 *mode = TRACE_MODE_READ; 3098 *fops = &event_hist_fops; 3099 return 1; 3100 } 3101 #endif 3102 #ifdef CONFIG_HIST_TRIGGERS_DEBUG 3103 if (strcmp(name, "hist_debug") == 0) { 3104 *mode = TRACE_MODE_READ; 3105 *fops = &event_hist_debug_fops; 3106 return 1; 3107 } 3108 #endif 3109 #ifdef CONFIG_TRACE_EVENT_INJECT 3110 if (call->event.type && call->class->reg && 3111 strcmp(name, "inject") == 0) { 3112 *mode = 0200; 3113 *fops = &event_inject_fops; 3114 return 1; 3115 } 3116 #endif 3117 return 0; 3118 } 3119 3120 /* The file is incremented on creation and freeing the enable file decrements it */ 3121 static void event_release(const char *name, void *data) 3122 { 3123 struct trace_event_file *file = data; 3124 3125 event_file_put(file); 3126 } 3127 3128 static int 3129 event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) 3130 { 3131 struct trace_event_call *call = file->event_call; 3132 struct trace_array *tr = file->tr; 3133 struct eventfs_inode *e_events; 3134 struct eventfs_inode *ei; 3135 const char *name; 3136 int nr_entries; 3137 int ret; 3138 static struct eventfs_entry event_entries[] = { 3139 { 3140 .name = "enable", 3141 .callback = event_callback, 3142 .release = event_release, 3143 }, 3144 { 3145 .name = "filter", 3146 .callback = event_callback, 3147 }, 3148 { 3149 .name = "trigger", 3150 .callback = event_callback, 3151 }, 3152 { 3153 .name = "format", 3154 .callback = event_callback, 3155 }, 3156 #ifdef CONFIG_PERF_EVENTS 3157 { 3158 .name = "id", 3159 .callback = event_callback, 3160 }, 3161 #endif 3162 #ifdef CONFIG_HIST_TRIGGERS 3163 { 3164 .name = "hist", 3165 .callback = event_callback, 3166 }, 3167 #endif 3168 #ifdef CONFIG_HIST_TRIGGERS_DEBUG 3169 { 3170 .name = "hist_debug", 3171 .callback = event_callback, 3172 }, 3173 #endif 3174 #ifdef CONFIG_TRACE_EVENT_INJECT 3175 { 3176 .name = "inject", 3177 .callback = event_callback, 3178 }, 3179 #endif 3180 }; 3181 3182 /* 3183 * If the trace point header did not define TRACE_SYSTEM 3184 * then the system would be called "TRACE_SYSTEM". This should 3185 * never happen. 3186 */ 3187 if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0)) 3188 return -ENODEV; 3189 3190 e_events = event_subsystem_dir(tr, call->class->system, file, parent); 3191 if (!e_events) 3192 return -ENOMEM; 3193 3194 nr_entries = ARRAY_SIZE(event_entries); 3195 3196 name = trace_event_name(call); 3197 ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file); 3198 if (IS_ERR(ei)) { 3199 pr_warn("Could not create tracefs '%s' directory\n", name); 3200 return -1; 3201 } 3202 3203 file->ei = ei; 3204 3205 ret = event_define_fields(call); 3206 if (ret < 0) { 3207 pr_warn("Could not initialize trace point events/%s\n", name); 3208 return ret; 3209 } 3210 3211 /* Gets decremented on freeing of the "enable" file */ 3212 event_file_get(file); 3213 3214 return 0; 3215 } 3216 3217 static void remove_event_from_tracers(struct trace_event_call *call) 3218 { 3219 struct trace_event_file *file; 3220 struct trace_array *tr; 3221 3222 do_for_each_event_file_safe(tr, file) { 3223 if (file->event_call != call) 3224 continue; 3225 3226 remove_event_file_dir(file); 3227 /* 3228 * The do_for_each_event_file_safe() is 3229 * a double loop. After finding the call for this 3230 * trace_array, we use break to jump to the next 3231 * trace_array. 3232 */ 3233 break; 3234 } while_for_each_event_file(); 3235 } 3236 3237 static void event_remove(struct trace_event_call *call) 3238 { 3239 struct trace_array *tr; 3240 struct trace_event_file *file; 3241 3242 do_for_each_event_file(tr, file) { 3243 if (file->event_call != call) 3244 continue; 3245 3246 if (file->flags & EVENT_FILE_FL_WAS_ENABLED) 3247 tr->clear_trace = true; 3248 3249 ftrace_event_enable_disable(file, 0); 3250 /* 3251 * The do_for_each_event_file() is 3252 * a double loop. After finding the call for this 3253 * trace_array, we use break to jump to the next 3254 * trace_array. 3255 */ 3256 break; 3257 } while_for_each_event_file(); 3258 3259 if (call->event.funcs) 3260 __unregister_trace_event(&call->event); 3261 remove_event_from_tracers(call); 3262 list_del(&call->list); 3263 } 3264 3265 static int event_init(struct trace_event_call *call) 3266 { 3267 int ret = 0; 3268 const char *name; 3269 3270 name = trace_event_name(call); 3271 if (WARN_ON(!name)) 3272 return -EINVAL; 3273 3274 if (call->class->raw_init) { 3275 ret = call->class->raw_init(call); 3276 if (ret < 0 && ret != -ENOSYS) 3277 pr_warn("Could not initialize trace events/%s\n", name); 3278 } 3279 3280 return ret; 3281 } 3282 3283 static int 3284 __register_event(struct trace_event_call *call, struct module *mod) 3285 { 3286 int ret; 3287 3288 ret = event_init(call); 3289 if (ret < 0) 3290 return ret; 3291 3292 down_write(&trace_event_sem); 3293 list_add(&call->list, &ftrace_events); 3294 up_write(&trace_event_sem); 3295 3296 if (call->flags & TRACE_EVENT_FL_DYNAMIC) 3297 atomic_set(&call->refcnt, 0); 3298 else 3299 call->module = mod; 3300 3301 return 0; 3302 } 3303 3304 static char *eval_replace(char *ptr, struct trace_eval_map *map, int len) 3305 { 3306 int rlen; 3307 int elen; 3308 3309 /* Find the length of the eval value as a string */ 3310 elen = snprintf(ptr, 0, "%ld", map->eval_value); 3311 /* Make sure there's enough room to replace the string with the value */ 3312 if (len < elen) 3313 return NULL; 3314 3315 snprintf(ptr, elen + 1, "%ld", map->eval_value); 3316 3317 /* Get the rest of the string of ptr */ 3318 rlen = strlen(ptr + len); 3319 memmove(ptr + elen, ptr + len, rlen); 3320 /* Make sure we end the new string */ 3321 ptr[elen + rlen] = 0; 3322 3323 return ptr + elen; 3324 } 3325 3326 static void update_event_printk(struct trace_event_call *call, 3327 struct trace_eval_map *map) 3328 { 3329 char *ptr; 3330 int quote = 0; 3331 int len = strlen(map->eval_string); 3332 3333 for (ptr = call->print_fmt; *ptr; ptr++) { 3334 if (*ptr == '\\') { 3335 ptr++; 3336 /* paranoid */ 3337 if (!*ptr) 3338 break; 3339 continue; 3340 } 3341 if (*ptr == '"') { 3342 quote ^= 1; 3343 continue; 3344 } 3345 if (quote) 3346 continue; 3347 if (isdigit(*ptr)) { 3348 /* skip numbers */ 3349 do { 3350 ptr++; 3351 /* Check for alpha chars like ULL */ 3352 } while (isalnum(*ptr)); 3353 if (!*ptr) 3354 break; 3355 /* 3356 * A number must have some kind of delimiter after 3357 * it, and we can ignore that too. 3358 */ 3359 continue; 3360 } 3361 if (isalpha(*ptr) || *ptr == '_') { 3362 if (strncmp(map->eval_string, ptr, len) == 0 && 3363 !isalnum(ptr[len]) && ptr[len] != '_') { 3364 ptr = eval_replace(ptr, map, len); 3365 /* enum/sizeof string smaller than value */ 3366 if (WARN_ON_ONCE(!ptr)) 3367 return; 3368 /* 3369 * No need to decrement here, as eval_replace() 3370 * returns the pointer to the character passed 3371 * the eval, and two evals can not be placed 3372 * back to back without something in between. 3373 * We can skip that something in between. 3374 */ 3375 continue; 3376 } 3377 skip_more: 3378 do { 3379 ptr++; 3380 } while (isalnum(*ptr) || *ptr == '_'); 3381 if (!*ptr) 3382 break; 3383 /* 3384 * If what comes after this variable is a '.' or 3385 * '->' then we can continue to ignore that string. 3386 */ 3387 if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { 3388 ptr += *ptr == '.' ? 1 : 2; 3389 if (!*ptr) 3390 break; 3391 goto skip_more; 3392 } 3393 /* 3394 * Once again, we can skip the delimiter that came 3395 * after the string. 3396 */ 3397 continue; 3398 } 3399 } 3400 } 3401 3402 static void add_str_to_module(struct module *module, char *str) 3403 { 3404 struct module_string *modstr; 3405 3406 modstr = kmalloc_obj(*modstr, GFP_KERNEL); 3407 3408 /* 3409 * If we failed to allocate memory here, then we'll just 3410 * let the str memory leak when the module is removed. 3411 * If this fails to allocate, there's worse problems than 3412 * a leaked string on module removal. 3413 */ 3414 if (WARN_ON_ONCE(!modstr)) 3415 return; 3416 3417 modstr->module = module; 3418 modstr->str = str; 3419 3420 list_add(&modstr->next, &module_strings); 3421 } 3422 3423 #define ATTRIBUTE_STR "__attribute__(" 3424 #define ATTRIBUTE_STR_LEN (sizeof(ATTRIBUTE_STR) - 1) 3425 3426 /* Remove all __attribute__() from @type. Return allocated string or @type. */ 3427 static char *sanitize_field_type(const char *type) 3428 { 3429 char *attr, *tmp, *next, *ret = (char *)type; 3430 int depth; 3431 3432 next = (char *)type; 3433 while ((attr = strstr(next, ATTRIBUTE_STR))) { 3434 /* Retry if "__attribute__(" is a part of another word. */ 3435 if (attr != next && !isspace(attr[-1])) { 3436 next = attr + ATTRIBUTE_STR_LEN; 3437 continue; 3438 } 3439 3440 if (ret == type) { 3441 ret = kstrdup(type, GFP_KERNEL); 3442 if (WARN_ON_ONCE(!ret)) 3443 return NULL; 3444 attr = ret + (attr - type); 3445 } 3446 3447 /* the ATTRIBUTE_STR already has the first '(' */ 3448 depth = 1; 3449 next = attr + ATTRIBUTE_STR_LEN; 3450 do { 3451 tmp = strpbrk(next, "()"); 3452 /* There is unbalanced parentheses */ 3453 if (WARN_ON_ONCE(!tmp)) { 3454 kfree(ret); 3455 return (char *)type; 3456 } 3457 3458 if (*tmp == '(') 3459 depth++; 3460 else 3461 depth--; 3462 next = tmp + 1; 3463 } while (depth > 0); 3464 next = skip_spaces(next); 3465 strcpy(attr, next); 3466 next = attr; 3467 } 3468 return ret; 3469 } 3470 3471 static char *find_replacable_eval(const char *type, const char *eval_string, 3472 int len) 3473 { 3474 char *ptr; 3475 3476 if (!eval_string) 3477 return NULL; 3478 3479 ptr = strchr(type, '['); 3480 if (!ptr) 3481 return NULL; 3482 ptr++; 3483 3484 if (!isalpha(*ptr) && *ptr != '_') 3485 return NULL; 3486 3487 if (strncmp(eval_string, ptr, len) != 0) 3488 return NULL; 3489 3490 return ptr; 3491 } 3492 3493 static void update_event_fields(struct trace_event_call *call, 3494 struct trace_eval_map *map) 3495 { 3496 struct ftrace_event_field *field; 3497 const char *eval_string = NULL; 3498 struct list_head *head; 3499 int len = 0; 3500 char *ptr; 3501 char *str; 3502 3503 /* Dynamic events should never have field maps */ 3504 if (call->flags & TRACE_EVENT_FL_DYNAMIC) 3505 return; 3506 3507 if (map) { 3508 eval_string = map->eval_string; 3509 len = strlen(map->eval_string); 3510 } 3511 3512 head = trace_get_fields(call); 3513 list_for_each_entry(field, head, link) { 3514 str = sanitize_field_type(field->type); 3515 if (!str) 3516 return; 3517 3518 ptr = find_replacable_eval(str, eval_string, len); 3519 if (ptr) { 3520 if (str == field->type) { 3521 str = kstrdup(field->type, GFP_KERNEL); 3522 if (WARN_ON_ONCE(!str)) 3523 return; 3524 ptr = str + (ptr - field->type); 3525 } 3526 3527 ptr = eval_replace(ptr, map, len); 3528 /* enum/sizeof string smaller than value */ 3529 if (WARN_ON_ONCE(!ptr)) { 3530 kfree(str); 3531 continue; 3532 } 3533 } 3534 3535 if (str == field->type) 3536 continue; 3537 /* 3538 * If the event is part of a module, then we need to free the string 3539 * when the module is removed. Otherwise, it will stay allocated 3540 * until a reboot. 3541 */ 3542 if (call->module) 3543 add_str_to_module(call->module, str); 3544 3545 field->type = str; 3546 if (field->filter_type == FILTER_OTHER) 3547 field->filter_type = filter_assign_type(field->type); 3548 } 3549 } 3550 3551 /* Update all events for replacing eval and sanitizing */ 3552 void trace_event_update_all(struct trace_eval_map **map, int len) 3553 { 3554 struct trace_event_call *call, *p; 3555 const char *last_system = NULL; 3556 bool first = false; 3557 bool updated; 3558 int last_i; 3559 int i; 3560 3561 down_write(&trace_event_sem); 3562 list_for_each_entry_safe(call, p, &ftrace_events, list) { 3563 /* events are usually grouped together with systems */ 3564 if (!last_system || call->class->system != last_system) { 3565 first = true; 3566 last_i = 0; 3567 last_system = call->class->system; 3568 } 3569 3570 updated = false; 3571 /* 3572 * Since calls are grouped by systems, the likelihood that the 3573 * next call in the iteration belongs to the same system as the 3574 * previous call is high. As an optimization, we skip searching 3575 * for a map[] that matches the call's system if the last call 3576 * was from the same system. That's what last_i is for. If the 3577 * call has the same system as the previous call, then last_i 3578 * will be the index of the first map[] that has a matching 3579 * system. 3580 */ 3581 for (i = last_i; i < len; i++) { 3582 if (call->class->system == map[i]->system) { 3583 /* Save the first system if need be */ 3584 if (first) { 3585 last_i = i; 3586 first = false; 3587 } 3588 update_event_printk(call, map[i]); 3589 update_event_fields(call, map[i]); 3590 updated = true; 3591 } 3592 } 3593 /* If not updated yet, update field for sanitizing. */ 3594 if (!updated) 3595 update_event_fields(call, NULL); 3596 cond_resched(); 3597 } 3598 up_write(&trace_event_sem); 3599 } 3600 3601 static bool event_in_systems(struct trace_event_call *call, 3602 const char *systems) 3603 { 3604 const char *system; 3605 const char *p; 3606 3607 if (!systems) 3608 return true; 3609 3610 system = call->class->system; 3611 p = strstr(systems, system); 3612 if (!p) 3613 return false; 3614 3615 if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',') 3616 return false; 3617 3618 p += strlen(system); 3619 return !*p || isspace(*p) || *p == ','; 3620 } 3621 3622 #ifdef CONFIG_HIST_TRIGGERS 3623 /* 3624 * Wake up waiter on the hist_poll_wq from irq_work because the hist trigger 3625 * may happen in any context. 3626 */ 3627 static void hist_poll_event_irq_work(struct irq_work *work) 3628 { 3629 wake_up_all(&hist_poll_wq); 3630 } 3631 3632 DEFINE_IRQ_WORK(hist_poll_work, hist_poll_event_irq_work); 3633 DECLARE_WAIT_QUEUE_HEAD(hist_poll_wq); 3634 #endif 3635 3636 static struct trace_event_file * 3637 trace_create_new_event(struct trace_event_call *call, 3638 struct trace_array *tr) 3639 { 3640 struct trace_pid_list *no_pid_list; 3641 struct trace_pid_list *pid_list; 3642 struct trace_event_file *file; 3643 unsigned int first; 3644 3645 if (!event_in_systems(call, tr->system_names)) 3646 return NULL; 3647 3648 file = kmem_cache_alloc(file_cachep, GFP_TRACE); 3649 if (!file) 3650 return ERR_PTR(-ENOMEM); 3651 3652 pid_list = rcu_dereference_protected(tr->filtered_pids, 3653 lockdep_is_held(&event_mutex)); 3654 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 3655 lockdep_is_held(&event_mutex)); 3656 3657 if (!trace_pid_list_first(pid_list, &first) || 3658 !trace_pid_list_first(no_pid_list, &first)) 3659 file->flags |= EVENT_FILE_FL_PID_FILTER; 3660 3661 file->event_call = call; 3662 file->tr = tr; 3663 atomic_set(&file->sm_ref, 0); 3664 atomic_set(&file->tm_ref, 0); 3665 INIT_LIST_HEAD(&file->triggers); 3666 list_add(&file->list, &tr->events); 3667 refcount_set(&file->ref, 1); 3668 3669 return file; 3670 } 3671 3672 #define MAX_BOOT_TRIGGERS 32 3673 3674 static struct boot_triggers { 3675 const char *event; 3676 char *trigger; 3677 } bootup_triggers[MAX_BOOT_TRIGGERS]; 3678 3679 static char bootup_trigger_buf[COMMAND_LINE_SIZE]; 3680 static int nr_boot_triggers; 3681 3682 static __init int setup_trace_triggers(char *str) 3683 { 3684 char *trigger; 3685 char *buf; 3686 int i; 3687 3688 strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE); 3689 trace_set_ring_buffer_expanded(NULL); 3690 disable_tracing_selftest("running event triggers"); 3691 3692 buf = bootup_trigger_buf; 3693 for (i = 0; i < MAX_BOOT_TRIGGERS; i++) { 3694 trigger = strsep(&buf, ","); 3695 if (!trigger) 3696 break; 3697 bootup_triggers[i].event = strsep(&trigger, "."); 3698 bootup_triggers[i].trigger = trigger; 3699 if (!bootup_triggers[i].trigger) 3700 break; 3701 } 3702 3703 nr_boot_triggers = i; 3704 return 1; 3705 } 3706 __setup("trace_trigger=", setup_trace_triggers); 3707 3708 /* Add an event to a trace directory */ 3709 static int 3710 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) 3711 { 3712 struct trace_event_file *file; 3713 3714 file = trace_create_new_event(call, tr); 3715 /* 3716 * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed 3717 * allocation, or NULL if the event is not part of the tr->system_names. 3718 * When the event is not part of the tr->system_names, return zero, not 3719 * an error. 3720 */ 3721 if (!file) 3722 return 0; 3723 3724 if (IS_ERR(file)) 3725 return PTR_ERR(file); 3726 3727 if (eventdir_initialized) 3728 return event_create_dir(tr->event_dir, file); 3729 else 3730 return event_define_fields(call); 3731 } 3732 3733 static void trace_early_triggers(struct trace_event_file *file, const char *name) 3734 { 3735 int ret; 3736 int i; 3737 3738 for (i = 0; i < nr_boot_triggers; i++) { 3739 if (strcmp(name, bootup_triggers[i].event)) 3740 continue; 3741 mutex_lock(&event_mutex); 3742 ret = trigger_process_regex(file, bootup_triggers[i].trigger); 3743 mutex_unlock(&event_mutex); 3744 if (ret) 3745 pr_err("Failed to register trigger '%s' on event %s\n", 3746 bootup_triggers[i].trigger, 3747 bootup_triggers[i].event); 3748 } 3749 } 3750 3751 /* 3752 * Just create a descriptor for early init. A descriptor is required 3753 * for enabling events at boot. We want to enable events before 3754 * the filesystem is initialized. 3755 */ 3756 static int 3757 __trace_early_add_new_event(struct trace_event_call *call, 3758 struct trace_array *tr) 3759 { 3760 struct trace_event_file *file; 3761 int ret; 3762 3763 file = trace_create_new_event(call, tr); 3764 /* 3765 * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed 3766 * allocation, or NULL if the event is not part of the tr->system_names. 3767 * When the event is not part of the tr->system_names, return zero, not 3768 * an error. 3769 */ 3770 if (!file) 3771 return 0; 3772 3773 if (IS_ERR(file)) 3774 return PTR_ERR(file); 3775 3776 ret = event_define_fields(call); 3777 if (ret) 3778 return ret; 3779 3780 trace_early_triggers(file, trace_event_name(call)); 3781 3782 return 0; 3783 } 3784 3785 struct ftrace_module_file_ops; 3786 static void __add_event_to_tracers(struct trace_event_call *call); 3787 3788 /* Add an additional event_call dynamically */ 3789 int trace_add_event_call(struct trace_event_call *call) 3790 { 3791 int ret; 3792 lockdep_assert_held(&event_mutex); 3793 3794 guard(mutex)(&trace_types_lock); 3795 3796 ret = __register_event(call, NULL); 3797 if (ret < 0) 3798 return ret; 3799 3800 __add_event_to_tracers(call); 3801 return ret; 3802 } 3803 EXPORT_SYMBOL_GPL(trace_add_event_call); 3804 3805 /* 3806 * Must be called under locking of trace_types_lock, event_mutex and 3807 * trace_event_sem. 3808 */ 3809 static void __trace_remove_event_call(struct trace_event_call *call) 3810 { 3811 event_remove(call); 3812 trace_destroy_fields(call); 3813 } 3814 3815 static int probe_remove_event_call(struct trace_event_call *call) 3816 { 3817 struct trace_array *tr; 3818 struct trace_event_file *file; 3819 3820 #ifdef CONFIG_PERF_EVENTS 3821 if (call->perf_refcount) 3822 return -EBUSY; 3823 #endif 3824 do_for_each_event_file(tr, file) { 3825 if (file->event_call != call) 3826 continue; 3827 /* 3828 * We can't rely on ftrace_event_enable_disable(enable => 0) 3829 * we are going to do, soft mode can suppress 3830 * TRACE_REG_UNREGISTER. 3831 */ 3832 if (file->flags & EVENT_FILE_FL_ENABLED) 3833 goto busy; 3834 3835 if (file->flags & EVENT_FILE_FL_WAS_ENABLED) 3836 tr->clear_trace = true; 3837 /* 3838 * The do_for_each_event_file_safe() is 3839 * a double loop. After finding the call for this 3840 * trace_array, we use break to jump to the next 3841 * trace_array. 3842 */ 3843 break; 3844 } while_for_each_event_file(); 3845 3846 __trace_remove_event_call(call); 3847 3848 return 0; 3849 busy: 3850 /* No need to clear the trace now */ 3851 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 3852 tr->clear_trace = false; 3853 } 3854 return -EBUSY; 3855 } 3856 3857 /* Remove an event_call */ 3858 int trace_remove_event_call(struct trace_event_call *call) 3859 { 3860 int ret; 3861 3862 lockdep_assert_held(&event_mutex); 3863 3864 mutex_lock(&trace_types_lock); 3865 down_write(&trace_event_sem); 3866 ret = probe_remove_event_call(call); 3867 up_write(&trace_event_sem); 3868 mutex_unlock(&trace_types_lock); 3869 3870 return ret; 3871 } 3872 EXPORT_SYMBOL_GPL(trace_remove_event_call); 3873 3874 #define for_each_event(event, start, end) \ 3875 for (event = start; \ 3876 (unsigned long)event < (unsigned long)end; \ 3877 event++) 3878 3879 #ifdef CONFIG_MODULES 3880 static void update_mod_cache(struct trace_array *tr, struct module *mod) 3881 { 3882 struct event_mod_load *event_mod, *n; 3883 3884 list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) { 3885 if (strcmp(event_mod->module, mod->name) != 0) 3886 continue; 3887 3888 __ftrace_set_clr_event_nolock(tr, event_mod->match, 3889 event_mod->system, 3890 event_mod->event, 1, mod->name); 3891 free_event_mod(event_mod); 3892 } 3893 } 3894 3895 static void update_cache_events(struct module *mod) 3896 { 3897 struct trace_array *tr; 3898 3899 list_for_each_entry(tr, &ftrace_trace_arrays, list) 3900 update_mod_cache(tr, mod); 3901 } 3902 3903 static void trace_module_add_events(struct module *mod) 3904 { 3905 struct trace_event_call **call, **start, **end; 3906 3907 if (!mod->num_trace_events) 3908 return; 3909 3910 /* Don't add infrastructure for mods without tracepoints */ 3911 if (trace_module_has_bad_taint(mod)) { 3912 pr_err("%s: module has bad taint, not creating trace events\n", 3913 mod->name); 3914 return; 3915 } 3916 3917 start = mod->trace_events; 3918 end = mod->trace_events + mod->num_trace_events; 3919 3920 for_each_event(call, start, end) { 3921 __register_event(*call, mod); 3922 __add_event_to_tracers(*call); 3923 } 3924 3925 update_cache_events(mod); 3926 } 3927 3928 static void trace_module_remove_events(struct module *mod) 3929 { 3930 struct trace_event_call *call, *p; 3931 struct module_string *modstr, *m; 3932 3933 down_write(&trace_event_sem); 3934 list_for_each_entry_safe(call, p, &ftrace_events, list) { 3935 if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module) 3936 continue; 3937 if (call->module == mod) 3938 __trace_remove_event_call(call); 3939 } 3940 /* Check for any strings allocated for this module */ 3941 list_for_each_entry_safe(modstr, m, &module_strings, next) { 3942 if (modstr->module != mod) 3943 continue; 3944 list_del(&modstr->next); 3945 kfree(modstr->str); 3946 kfree(modstr); 3947 } 3948 up_write(&trace_event_sem); 3949 3950 /* 3951 * It is safest to reset the ring buffer if the module being unloaded 3952 * registered any events that were used. The only worry is if 3953 * a new module gets loaded, and takes on the same id as the events 3954 * of this module. When printing out the buffer, traced events left 3955 * over from this module may be passed to the new module events and 3956 * unexpected results may occur. 3957 */ 3958 tracing_reset_all_online_cpus_unlocked(); 3959 } 3960 3961 static int trace_module_notify(struct notifier_block *self, 3962 unsigned long val, void *data) 3963 { 3964 struct module *mod = data; 3965 3966 mutex_lock(&event_mutex); 3967 mutex_lock(&trace_types_lock); 3968 switch (val) { 3969 case MODULE_STATE_COMING: 3970 trace_module_add_events(mod); 3971 break; 3972 case MODULE_STATE_GOING: 3973 trace_module_remove_events(mod); 3974 break; 3975 } 3976 mutex_unlock(&trace_types_lock); 3977 mutex_unlock(&event_mutex); 3978 3979 return NOTIFY_OK; 3980 } 3981 3982 static struct notifier_block trace_module_nb = { 3983 .notifier_call = trace_module_notify, 3984 .priority = 1, /* higher than trace.c module notify */ 3985 }; 3986 #endif /* CONFIG_MODULES */ 3987 3988 /* Create a new event directory structure for a trace directory. */ 3989 static void 3990 __trace_add_event_dirs(struct trace_array *tr) 3991 { 3992 struct trace_event_call *call; 3993 int ret; 3994 3995 lockdep_assert_held(&trace_event_sem); 3996 3997 list_for_each_entry(call, &ftrace_events, list) { 3998 ret = __trace_add_new_event(call, tr); 3999 if (ret < 0) 4000 pr_warn("Could not create directory for event %s\n", 4001 trace_event_name(call)); 4002 } 4003 } 4004 4005 /* Returns any file that matches the system and event */ 4006 struct trace_event_file * 4007 __find_event_file(struct trace_array *tr, const char *system, const char *event) 4008 { 4009 struct trace_event_file *file; 4010 struct trace_event_call *call; 4011 const char *name; 4012 4013 list_for_each_entry(file, &tr->events, list) { 4014 4015 call = file->event_call; 4016 name = trace_event_name(call); 4017 4018 if (!name || !call->class) 4019 continue; 4020 4021 if (strcmp(event, name) == 0 && 4022 strcmp(system, call->class->system) == 0) 4023 return file; 4024 } 4025 return NULL; 4026 } 4027 4028 /* Returns valid trace event files that match system and event */ 4029 struct trace_event_file * 4030 find_event_file(struct trace_array *tr, const char *system, const char *event) 4031 { 4032 struct trace_event_file *file; 4033 4034 file = __find_event_file(tr, system, event); 4035 if (!file || !file->event_call->class->reg || 4036 file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) 4037 return NULL; 4038 4039 return file; 4040 } 4041 4042 /** 4043 * trace_get_event_file - Find and return a trace event file 4044 * @instance: The name of the trace instance containing the event 4045 * @system: The name of the system containing the event 4046 * @event: The name of the event 4047 * 4048 * Return a trace event file given the trace instance name, trace 4049 * system, and trace event name. If the instance name is NULL, it 4050 * refers to the top-level trace array. 4051 * 4052 * This function will look it up and return it if found, after calling 4053 * trace_array_get() to prevent the instance from going away, and 4054 * increment the event's module refcount to prevent it from being 4055 * removed. 4056 * 4057 * To release the file, call trace_put_event_file(), which will call 4058 * trace_array_put() and decrement the event's module refcount. 4059 * 4060 * Return: The trace event on success, ERR_PTR otherwise. 4061 */ 4062 struct trace_event_file *trace_get_event_file(const char *instance, 4063 const char *system, 4064 const char *event) 4065 { 4066 struct trace_array *tr = top_trace_array(); 4067 struct trace_event_file *file = NULL; 4068 int ret = -EINVAL; 4069 4070 if (instance) { 4071 tr = trace_array_find_get(instance); 4072 if (!tr) 4073 return ERR_PTR(-ENOENT); 4074 } else { 4075 ret = trace_array_get(tr); 4076 if (ret) 4077 return ERR_PTR(ret); 4078 } 4079 4080 guard(mutex)(&event_mutex); 4081 4082 file = find_event_file(tr, system, event); 4083 if (!file) { 4084 trace_array_put(tr); 4085 return ERR_PTR(-EINVAL); 4086 } 4087 4088 /* Don't let event modules unload while in use */ 4089 ret = trace_event_try_get_ref(file->event_call); 4090 if (!ret) { 4091 trace_array_put(tr); 4092 return ERR_PTR(-EBUSY); 4093 } 4094 4095 return file; 4096 } 4097 EXPORT_SYMBOL_GPL(trace_get_event_file); 4098 4099 /** 4100 * trace_put_event_file - Release a file from trace_get_event_file() 4101 * @file: The trace event file 4102 * 4103 * If a file was retrieved using trace_get_event_file(), this should 4104 * be called when it's no longer needed. It will cancel the previous 4105 * trace_array_get() called by that function, and decrement the 4106 * event's module refcount. 4107 */ 4108 void trace_put_event_file(struct trace_event_file *file) 4109 { 4110 mutex_lock(&event_mutex); 4111 trace_event_put_ref(file->event_call); 4112 mutex_unlock(&event_mutex); 4113 4114 trace_array_put(file->tr); 4115 } 4116 EXPORT_SYMBOL_GPL(trace_put_event_file); 4117 4118 #ifdef CONFIG_DYNAMIC_FTRACE 4119 struct event_probe_data { 4120 struct trace_event_file *file; 4121 unsigned long count; 4122 int ref; 4123 bool enable; 4124 }; 4125 4126 static void update_event_probe(struct event_probe_data *data) 4127 { 4128 if (data->enable) 4129 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); 4130 else 4131 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); 4132 } 4133 4134 static void 4135 event_enable_probe(unsigned long ip, unsigned long parent_ip, 4136 struct trace_array *tr, struct ftrace_probe_ops *ops, 4137 void *data) 4138 { 4139 struct ftrace_func_mapper *mapper = data; 4140 struct event_probe_data *edata; 4141 void **pdata; 4142 4143 pdata = ftrace_func_mapper_find_ip(mapper, ip); 4144 if (!pdata || !*pdata) 4145 return; 4146 4147 edata = *pdata; 4148 update_event_probe(edata); 4149 } 4150 4151 static void 4152 event_enable_count_probe(unsigned long ip, unsigned long parent_ip, 4153 struct trace_array *tr, struct ftrace_probe_ops *ops, 4154 void *data) 4155 { 4156 struct ftrace_func_mapper *mapper = data; 4157 struct event_probe_data *edata; 4158 void **pdata; 4159 4160 pdata = ftrace_func_mapper_find_ip(mapper, ip); 4161 if (!pdata || !*pdata) 4162 return; 4163 4164 edata = *pdata; 4165 4166 if (!edata->count) 4167 return; 4168 4169 /* Skip if the event is in a state we want to switch to */ 4170 if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) 4171 return; 4172 4173 if (edata->count != -1) 4174 (edata->count)--; 4175 4176 update_event_probe(edata); 4177 } 4178 4179 static int 4180 event_enable_print(struct seq_file *m, unsigned long ip, 4181 struct ftrace_probe_ops *ops, void *data) 4182 { 4183 struct ftrace_func_mapper *mapper = data; 4184 struct event_probe_data *edata; 4185 void **pdata; 4186 4187 pdata = ftrace_func_mapper_find_ip(mapper, ip); 4188 4189 if (WARN_ON_ONCE(!pdata || !*pdata)) 4190 return 0; 4191 4192 edata = *pdata; 4193 4194 seq_printf(m, "%ps:", (void *)ip); 4195 4196 seq_printf(m, "%s:%s:%s", 4197 edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, 4198 edata->file->event_call->class->system, 4199 trace_event_name(edata->file->event_call)); 4200 4201 if (edata->count == -1) 4202 seq_puts(m, ":unlimited\n"); 4203 else 4204 seq_printf(m, ":count=%ld\n", edata->count); 4205 4206 return 0; 4207 } 4208 4209 static int 4210 event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 4211 unsigned long ip, void *init_data, void **data) 4212 { 4213 struct ftrace_func_mapper *mapper = *data; 4214 struct event_probe_data *edata = init_data; 4215 int ret; 4216 4217 if (!mapper) { 4218 mapper = allocate_ftrace_func_mapper(); 4219 if (!mapper) 4220 return -ENODEV; 4221 *data = mapper; 4222 } 4223 4224 ret = ftrace_func_mapper_add_ip(mapper, ip, edata); 4225 if (ret < 0) 4226 return ret; 4227 4228 edata->ref++; 4229 4230 return 0; 4231 } 4232 4233 static int free_probe_data(void *data) 4234 { 4235 struct event_probe_data *edata = data; 4236 4237 edata->ref--; 4238 if (!edata->ref) { 4239 /* Remove soft mode */ 4240 __ftrace_event_enable_disable(edata->file, 0, 1); 4241 trace_event_put_ref(edata->file->event_call); 4242 kfree(edata); 4243 } 4244 return 0; 4245 } 4246 4247 static void 4248 event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 4249 unsigned long ip, void *data) 4250 { 4251 struct ftrace_func_mapper *mapper = data; 4252 struct event_probe_data *edata; 4253 4254 if (!ip) { 4255 if (!mapper) 4256 return; 4257 free_ftrace_func_mapper(mapper, free_probe_data); 4258 return; 4259 } 4260 4261 edata = ftrace_func_mapper_remove_ip(mapper, ip); 4262 4263 if (WARN_ON_ONCE(!edata)) 4264 return; 4265 4266 if (WARN_ON_ONCE(edata->ref <= 0)) 4267 return; 4268 4269 free_probe_data(edata); 4270 } 4271 4272 static struct ftrace_probe_ops event_enable_probe_ops = { 4273 .func = event_enable_probe, 4274 .print = event_enable_print, 4275 .init = event_enable_init, 4276 .free = event_enable_free, 4277 }; 4278 4279 static struct ftrace_probe_ops event_enable_count_probe_ops = { 4280 .func = event_enable_count_probe, 4281 .print = event_enable_print, 4282 .init = event_enable_init, 4283 .free = event_enable_free, 4284 }; 4285 4286 static struct ftrace_probe_ops event_disable_probe_ops = { 4287 .func = event_enable_probe, 4288 .print = event_enable_print, 4289 .init = event_enable_init, 4290 .free = event_enable_free, 4291 }; 4292 4293 static struct ftrace_probe_ops event_disable_count_probe_ops = { 4294 .func = event_enable_count_probe, 4295 .print = event_enable_print, 4296 .init = event_enable_init, 4297 .free = event_enable_free, 4298 }; 4299 4300 static int 4301 event_enable_func(struct trace_array *tr, struct ftrace_hash *hash, 4302 char *glob, char *cmd, char *param, int enabled) 4303 { 4304 struct trace_event_file *file; 4305 struct ftrace_probe_ops *ops; 4306 struct event_probe_data *data; 4307 unsigned long count = -1; 4308 const char *system; 4309 const char *event; 4310 char *number; 4311 bool enable; 4312 int ret; 4313 4314 if (!tr) 4315 return -ENODEV; 4316 4317 /* hash funcs only work with set_ftrace_filter */ 4318 if (!enabled || !param) 4319 return -EINVAL; 4320 4321 system = strsep(¶m, ":"); 4322 if (!param) 4323 return -EINVAL; 4324 4325 event = strsep(¶m, ":"); 4326 4327 guard(mutex)(&event_mutex); 4328 4329 file = find_event_file(tr, system, event); 4330 if (!file) 4331 return -EINVAL; 4332 4333 enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; 4334 4335 if (enable) 4336 ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops; 4337 else 4338 ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops; 4339 4340 if (glob[0] == '!') 4341 return unregister_ftrace_function_probe_func(glob+1, tr, ops); 4342 4343 if (param) { 4344 number = strsep(¶m, ":"); 4345 4346 if (!strlen(number)) 4347 return -EINVAL; 4348 4349 /* 4350 * We use the callback data field (which is a pointer) 4351 * as our counter. 4352 */ 4353 ret = kstrtoul(number, 0, &count); 4354 if (ret) 4355 return ret; 4356 } 4357 4358 /* Don't let event modules unload while probe registered */ 4359 ret = trace_event_try_get_ref(file->event_call); 4360 if (!ret) 4361 return -EBUSY; 4362 4363 ret = __ftrace_event_enable_disable(file, 1, 1); 4364 if (ret < 0) 4365 goto out_put; 4366 4367 ret = -ENOMEM; 4368 data = kzalloc_obj(*data, GFP_KERNEL); 4369 if (!data) 4370 goto out_put; 4371 4372 data->enable = enable; 4373 data->count = count; 4374 data->file = file; 4375 4376 ret = register_ftrace_function_probe(glob, tr, ops, data); 4377 /* 4378 * The above returns on success the # of functions enabled, 4379 * but if it didn't find any functions it returns zero. 4380 * Consider no functions a failure too. 4381 */ 4382 4383 /* Just return zero, not the number of enabled functions */ 4384 if (ret > 0) 4385 return 0; 4386 4387 kfree(data); 4388 4389 if (!ret) 4390 ret = -ENOENT; 4391 4392 __ftrace_event_enable_disable(file, 0, 1); 4393 out_put: 4394 trace_event_put_ref(file->event_call); 4395 return ret; 4396 } 4397 4398 static struct ftrace_func_command event_enable_cmd = { 4399 .name = ENABLE_EVENT_STR, 4400 .func = event_enable_func, 4401 }; 4402 4403 static struct ftrace_func_command event_disable_cmd = { 4404 .name = DISABLE_EVENT_STR, 4405 .func = event_enable_func, 4406 }; 4407 4408 static __init int register_event_cmds(void) 4409 { 4410 int ret; 4411 4412 ret = register_ftrace_command(&event_enable_cmd); 4413 if (WARN_ON(ret < 0)) 4414 return ret; 4415 ret = register_ftrace_command(&event_disable_cmd); 4416 if (WARN_ON(ret < 0)) 4417 unregister_ftrace_command(&event_enable_cmd); 4418 return ret; 4419 } 4420 #else 4421 static inline int register_event_cmds(void) { return 0; } 4422 #endif /* CONFIG_DYNAMIC_FTRACE */ 4423 4424 /* 4425 * The top level array and trace arrays created by boot-time tracing 4426 * have already had its trace_event_file descriptors created in order 4427 * to allow for early events to be recorded. 4428 * This function is called after the tracefs has been initialized, 4429 * and we now have to create the files associated to the events. 4430 */ 4431 static void __trace_early_add_event_dirs(struct trace_array *tr) 4432 { 4433 struct trace_event_file *file; 4434 int ret; 4435 4436 4437 list_for_each_entry(file, &tr->events, list) { 4438 ret = event_create_dir(tr->event_dir, file); 4439 if (ret < 0) 4440 pr_warn("Could not create directory for event %s\n", 4441 trace_event_name(file->event_call)); 4442 } 4443 } 4444 4445 /* 4446 * For early boot up, the top trace array and the trace arrays created 4447 * by boot-time tracing require to have a list of events that can be 4448 * enabled. This must be done before the filesystem is set up in order 4449 * to allow events to be traced early. 4450 */ 4451 void __trace_early_add_events(struct trace_array *tr) 4452 { 4453 struct trace_event_call *call; 4454 int ret; 4455 4456 list_for_each_entry(call, &ftrace_events, list) { 4457 /* Early boot up should not have any modules loaded */ 4458 if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) && 4459 WARN_ON_ONCE(call->module)) 4460 continue; 4461 4462 ret = __trace_early_add_new_event(call, tr); 4463 if (ret < 0) 4464 pr_warn("Could not create early event %s\n", 4465 trace_event_name(call)); 4466 } 4467 } 4468 4469 /* Remove the event directory structure for a trace directory. */ 4470 static void 4471 __trace_remove_event_dirs(struct trace_array *tr) 4472 { 4473 struct trace_event_file *file, *next; 4474 4475 list_for_each_entry_safe(file, next, &tr->events, list) 4476 remove_event_file_dir(file); 4477 } 4478 4479 static void __add_event_to_tracers(struct trace_event_call *call) 4480 { 4481 struct trace_array *tr; 4482 4483 list_for_each_entry(tr, &ftrace_trace_arrays, list) 4484 __trace_add_new_event(call, tr); 4485 } 4486 4487 extern struct trace_event_call *__start_ftrace_events[]; 4488 extern struct trace_event_call *__stop_ftrace_events[]; 4489 4490 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; 4491 4492 static __init int setup_trace_event(char *str) 4493 { 4494 strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE); 4495 trace_set_ring_buffer_expanded(NULL); 4496 disable_tracing_selftest("running event tracing"); 4497 4498 return 1; 4499 } 4500 __setup("trace_event=", setup_trace_event); 4501 4502 static int events_callback(const char *name, umode_t *mode, void **data, 4503 const struct file_operations **fops) 4504 { 4505 if (strcmp(name, "enable") == 0) { 4506 *mode = TRACE_MODE_WRITE; 4507 *fops = &ftrace_tr_enable_fops; 4508 return 1; 4509 } 4510 4511 if (strcmp(name, "header_page") == 0) { 4512 *mode = TRACE_MODE_READ; 4513 *fops = &ftrace_show_header_page_fops; 4514 4515 } else if (strcmp(name, "header_event") == 0) { 4516 *mode = TRACE_MODE_READ; 4517 *fops = &ftrace_show_header_event_fops; 4518 } else 4519 return 0; 4520 4521 return 1; 4522 } 4523 4524 /* Expects to have event_mutex held when called */ 4525 static int 4526 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) 4527 { 4528 struct eventfs_inode *e_events; 4529 struct dentry *entry; 4530 int nr_entries; 4531 static struct eventfs_entry events_entries[] = { 4532 { 4533 .name = "enable", 4534 .callback = events_callback, 4535 }, 4536 { 4537 .name = "header_page", 4538 .callback = events_callback, 4539 }, 4540 { 4541 .name = "header_event", 4542 .callback = events_callback, 4543 }, 4544 }; 4545 4546 entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, 4547 tr, &ftrace_set_event_fops); 4548 if (!entry) 4549 return -ENOMEM; 4550 4551 trace_create_file("show_event_filters", TRACE_MODE_READ, parent, tr, 4552 &ftrace_show_event_filters_fops); 4553 4554 trace_create_file("show_event_triggers", TRACE_MODE_READ, parent, tr, 4555 &ftrace_show_event_triggers_fops); 4556 4557 nr_entries = ARRAY_SIZE(events_entries); 4558 4559 e_events = eventfs_create_events_dir("events", parent, events_entries, 4560 nr_entries, tr); 4561 if (IS_ERR(e_events)) { 4562 pr_warn("Could not create tracefs 'events' directory\n"); 4563 return -ENOMEM; 4564 } 4565 4566 /* There are not as crucial, just warn if they are not created */ 4567 4568 trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, 4569 tr, &ftrace_set_event_pid_fops); 4570 4571 trace_create_file("set_event_notrace_pid", 4572 TRACE_MODE_WRITE, parent, tr, 4573 &ftrace_set_event_notrace_pid_fops); 4574 4575 tr->event_dir = e_events; 4576 4577 return 0; 4578 } 4579 4580 /** 4581 * event_trace_add_tracer - add a instance of a trace_array to events 4582 * @parent: The parent dentry to place the files/directories for events in 4583 * @tr: The trace array associated with these events 4584 * 4585 * When a new instance is created, it needs to set up its events 4586 * directory, as well as other files associated with events. It also 4587 * creates the event hierarchy in the @parent/events directory. 4588 * 4589 * Returns 0 on success. 4590 * 4591 * Must be called with event_mutex held. 4592 */ 4593 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) 4594 { 4595 int ret; 4596 4597 lockdep_assert_held(&event_mutex); 4598 4599 ret = create_event_toplevel_files(parent, tr); 4600 if (ret) 4601 goto out; 4602 4603 down_write(&trace_event_sem); 4604 /* If tr already has the event list, it is initialized in early boot. */ 4605 if (unlikely(!list_empty(&tr->events))) 4606 __trace_early_add_event_dirs(tr); 4607 else 4608 __trace_add_event_dirs(tr); 4609 up_write(&trace_event_sem); 4610 4611 out: 4612 return ret; 4613 } 4614 4615 /* 4616 * The top trace array already had its file descriptors created. 4617 * Now the files themselves need to be created. 4618 */ 4619 static __init int 4620 early_event_add_tracer(struct dentry *parent, struct trace_array *tr) 4621 { 4622 int ret; 4623 4624 guard(mutex)(&event_mutex); 4625 4626 ret = create_event_toplevel_files(parent, tr); 4627 if (ret) 4628 return ret; 4629 4630 down_write(&trace_event_sem); 4631 __trace_early_add_event_dirs(tr); 4632 up_write(&trace_event_sem); 4633 4634 return 0; 4635 } 4636 4637 /* Must be called with event_mutex held */ 4638 int event_trace_del_tracer(struct trace_array *tr) 4639 { 4640 lockdep_assert_held(&event_mutex); 4641 4642 /* Disable any event triggers and associated soft-disabled events */ 4643 clear_event_triggers(tr); 4644 4645 /* Clear the pid list */ 4646 __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); 4647 4648 /* Disable any running events */ 4649 __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0, NULL); 4650 4651 /* Make sure no more events are being executed */ 4652 tracepoint_synchronize_unregister(); 4653 4654 down_write(&trace_event_sem); 4655 __trace_remove_event_dirs(tr); 4656 eventfs_remove_events_dir(tr->event_dir); 4657 up_write(&trace_event_sem); 4658 4659 tr->event_dir = NULL; 4660 4661 return 0; 4662 } 4663 4664 static __init int event_trace_memsetup(void) 4665 { 4666 field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); 4667 file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC); 4668 return 0; 4669 } 4670 4671 __init void 4672 early_enable_events(struct trace_array *tr, char *buf, bool disable_first) 4673 { 4674 char *token; 4675 int ret; 4676 4677 while (true) { 4678 token = strsep(&buf, ","); 4679 4680 if (!token) 4681 break; 4682 4683 if (*token) { 4684 /* Restarting syscalls requires that we stop them first */ 4685 if (disable_first) 4686 ftrace_set_clr_event(tr, token, 0); 4687 4688 ret = ftrace_set_clr_event(tr, token, 1); 4689 if (ret) 4690 pr_warn("Failed to enable trace event: %s\n", token); 4691 } 4692 4693 /* Put back the comma to allow this to be called again */ 4694 if (buf) 4695 *(buf - 1) = ','; 4696 } 4697 } 4698 4699 static __init int event_trace_enable(void) 4700 { 4701 struct trace_array *tr = top_trace_array(); 4702 struct trace_event_call **iter, *call; 4703 int ret; 4704 4705 if (!tr) 4706 return -ENODEV; 4707 4708 for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { 4709 4710 call = *iter; 4711 ret = event_init(call); 4712 if (!ret) 4713 list_add(&call->list, &ftrace_events); 4714 } 4715 4716 register_trigger_cmds(); 4717 4718 /* 4719 * We need the top trace array to have a working set of trace 4720 * points at early init, before the debug files and directories 4721 * are created. Create the file entries now, and attach them 4722 * to the actual file dentries later. 4723 */ 4724 __trace_early_add_events(tr); 4725 4726 early_enable_events(tr, bootup_event_buf, false); 4727 4728 trace_printk_start_comm(); 4729 4730 register_event_cmds(); 4731 4732 4733 return 0; 4734 } 4735 4736 /* 4737 * event_trace_enable() is called from trace_event_init() first to 4738 * initialize events and perhaps start any events that are on the 4739 * command line. Unfortunately, there are some events that will not 4740 * start this early, like the system call tracepoints that need 4741 * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But 4742 * event_trace_enable() is called before pid 1 starts, and this flag 4743 * is never set, making the syscall tracepoint never get reached, but 4744 * the event is enabled regardless (and not doing anything). 4745 */ 4746 static __init int event_trace_enable_again(void) 4747 { 4748 struct trace_array *tr; 4749 4750 tr = top_trace_array(); 4751 if (!tr) 4752 return -ENODEV; 4753 4754 early_enable_events(tr, bootup_event_buf, true); 4755 4756 return 0; 4757 } 4758 4759 early_initcall(event_trace_enable_again); 4760 4761 /* Init fields which doesn't related to the tracefs */ 4762 static __init int event_trace_init_fields(void) 4763 { 4764 if (trace_define_generic_fields()) 4765 pr_warn("tracing: Failed to allocated generic fields"); 4766 4767 if (trace_define_common_fields()) 4768 pr_warn("tracing: Failed to allocate common fields"); 4769 4770 return 0; 4771 } 4772 4773 __init int event_trace_init(void) 4774 { 4775 struct trace_array *tr; 4776 int ret; 4777 4778 tr = top_trace_array(); 4779 if (!tr) 4780 return -ENODEV; 4781 4782 trace_create_file("available_events", TRACE_MODE_READ, 4783 NULL, tr, &ftrace_avail_fops); 4784 4785 ret = early_event_add_tracer(NULL, tr); 4786 if (ret) 4787 return ret; 4788 4789 #ifdef CONFIG_MODULES 4790 ret = register_module_notifier(&trace_module_nb); 4791 if (ret) 4792 pr_warn("Failed to register trace events module notifier\n"); 4793 #endif 4794 4795 eventdir_initialized = true; 4796 4797 return 0; 4798 } 4799 4800 void __init trace_event_init(void) 4801 { 4802 event_trace_memsetup(); 4803 init_ftrace_syscalls(); 4804 event_trace_enable(); 4805 event_trace_init_fields(); 4806 } 4807 4808 #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST 4809 4810 static DEFINE_SPINLOCK(test_spinlock); 4811 static DEFINE_SPINLOCK(test_spinlock_irq); 4812 static DEFINE_MUTEX(test_mutex); 4813 4814 static __init void test_work(struct work_struct *dummy) 4815 { 4816 spin_lock(&test_spinlock); 4817 spin_lock_irq(&test_spinlock_irq); 4818 udelay(1); 4819 spin_unlock_irq(&test_spinlock_irq); 4820 spin_unlock(&test_spinlock); 4821 4822 mutex_lock(&test_mutex); 4823 msleep(1); 4824 mutex_unlock(&test_mutex); 4825 } 4826 4827 static __init int event_test_thread(void *unused) 4828 { 4829 void *test_malloc; 4830 4831 test_malloc = kmalloc(1234, GFP_KERNEL); 4832 if (!test_malloc) 4833 pr_info("failed to kmalloc\n"); 4834 4835 schedule_on_each_cpu(test_work); 4836 4837 kfree(test_malloc); 4838 4839 set_current_state(TASK_INTERRUPTIBLE); 4840 while (!kthread_should_stop()) { 4841 schedule(); 4842 set_current_state(TASK_INTERRUPTIBLE); 4843 } 4844 __set_current_state(TASK_RUNNING); 4845 4846 return 0; 4847 } 4848 4849 /* 4850 * Do various things that may trigger events. 4851 */ 4852 static __init void event_test_stuff(void) 4853 { 4854 struct task_struct *test_thread; 4855 4856 test_thread = kthread_run(event_test_thread, NULL, "test-events"); 4857 msleep(1); 4858 kthread_stop(test_thread); 4859 } 4860 4861 /* 4862 * For every trace event defined, we will test each trace point separately, 4863 * and then by groups, and finally all trace points. 4864 */ 4865 static __init void event_trace_self_tests(void) 4866 { 4867 struct trace_subsystem_dir *dir; 4868 struct trace_event_file *file; 4869 struct trace_event_call *call; 4870 struct event_subsystem *system; 4871 struct trace_array *tr; 4872 int ret; 4873 4874 tr = top_trace_array(); 4875 if (!tr) 4876 return; 4877 4878 pr_info("Running tests on trace events:\n"); 4879 4880 list_for_each_entry(file, &tr->events, list) { 4881 4882 call = file->event_call; 4883 4884 /* Only test those that have a probe */ 4885 if (!call->class || !call->class->probe) 4886 continue; 4887 4888 /* 4889 * Testing syscall events here is pretty useless, but 4890 * we still do it if configured. But this is time consuming. 4891 * What we really need is a user thread to perform the 4892 * syscalls as we test. 4893 */ 4894 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS 4895 if (call->class->system && 4896 strcmp(call->class->system, "syscalls") == 0) 4897 continue; 4898 #endif 4899 4900 pr_info("Testing event %s: ", trace_event_name(call)); 4901 4902 /* 4903 * If an event is already enabled, someone is using 4904 * it and the self test should not be on. 4905 */ 4906 if (file->flags & EVENT_FILE_FL_ENABLED) { 4907 pr_warn("Enabled event during self test!\n"); 4908 WARN_ON_ONCE(1); 4909 continue; 4910 } 4911 4912 ftrace_event_enable_disable(file, 1); 4913 event_test_stuff(); 4914 ftrace_event_enable_disable(file, 0); 4915 4916 pr_cont("OK\n"); 4917 } 4918 4919 /* Now test at the sub system level */ 4920 4921 pr_info("Running tests on trace event systems:\n"); 4922 4923 list_for_each_entry(dir, &tr->systems, list) { 4924 4925 system = dir->subsystem; 4926 4927 /* the ftrace system is special, skip it */ 4928 if (strcmp(system->name, "ftrace") == 0) 4929 continue; 4930 4931 pr_info("Testing event system %s: ", system->name); 4932 4933 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1, NULL); 4934 if (WARN_ON_ONCE(ret)) { 4935 pr_warn("error enabling system %s\n", 4936 system->name); 4937 continue; 4938 } 4939 4940 event_test_stuff(); 4941 4942 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0, NULL); 4943 if (WARN_ON_ONCE(ret)) { 4944 pr_warn("error disabling system %s\n", 4945 system->name); 4946 continue; 4947 } 4948 4949 pr_cont("OK\n"); 4950 } 4951 4952 /* Test with all events enabled */ 4953 4954 pr_info("Running tests on all trace events:\n"); 4955 pr_info("Testing all events: "); 4956 4957 ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1, NULL); 4958 if (WARN_ON_ONCE(ret)) { 4959 pr_warn("error enabling all events\n"); 4960 return; 4961 } 4962 4963 event_test_stuff(); 4964 4965 /* reset sysname */ 4966 ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0, NULL); 4967 if (WARN_ON_ONCE(ret)) { 4968 pr_warn("error disabling all events\n"); 4969 return; 4970 } 4971 4972 pr_cont("OK\n"); 4973 } 4974 4975 #ifdef CONFIG_FUNCTION_TRACER 4976 4977 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); 4978 4979 static struct trace_event_file event_trace_file __initdata; 4980 4981 static void __init 4982 function_test_events_call(unsigned long ip, unsigned long parent_ip, 4983 struct ftrace_ops *op, struct ftrace_regs *regs) 4984 { 4985 struct trace_buffer *buffer; 4986 struct ring_buffer_event *event; 4987 struct ftrace_entry *entry; 4988 unsigned int trace_ctx; 4989 long disabled; 4990 int cpu; 4991 4992 trace_ctx = tracing_gen_ctx(); 4993 preempt_disable_notrace(); 4994 cpu = raw_smp_processor_id(); 4995 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); 4996 4997 if (disabled != 1) 4998 goto out; 4999 5000 event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, 5001 TRACE_FN, sizeof(*entry), 5002 trace_ctx); 5003 if (!event) 5004 goto out; 5005 entry = ring_buffer_event_data(event); 5006 entry->ip = ip; 5007 entry->parent_ip = parent_ip; 5008 5009 event_trigger_unlock_commit(&event_trace_file, buffer, event, 5010 entry, trace_ctx); 5011 out: 5012 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); 5013 preempt_enable_notrace(); 5014 } 5015 5016 static struct ftrace_ops trace_ops __initdata = 5017 { 5018 .func = function_test_events_call, 5019 }; 5020 5021 static __init void event_trace_self_test_with_function(void) 5022 { 5023 int ret; 5024 5025 event_trace_file.tr = top_trace_array(); 5026 if (WARN_ON(!event_trace_file.tr)) 5027 return; 5028 5029 ret = register_ftrace_function(&trace_ops); 5030 if (WARN_ON(ret < 0)) { 5031 pr_info("Failed to enable function tracer for event tests\n"); 5032 return; 5033 } 5034 pr_info("Running tests again, along with the function tracer\n"); 5035 event_trace_self_tests(); 5036 unregister_ftrace_function(&trace_ops); 5037 } 5038 #else 5039 static __init void event_trace_self_test_with_function(void) 5040 { 5041 } 5042 #endif 5043 5044 static __init int event_trace_self_tests_init(void) 5045 { 5046 if (!tracing_selftest_disabled) { 5047 event_trace_self_tests(); 5048 event_trace_self_test_with_function(); 5049 } 5050 5051 return 0; 5052 } 5053 5054 late_initcall(event_trace_self_tests_init); 5055 5056 #endif 5057