1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * event tracer 4 * 5 * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> 6 * 7 * - Added format output of fields of the trace point. 8 * This was based off of work by Tom Zanussi <tzanussi@gmail.com>. 9 * 10 */ 11 12 #define pr_fmt(fmt) fmt 13 14 #include <linux/workqueue.h> 15 #include <linux/security.h> 16 #include <linux/spinlock.h> 17 #include <linux/kthread.h> 18 #include <linux/tracefs.h> 19 #include <linux/uaccess.h> 20 #include <linux/module.h> 21 #include <linux/ctype.h> 22 #include <linux/sort.h> 23 #include <linux/slab.h> 24 #include <linux/delay.h> 25 26 #include <trace/events/sched.h> 27 #include <trace/syscall.h> 28 29 #include <asm/setup.h> 30 31 #include "trace_output.h" 32 33 #undef TRACE_SYSTEM 34 #define TRACE_SYSTEM "TRACE_SYSTEM" 35 36 DEFINE_MUTEX(event_mutex); 37 38 LIST_HEAD(ftrace_events); 39 static LIST_HEAD(ftrace_generic_fields); 40 static LIST_HEAD(ftrace_common_fields); 41 static bool eventdir_initialized; 42 43 static LIST_HEAD(module_strings); 44 45 struct module_string { 46 struct list_head next; 47 struct module *module; 48 char *str; 49 }; 50 51 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO) 52 53 static struct kmem_cache *field_cachep; 54 static struct kmem_cache *file_cachep; 55 56 static inline int system_refcount(struct event_subsystem *system) 57 { 58 return system->ref_count; 59 } 60 61 static int system_refcount_inc(struct event_subsystem *system) 62 { 63 return system->ref_count++; 64 } 65 66 static int system_refcount_dec(struct event_subsystem *system) 67 { 68 return --system->ref_count; 69 } 70 71 /* Double loops, do not use break, only goto's work */ 72 #define do_for_each_event_file(tr, file) \ 73 list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ 74 list_for_each_entry(file, &tr->events, list) 75 76 #define do_for_each_event_file_safe(tr, file) \ 77 list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ 78 struct trace_event_file *___n; \ 79 list_for_each_entry_safe(file, ___n, &tr->events, list) 80 81 #define while_for_each_event_file() \ 82 } 83 84 static struct ftrace_event_field * 85 __find_event_field(struct list_head *head, char *name) 86 { 87 struct ftrace_event_field *field; 88 89 list_for_each_entry(field, head, link) { 90 if (!strcmp(field->name, name)) 91 return field; 92 } 93 94 return NULL; 95 } 96 97 struct ftrace_event_field * 98 trace_find_event_field(struct trace_event_call *call, char *name) 99 { 100 struct ftrace_event_field *field; 101 struct list_head *head; 102 103 head = trace_get_fields(call); 104 field = __find_event_field(head, name); 105 if (field) 106 return field; 107 108 field = __find_event_field(&ftrace_generic_fields, name); 109 if (field) 110 return field; 111 112 return __find_event_field(&ftrace_common_fields, name); 113 } 114 115 static int __trace_define_field(struct list_head *head, const char *type, 116 const char *name, int offset, int size, 117 int is_signed, int filter_type) 118 { 119 struct ftrace_event_field *field; 120 121 field = kmem_cache_alloc(field_cachep, GFP_TRACE); 122 if (!field) 123 return -ENOMEM; 124 125 field->name = name; 126 field->type = type; 127 128 if (filter_type == FILTER_OTHER) 129 field->filter_type = filter_assign_type(type); 130 else 131 field->filter_type = filter_type; 132 133 field->offset = offset; 134 field->size = size; 135 field->is_signed = is_signed; 136 137 list_add(&field->link, head); 138 139 return 0; 140 } 141 142 int trace_define_field(struct trace_event_call *call, const char *type, 143 const char *name, int offset, int size, int is_signed, 144 int filter_type) 145 { 146 struct list_head *head; 147 148 if (WARN_ON(!call->class)) 149 return 0; 150 151 head = trace_get_fields(call); 152 return __trace_define_field(head, type, name, offset, size, 153 is_signed, filter_type); 154 } 155 EXPORT_SYMBOL_GPL(trace_define_field); 156 157 #define __generic_field(type, item, filter_type) \ 158 ret = __trace_define_field(&ftrace_generic_fields, #type, \ 159 #item, 0, 0, is_signed_type(type), \ 160 filter_type); \ 161 if (ret) \ 162 return ret; 163 164 #define __common_field(type, item) \ 165 ret = __trace_define_field(&ftrace_common_fields, #type, \ 166 "common_" #item, \ 167 offsetof(typeof(ent), item), \ 168 sizeof(ent.item), \ 169 is_signed_type(type), FILTER_OTHER); \ 170 if (ret) \ 171 return ret; 172 173 static int trace_define_generic_fields(void) 174 { 175 int ret; 176 177 __generic_field(int, CPU, FILTER_CPU); 178 __generic_field(int, cpu, FILTER_CPU); 179 __generic_field(int, common_cpu, FILTER_CPU); 180 __generic_field(char *, COMM, FILTER_COMM); 181 __generic_field(char *, comm, FILTER_COMM); 182 183 return ret; 184 } 185 186 static int trace_define_common_fields(void) 187 { 188 int ret; 189 struct trace_entry ent; 190 191 __common_field(unsigned short, type); 192 __common_field(unsigned char, flags); 193 /* Holds both preempt_count and migrate_disable */ 194 __common_field(unsigned char, preempt_count); 195 __common_field(int, pid); 196 197 return ret; 198 } 199 200 static void trace_destroy_fields(struct trace_event_call *call) 201 { 202 struct ftrace_event_field *field, *next; 203 struct list_head *head; 204 205 head = trace_get_fields(call); 206 list_for_each_entry_safe(field, next, head, link) { 207 list_del(&field->link); 208 kmem_cache_free(field_cachep, field); 209 } 210 } 211 212 /* 213 * run-time version of trace_event_get_offsets_<call>() that returns the last 214 * accessible offset of trace fields excluding __dynamic_array bytes 215 */ 216 int trace_event_get_offsets(struct trace_event_call *call) 217 { 218 struct ftrace_event_field *tail; 219 struct list_head *head; 220 221 head = trace_get_fields(call); 222 /* 223 * head->next points to the last field with the largest offset, 224 * since it was added last by trace_define_field() 225 */ 226 tail = list_first_entry(head, struct ftrace_event_field, link); 227 return tail->offset + tail->size; 228 } 229 230 /* 231 * Check if the referenced field is an array and return true, 232 * as arrays are OK to dereference. 233 */ 234 static bool test_field(const char *fmt, struct trace_event_call *call) 235 { 236 struct trace_event_fields *field = call->class->fields_array; 237 const char *array_descriptor; 238 const char *p = fmt; 239 int len; 240 241 if (!(len = str_has_prefix(fmt, "REC->"))) 242 return false; 243 fmt += len; 244 for (p = fmt; *p; p++) { 245 if (!isalnum(*p) && *p != '_') 246 break; 247 } 248 len = p - fmt; 249 250 for (; field->type; field++) { 251 if (strncmp(field->name, fmt, len) || 252 field->name[len]) 253 continue; 254 array_descriptor = strchr(field->type, '['); 255 /* This is an array and is OK to dereference. */ 256 return array_descriptor != NULL; 257 } 258 return false; 259 } 260 261 /* 262 * Examine the print fmt of the event looking for unsafe dereference 263 * pointers using %p* that could be recorded in the trace event and 264 * much later referenced after the pointer was freed. Dereferencing 265 * pointers are OK, if it is dereferenced into the event itself. 266 */ 267 static void test_event_printk(struct trace_event_call *call) 268 { 269 u64 dereference_flags = 0; 270 bool first = true; 271 const char *fmt, *c, *r, *a; 272 int parens = 0; 273 char in_quote = 0; 274 int start_arg = 0; 275 int arg = 0; 276 int i; 277 278 fmt = call->print_fmt; 279 280 if (!fmt) 281 return; 282 283 for (i = 0; fmt[i]; i++) { 284 switch (fmt[i]) { 285 case '\\': 286 i++; 287 if (!fmt[i]) 288 return; 289 continue; 290 case '"': 291 case '\'': 292 /* 293 * The print fmt starts with a string that 294 * is processed first to find %p* usage, 295 * then after the first string, the print fmt 296 * contains arguments that are used to check 297 * if the dereferenced %p* usage is safe. 298 */ 299 if (first) { 300 if (fmt[i] == '\'') 301 continue; 302 if (in_quote) { 303 arg = 0; 304 first = false; 305 /* 306 * If there was no %p* uses 307 * the fmt is OK. 308 */ 309 if (!dereference_flags) 310 return; 311 } 312 } 313 if (in_quote) { 314 if (in_quote == fmt[i]) 315 in_quote = 0; 316 } else { 317 in_quote = fmt[i]; 318 } 319 continue; 320 case '%': 321 if (!first || !in_quote) 322 continue; 323 i++; 324 if (!fmt[i]) 325 return; 326 switch (fmt[i]) { 327 case '%': 328 continue; 329 case 'p': 330 /* Find dereferencing fields */ 331 switch (fmt[i + 1]) { 332 case 'B': case 'R': case 'r': 333 case 'b': case 'M': case 'm': 334 case 'I': case 'i': case 'E': 335 case 'U': case 'V': case 'N': 336 case 'a': case 'd': case 'D': 337 case 'g': case 't': case 'C': 338 case 'O': case 'f': 339 if (WARN_ONCE(arg == 63, 340 "Too many args for event: %s", 341 trace_event_name(call))) 342 return; 343 dereference_flags |= 1ULL << arg; 344 } 345 break; 346 default: 347 { 348 bool star = false; 349 int j; 350 351 /* Increment arg if %*s exists. */ 352 for (j = 0; fmt[i + j]; j++) { 353 if (isdigit(fmt[i + j]) || 354 fmt[i + j] == '.') 355 continue; 356 if (fmt[i + j] == '*') { 357 star = true; 358 continue; 359 } 360 if ((fmt[i + j] == 's') && star) 361 arg++; 362 break; 363 } 364 break; 365 } /* default */ 366 367 } /* switch */ 368 arg++; 369 continue; 370 case '(': 371 if (in_quote) 372 continue; 373 parens++; 374 continue; 375 case ')': 376 if (in_quote) 377 continue; 378 parens--; 379 if (WARN_ONCE(parens < 0, 380 "Paren mismatch for event: %s\narg='%s'\n%*s", 381 trace_event_name(call), 382 fmt + start_arg, 383 (i - start_arg) + 5, "^")) 384 return; 385 continue; 386 case ',': 387 if (in_quote || parens) 388 continue; 389 i++; 390 while (isspace(fmt[i])) 391 i++; 392 start_arg = i; 393 if (!(dereference_flags & (1ULL << arg))) 394 goto next_arg; 395 396 /* Find the REC-> in the argument */ 397 c = strchr(fmt + i, ','); 398 r = strstr(fmt + i, "REC->"); 399 if (r && (!c || r < c)) { 400 /* 401 * Addresses of events on the buffer, 402 * or an array on the buffer is 403 * OK to dereference. 404 * There's ways to fool this, but 405 * this is to catch common mistakes, 406 * not malicious code. 407 */ 408 a = strchr(fmt + i, '&'); 409 if ((a && (a < r)) || test_field(r, call)) 410 dereference_flags &= ~(1ULL << arg); 411 } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) && 412 (!c || r < c)) { 413 dereference_flags &= ~(1ULL << arg); 414 } else if ((r = strstr(fmt + i, "__get_sockaddr(")) && 415 (!c || r < c)) { 416 dereference_flags &= ~(1ULL << arg); 417 } 418 419 next_arg: 420 i--; 421 arg++; 422 } 423 } 424 425 /* 426 * If you triggered the below warning, the trace event reported 427 * uses an unsafe dereference pointer %p*. As the data stored 428 * at the trace event time may no longer exist when the trace 429 * event is printed, dereferencing to the original source is 430 * unsafe. The source of the dereference must be copied into the 431 * event itself, and the dereference must access the copy instead. 432 */ 433 if (WARN_ON_ONCE(dereference_flags)) { 434 arg = 1; 435 while (!(dereference_flags & 1)) { 436 dereference_flags >>= 1; 437 arg++; 438 } 439 pr_warn("event %s has unsafe dereference of argument %d\n", 440 trace_event_name(call), arg); 441 pr_warn("print_fmt: %s\n", fmt); 442 } 443 } 444 445 int trace_event_raw_init(struct trace_event_call *call) 446 { 447 int id; 448 449 id = register_trace_event(&call->event); 450 if (!id) 451 return -ENODEV; 452 453 test_event_printk(call); 454 455 return 0; 456 } 457 EXPORT_SYMBOL_GPL(trace_event_raw_init); 458 459 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) 460 { 461 struct trace_array *tr = trace_file->tr; 462 struct trace_array_cpu *data; 463 struct trace_pid_list *no_pid_list; 464 struct trace_pid_list *pid_list; 465 466 pid_list = rcu_dereference_raw(tr->filtered_pids); 467 no_pid_list = rcu_dereference_raw(tr->filtered_no_pids); 468 469 if (!pid_list && !no_pid_list) 470 return false; 471 472 data = this_cpu_ptr(tr->array_buffer.data); 473 474 return data->ignore_pid; 475 } 476 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); 477 478 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, 479 struct trace_event_file *trace_file, 480 unsigned long len) 481 { 482 struct trace_event_call *event_call = trace_file->event_call; 483 484 if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) && 485 trace_event_ignore_this_pid(trace_file)) 486 return NULL; 487 488 /* 489 * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables 490 * preemption (adding one to the preempt_count). Since we are 491 * interested in the preempt_count at the time the tracepoint was 492 * hit, we need to subtract one to offset the increment. 493 */ 494 fbuffer->trace_ctx = tracing_gen_ctx_dec(); 495 fbuffer->trace_file = trace_file; 496 497 fbuffer->event = 498 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, 499 event_call->event.type, len, 500 fbuffer->trace_ctx); 501 if (!fbuffer->event) 502 return NULL; 503 504 fbuffer->regs = NULL; 505 fbuffer->entry = ring_buffer_event_data(fbuffer->event); 506 return fbuffer->entry; 507 } 508 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); 509 510 int trace_event_reg(struct trace_event_call *call, 511 enum trace_reg type, void *data) 512 { 513 struct trace_event_file *file = data; 514 515 WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); 516 switch (type) { 517 case TRACE_REG_REGISTER: 518 return tracepoint_probe_register(call->tp, 519 call->class->probe, 520 file); 521 case TRACE_REG_UNREGISTER: 522 tracepoint_probe_unregister(call->tp, 523 call->class->probe, 524 file); 525 return 0; 526 527 #ifdef CONFIG_PERF_EVENTS 528 case TRACE_REG_PERF_REGISTER: 529 return tracepoint_probe_register(call->tp, 530 call->class->perf_probe, 531 call); 532 case TRACE_REG_PERF_UNREGISTER: 533 tracepoint_probe_unregister(call->tp, 534 call->class->perf_probe, 535 call); 536 return 0; 537 case TRACE_REG_PERF_OPEN: 538 case TRACE_REG_PERF_CLOSE: 539 case TRACE_REG_PERF_ADD: 540 case TRACE_REG_PERF_DEL: 541 return 0; 542 #endif 543 } 544 return 0; 545 } 546 EXPORT_SYMBOL_GPL(trace_event_reg); 547 548 void trace_event_enable_cmd_record(bool enable) 549 { 550 struct trace_event_file *file; 551 struct trace_array *tr; 552 553 lockdep_assert_held(&event_mutex); 554 555 do_for_each_event_file(tr, file) { 556 557 if (!(file->flags & EVENT_FILE_FL_ENABLED)) 558 continue; 559 560 if (enable) { 561 tracing_start_cmdline_record(); 562 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 563 } else { 564 tracing_stop_cmdline_record(); 565 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 566 } 567 } while_for_each_event_file(); 568 } 569 570 void trace_event_enable_tgid_record(bool enable) 571 { 572 struct trace_event_file *file; 573 struct trace_array *tr; 574 575 lockdep_assert_held(&event_mutex); 576 577 do_for_each_event_file(tr, file) { 578 if (!(file->flags & EVENT_FILE_FL_ENABLED)) 579 continue; 580 581 if (enable) { 582 tracing_start_tgid_record(); 583 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 584 } else { 585 tracing_stop_tgid_record(); 586 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, 587 &file->flags); 588 } 589 } while_for_each_event_file(); 590 } 591 592 static int __ftrace_event_enable_disable(struct trace_event_file *file, 593 int enable, int soft_disable) 594 { 595 struct trace_event_call *call = file->event_call; 596 struct trace_array *tr = file->tr; 597 unsigned long file_flags = file->flags; 598 int ret = 0; 599 int disable; 600 601 switch (enable) { 602 case 0: 603 /* 604 * When soft_disable is set and enable is cleared, the sm_ref 605 * reference counter is decremented. If it reaches 0, we want 606 * to clear the SOFT_DISABLED flag but leave the event in the 607 * state that it was. That is, if the event was enabled and 608 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED 609 * is set we do not want the event to be enabled before we 610 * clear the bit. 611 * 612 * When soft_disable is not set but the SOFT_MODE flag is, 613 * we do nothing. Do not disable the tracepoint, otherwise 614 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. 615 */ 616 if (soft_disable) { 617 if (atomic_dec_return(&file->sm_ref) > 0) 618 break; 619 disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; 620 clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); 621 } else 622 disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); 623 624 if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { 625 clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); 626 if (file->flags & EVENT_FILE_FL_RECORDED_CMD) { 627 tracing_stop_cmdline_record(); 628 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 629 } 630 631 if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { 632 tracing_stop_tgid_record(); 633 clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 634 } 635 636 call->class->reg(call, TRACE_REG_UNREGISTER, file); 637 } 638 /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ 639 if (file->flags & EVENT_FILE_FL_SOFT_MODE) 640 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 641 else 642 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 643 break; 644 case 1: 645 /* 646 * When soft_disable is set and enable is set, we want to 647 * register the tracepoint for the event, but leave the event 648 * as is. That means, if the event was already enabled, we do 649 * nothing (but set SOFT_MODE). If the event is disabled, we 650 * set SOFT_DISABLED before enabling the event tracepoint, so 651 * it still seems to be disabled. 652 */ 653 if (!soft_disable) 654 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 655 else { 656 if (atomic_inc_return(&file->sm_ref) > 1) 657 break; 658 set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); 659 } 660 661 if (!(file->flags & EVENT_FILE_FL_ENABLED)) { 662 bool cmd = false, tgid = false; 663 664 /* Keep the event disabled, when going to SOFT_MODE. */ 665 if (soft_disable) 666 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); 667 668 if (tr->trace_flags & TRACE_ITER_RECORD_CMD) { 669 cmd = true; 670 tracing_start_cmdline_record(); 671 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); 672 } 673 674 if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { 675 tgid = true; 676 tracing_start_tgid_record(); 677 set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); 678 } 679 680 ret = call->class->reg(call, TRACE_REG_REGISTER, file); 681 if (ret) { 682 if (cmd) 683 tracing_stop_cmdline_record(); 684 if (tgid) 685 tracing_stop_tgid_record(); 686 pr_info("event trace: Could not enable event " 687 "%s\n", trace_event_name(call)); 688 break; 689 } 690 set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); 691 692 /* WAS_ENABLED gets set but never cleared. */ 693 set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags); 694 } 695 break; 696 } 697 698 /* Enable or disable use of trace_buffered_event */ 699 if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) != 700 (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) { 701 if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) 702 trace_buffered_event_enable(); 703 else 704 trace_buffered_event_disable(); 705 } 706 707 return ret; 708 } 709 710 int trace_event_enable_disable(struct trace_event_file *file, 711 int enable, int soft_disable) 712 { 713 return __ftrace_event_enable_disable(file, enable, soft_disable); 714 } 715 716 static int ftrace_event_enable_disable(struct trace_event_file *file, 717 int enable) 718 { 719 return __ftrace_event_enable_disable(file, enable, 0); 720 } 721 722 static void ftrace_clear_events(struct trace_array *tr) 723 { 724 struct trace_event_file *file; 725 726 mutex_lock(&event_mutex); 727 list_for_each_entry(file, &tr->events, list) { 728 ftrace_event_enable_disable(file, 0); 729 } 730 mutex_unlock(&event_mutex); 731 } 732 733 static void 734 event_filter_pid_sched_process_exit(void *data, struct task_struct *task) 735 { 736 struct trace_pid_list *pid_list; 737 struct trace_array *tr = data; 738 739 pid_list = rcu_dereference_raw(tr->filtered_pids); 740 trace_filter_add_remove_task(pid_list, NULL, task); 741 742 pid_list = rcu_dereference_raw(tr->filtered_no_pids); 743 trace_filter_add_remove_task(pid_list, NULL, task); 744 } 745 746 static void 747 event_filter_pid_sched_process_fork(void *data, 748 struct task_struct *self, 749 struct task_struct *task) 750 { 751 struct trace_pid_list *pid_list; 752 struct trace_array *tr = data; 753 754 pid_list = rcu_dereference_sched(tr->filtered_pids); 755 trace_filter_add_remove_task(pid_list, self, task); 756 757 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 758 trace_filter_add_remove_task(pid_list, self, task); 759 } 760 761 void trace_event_follow_fork(struct trace_array *tr, bool enable) 762 { 763 if (enable) { 764 register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork, 765 tr, INT_MIN); 766 register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit, 767 tr, INT_MAX); 768 } else { 769 unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork, 770 tr); 771 unregister_trace_sched_process_free(event_filter_pid_sched_process_exit, 772 tr); 773 } 774 } 775 776 static void 777 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt, 778 struct task_struct *prev, 779 struct task_struct *next, 780 unsigned int prev_state) 781 { 782 struct trace_array *tr = data; 783 struct trace_pid_list *no_pid_list; 784 struct trace_pid_list *pid_list; 785 bool ret; 786 787 pid_list = rcu_dereference_sched(tr->filtered_pids); 788 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 789 790 /* 791 * Sched switch is funny, as we only want to ignore it 792 * in the notrace case if both prev and next should be ignored. 793 */ 794 ret = trace_ignore_this_task(NULL, no_pid_list, prev) && 795 trace_ignore_this_task(NULL, no_pid_list, next); 796 797 this_cpu_write(tr->array_buffer.data->ignore_pid, ret || 798 (trace_ignore_this_task(pid_list, NULL, prev) && 799 trace_ignore_this_task(pid_list, NULL, next))); 800 } 801 802 static void 803 event_filter_pid_sched_switch_probe_post(void *data, bool preempt, 804 struct task_struct *prev, 805 struct task_struct *next, 806 unsigned int prev_state) 807 { 808 struct trace_array *tr = data; 809 struct trace_pid_list *no_pid_list; 810 struct trace_pid_list *pid_list; 811 812 pid_list = rcu_dereference_sched(tr->filtered_pids); 813 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 814 815 this_cpu_write(tr->array_buffer.data->ignore_pid, 816 trace_ignore_this_task(pid_list, no_pid_list, next)); 817 } 818 819 static void 820 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task) 821 { 822 struct trace_array *tr = data; 823 struct trace_pid_list *no_pid_list; 824 struct trace_pid_list *pid_list; 825 826 /* Nothing to do if we are already tracing */ 827 if (!this_cpu_read(tr->array_buffer.data->ignore_pid)) 828 return; 829 830 pid_list = rcu_dereference_sched(tr->filtered_pids); 831 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 832 833 this_cpu_write(tr->array_buffer.data->ignore_pid, 834 trace_ignore_this_task(pid_list, no_pid_list, task)); 835 } 836 837 static void 838 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task) 839 { 840 struct trace_array *tr = data; 841 struct trace_pid_list *no_pid_list; 842 struct trace_pid_list *pid_list; 843 844 /* Nothing to do if we are not tracing */ 845 if (this_cpu_read(tr->array_buffer.data->ignore_pid)) 846 return; 847 848 pid_list = rcu_dereference_sched(tr->filtered_pids); 849 no_pid_list = rcu_dereference_sched(tr->filtered_no_pids); 850 851 /* Set tracing if current is enabled */ 852 this_cpu_write(tr->array_buffer.data->ignore_pid, 853 trace_ignore_this_task(pid_list, no_pid_list, current)); 854 } 855 856 static void unregister_pid_events(struct trace_array *tr) 857 { 858 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr); 859 unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr); 860 861 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); 862 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); 863 864 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr); 865 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr); 866 867 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr); 868 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr); 869 } 870 871 static void __ftrace_clear_event_pids(struct trace_array *tr, int type) 872 { 873 struct trace_pid_list *pid_list; 874 struct trace_pid_list *no_pid_list; 875 struct trace_event_file *file; 876 int cpu; 877 878 pid_list = rcu_dereference_protected(tr->filtered_pids, 879 lockdep_is_held(&event_mutex)); 880 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 881 lockdep_is_held(&event_mutex)); 882 883 /* Make sure there's something to do */ 884 if (!pid_type_enabled(type, pid_list, no_pid_list)) 885 return; 886 887 if (!still_need_pid_events(type, pid_list, no_pid_list)) { 888 unregister_pid_events(tr); 889 890 list_for_each_entry(file, &tr->events, list) { 891 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); 892 } 893 894 for_each_possible_cpu(cpu) 895 per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false; 896 } 897 898 if (type & TRACE_PIDS) 899 rcu_assign_pointer(tr->filtered_pids, NULL); 900 901 if (type & TRACE_NO_PIDS) 902 rcu_assign_pointer(tr->filtered_no_pids, NULL); 903 904 /* Wait till all users are no longer using pid filtering */ 905 tracepoint_synchronize_unregister(); 906 907 if ((type & TRACE_PIDS) && pid_list) 908 trace_pid_list_free(pid_list); 909 910 if ((type & TRACE_NO_PIDS) && no_pid_list) 911 trace_pid_list_free(no_pid_list); 912 } 913 914 static void ftrace_clear_event_pids(struct trace_array *tr, int type) 915 { 916 mutex_lock(&event_mutex); 917 __ftrace_clear_event_pids(tr, type); 918 mutex_unlock(&event_mutex); 919 } 920 921 static void __put_system(struct event_subsystem *system) 922 { 923 struct event_filter *filter = system->filter; 924 925 WARN_ON_ONCE(system_refcount(system) == 0); 926 if (system_refcount_dec(system)) 927 return; 928 929 list_del(&system->list); 930 931 if (filter) { 932 kfree(filter->filter_string); 933 kfree(filter); 934 } 935 kfree_const(system->name); 936 kfree(system); 937 } 938 939 static void __get_system(struct event_subsystem *system) 940 { 941 WARN_ON_ONCE(system_refcount(system) == 0); 942 system_refcount_inc(system); 943 } 944 945 static void __get_system_dir(struct trace_subsystem_dir *dir) 946 { 947 WARN_ON_ONCE(dir->ref_count == 0); 948 dir->ref_count++; 949 __get_system(dir->subsystem); 950 } 951 952 static void __put_system_dir(struct trace_subsystem_dir *dir) 953 { 954 WARN_ON_ONCE(dir->ref_count == 0); 955 /* If the subsystem is about to be freed, the dir must be too */ 956 WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1); 957 958 __put_system(dir->subsystem); 959 if (!--dir->ref_count) 960 kfree(dir); 961 } 962 963 static void put_system(struct trace_subsystem_dir *dir) 964 { 965 mutex_lock(&event_mutex); 966 __put_system_dir(dir); 967 mutex_unlock(&event_mutex); 968 } 969 970 static void remove_subsystem(struct trace_subsystem_dir *dir) 971 { 972 if (!dir) 973 return; 974 975 if (!--dir->nr_events) { 976 tracefs_remove(dir->entry); 977 list_del(&dir->list); 978 __put_system_dir(dir); 979 } 980 } 981 982 static void remove_event_file_dir(struct trace_event_file *file) 983 { 984 struct dentry *dir = file->dir; 985 struct dentry *child; 986 987 if (dir) { 988 spin_lock(&dir->d_lock); /* probably unneeded */ 989 list_for_each_entry(child, &dir->d_subdirs, d_child) { 990 if (d_really_is_positive(child)) /* probably unneeded */ 991 d_inode(child)->i_private = NULL; 992 } 993 spin_unlock(&dir->d_lock); 994 995 tracefs_remove(dir); 996 } 997 998 list_del(&file->list); 999 remove_subsystem(file->system); 1000 free_event_filter(file->filter); 1001 kmem_cache_free(file_cachep, file); 1002 } 1003 1004 /* 1005 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. 1006 */ 1007 static int 1008 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, 1009 const char *sub, const char *event, int set) 1010 { 1011 struct trace_event_file *file; 1012 struct trace_event_call *call; 1013 const char *name; 1014 int ret = -EINVAL; 1015 int eret = 0; 1016 1017 list_for_each_entry(file, &tr->events, list) { 1018 1019 call = file->event_call; 1020 name = trace_event_name(call); 1021 1022 if (!name || !call->class || !call->class->reg) 1023 continue; 1024 1025 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) 1026 continue; 1027 1028 if (match && 1029 strcmp(match, name) != 0 && 1030 strcmp(match, call->class->system) != 0) 1031 continue; 1032 1033 if (sub && strcmp(sub, call->class->system) != 0) 1034 continue; 1035 1036 if (event && strcmp(event, name) != 0) 1037 continue; 1038 1039 ret = ftrace_event_enable_disable(file, set); 1040 1041 /* 1042 * Save the first error and return that. Some events 1043 * may still have been enabled, but let the user 1044 * know that something went wrong. 1045 */ 1046 if (ret && !eret) 1047 eret = ret; 1048 1049 ret = eret; 1050 } 1051 1052 return ret; 1053 } 1054 1055 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, 1056 const char *sub, const char *event, int set) 1057 { 1058 int ret; 1059 1060 mutex_lock(&event_mutex); 1061 ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set); 1062 mutex_unlock(&event_mutex); 1063 1064 return ret; 1065 } 1066 1067 int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) 1068 { 1069 char *event = NULL, *sub = NULL, *match; 1070 int ret; 1071 1072 if (!tr) 1073 return -ENOENT; 1074 /* 1075 * The buf format can be <subsystem>:<event-name> 1076 * *:<event-name> means any event by that name. 1077 * :<event-name> is the same. 1078 * 1079 * <subsystem>:* means all events in that subsystem 1080 * <subsystem>: means the same. 1081 * 1082 * <name> (no ':') means all events in a subsystem with 1083 * the name <name> or any event that matches <name> 1084 */ 1085 1086 match = strsep(&buf, ":"); 1087 if (buf) { 1088 sub = match; 1089 event = buf; 1090 match = NULL; 1091 1092 if (!strlen(sub) || strcmp(sub, "*") == 0) 1093 sub = NULL; 1094 if (!strlen(event) || strcmp(event, "*") == 0) 1095 event = NULL; 1096 } 1097 1098 ret = __ftrace_set_clr_event(tr, match, sub, event, set); 1099 1100 /* Put back the colon to allow this to be called again */ 1101 if (buf) 1102 *(buf - 1) = ':'; 1103 1104 return ret; 1105 } 1106 1107 /** 1108 * trace_set_clr_event - enable or disable an event 1109 * @system: system name to match (NULL for any system) 1110 * @event: event name to match (NULL for all events, within system) 1111 * @set: 1 to enable, 0 to disable 1112 * 1113 * This is a way for other parts of the kernel to enable or disable 1114 * event recording. 1115 * 1116 * Returns 0 on success, -EINVAL if the parameters do not match any 1117 * registered events. 1118 */ 1119 int trace_set_clr_event(const char *system, const char *event, int set) 1120 { 1121 struct trace_array *tr = top_trace_array(); 1122 1123 if (!tr) 1124 return -ENODEV; 1125 1126 return __ftrace_set_clr_event(tr, NULL, system, event, set); 1127 } 1128 EXPORT_SYMBOL_GPL(trace_set_clr_event); 1129 1130 /** 1131 * trace_array_set_clr_event - enable or disable an event for a trace array. 1132 * @tr: concerned trace array. 1133 * @system: system name to match (NULL for any system) 1134 * @event: event name to match (NULL for all events, within system) 1135 * @enable: true to enable, false to disable 1136 * 1137 * This is a way for other parts of the kernel to enable or disable 1138 * event recording. 1139 * 1140 * Returns 0 on success, -EINVAL if the parameters do not match any 1141 * registered events. 1142 */ 1143 int trace_array_set_clr_event(struct trace_array *tr, const char *system, 1144 const char *event, bool enable) 1145 { 1146 int set; 1147 1148 if (!tr) 1149 return -ENOENT; 1150 1151 set = (enable == true) ? 1 : 0; 1152 return __ftrace_set_clr_event(tr, NULL, system, event, set); 1153 } 1154 EXPORT_SYMBOL_GPL(trace_array_set_clr_event); 1155 1156 /* 128 should be much more than enough */ 1157 #define EVENT_BUF_SIZE 127 1158 1159 static ssize_t 1160 ftrace_event_write(struct file *file, const char __user *ubuf, 1161 size_t cnt, loff_t *ppos) 1162 { 1163 struct trace_parser parser; 1164 struct seq_file *m = file->private_data; 1165 struct trace_array *tr = m->private; 1166 ssize_t read, ret; 1167 1168 if (!cnt) 1169 return 0; 1170 1171 ret = tracing_update_buffers(); 1172 if (ret < 0) 1173 return ret; 1174 1175 if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1)) 1176 return -ENOMEM; 1177 1178 read = trace_get_user(&parser, ubuf, cnt, ppos); 1179 1180 if (read >= 0 && trace_parser_loaded((&parser))) { 1181 int set = 1; 1182 1183 if (*parser.buffer == '!') 1184 set = 0; 1185 1186 ret = ftrace_set_clr_event(tr, parser.buffer + !set, set); 1187 if (ret) 1188 goto out_put; 1189 } 1190 1191 ret = read; 1192 1193 out_put: 1194 trace_parser_put(&parser); 1195 1196 return ret; 1197 } 1198 1199 static void * 1200 t_next(struct seq_file *m, void *v, loff_t *pos) 1201 { 1202 struct trace_event_file *file = v; 1203 struct trace_event_call *call; 1204 struct trace_array *tr = m->private; 1205 1206 (*pos)++; 1207 1208 list_for_each_entry_continue(file, &tr->events, list) { 1209 call = file->event_call; 1210 /* 1211 * The ftrace subsystem is for showing formats only. 1212 * They can not be enabled or disabled via the event files. 1213 */ 1214 if (call->class && call->class->reg && 1215 !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) 1216 return file; 1217 } 1218 1219 return NULL; 1220 } 1221 1222 static void *t_start(struct seq_file *m, loff_t *pos) 1223 { 1224 struct trace_event_file *file; 1225 struct trace_array *tr = m->private; 1226 loff_t l; 1227 1228 mutex_lock(&event_mutex); 1229 1230 file = list_entry(&tr->events, struct trace_event_file, list); 1231 for (l = 0; l <= *pos; ) { 1232 file = t_next(m, file, &l); 1233 if (!file) 1234 break; 1235 } 1236 return file; 1237 } 1238 1239 static void * 1240 s_next(struct seq_file *m, void *v, loff_t *pos) 1241 { 1242 struct trace_event_file *file = v; 1243 struct trace_array *tr = m->private; 1244 1245 (*pos)++; 1246 1247 list_for_each_entry_continue(file, &tr->events, list) { 1248 if (file->flags & EVENT_FILE_FL_ENABLED) 1249 return file; 1250 } 1251 1252 return NULL; 1253 } 1254 1255 static void *s_start(struct seq_file *m, loff_t *pos) 1256 { 1257 struct trace_event_file *file; 1258 struct trace_array *tr = m->private; 1259 loff_t l; 1260 1261 mutex_lock(&event_mutex); 1262 1263 file = list_entry(&tr->events, struct trace_event_file, list); 1264 for (l = 0; l <= *pos; ) { 1265 file = s_next(m, file, &l); 1266 if (!file) 1267 break; 1268 } 1269 return file; 1270 } 1271 1272 static int t_show(struct seq_file *m, void *v) 1273 { 1274 struct trace_event_file *file = v; 1275 struct trace_event_call *call = file->event_call; 1276 1277 if (strcmp(call->class->system, TRACE_SYSTEM) != 0) 1278 seq_printf(m, "%s:", call->class->system); 1279 seq_printf(m, "%s\n", trace_event_name(call)); 1280 1281 return 0; 1282 } 1283 1284 static void t_stop(struct seq_file *m, void *p) 1285 { 1286 mutex_unlock(&event_mutex); 1287 } 1288 1289 static void * 1290 __next(struct seq_file *m, void *v, loff_t *pos, int type) 1291 { 1292 struct trace_array *tr = m->private; 1293 struct trace_pid_list *pid_list; 1294 1295 if (type == TRACE_PIDS) 1296 pid_list = rcu_dereference_sched(tr->filtered_pids); 1297 else 1298 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1299 1300 return trace_pid_next(pid_list, v, pos); 1301 } 1302 1303 static void * 1304 p_next(struct seq_file *m, void *v, loff_t *pos) 1305 { 1306 return __next(m, v, pos, TRACE_PIDS); 1307 } 1308 1309 static void * 1310 np_next(struct seq_file *m, void *v, loff_t *pos) 1311 { 1312 return __next(m, v, pos, TRACE_NO_PIDS); 1313 } 1314 1315 static void *__start(struct seq_file *m, loff_t *pos, int type) 1316 __acquires(RCU) 1317 { 1318 struct trace_pid_list *pid_list; 1319 struct trace_array *tr = m->private; 1320 1321 /* 1322 * Grab the mutex, to keep calls to p_next() having the same 1323 * tr->filtered_pids as p_start() has. 1324 * If we just passed the tr->filtered_pids around, then RCU would 1325 * have been enough, but doing that makes things more complex. 1326 */ 1327 mutex_lock(&event_mutex); 1328 rcu_read_lock_sched(); 1329 1330 if (type == TRACE_PIDS) 1331 pid_list = rcu_dereference_sched(tr->filtered_pids); 1332 else 1333 pid_list = rcu_dereference_sched(tr->filtered_no_pids); 1334 1335 if (!pid_list) 1336 return NULL; 1337 1338 return trace_pid_start(pid_list, pos); 1339 } 1340 1341 static void *p_start(struct seq_file *m, loff_t *pos) 1342 __acquires(RCU) 1343 { 1344 return __start(m, pos, TRACE_PIDS); 1345 } 1346 1347 static void *np_start(struct seq_file *m, loff_t *pos) 1348 __acquires(RCU) 1349 { 1350 return __start(m, pos, TRACE_NO_PIDS); 1351 } 1352 1353 static void p_stop(struct seq_file *m, void *p) 1354 __releases(RCU) 1355 { 1356 rcu_read_unlock_sched(); 1357 mutex_unlock(&event_mutex); 1358 } 1359 1360 static ssize_t 1361 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1362 loff_t *ppos) 1363 { 1364 struct trace_event_file *file; 1365 unsigned long flags; 1366 char buf[4] = "0"; 1367 1368 mutex_lock(&event_mutex); 1369 file = event_file_data(filp); 1370 if (likely(file)) 1371 flags = file->flags; 1372 mutex_unlock(&event_mutex); 1373 1374 if (!file) 1375 return -ENODEV; 1376 1377 if (flags & EVENT_FILE_FL_ENABLED && 1378 !(flags & EVENT_FILE_FL_SOFT_DISABLED)) 1379 strcpy(buf, "1"); 1380 1381 if (flags & EVENT_FILE_FL_SOFT_DISABLED || 1382 flags & EVENT_FILE_FL_SOFT_MODE) 1383 strcat(buf, "*"); 1384 1385 strcat(buf, "\n"); 1386 1387 return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf)); 1388 } 1389 1390 static ssize_t 1391 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 1392 loff_t *ppos) 1393 { 1394 struct trace_event_file *file; 1395 unsigned long val; 1396 int ret; 1397 1398 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 1399 if (ret) 1400 return ret; 1401 1402 ret = tracing_update_buffers(); 1403 if (ret < 0) 1404 return ret; 1405 1406 switch (val) { 1407 case 0: 1408 case 1: 1409 ret = -ENODEV; 1410 mutex_lock(&event_mutex); 1411 file = event_file_data(filp); 1412 if (likely(file)) 1413 ret = ftrace_event_enable_disable(file, val); 1414 mutex_unlock(&event_mutex); 1415 break; 1416 1417 default: 1418 return -EINVAL; 1419 } 1420 1421 *ppos += cnt; 1422 1423 return ret ? ret : cnt; 1424 } 1425 1426 static ssize_t 1427 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1428 loff_t *ppos) 1429 { 1430 const char set_to_char[4] = { '?', '0', '1', 'X' }; 1431 struct trace_subsystem_dir *dir = filp->private_data; 1432 struct event_subsystem *system = dir->subsystem; 1433 struct trace_event_call *call; 1434 struct trace_event_file *file; 1435 struct trace_array *tr = dir->tr; 1436 char buf[2]; 1437 int set = 0; 1438 int ret; 1439 1440 mutex_lock(&event_mutex); 1441 list_for_each_entry(file, &tr->events, list) { 1442 call = file->event_call; 1443 if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || 1444 !trace_event_name(call) || !call->class || !call->class->reg) 1445 continue; 1446 1447 if (system && strcmp(call->class->system, system->name) != 0) 1448 continue; 1449 1450 /* 1451 * We need to find out if all the events are set 1452 * or if all events or cleared, or if we have 1453 * a mixture. 1454 */ 1455 set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED)); 1456 1457 /* 1458 * If we have a mixture, no need to look further. 1459 */ 1460 if (set == 3) 1461 break; 1462 } 1463 mutex_unlock(&event_mutex); 1464 1465 buf[0] = set_to_char[set]; 1466 buf[1] = '\n'; 1467 1468 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 1469 1470 return ret; 1471 } 1472 1473 static ssize_t 1474 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 1475 loff_t *ppos) 1476 { 1477 struct trace_subsystem_dir *dir = filp->private_data; 1478 struct event_subsystem *system = dir->subsystem; 1479 const char *name = NULL; 1480 unsigned long val; 1481 ssize_t ret; 1482 1483 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 1484 if (ret) 1485 return ret; 1486 1487 ret = tracing_update_buffers(); 1488 if (ret < 0) 1489 return ret; 1490 1491 if (val != 0 && val != 1) 1492 return -EINVAL; 1493 1494 /* 1495 * Opening of "enable" adds a ref count to system, 1496 * so the name is safe to use. 1497 */ 1498 if (system) 1499 name = system->name; 1500 1501 ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val); 1502 if (ret) 1503 goto out; 1504 1505 ret = cnt; 1506 1507 out: 1508 *ppos += cnt; 1509 1510 return ret; 1511 } 1512 1513 enum { 1514 FORMAT_HEADER = 1, 1515 FORMAT_FIELD_SEPERATOR = 2, 1516 FORMAT_PRINTFMT = 3, 1517 }; 1518 1519 static void *f_next(struct seq_file *m, void *v, loff_t *pos) 1520 { 1521 struct trace_event_call *call = event_file_data(m->private); 1522 struct list_head *common_head = &ftrace_common_fields; 1523 struct list_head *head = trace_get_fields(call); 1524 struct list_head *node = v; 1525 1526 (*pos)++; 1527 1528 switch ((unsigned long)v) { 1529 case FORMAT_HEADER: 1530 node = common_head; 1531 break; 1532 1533 case FORMAT_FIELD_SEPERATOR: 1534 node = head; 1535 break; 1536 1537 case FORMAT_PRINTFMT: 1538 /* all done */ 1539 return NULL; 1540 } 1541 1542 node = node->prev; 1543 if (node == common_head) 1544 return (void *)FORMAT_FIELD_SEPERATOR; 1545 else if (node == head) 1546 return (void *)FORMAT_PRINTFMT; 1547 else 1548 return node; 1549 } 1550 1551 static int f_show(struct seq_file *m, void *v) 1552 { 1553 struct trace_event_call *call = event_file_data(m->private); 1554 struct ftrace_event_field *field; 1555 const char *array_descriptor; 1556 1557 switch ((unsigned long)v) { 1558 case FORMAT_HEADER: 1559 seq_printf(m, "name: %s\n", trace_event_name(call)); 1560 seq_printf(m, "ID: %d\n", call->event.type); 1561 seq_puts(m, "format:\n"); 1562 return 0; 1563 1564 case FORMAT_FIELD_SEPERATOR: 1565 seq_putc(m, '\n'); 1566 return 0; 1567 1568 case FORMAT_PRINTFMT: 1569 seq_printf(m, "\nprint fmt: %s\n", 1570 call->print_fmt); 1571 return 0; 1572 } 1573 1574 field = list_entry(v, struct ftrace_event_field, link); 1575 /* 1576 * Smartly shows the array type(except dynamic array). 1577 * Normal: 1578 * field:TYPE VAR 1579 * If TYPE := TYPE[LEN], it is shown: 1580 * field:TYPE VAR[LEN] 1581 */ 1582 array_descriptor = strchr(field->type, '['); 1583 1584 if (str_has_prefix(field->type, "__data_loc")) 1585 array_descriptor = NULL; 1586 1587 if (!array_descriptor) 1588 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", 1589 field->type, field->name, field->offset, 1590 field->size, !!field->is_signed); 1591 else 1592 seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", 1593 (int)(array_descriptor - field->type), 1594 field->type, field->name, 1595 array_descriptor, field->offset, 1596 field->size, !!field->is_signed); 1597 1598 return 0; 1599 } 1600 1601 static void *f_start(struct seq_file *m, loff_t *pos) 1602 { 1603 void *p = (void *)FORMAT_HEADER; 1604 loff_t l = 0; 1605 1606 /* ->stop() is called even if ->start() fails */ 1607 mutex_lock(&event_mutex); 1608 if (!event_file_data(m->private)) 1609 return ERR_PTR(-ENODEV); 1610 1611 while (l < *pos && p) 1612 p = f_next(m, p, &l); 1613 1614 return p; 1615 } 1616 1617 static void f_stop(struct seq_file *m, void *p) 1618 { 1619 mutex_unlock(&event_mutex); 1620 } 1621 1622 static const struct seq_operations trace_format_seq_ops = { 1623 .start = f_start, 1624 .next = f_next, 1625 .stop = f_stop, 1626 .show = f_show, 1627 }; 1628 1629 static int trace_format_open(struct inode *inode, struct file *file) 1630 { 1631 struct seq_file *m; 1632 int ret; 1633 1634 /* Do we want to hide event format files on tracefs lockdown? */ 1635 1636 ret = seq_open(file, &trace_format_seq_ops); 1637 if (ret < 0) 1638 return ret; 1639 1640 m = file->private_data; 1641 m->private = file; 1642 1643 return 0; 1644 } 1645 1646 static ssize_t 1647 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 1648 { 1649 int id = (long)event_file_data(filp); 1650 char buf[32]; 1651 int len; 1652 1653 if (unlikely(!id)) 1654 return -ENODEV; 1655 1656 len = sprintf(buf, "%d\n", id); 1657 1658 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 1659 } 1660 1661 static ssize_t 1662 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 1663 loff_t *ppos) 1664 { 1665 struct trace_event_file *file; 1666 struct trace_seq *s; 1667 int r = -ENODEV; 1668 1669 if (*ppos) 1670 return 0; 1671 1672 s = kmalloc(sizeof(*s), GFP_KERNEL); 1673 1674 if (!s) 1675 return -ENOMEM; 1676 1677 trace_seq_init(s); 1678 1679 mutex_lock(&event_mutex); 1680 file = event_file_data(filp); 1681 if (file) 1682 print_event_filter(file, s); 1683 mutex_unlock(&event_mutex); 1684 1685 if (file) 1686 r = simple_read_from_buffer(ubuf, cnt, ppos, 1687 s->buffer, trace_seq_used(s)); 1688 1689 kfree(s); 1690 1691 return r; 1692 } 1693 1694 static ssize_t 1695 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, 1696 loff_t *ppos) 1697 { 1698 struct trace_event_file *file; 1699 char *buf; 1700 int err = -ENODEV; 1701 1702 if (cnt >= PAGE_SIZE) 1703 return -EINVAL; 1704 1705 buf = memdup_user_nul(ubuf, cnt); 1706 if (IS_ERR(buf)) 1707 return PTR_ERR(buf); 1708 1709 mutex_lock(&event_mutex); 1710 file = event_file_data(filp); 1711 if (file) 1712 err = apply_event_filter(file, buf); 1713 mutex_unlock(&event_mutex); 1714 1715 kfree(buf); 1716 if (err < 0) 1717 return err; 1718 1719 *ppos += cnt; 1720 1721 return cnt; 1722 } 1723 1724 static LIST_HEAD(event_subsystems); 1725 1726 static int subsystem_open(struct inode *inode, struct file *filp) 1727 { 1728 struct trace_subsystem_dir *dir = NULL, *iter_dir; 1729 struct trace_array *tr = NULL, *iter_tr; 1730 struct event_subsystem *system = NULL; 1731 int ret; 1732 1733 if (tracing_is_disabled()) 1734 return -ENODEV; 1735 1736 /* Make sure the system still exists */ 1737 mutex_lock(&event_mutex); 1738 mutex_lock(&trace_types_lock); 1739 list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) { 1740 list_for_each_entry(iter_dir, &iter_tr->systems, list) { 1741 if (iter_dir == inode->i_private) { 1742 /* Don't open systems with no events */ 1743 tr = iter_tr; 1744 dir = iter_dir; 1745 if (dir->nr_events) { 1746 __get_system_dir(dir); 1747 system = dir->subsystem; 1748 } 1749 goto exit_loop; 1750 } 1751 } 1752 } 1753 exit_loop: 1754 mutex_unlock(&trace_types_lock); 1755 mutex_unlock(&event_mutex); 1756 1757 if (!system) 1758 return -ENODEV; 1759 1760 /* Still need to increment the ref count of the system */ 1761 if (trace_array_get(tr) < 0) { 1762 put_system(dir); 1763 return -ENODEV; 1764 } 1765 1766 ret = tracing_open_generic(inode, filp); 1767 if (ret < 0) { 1768 trace_array_put(tr); 1769 put_system(dir); 1770 } 1771 1772 return ret; 1773 } 1774 1775 static int system_tr_open(struct inode *inode, struct file *filp) 1776 { 1777 struct trace_subsystem_dir *dir; 1778 struct trace_array *tr = inode->i_private; 1779 int ret; 1780 1781 /* Make a temporary dir that has no system but points to tr */ 1782 dir = kzalloc(sizeof(*dir), GFP_KERNEL); 1783 if (!dir) 1784 return -ENOMEM; 1785 1786 ret = tracing_open_generic_tr(inode, filp); 1787 if (ret < 0) { 1788 kfree(dir); 1789 return ret; 1790 } 1791 dir->tr = tr; 1792 filp->private_data = dir; 1793 1794 return 0; 1795 } 1796 1797 static int subsystem_release(struct inode *inode, struct file *file) 1798 { 1799 struct trace_subsystem_dir *dir = file->private_data; 1800 1801 trace_array_put(dir->tr); 1802 1803 /* 1804 * If dir->subsystem is NULL, then this is a temporary 1805 * descriptor that was made for a trace_array to enable 1806 * all subsystems. 1807 */ 1808 if (dir->subsystem) 1809 put_system(dir); 1810 else 1811 kfree(dir); 1812 1813 return 0; 1814 } 1815 1816 static ssize_t 1817 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 1818 loff_t *ppos) 1819 { 1820 struct trace_subsystem_dir *dir = filp->private_data; 1821 struct event_subsystem *system = dir->subsystem; 1822 struct trace_seq *s; 1823 int r; 1824 1825 if (*ppos) 1826 return 0; 1827 1828 s = kmalloc(sizeof(*s), GFP_KERNEL); 1829 if (!s) 1830 return -ENOMEM; 1831 1832 trace_seq_init(s); 1833 1834 print_subsystem_event_filter(system, s); 1835 r = simple_read_from_buffer(ubuf, cnt, ppos, 1836 s->buffer, trace_seq_used(s)); 1837 1838 kfree(s); 1839 1840 return r; 1841 } 1842 1843 static ssize_t 1844 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, 1845 loff_t *ppos) 1846 { 1847 struct trace_subsystem_dir *dir = filp->private_data; 1848 char *buf; 1849 int err; 1850 1851 if (cnt >= PAGE_SIZE) 1852 return -EINVAL; 1853 1854 buf = memdup_user_nul(ubuf, cnt); 1855 if (IS_ERR(buf)) 1856 return PTR_ERR(buf); 1857 1858 err = apply_subsystem_event_filter(dir, buf); 1859 kfree(buf); 1860 if (err < 0) 1861 return err; 1862 1863 *ppos += cnt; 1864 1865 return cnt; 1866 } 1867 1868 static ssize_t 1869 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 1870 { 1871 int (*func)(struct trace_seq *s) = filp->private_data; 1872 struct trace_seq *s; 1873 int r; 1874 1875 if (*ppos) 1876 return 0; 1877 1878 s = kmalloc(sizeof(*s), GFP_KERNEL); 1879 if (!s) 1880 return -ENOMEM; 1881 1882 trace_seq_init(s); 1883 1884 func(s); 1885 r = simple_read_from_buffer(ubuf, cnt, ppos, 1886 s->buffer, trace_seq_used(s)); 1887 1888 kfree(s); 1889 1890 return r; 1891 } 1892 1893 static void ignore_task_cpu(void *data) 1894 { 1895 struct trace_array *tr = data; 1896 struct trace_pid_list *pid_list; 1897 struct trace_pid_list *no_pid_list; 1898 1899 /* 1900 * This function is called by on_each_cpu() while the 1901 * event_mutex is held. 1902 */ 1903 pid_list = rcu_dereference_protected(tr->filtered_pids, 1904 mutex_is_locked(&event_mutex)); 1905 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 1906 mutex_is_locked(&event_mutex)); 1907 1908 this_cpu_write(tr->array_buffer.data->ignore_pid, 1909 trace_ignore_this_task(pid_list, no_pid_list, current)); 1910 } 1911 1912 static void register_pid_events(struct trace_array *tr) 1913 { 1914 /* 1915 * Register a probe that is called before all other probes 1916 * to set ignore_pid if next or prev do not match. 1917 * Register a probe this is called after all other probes 1918 * to only keep ignore_pid set if next pid matches. 1919 */ 1920 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre, 1921 tr, INT_MAX); 1922 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post, 1923 tr, 0); 1924 1925 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, 1926 tr, INT_MAX); 1927 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, 1928 tr, 0); 1929 1930 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, 1931 tr, INT_MAX); 1932 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, 1933 tr, 0); 1934 1935 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre, 1936 tr, INT_MAX); 1937 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post, 1938 tr, 0); 1939 } 1940 1941 static ssize_t 1942 event_pid_write(struct file *filp, const char __user *ubuf, 1943 size_t cnt, loff_t *ppos, int type) 1944 { 1945 struct seq_file *m = filp->private_data; 1946 struct trace_array *tr = m->private; 1947 struct trace_pid_list *filtered_pids = NULL; 1948 struct trace_pid_list *other_pids = NULL; 1949 struct trace_pid_list *pid_list; 1950 struct trace_event_file *file; 1951 ssize_t ret; 1952 1953 if (!cnt) 1954 return 0; 1955 1956 ret = tracing_update_buffers(); 1957 if (ret < 0) 1958 return ret; 1959 1960 mutex_lock(&event_mutex); 1961 1962 if (type == TRACE_PIDS) { 1963 filtered_pids = rcu_dereference_protected(tr->filtered_pids, 1964 lockdep_is_held(&event_mutex)); 1965 other_pids = rcu_dereference_protected(tr->filtered_no_pids, 1966 lockdep_is_held(&event_mutex)); 1967 } else { 1968 filtered_pids = rcu_dereference_protected(tr->filtered_no_pids, 1969 lockdep_is_held(&event_mutex)); 1970 other_pids = rcu_dereference_protected(tr->filtered_pids, 1971 lockdep_is_held(&event_mutex)); 1972 } 1973 1974 ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt); 1975 if (ret < 0) 1976 goto out; 1977 1978 if (type == TRACE_PIDS) 1979 rcu_assign_pointer(tr->filtered_pids, pid_list); 1980 else 1981 rcu_assign_pointer(tr->filtered_no_pids, pid_list); 1982 1983 list_for_each_entry(file, &tr->events, list) { 1984 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); 1985 } 1986 1987 if (filtered_pids) { 1988 tracepoint_synchronize_unregister(); 1989 trace_pid_list_free(filtered_pids); 1990 } else if (pid_list && !other_pids) { 1991 register_pid_events(tr); 1992 } 1993 1994 /* 1995 * Ignoring of pids is done at task switch. But we have to 1996 * check for those tasks that are currently running. 1997 * Always do this in case a pid was appended or removed. 1998 */ 1999 on_each_cpu(ignore_task_cpu, tr, 1); 2000 2001 out: 2002 mutex_unlock(&event_mutex); 2003 2004 if (ret > 0) 2005 *ppos += ret; 2006 2007 return ret; 2008 } 2009 2010 static ssize_t 2011 ftrace_event_pid_write(struct file *filp, const char __user *ubuf, 2012 size_t cnt, loff_t *ppos) 2013 { 2014 return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS); 2015 } 2016 2017 static ssize_t 2018 ftrace_event_npid_write(struct file *filp, const char __user *ubuf, 2019 size_t cnt, loff_t *ppos) 2020 { 2021 return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS); 2022 } 2023 2024 static int ftrace_event_avail_open(struct inode *inode, struct file *file); 2025 static int ftrace_event_set_open(struct inode *inode, struct file *file); 2026 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); 2027 static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); 2028 static int ftrace_event_release(struct inode *inode, struct file *file); 2029 2030 static const struct seq_operations show_event_seq_ops = { 2031 .start = t_start, 2032 .next = t_next, 2033 .show = t_show, 2034 .stop = t_stop, 2035 }; 2036 2037 static const struct seq_operations show_set_event_seq_ops = { 2038 .start = s_start, 2039 .next = s_next, 2040 .show = t_show, 2041 .stop = t_stop, 2042 }; 2043 2044 static const struct seq_operations show_set_pid_seq_ops = { 2045 .start = p_start, 2046 .next = p_next, 2047 .show = trace_pid_show, 2048 .stop = p_stop, 2049 }; 2050 2051 static const struct seq_operations show_set_no_pid_seq_ops = { 2052 .start = np_start, 2053 .next = np_next, 2054 .show = trace_pid_show, 2055 .stop = p_stop, 2056 }; 2057 2058 static const struct file_operations ftrace_avail_fops = { 2059 .open = ftrace_event_avail_open, 2060 .read = seq_read, 2061 .llseek = seq_lseek, 2062 .release = seq_release, 2063 }; 2064 2065 static const struct file_operations ftrace_set_event_fops = { 2066 .open = ftrace_event_set_open, 2067 .read = seq_read, 2068 .write = ftrace_event_write, 2069 .llseek = seq_lseek, 2070 .release = ftrace_event_release, 2071 }; 2072 2073 static const struct file_operations ftrace_set_event_pid_fops = { 2074 .open = ftrace_event_set_pid_open, 2075 .read = seq_read, 2076 .write = ftrace_event_pid_write, 2077 .llseek = seq_lseek, 2078 .release = ftrace_event_release, 2079 }; 2080 2081 static const struct file_operations ftrace_set_event_notrace_pid_fops = { 2082 .open = ftrace_event_set_npid_open, 2083 .read = seq_read, 2084 .write = ftrace_event_npid_write, 2085 .llseek = seq_lseek, 2086 .release = ftrace_event_release, 2087 }; 2088 2089 static const struct file_operations ftrace_enable_fops = { 2090 .open = tracing_open_generic, 2091 .read = event_enable_read, 2092 .write = event_enable_write, 2093 .llseek = default_llseek, 2094 }; 2095 2096 static const struct file_operations ftrace_event_format_fops = { 2097 .open = trace_format_open, 2098 .read = seq_read, 2099 .llseek = seq_lseek, 2100 .release = seq_release, 2101 }; 2102 2103 static const struct file_operations ftrace_event_id_fops = { 2104 .read = event_id_read, 2105 .llseek = default_llseek, 2106 }; 2107 2108 static const struct file_operations ftrace_event_filter_fops = { 2109 .open = tracing_open_generic, 2110 .read = event_filter_read, 2111 .write = event_filter_write, 2112 .llseek = default_llseek, 2113 }; 2114 2115 static const struct file_operations ftrace_subsystem_filter_fops = { 2116 .open = subsystem_open, 2117 .read = subsystem_filter_read, 2118 .write = subsystem_filter_write, 2119 .llseek = default_llseek, 2120 .release = subsystem_release, 2121 }; 2122 2123 static const struct file_operations ftrace_system_enable_fops = { 2124 .open = subsystem_open, 2125 .read = system_enable_read, 2126 .write = system_enable_write, 2127 .llseek = default_llseek, 2128 .release = subsystem_release, 2129 }; 2130 2131 static const struct file_operations ftrace_tr_enable_fops = { 2132 .open = system_tr_open, 2133 .read = system_enable_read, 2134 .write = system_enable_write, 2135 .llseek = default_llseek, 2136 .release = subsystem_release, 2137 }; 2138 2139 static const struct file_operations ftrace_show_header_fops = { 2140 .open = tracing_open_generic, 2141 .read = show_header, 2142 .llseek = default_llseek, 2143 }; 2144 2145 static int 2146 ftrace_event_open(struct inode *inode, struct file *file, 2147 const struct seq_operations *seq_ops) 2148 { 2149 struct seq_file *m; 2150 int ret; 2151 2152 ret = security_locked_down(LOCKDOWN_TRACEFS); 2153 if (ret) 2154 return ret; 2155 2156 ret = seq_open(file, seq_ops); 2157 if (ret < 0) 2158 return ret; 2159 m = file->private_data; 2160 /* copy tr over to seq ops */ 2161 m->private = inode->i_private; 2162 2163 return ret; 2164 } 2165 2166 static int ftrace_event_release(struct inode *inode, struct file *file) 2167 { 2168 struct trace_array *tr = inode->i_private; 2169 2170 trace_array_put(tr); 2171 2172 return seq_release(inode, file); 2173 } 2174 2175 static int 2176 ftrace_event_avail_open(struct inode *inode, struct file *file) 2177 { 2178 const struct seq_operations *seq_ops = &show_event_seq_ops; 2179 2180 /* Checks for tracefs lockdown */ 2181 return ftrace_event_open(inode, file, seq_ops); 2182 } 2183 2184 static int 2185 ftrace_event_set_open(struct inode *inode, struct file *file) 2186 { 2187 const struct seq_operations *seq_ops = &show_set_event_seq_ops; 2188 struct trace_array *tr = inode->i_private; 2189 int ret; 2190 2191 ret = tracing_check_open_get_tr(tr); 2192 if (ret) 2193 return ret; 2194 2195 if ((file->f_mode & FMODE_WRITE) && 2196 (file->f_flags & O_TRUNC)) 2197 ftrace_clear_events(tr); 2198 2199 ret = ftrace_event_open(inode, file, seq_ops); 2200 if (ret < 0) 2201 trace_array_put(tr); 2202 return ret; 2203 } 2204 2205 static int 2206 ftrace_event_set_pid_open(struct inode *inode, struct file *file) 2207 { 2208 const struct seq_operations *seq_ops = &show_set_pid_seq_ops; 2209 struct trace_array *tr = inode->i_private; 2210 int ret; 2211 2212 ret = tracing_check_open_get_tr(tr); 2213 if (ret) 2214 return ret; 2215 2216 if ((file->f_mode & FMODE_WRITE) && 2217 (file->f_flags & O_TRUNC)) 2218 ftrace_clear_event_pids(tr, TRACE_PIDS); 2219 2220 ret = ftrace_event_open(inode, file, seq_ops); 2221 if (ret < 0) 2222 trace_array_put(tr); 2223 return ret; 2224 } 2225 2226 static int 2227 ftrace_event_set_npid_open(struct inode *inode, struct file *file) 2228 { 2229 const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops; 2230 struct trace_array *tr = inode->i_private; 2231 int ret; 2232 2233 ret = tracing_check_open_get_tr(tr); 2234 if (ret) 2235 return ret; 2236 2237 if ((file->f_mode & FMODE_WRITE) && 2238 (file->f_flags & O_TRUNC)) 2239 ftrace_clear_event_pids(tr, TRACE_NO_PIDS); 2240 2241 ret = ftrace_event_open(inode, file, seq_ops); 2242 if (ret < 0) 2243 trace_array_put(tr); 2244 return ret; 2245 } 2246 2247 static struct event_subsystem * 2248 create_new_subsystem(const char *name) 2249 { 2250 struct event_subsystem *system; 2251 2252 /* need to create new entry */ 2253 system = kmalloc(sizeof(*system), GFP_KERNEL); 2254 if (!system) 2255 return NULL; 2256 2257 system->ref_count = 1; 2258 2259 /* Only allocate if dynamic (kprobes and modules) */ 2260 system->name = kstrdup_const(name, GFP_KERNEL); 2261 if (!system->name) 2262 goto out_free; 2263 2264 system->filter = NULL; 2265 2266 system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); 2267 if (!system->filter) 2268 goto out_free; 2269 2270 list_add(&system->list, &event_subsystems); 2271 2272 return system; 2273 2274 out_free: 2275 kfree_const(system->name); 2276 kfree(system); 2277 return NULL; 2278 } 2279 2280 static struct dentry * 2281 event_subsystem_dir(struct trace_array *tr, const char *name, 2282 struct trace_event_file *file, struct dentry *parent) 2283 { 2284 struct event_subsystem *system, *iter; 2285 struct trace_subsystem_dir *dir; 2286 struct dentry *entry; 2287 2288 /* First see if we did not already create this dir */ 2289 list_for_each_entry(dir, &tr->systems, list) { 2290 system = dir->subsystem; 2291 if (strcmp(system->name, name) == 0) { 2292 dir->nr_events++; 2293 file->system = dir; 2294 return dir->entry; 2295 } 2296 } 2297 2298 /* Now see if the system itself exists. */ 2299 system = NULL; 2300 list_for_each_entry(iter, &event_subsystems, list) { 2301 if (strcmp(iter->name, name) == 0) { 2302 system = iter; 2303 break; 2304 } 2305 } 2306 2307 dir = kmalloc(sizeof(*dir), GFP_KERNEL); 2308 if (!dir) 2309 goto out_fail; 2310 2311 if (!system) { 2312 system = create_new_subsystem(name); 2313 if (!system) 2314 goto out_free; 2315 } else 2316 __get_system(system); 2317 2318 dir->entry = tracefs_create_dir(name, parent); 2319 if (!dir->entry) { 2320 pr_warn("Failed to create system directory %s\n", name); 2321 __put_system(system); 2322 goto out_free; 2323 } 2324 2325 dir->tr = tr; 2326 dir->ref_count = 1; 2327 dir->nr_events = 1; 2328 dir->subsystem = system; 2329 file->system = dir; 2330 2331 /* the ftrace system is special, do not create enable or filter files */ 2332 if (strcmp(name, "ftrace") != 0) { 2333 2334 entry = tracefs_create_file("filter", TRACE_MODE_WRITE, 2335 dir->entry, dir, 2336 &ftrace_subsystem_filter_fops); 2337 if (!entry) { 2338 kfree(system->filter); 2339 system->filter = NULL; 2340 pr_warn("Could not create tracefs '%s/filter' entry\n", name); 2341 } 2342 2343 trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir, 2344 &ftrace_system_enable_fops); 2345 } 2346 2347 list_add(&dir->list, &tr->systems); 2348 2349 return dir->entry; 2350 2351 out_free: 2352 kfree(dir); 2353 out_fail: 2354 /* Only print this message if failed on memory allocation */ 2355 if (!dir || !system) 2356 pr_warn("No memory to create event subsystem %s\n", name); 2357 return NULL; 2358 } 2359 2360 static int 2361 event_define_fields(struct trace_event_call *call) 2362 { 2363 struct list_head *head; 2364 int ret = 0; 2365 2366 /* 2367 * Other events may have the same class. Only update 2368 * the fields if they are not already defined. 2369 */ 2370 head = trace_get_fields(call); 2371 if (list_empty(head)) { 2372 struct trace_event_fields *field = call->class->fields_array; 2373 unsigned int offset = sizeof(struct trace_entry); 2374 2375 for (; field->type; field++) { 2376 if (field->type == TRACE_FUNCTION_TYPE) { 2377 field->define_fields(call); 2378 break; 2379 } 2380 2381 offset = ALIGN(offset, field->align); 2382 ret = trace_define_field(call, field->type, field->name, 2383 offset, field->size, 2384 field->is_signed, field->filter_type); 2385 if (WARN_ON_ONCE(ret)) { 2386 pr_err("error code is %d\n", ret); 2387 break; 2388 } 2389 2390 offset += field->size; 2391 } 2392 } 2393 2394 return ret; 2395 } 2396 2397 static int 2398 event_create_dir(struct dentry *parent, struct trace_event_file *file) 2399 { 2400 struct trace_event_call *call = file->event_call; 2401 struct trace_array *tr = file->tr; 2402 struct dentry *d_events; 2403 const char *name; 2404 int ret; 2405 2406 /* 2407 * If the trace point header did not define TRACE_SYSTEM 2408 * then the system would be called "TRACE_SYSTEM". 2409 */ 2410 if (strcmp(call->class->system, TRACE_SYSTEM) != 0) { 2411 d_events = event_subsystem_dir(tr, call->class->system, file, parent); 2412 if (!d_events) 2413 return -ENOMEM; 2414 } else 2415 d_events = parent; 2416 2417 name = trace_event_name(call); 2418 file->dir = tracefs_create_dir(name, d_events); 2419 if (!file->dir) { 2420 pr_warn("Could not create tracefs '%s' directory\n", name); 2421 return -1; 2422 } 2423 2424 if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) 2425 trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file, 2426 &ftrace_enable_fops); 2427 2428 #ifdef CONFIG_PERF_EVENTS 2429 if (call->event.type && call->class->reg) 2430 trace_create_file("id", TRACE_MODE_READ, file->dir, 2431 (void *)(long)call->event.type, 2432 &ftrace_event_id_fops); 2433 #endif 2434 2435 ret = event_define_fields(call); 2436 if (ret < 0) { 2437 pr_warn("Could not initialize trace point events/%s\n", name); 2438 return ret; 2439 } 2440 2441 /* 2442 * Only event directories that can be enabled should have 2443 * triggers or filters. 2444 */ 2445 if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { 2446 trace_create_file("filter", TRACE_MODE_WRITE, file->dir, 2447 file, &ftrace_event_filter_fops); 2448 2449 trace_create_file("trigger", TRACE_MODE_WRITE, file->dir, 2450 file, &event_trigger_fops); 2451 } 2452 2453 #ifdef CONFIG_HIST_TRIGGERS 2454 trace_create_file("hist", TRACE_MODE_READ, file->dir, file, 2455 &event_hist_fops); 2456 #endif 2457 #ifdef CONFIG_HIST_TRIGGERS_DEBUG 2458 trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file, 2459 &event_hist_debug_fops); 2460 #endif 2461 trace_create_file("format", TRACE_MODE_READ, file->dir, call, 2462 &ftrace_event_format_fops); 2463 2464 #ifdef CONFIG_TRACE_EVENT_INJECT 2465 if (call->event.type && call->class->reg) 2466 trace_create_file("inject", 0200, file->dir, file, 2467 &event_inject_fops); 2468 #endif 2469 2470 return 0; 2471 } 2472 2473 static void remove_event_from_tracers(struct trace_event_call *call) 2474 { 2475 struct trace_event_file *file; 2476 struct trace_array *tr; 2477 2478 do_for_each_event_file_safe(tr, file) { 2479 if (file->event_call != call) 2480 continue; 2481 2482 remove_event_file_dir(file); 2483 /* 2484 * The do_for_each_event_file_safe() is 2485 * a double loop. After finding the call for this 2486 * trace_array, we use break to jump to the next 2487 * trace_array. 2488 */ 2489 break; 2490 } while_for_each_event_file(); 2491 } 2492 2493 static void event_remove(struct trace_event_call *call) 2494 { 2495 struct trace_array *tr; 2496 struct trace_event_file *file; 2497 2498 do_for_each_event_file(tr, file) { 2499 if (file->event_call != call) 2500 continue; 2501 2502 if (file->flags & EVENT_FILE_FL_WAS_ENABLED) 2503 tr->clear_trace = true; 2504 2505 ftrace_event_enable_disable(file, 0); 2506 /* 2507 * The do_for_each_event_file() is 2508 * a double loop. After finding the call for this 2509 * trace_array, we use break to jump to the next 2510 * trace_array. 2511 */ 2512 break; 2513 } while_for_each_event_file(); 2514 2515 if (call->event.funcs) 2516 __unregister_trace_event(&call->event); 2517 remove_event_from_tracers(call); 2518 list_del(&call->list); 2519 } 2520 2521 static int event_init(struct trace_event_call *call) 2522 { 2523 int ret = 0; 2524 const char *name; 2525 2526 name = trace_event_name(call); 2527 if (WARN_ON(!name)) 2528 return -EINVAL; 2529 2530 if (call->class->raw_init) { 2531 ret = call->class->raw_init(call); 2532 if (ret < 0 && ret != -ENOSYS) 2533 pr_warn("Could not initialize trace events/%s\n", name); 2534 } 2535 2536 return ret; 2537 } 2538 2539 static int 2540 __register_event(struct trace_event_call *call, struct module *mod) 2541 { 2542 int ret; 2543 2544 ret = event_init(call); 2545 if (ret < 0) 2546 return ret; 2547 2548 list_add(&call->list, &ftrace_events); 2549 if (call->flags & TRACE_EVENT_FL_DYNAMIC) 2550 atomic_set(&call->refcnt, 0); 2551 else 2552 call->module = mod; 2553 2554 return 0; 2555 } 2556 2557 static char *eval_replace(char *ptr, struct trace_eval_map *map, int len) 2558 { 2559 int rlen; 2560 int elen; 2561 2562 /* Find the length of the eval value as a string */ 2563 elen = snprintf(ptr, 0, "%ld", map->eval_value); 2564 /* Make sure there's enough room to replace the string with the value */ 2565 if (len < elen) 2566 return NULL; 2567 2568 snprintf(ptr, elen + 1, "%ld", map->eval_value); 2569 2570 /* Get the rest of the string of ptr */ 2571 rlen = strlen(ptr + len); 2572 memmove(ptr + elen, ptr + len, rlen); 2573 /* Make sure we end the new string */ 2574 ptr[elen + rlen] = 0; 2575 2576 return ptr + elen; 2577 } 2578 2579 static void update_event_printk(struct trace_event_call *call, 2580 struct trace_eval_map *map) 2581 { 2582 char *ptr; 2583 int quote = 0; 2584 int len = strlen(map->eval_string); 2585 2586 for (ptr = call->print_fmt; *ptr; ptr++) { 2587 if (*ptr == '\\') { 2588 ptr++; 2589 /* paranoid */ 2590 if (!*ptr) 2591 break; 2592 continue; 2593 } 2594 if (*ptr == '"') { 2595 quote ^= 1; 2596 continue; 2597 } 2598 if (quote) 2599 continue; 2600 if (isdigit(*ptr)) { 2601 /* skip numbers */ 2602 do { 2603 ptr++; 2604 /* Check for alpha chars like ULL */ 2605 } while (isalnum(*ptr)); 2606 if (!*ptr) 2607 break; 2608 /* 2609 * A number must have some kind of delimiter after 2610 * it, and we can ignore that too. 2611 */ 2612 continue; 2613 } 2614 if (isalpha(*ptr) || *ptr == '_') { 2615 if (strncmp(map->eval_string, ptr, len) == 0 && 2616 !isalnum(ptr[len]) && ptr[len] != '_') { 2617 ptr = eval_replace(ptr, map, len); 2618 /* enum/sizeof string smaller than value */ 2619 if (WARN_ON_ONCE(!ptr)) 2620 return; 2621 /* 2622 * No need to decrement here, as eval_replace() 2623 * returns the pointer to the character passed 2624 * the eval, and two evals can not be placed 2625 * back to back without something in between. 2626 * We can skip that something in between. 2627 */ 2628 continue; 2629 } 2630 skip_more: 2631 do { 2632 ptr++; 2633 } while (isalnum(*ptr) || *ptr == '_'); 2634 if (!*ptr) 2635 break; 2636 /* 2637 * If what comes after this variable is a '.' or 2638 * '->' then we can continue to ignore that string. 2639 */ 2640 if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) { 2641 ptr += *ptr == '.' ? 1 : 2; 2642 if (!*ptr) 2643 break; 2644 goto skip_more; 2645 } 2646 /* 2647 * Once again, we can skip the delimiter that came 2648 * after the string. 2649 */ 2650 continue; 2651 } 2652 } 2653 } 2654 2655 static void add_str_to_module(struct module *module, char *str) 2656 { 2657 struct module_string *modstr; 2658 2659 modstr = kmalloc(sizeof(*modstr), GFP_KERNEL); 2660 2661 /* 2662 * If we failed to allocate memory here, then we'll just 2663 * let the str memory leak when the module is removed. 2664 * If this fails to allocate, there's worse problems than 2665 * a leaked string on module removal. 2666 */ 2667 if (WARN_ON_ONCE(!modstr)) 2668 return; 2669 2670 modstr->module = module; 2671 modstr->str = str; 2672 2673 list_add(&modstr->next, &module_strings); 2674 } 2675 2676 static void update_event_fields(struct trace_event_call *call, 2677 struct trace_eval_map *map) 2678 { 2679 struct ftrace_event_field *field; 2680 struct list_head *head; 2681 char *ptr; 2682 char *str; 2683 int len = strlen(map->eval_string); 2684 2685 /* Dynamic events should never have field maps */ 2686 if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) 2687 return; 2688 2689 head = trace_get_fields(call); 2690 list_for_each_entry(field, head, link) { 2691 ptr = strchr(field->type, '['); 2692 if (!ptr) 2693 continue; 2694 ptr++; 2695 2696 if (!isalpha(*ptr) && *ptr != '_') 2697 continue; 2698 2699 if (strncmp(map->eval_string, ptr, len) != 0) 2700 continue; 2701 2702 str = kstrdup(field->type, GFP_KERNEL); 2703 if (WARN_ON_ONCE(!str)) 2704 return; 2705 ptr = str + (ptr - field->type); 2706 ptr = eval_replace(ptr, map, len); 2707 /* enum/sizeof string smaller than value */ 2708 if (WARN_ON_ONCE(!ptr)) { 2709 kfree(str); 2710 continue; 2711 } 2712 2713 /* 2714 * If the event is part of a module, then we need to free the string 2715 * when the module is removed. Otherwise, it will stay allocated 2716 * until a reboot. 2717 */ 2718 if (call->module) 2719 add_str_to_module(call->module, str); 2720 2721 field->type = str; 2722 } 2723 } 2724 2725 void trace_event_eval_update(struct trace_eval_map **map, int len) 2726 { 2727 struct trace_event_call *call, *p; 2728 const char *last_system = NULL; 2729 bool first = false; 2730 int last_i; 2731 int i; 2732 2733 down_write(&trace_event_sem); 2734 list_for_each_entry_safe(call, p, &ftrace_events, list) { 2735 /* events are usually grouped together with systems */ 2736 if (!last_system || call->class->system != last_system) { 2737 first = true; 2738 last_i = 0; 2739 last_system = call->class->system; 2740 } 2741 2742 /* 2743 * Since calls are grouped by systems, the likelihood that the 2744 * next call in the iteration belongs to the same system as the 2745 * previous call is high. As an optimization, we skip searching 2746 * for a map[] that matches the call's system if the last call 2747 * was from the same system. That's what last_i is for. If the 2748 * call has the same system as the previous call, then last_i 2749 * will be the index of the first map[] that has a matching 2750 * system. 2751 */ 2752 for (i = last_i; i < len; i++) { 2753 if (call->class->system == map[i]->system) { 2754 /* Save the first system if need be */ 2755 if (first) { 2756 last_i = i; 2757 first = false; 2758 } 2759 update_event_printk(call, map[i]); 2760 update_event_fields(call, map[i]); 2761 } 2762 } 2763 } 2764 up_write(&trace_event_sem); 2765 } 2766 2767 static struct trace_event_file * 2768 trace_create_new_event(struct trace_event_call *call, 2769 struct trace_array *tr) 2770 { 2771 struct trace_pid_list *no_pid_list; 2772 struct trace_pid_list *pid_list; 2773 struct trace_event_file *file; 2774 unsigned int first; 2775 2776 file = kmem_cache_alloc(file_cachep, GFP_TRACE); 2777 if (!file) 2778 return NULL; 2779 2780 pid_list = rcu_dereference_protected(tr->filtered_pids, 2781 lockdep_is_held(&event_mutex)); 2782 no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, 2783 lockdep_is_held(&event_mutex)); 2784 2785 if (!trace_pid_list_first(pid_list, &first) || 2786 !trace_pid_list_first(no_pid_list, &first)) 2787 file->flags |= EVENT_FILE_FL_PID_FILTER; 2788 2789 file->event_call = call; 2790 file->tr = tr; 2791 atomic_set(&file->sm_ref, 0); 2792 atomic_set(&file->tm_ref, 0); 2793 INIT_LIST_HEAD(&file->triggers); 2794 list_add(&file->list, &tr->events); 2795 2796 return file; 2797 } 2798 2799 #define MAX_BOOT_TRIGGERS 32 2800 2801 static struct boot_triggers { 2802 const char *event; 2803 char *trigger; 2804 } bootup_triggers[MAX_BOOT_TRIGGERS]; 2805 2806 static char bootup_trigger_buf[COMMAND_LINE_SIZE]; 2807 static int nr_boot_triggers; 2808 2809 static __init int setup_trace_triggers(char *str) 2810 { 2811 char *trigger; 2812 char *buf; 2813 int i; 2814 2815 strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE); 2816 ring_buffer_expanded = true; 2817 disable_tracing_selftest("running event triggers"); 2818 2819 buf = bootup_trigger_buf; 2820 for (i = 0; i < MAX_BOOT_TRIGGERS; i++) { 2821 trigger = strsep(&buf, ","); 2822 if (!trigger) 2823 break; 2824 bootup_triggers[i].event = strsep(&trigger, "."); 2825 bootup_triggers[i].trigger = strsep(&trigger, "."); 2826 if (!bootup_triggers[i].trigger) 2827 break; 2828 } 2829 2830 nr_boot_triggers = i; 2831 return 1; 2832 } 2833 __setup("trace_trigger=", setup_trace_triggers); 2834 2835 /* Add an event to a trace directory */ 2836 static int 2837 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) 2838 { 2839 struct trace_event_file *file; 2840 2841 file = trace_create_new_event(call, tr); 2842 if (!file) 2843 return -ENOMEM; 2844 2845 if (eventdir_initialized) 2846 return event_create_dir(tr->event_dir, file); 2847 else 2848 return event_define_fields(call); 2849 } 2850 2851 static void trace_early_triggers(struct trace_event_file *file, const char *name) 2852 { 2853 int ret; 2854 int i; 2855 2856 for (i = 0; i < nr_boot_triggers; i++) { 2857 if (strcmp(name, bootup_triggers[i].event)) 2858 continue; 2859 mutex_lock(&event_mutex); 2860 ret = trigger_process_regex(file, bootup_triggers[i].trigger); 2861 mutex_unlock(&event_mutex); 2862 if (ret) 2863 pr_err("Failed to register trigger '%s' on event %s\n", 2864 bootup_triggers[i].trigger, 2865 bootup_triggers[i].event); 2866 } 2867 } 2868 2869 /* 2870 * Just create a descriptor for early init. A descriptor is required 2871 * for enabling events at boot. We want to enable events before 2872 * the filesystem is initialized. 2873 */ 2874 static int 2875 __trace_early_add_new_event(struct trace_event_call *call, 2876 struct trace_array *tr) 2877 { 2878 struct trace_event_file *file; 2879 int ret; 2880 2881 file = trace_create_new_event(call, tr); 2882 if (!file) 2883 return -ENOMEM; 2884 2885 ret = event_define_fields(call); 2886 if (ret) 2887 return ret; 2888 2889 trace_early_triggers(file, trace_event_name(call)); 2890 2891 return 0; 2892 } 2893 2894 struct ftrace_module_file_ops; 2895 static void __add_event_to_tracers(struct trace_event_call *call); 2896 2897 /* Add an additional event_call dynamically */ 2898 int trace_add_event_call(struct trace_event_call *call) 2899 { 2900 int ret; 2901 lockdep_assert_held(&event_mutex); 2902 2903 mutex_lock(&trace_types_lock); 2904 2905 ret = __register_event(call, NULL); 2906 if (ret >= 0) 2907 __add_event_to_tracers(call); 2908 2909 mutex_unlock(&trace_types_lock); 2910 return ret; 2911 } 2912 EXPORT_SYMBOL_GPL(trace_add_event_call); 2913 2914 /* 2915 * Must be called under locking of trace_types_lock, event_mutex and 2916 * trace_event_sem. 2917 */ 2918 static void __trace_remove_event_call(struct trace_event_call *call) 2919 { 2920 event_remove(call); 2921 trace_destroy_fields(call); 2922 free_event_filter(call->filter); 2923 call->filter = NULL; 2924 } 2925 2926 static int probe_remove_event_call(struct trace_event_call *call) 2927 { 2928 struct trace_array *tr; 2929 struct trace_event_file *file; 2930 2931 #ifdef CONFIG_PERF_EVENTS 2932 if (call->perf_refcount) 2933 return -EBUSY; 2934 #endif 2935 do_for_each_event_file(tr, file) { 2936 if (file->event_call != call) 2937 continue; 2938 /* 2939 * We can't rely on ftrace_event_enable_disable(enable => 0) 2940 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress 2941 * TRACE_REG_UNREGISTER. 2942 */ 2943 if (file->flags & EVENT_FILE_FL_ENABLED) 2944 goto busy; 2945 2946 if (file->flags & EVENT_FILE_FL_WAS_ENABLED) 2947 tr->clear_trace = true; 2948 /* 2949 * The do_for_each_event_file_safe() is 2950 * a double loop. After finding the call for this 2951 * trace_array, we use break to jump to the next 2952 * trace_array. 2953 */ 2954 break; 2955 } while_for_each_event_file(); 2956 2957 __trace_remove_event_call(call); 2958 2959 return 0; 2960 busy: 2961 /* No need to clear the trace now */ 2962 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2963 tr->clear_trace = false; 2964 } 2965 return -EBUSY; 2966 } 2967 2968 /* Remove an event_call */ 2969 int trace_remove_event_call(struct trace_event_call *call) 2970 { 2971 int ret; 2972 2973 lockdep_assert_held(&event_mutex); 2974 2975 mutex_lock(&trace_types_lock); 2976 down_write(&trace_event_sem); 2977 ret = probe_remove_event_call(call); 2978 up_write(&trace_event_sem); 2979 mutex_unlock(&trace_types_lock); 2980 2981 return ret; 2982 } 2983 EXPORT_SYMBOL_GPL(trace_remove_event_call); 2984 2985 #define for_each_event(event, start, end) \ 2986 for (event = start; \ 2987 (unsigned long)event < (unsigned long)end; \ 2988 event++) 2989 2990 #ifdef CONFIG_MODULES 2991 2992 static void trace_module_add_events(struct module *mod) 2993 { 2994 struct trace_event_call **call, **start, **end; 2995 2996 if (!mod->num_trace_events) 2997 return; 2998 2999 /* Don't add infrastructure for mods without tracepoints */ 3000 if (trace_module_has_bad_taint(mod)) { 3001 pr_err("%s: module has bad taint, not creating trace events\n", 3002 mod->name); 3003 return; 3004 } 3005 3006 start = mod->trace_events; 3007 end = mod->trace_events + mod->num_trace_events; 3008 3009 for_each_event(call, start, end) { 3010 __register_event(*call, mod); 3011 __add_event_to_tracers(*call); 3012 } 3013 } 3014 3015 static void trace_module_remove_events(struct module *mod) 3016 { 3017 struct trace_event_call *call, *p; 3018 struct module_string *modstr, *m; 3019 3020 down_write(&trace_event_sem); 3021 list_for_each_entry_safe(call, p, &ftrace_events, list) { 3022 if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module) 3023 continue; 3024 if (call->module == mod) 3025 __trace_remove_event_call(call); 3026 } 3027 /* Check for any strings allocade for this module */ 3028 list_for_each_entry_safe(modstr, m, &module_strings, next) { 3029 if (modstr->module != mod) 3030 continue; 3031 list_del(&modstr->next); 3032 kfree(modstr->str); 3033 kfree(modstr); 3034 } 3035 up_write(&trace_event_sem); 3036 3037 /* 3038 * It is safest to reset the ring buffer if the module being unloaded 3039 * registered any events that were used. The only worry is if 3040 * a new module gets loaded, and takes on the same id as the events 3041 * of this module. When printing out the buffer, traced events left 3042 * over from this module may be passed to the new module events and 3043 * unexpected results may occur. 3044 */ 3045 tracing_reset_all_online_cpus_unlocked(); 3046 } 3047 3048 static int trace_module_notify(struct notifier_block *self, 3049 unsigned long val, void *data) 3050 { 3051 struct module *mod = data; 3052 3053 mutex_lock(&event_mutex); 3054 mutex_lock(&trace_types_lock); 3055 switch (val) { 3056 case MODULE_STATE_COMING: 3057 trace_module_add_events(mod); 3058 break; 3059 case MODULE_STATE_GOING: 3060 trace_module_remove_events(mod); 3061 break; 3062 } 3063 mutex_unlock(&trace_types_lock); 3064 mutex_unlock(&event_mutex); 3065 3066 return NOTIFY_OK; 3067 } 3068 3069 static struct notifier_block trace_module_nb = { 3070 .notifier_call = trace_module_notify, 3071 .priority = 1, /* higher than trace.c module notify */ 3072 }; 3073 #endif /* CONFIG_MODULES */ 3074 3075 /* Create a new event directory structure for a trace directory. */ 3076 static void 3077 __trace_add_event_dirs(struct trace_array *tr) 3078 { 3079 struct trace_event_call *call; 3080 int ret; 3081 3082 list_for_each_entry(call, &ftrace_events, list) { 3083 ret = __trace_add_new_event(call, tr); 3084 if (ret < 0) 3085 pr_warn("Could not create directory for event %s\n", 3086 trace_event_name(call)); 3087 } 3088 } 3089 3090 /* Returns any file that matches the system and event */ 3091 struct trace_event_file * 3092 __find_event_file(struct trace_array *tr, const char *system, const char *event) 3093 { 3094 struct trace_event_file *file; 3095 struct trace_event_call *call; 3096 const char *name; 3097 3098 list_for_each_entry(file, &tr->events, list) { 3099 3100 call = file->event_call; 3101 name = trace_event_name(call); 3102 3103 if (!name || !call->class) 3104 continue; 3105 3106 if (strcmp(event, name) == 0 && 3107 strcmp(system, call->class->system) == 0) 3108 return file; 3109 } 3110 return NULL; 3111 } 3112 3113 /* Returns valid trace event files that match system and event */ 3114 struct trace_event_file * 3115 find_event_file(struct trace_array *tr, const char *system, const char *event) 3116 { 3117 struct trace_event_file *file; 3118 3119 file = __find_event_file(tr, system, event); 3120 if (!file || !file->event_call->class->reg || 3121 file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) 3122 return NULL; 3123 3124 return file; 3125 } 3126 3127 /** 3128 * trace_get_event_file - Find and return a trace event file 3129 * @instance: The name of the trace instance containing the event 3130 * @system: The name of the system containing the event 3131 * @event: The name of the event 3132 * 3133 * Return a trace event file given the trace instance name, trace 3134 * system, and trace event name. If the instance name is NULL, it 3135 * refers to the top-level trace array. 3136 * 3137 * This function will look it up and return it if found, after calling 3138 * trace_array_get() to prevent the instance from going away, and 3139 * increment the event's module refcount to prevent it from being 3140 * removed. 3141 * 3142 * To release the file, call trace_put_event_file(), which will call 3143 * trace_array_put() and decrement the event's module refcount. 3144 * 3145 * Return: The trace event on success, ERR_PTR otherwise. 3146 */ 3147 struct trace_event_file *trace_get_event_file(const char *instance, 3148 const char *system, 3149 const char *event) 3150 { 3151 struct trace_array *tr = top_trace_array(); 3152 struct trace_event_file *file = NULL; 3153 int ret = -EINVAL; 3154 3155 if (instance) { 3156 tr = trace_array_find_get(instance); 3157 if (!tr) 3158 return ERR_PTR(-ENOENT); 3159 } else { 3160 ret = trace_array_get(tr); 3161 if (ret) 3162 return ERR_PTR(ret); 3163 } 3164 3165 mutex_lock(&event_mutex); 3166 3167 file = find_event_file(tr, system, event); 3168 if (!file) { 3169 trace_array_put(tr); 3170 ret = -EINVAL; 3171 goto out; 3172 } 3173 3174 /* Don't let event modules unload while in use */ 3175 ret = trace_event_try_get_ref(file->event_call); 3176 if (!ret) { 3177 trace_array_put(tr); 3178 ret = -EBUSY; 3179 goto out; 3180 } 3181 3182 ret = 0; 3183 out: 3184 mutex_unlock(&event_mutex); 3185 3186 if (ret) 3187 file = ERR_PTR(ret); 3188 3189 return file; 3190 } 3191 EXPORT_SYMBOL_GPL(trace_get_event_file); 3192 3193 /** 3194 * trace_put_event_file - Release a file from trace_get_event_file() 3195 * @file: The trace event file 3196 * 3197 * If a file was retrieved using trace_get_event_file(), this should 3198 * be called when it's no longer needed. It will cancel the previous 3199 * trace_array_get() called by that function, and decrement the 3200 * event's module refcount. 3201 */ 3202 void trace_put_event_file(struct trace_event_file *file) 3203 { 3204 mutex_lock(&event_mutex); 3205 trace_event_put_ref(file->event_call); 3206 mutex_unlock(&event_mutex); 3207 3208 trace_array_put(file->tr); 3209 } 3210 EXPORT_SYMBOL_GPL(trace_put_event_file); 3211 3212 #ifdef CONFIG_DYNAMIC_FTRACE 3213 3214 /* Avoid typos */ 3215 #define ENABLE_EVENT_STR "enable_event" 3216 #define DISABLE_EVENT_STR "disable_event" 3217 3218 struct event_probe_data { 3219 struct trace_event_file *file; 3220 unsigned long count; 3221 int ref; 3222 bool enable; 3223 }; 3224 3225 static void update_event_probe(struct event_probe_data *data) 3226 { 3227 if (data->enable) 3228 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); 3229 else 3230 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); 3231 } 3232 3233 static void 3234 event_enable_probe(unsigned long ip, unsigned long parent_ip, 3235 struct trace_array *tr, struct ftrace_probe_ops *ops, 3236 void *data) 3237 { 3238 struct ftrace_func_mapper *mapper = data; 3239 struct event_probe_data *edata; 3240 void **pdata; 3241 3242 pdata = ftrace_func_mapper_find_ip(mapper, ip); 3243 if (!pdata || !*pdata) 3244 return; 3245 3246 edata = *pdata; 3247 update_event_probe(edata); 3248 } 3249 3250 static void 3251 event_enable_count_probe(unsigned long ip, unsigned long parent_ip, 3252 struct trace_array *tr, struct ftrace_probe_ops *ops, 3253 void *data) 3254 { 3255 struct ftrace_func_mapper *mapper = data; 3256 struct event_probe_data *edata; 3257 void **pdata; 3258 3259 pdata = ftrace_func_mapper_find_ip(mapper, ip); 3260 if (!pdata || !*pdata) 3261 return; 3262 3263 edata = *pdata; 3264 3265 if (!edata->count) 3266 return; 3267 3268 /* Skip if the event is in a state we want to switch to */ 3269 if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) 3270 return; 3271 3272 if (edata->count != -1) 3273 (edata->count)--; 3274 3275 update_event_probe(edata); 3276 } 3277 3278 static int 3279 event_enable_print(struct seq_file *m, unsigned long ip, 3280 struct ftrace_probe_ops *ops, void *data) 3281 { 3282 struct ftrace_func_mapper *mapper = data; 3283 struct event_probe_data *edata; 3284 void **pdata; 3285 3286 pdata = ftrace_func_mapper_find_ip(mapper, ip); 3287 3288 if (WARN_ON_ONCE(!pdata || !*pdata)) 3289 return 0; 3290 3291 edata = *pdata; 3292 3293 seq_printf(m, "%ps:", (void *)ip); 3294 3295 seq_printf(m, "%s:%s:%s", 3296 edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, 3297 edata->file->event_call->class->system, 3298 trace_event_name(edata->file->event_call)); 3299 3300 if (edata->count == -1) 3301 seq_puts(m, ":unlimited\n"); 3302 else 3303 seq_printf(m, ":count=%ld\n", edata->count); 3304 3305 return 0; 3306 } 3307 3308 static int 3309 event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 3310 unsigned long ip, void *init_data, void **data) 3311 { 3312 struct ftrace_func_mapper *mapper = *data; 3313 struct event_probe_data *edata = init_data; 3314 int ret; 3315 3316 if (!mapper) { 3317 mapper = allocate_ftrace_func_mapper(); 3318 if (!mapper) 3319 return -ENODEV; 3320 *data = mapper; 3321 } 3322 3323 ret = ftrace_func_mapper_add_ip(mapper, ip, edata); 3324 if (ret < 0) 3325 return ret; 3326 3327 edata->ref++; 3328 3329 return 0; 3330 } 3331 3332 static int free_probe_data(void *data) 3333 { 3334 struct event_probe_data *edata = data; 3335 3336 edata->ref--; 3337 if (!edata->ref) { 3338 /* Remove the SOFT_MODE flag */ 3339 __ftrace_event_enable_disable(edata->file, 0, 1); 3340 trace_event_put_ref(edata->file->event_call); 3341 kfree(edata); 3342 } 3343 return 0; 3344 } 3345 3346 static void 3347 event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 3348 unsigned long ip, void *data) 3349 { 3350 struct ftrace_func_mapper *mapper = data; 3351 struct event_probe_data *edata; 3352 3353 if (!ip) { 3354 if (!mapper) 3355 return; 3356 free_ftrace_func_mapper(mapper, free_probe_data); 3357 return; 3358 } 3359 3360 edata = ftrace_func_mapper_remove_ip(mapper, ip); 3361 3362 if (WARN_ON_ONCE(!edata)) 3363 return; 3364 3365 if (WARN_ON_ONCE(edata->ref <= 0)) 3366 return; 3367 3368 free_probe_data(edata); 3369 } 3370 3371 static struct ftrace_probe_ops event_enable_probe_ops = { 3372 .func = event_enable_probe, 3373 .print = event_enable_print, 3374 .init = event_enable_init, 3375 .free = event_enable_free, 3376 }; 3377 3378 static struct ftrace_probe_ops event_enable_count_probe_ops = { 3379 .func = event_enable_count_probe, 3380 .print = event_enable_print, 3381 .init = event_enable_init, 3382 .free = event_enable_free, 3383 }; 3384 3385 static struct ftrace_probe_ops event_disable_probe_ops = { 3386 .func = event_enable_probe, 3387 .print = event_enable_print, 3388 .init = event_enable_init, 3389 .free = event_enable_free, 3390 }; 3391 3392 static struct ftrace_probe_ops event_disable_count_probe_ops = { 3393 .func = event_enable_count_probe, 3394 .print = event_enable_print, 3395 .init = event_enable_init, 3396 .free = event_enable_free, 3397 }; 3398 3399 static int 3400 event_enable_func(struct trace_array *tr, struct ftrace_hash *hash, 3401 char *glob, char *cmd, char *param, int enabled) 3402 { 3403 struct trace_event_file *file; 3404 struct ftrace_probe_ops *ops; 3405 struct event_probe_data *data; 3406 const char *system; 3407 const char *event; 3408 char *number; 3409 bool enable; 3410 int ret; 3411 3412 if (!tr) 3413 return -ENODEV; 3414 3415 /* hash funcs only work with set_ftrace_filter */ 3416 if (!enabled || !param) 3417 return -EINVAL; 3418 3419 system = strsep(¶m, ":"); 3420 if (!param) 3421 return -EINVAL; 3422 3423 event = strsep(¶m, ":"); 3424 3425 mutex_lock(&event_mutex); 3426 3427 ret = -EINVAL; 3428 file = find_event_file(tr, system, event); 3429 if (!file) 3430 goto out; 3431 3432 enable = strcmp(cmd, ENABLE_EVENT_STR) == 0; 3433 3434 if (enable) 3435 ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops; 3436 else 3437 ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops; 3438 3439 if (glob[0] == '!') { 3440 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); 3441 goto out; 3442 } 3443 3444 ret = -ENOMEM; 3445 3446 data = kzalloc(sizeof(*data), GFP_KERNEL); 3447 if (!data) 3448 goto out; 3449 3450 data->enable = enable; 3451 data->count = -1; 3452 data->file = file; 3453 3454 if (!param) 3455 goto out_reg; 3456 3457 number = strsep(¶m, ":"); 3458 3459 ret = -EINVAL; 3460 if (!strlen(number)) 3461 goto out_free; 3462 3463 /* 3464 * We use the callback data field (which is a pointer) 3465 * as our counter. 3466 */ 3467 ret = kstrtoul(number, 0, &data->count); 3468 if (ret) 3469 goto out_free; 3470 3471 out_reg: 3472 /* Don't let event modules unload while probe registered */ 3473 ret = trace_event_try_get_ref(file->event_call); 3474 if (!ret) { 3475 ret = -EBUSY; 3476 goto out_free; 3477 } 3478 3479 ret = __ftrace_event_enable_disable(file, 1, 1); 3480 if (ret < 0) 3481 goto out_put; 3482 3483 ret = register_ftrace_function_probe(glob, tr, ops, data); 3484 /* 3485 * The above returns on success the # of functions enabled, 3486 * but if it didn't find any functions it returns zero. 3487 * Consider no functions a failure too. 3488 */ 3489 if (!ret) { 3490 ret = -ENOENT; 3491 goto out_disable; 3492 } else if (ret < 0) 3493 goto out_disable; 3494 /* Just return zero, not the number of enabled functions */ 3495 ret = 0; 3496 out: 3497 mutex_unlock(&event_mutex); 3498 return ret; 3499 3500 out_disable: 3501 __ftrace_event_enable_disable(file, 0, 1); 3502 out_put: 3503 trace_event_put_ref(file->event_call); 3504 out_free: 3505 kfree(data); 3506 goto out; 3507 } 3508 3509 static struct ftrace_func_command event_enable_cmd = { 3510 .name = ENABLE_EVENT_STR, 3511 .func = event_enable_func, 3512 }; 3513 3514 static struct ftrace_func_command event_disable_cmd = { 3515 .name = DISABLE_EVENT_STR, 3516 .func = event_enable_func, 3517 }; 3518 3519 static __init int register_event_cmds(void) 3520 { 3521 int ret; 3522 3523 ret = register_ftrace_command(&event_enable_cmd); 3524 if (WARN_ON(ret < 0)) 3525 return ret; 3526 ret = register_ftrace_command(&event_disable_cmd); 3527 if (WARN_ON(ret < 0)) 3528 unregister_ftrace_command(&event_enable_cmd); 3529 return ret; 3530 } 3531 #else 3532 static inline int register_event_cmds(void) { return 0; } 3533 #endif /* CONFIG_DYNAMIC_FTRACE */ 3534 3535 /* 3536 * The top level array and trace arrays created by boot-time tracing 3537 * have already had its trace_event_file descriptors created in order 3538 * to allow for early events to be recorded. 3539 * This function is called after the tracefs has been initialized, 3540 * and we now have to create the files associated to the events. 3541 */ 3542 static void __trace_early_add_event_dirs(struct trace_array *tr) 3543 { 3544 struct trace_event_file *file; 3545 int ret; 3546 3547 3548 list_for_each_entry(file, &tr->events, list) { 3549 ret = event_create_dir(tr->event_dir, file); 3550 if (ret < 0) 3551 pr_warn("Could not create directory for event %s\n", 3552 trace_event_name(file->event_call)); 3553 } 3554 } 3555 3556 /* 3557 * For early boot up, the top trace array and the trace arrays created 3558 * by boot-time tracing require to have a list of events that can be 3559 * enabled. This must be done before the filesystem is set up in order 3560 * to allow events to be traced early. 3561 */ 3562 void __trace_early_add_events(struct trace_array *tr) 3563 { 3564 struct trace_event_call *call; 3565 int ret; 3566 3567 list_for_each_entry(call, &ftrace_events, list) { 3568 /* Early boot up should not have any modules loaded */ 3569 if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) && 3570 WARN_ON_ONCE(call->module)) 3571 continue; 3572 3573 ret = __trace_early_add_new_event(call, tr); 3574 if (ret < 0) 3575 pr_warn("Could not create early event %s\n", 3576 trace_event_name(call)); 3577 } 3578 } 3579 3580 /* Remove the event directory structure for a trace directory. */ 3581 static void 3582 __trace_remove_event_dirs(struct trace_array *tr) 3583 { 3584 struct trace_event_file *file, *next; 3585 3586 list_for_each_entry_safe(file, next, &tr->events, list) 3587 remove_event_file_dir(file); 3588 } 3589 3590 static void __add_event_to_tracers(struct trace_event_call *call) 3591 { 3592 struct trace_array *tr; 3593 3594 list_for_each_entry(tr, &ftrace_trace_arrays, list) 3595 __trace_add_new_event(call, tr); 3596 } 3597 3598 extern struct trace_event_call *__start_ftrace_events[]; 3599 extern struct trace_event_call *__stop_ftrace_events[]; 3600 3601 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; 3602 3603 static __init int setup_trace_event(char *str) 3604 { 3605 strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE); 3606 ring_buffer_expanded = true; 3607 disable_tracing_selftest("running event tracing"); 3608 3609 return 1; 3610 } 3611 __setup("trace_event=", setup_trace_event); 3612 3613 /* Expects to have event_mutex held when called */ 3614 static int 3615 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) 3616 { 3617 struct dentry *d_events; 3618 struct dentry *entry; 3619 3620 entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, 3621 tr, &ftrace_set_event_fops); 3622 if (!entry) 3623 return -ENOMEM; 3624 3625 d_events = tracefs_create_dir("events", parent); 3626 if (!d_events) { 3627 pr_warn("Could not create tracefs 'events' directory\n"); 3628 return -ENOMEM; 3629 } 3630 3631 entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events, 3632 tr, &ftrace_tr_enable_fops); 3633 if (!entry) 3634 return -ENOMEM; 3635 3636 /* There are not as crucial, just warn if they are not created */ 3637 3638 trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, 3639 tr, &ftrace_set_event_pid_fops); 3640 3641 trace_create_file("set_event_notrace_pid", 3642 TRACE_MODE_WRITE, parent, tr, 3643 &ftrace_set_event_notrace_pid_fops); 3644 3645 /* ring buffer internal formats */ 3646 trace_create_file("header_page", TRACE_MODE_READ, d_events, 3647 ring_buffer_print_page_header, 3648 &ftrace_show_header_fops); 3649 3650 trace_create_file("header_event", TRACE_MODE_READ, d_events, 3651 ring_buffer_print_entry_header, 3652 &ftrace_show_header_fops); 3653 3654 tr->event_dir = d_events; 3655 3656 return 0; 3657 } 3658 3659 /** 3660 * event_trace_add_tracer - add a instance of a trace_array to events 3661 * @parent: The parent dentry to place the files/directories for events in 3662 * @tr: The trace array associated with these events 3663 * 3664 * When a new instance is created, it needs to set up its events 3665 * directory, as well as other files associated with events. It also 3666 * creates the event hierarchy in the @parent/events directory. 3667 * 3668 * Returns 0 on success. 3669 * 3670 * Must be called with event_mutex held. 3671 */ 3672 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) 3673 { 3674 int ret; 3675 3676 lockdep_assert_held(&event_mutex); 3677 3678 ret = create_event_toplevel_files(parent, tr); 3679 if (ret) 3680 goto out; 3681 3682 down_write(&trace_event_sem); 3683 /* If tr already has the event list, it is initialized in early boot. */ 3684 if (unlikely(!list_empty(&tr->events))) 3685 __trace_early_add_event_dirs(tr); 3686 else 3687 __trace_add_event_dirs(tr); 3688 up_write(&trace_event_sem); 3689 3690 out: 3691 return ret; 3692 } 3693 3694 /* 3695 * The top trace array already had its file descriptors created. 3696 * Now the files themselves need to be created. 3697 */ 3698 static __init int 3699 early_event_add_tracer(struct dentry *parent, struct trace_array *tr) 3700 { 3701 int ret; 3702 3703 mutex_lock(&event_mutex); 3704 3705 ret = create_event_toplevel_files(parent, tr); 3706 if (ret) 3707 goto out_unlock; 3708 3709 down_write(&trace_event_sem); 3710 __trace_early_add_event_dirs(tr); 3711 up_write(&trace_event_sem); 3712 3713 out_unlock: 3714 mutex_unlock(&event_mutex); 3715 3716 return ret; 3717 } 3718 3719 /* Must be called with event_mutex held */ 3720 int event_trace_del_tracer(struct trace_array *tr) 3721 { 3722 lockdep_assert_held(&event_mutex); 3723 3724 /* Disable any event triggers and associated soft-disabled events */ 3725 clear_event_triggers(tr); 3726 3727 /* Clear the pid list */ 3728 __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS); 3729 3730 /* Disable any running events */ 3731 __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); 3732 3733 /* Make sure no more events are being executed */ 3734 tracepoint_synchronize_unregister(); 3735 3736 down_write(&trace_event_sem); 3737 __trace_remove_event_dirs(tr); 3738 tracefs_remove(tr->event_dir); 3739 up_write(&trace_event_sem); 3740 3741 tr->event_dir = NULL; 3742 3743 return 0; 3744 } 3745 3746 static __init int event_trace_memsetup(void) 3747 { 3748 field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); 3749 file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC); 3750 return 0; 3751 } 3752 3753 static __init void 3754 early_enable_events(struct trace_array *tr, bool disable_first) 3755 { 3756 char *buf = bootup_event_buf; 3757 char *token; 3758 int ret; 3759 3760 while (true) { 3761 token = strsep(&buf, ","); 3762 3763 if (!token) 3764 break; 3765 3766 if (*token) { 3767 /* Restarting syscalls requires that we stop them first */ 3768 if (disable_first) 3769 ftrace_set_clr_event(tr, token, 0); 3770 3771 ret = ftrace_set_clr_event(tr, token, 1); 3772 if (ret) 3773 pr_warn("Failed to enable trace event: %s\n", token); 3774 } 3775 3776 /* Put back the comma to allow this to be called again */ 3777 if (buf) 3778 *(buf - 1) = ','; 3779 } 3780 } 3781 3782 static __init int event_trace_enable(void) 3783 { 3784 struct trace_array *tr = top_trace_array(); 3785 struct trace_event_call **iter, *call; 3786 int ret; 3787 3788 if (!tr) 3789 return -ENODEV; 3790 3791 for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) { 3792 3793 call = *iter; 3794 ret = event_init(call); 3795 if (!ret) 3796 list_add(&call->list, &ftrace_events); 3797 } 3798 3799 register_trigger_cmds(); 3800 3801 /* 3802 * We need the top trace array to have a working set of trace 3803 * points at early init, before the debug files and directories 3804 * are created. Create the file entries now, and attach them 3805 * to the actual file dentries later. 3806 */ 3807 __trace_early_add_events(tr); 3808 3809 early_enable_events(tr, false); 3810 3811 trace_printk_start_comm(); 3812 3813 register_event_cmds(); 3814 3815 3816 return 0; 3817 } 3818 3819 /* 3820 * event_trace_enable() is called from trace_event_init() first to 3821 * initialize events and perhaps start any events that are on the 3822 * command line. Unfortunately, there are some events that will not 3823 * start this early, like the system call tracepoints that need 3824 * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But 3825 * event_trace_enable() is called before pid 1 starts, and this flag 3826 * is never set, making the syscall tracepoint never get reached, but 3827 * the event is enabled regardless (and not doing anything). 3828 */ 3829 static __init int event_trace_enable_again(void) 3830 { 3831 struct trace_array *tr; 3832 3833 tr = top_trace_array(); 3834 if (!tr) 3835 return -ENODEV; 3836 3837 early_enable_events(tr, true); 3838 3839 return 0; 3840 } 3841 3842 early_initcall(event_trace_enable_again); 3843 3844 /* Init fields which doesn't related to the tracefs */ 3845 static __init int event_trace_init_fields(void) 3846 { 3847 if (trace_define_generic_fields()) 3848 pr_warn("tracing: Failed to allocated generic fields"); 3849 3850 if (trace_define_common_fields()) 3851 pr_warn("tracing: Failed to allocate common fields"); 3852 3853 return 0; 3854 } 3855 3856 __init int event_trace_init(void) 3857 { 3858 struct trace_array *tr; 3859 int ret; 3860 3861 tr = top_trace_array(); 3862 if (!tr) 3863 return -ENODEV; 3864 3865 trace_create_file("available_events", TRACE_MODE_READ, 3866 NULL, tr, &ftrace_avail_fops); 3867 3868 ret = early_event_add_tracer(NULL, tr); 3869 if (ret) 3870 return ret; 3871 3872 #ifdef CONFIG_MODULES 3873 ret = register_module_notifier(&trace_module_nb); 3874 if (ret) 3875 pr_warn("Failed to register trace events module notifier\n"); 3876 #endif 3877 3878 eventdir_initialized = true; 3879 3880 return 0; 3881 } 3882 3883 void __init trace_event_init(void) 3884 { 3885 event_trace_memsetup(); 3886 init_ftrace_syscalls(); 3887 event_trace_enable(); 3888 event_trace_init_fields(); 3889 } 3890 3891 #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST 3892 3893 static DEFINE_SPINLOCK(test_spinlock); 3894 static DEFINE_SPINLOCK(test_spinlock_irq); 3895 static DEFINE_MUTEX(test_mutex); 3896 3897 static __init void test_work(struct work_struct *dummy) 3898 { 3899 spin_lock(&test_spinlock); 3900 spin_lock_irq(&test_spinlock_irq); 3901 udelay(1); 3902 spin_unlock_irq(&test_spinlock_irq); 3903 spin_unlock(&test_spinlock); 3904 3905 mutex_lock(&test_mutex); 3906 msleep(1); 3907 mutex_unlock(&test_mutex); 3908 } 3909 3910 static __init int event_test_thread(void *unused) 3911 { 3912 void *test_malloc; 3913 3914 test_malloc = kmalloc(1234, GFP_KERNEL); 3915 if (!test_malloc) 3916 pr_info("failed to kmalloc\n"); 3917 3918 schedule_on_each_cpu(test_work); 3919 3920 kfree(test_malloc); 3921 3922 set_current_state(TASK_INTERRUPTIBLE); 3923 while (!kthread_should_stop()) { 3924 schedule(); 3925 set_current_state(TASK_INTERRUPTIBLE); 3926 } 3927 __set_current_state(TASK_RUNNING); 3928 3929 return 0; 3930 } 3931 3932 /* 3933 * Do various things that may trigger events. 3934 */ 3935 static __init void event_test_stuff(void) 3936 { 3937 struct task_struct *test_thread; 3938 3939 test_thread = kthread_run(event_test_thread, NULL, "test-events"); 3940 msleep(1); 3941 kthread_stop(test_thread); 3942 } 3943 3944 /* 3945 * For every trace event defined, we will test each trace point separately, 3946 * and then by groups, and finally all trace points. 3947 */ 3948 static __init void event_trace_self_tests(void) 3949 { 3950 struct trace_subsystem_dir *dir; 3951 struct trace_event_file *file; 3952 struct trace_event_call *call; 3953 struct event_subsystem *system; 3954 struct trace_array *tr; 3955 int ret; 3956 3957 tr = top_trace_array(); 3958 if (!tr) 3959 return; 3960 3961 pr_info("Running tests on trace events:\n"); 3962 3963 list_for_each_entry(file, &tr->events, list) { 3964 3965 call = file->event_call; 3966 3967 /* Only test those that have a probe */ 3968 if (!call->class || !call->class->probe) 3969 continue; 3970 3971 /* 3972 * Testing syscall events here is pretty useless, but 3973 * we still do it if configured. But this is time consuming. 3974 * What we really need is a user thread to perform the 3975 * syscalls as we test. 3976 */ 3977 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS 3978 if (call->class->system && 3979 strcmp(call->class->system, "syscalls") == 0) 3980 continue; 3981 #endif 3982 3983 pr_info("Testing event %s: ", trace_event_name(call)); 3984 3985 /* 3986 * If an event is already enabled, someone is using 3987 * it and the self test should not be on. 3988 */ 3989 if (file->flags & EVENT_FILE_FL_ENABLED) { 3990 pr_warn("Enabled event during self test!\n"); 3991 WARN_ON_ONCE(1); 3992 continue; 3993 } 3994 3995 ftrace_event_enable_disable(file, 1); 3996 event_test_stuff(); 3997 ftrace_event_enable_disable(file, 0); 3998 3999 pr_cont("OK\n"); 4000 } 4001 4002 /* Now test at the sub system level */ 4003 4004 pr_info("Running tests on trace event systems:\n"); 4005 4006 list_for_each_entry(dir, &tr->systems, list) { 4007 4008 system = dir->subsystem; 4009 4010 /* the ftrace system is special, skip it */ 4011 if (strcmp(system->name, "ftrace") == 0) 4012 continue; 4013 4014 pr_info("Testing event system %s: ", system->name); 4015 4016 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1); 4017 if (WARN_ON_ONCE(ret)) { 4018 pr_warn("error enabling system %s\n", 4019 system->name); 4020 continue; 4021 } 4022 4023 event_test_stuff(); 4024 4025 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0); 4026 if (WARN_ON_ONCE(ret)) { 4027 pr_warn("error disabling system %s\n", 4028 system->name); 4029 continue; 4030 } 4031 4032 pr_cont("OK\n"); 4033 } 4034 4035 /* Test with all events enabled */ 4036 4037 pr_info("Running tests on all trace events:\n"); 4038 pr_info("Testing all events: "); 4039 4040 ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1); 4041 if (WARN_ON_ONCE(ret)) { 4042 pr_warn("error enabling all events\n"); 4043 return; 4044 } 4045 4046 event_test_stuff(); 4047 4048 /* reset sysname */ 4049 ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0); 4050 if (WARN_ON_ONCE(ret)) { 4051 pr_warn("error disabling all events\n"); 4052 return; 4053 } 4054 4055 pr_cont("OK\n"); 4056 } 4057 4058 #ifdef CONFIG_FUNCTION_TRACER 4059 4060 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); 4061 4062 static struct trace_event_file event_trace_file __initdata; 4063 4064 static void __init 4065 function_test_events_call(unsigned long ip, unsigned long parent_ip, 4066 struct ftrace_ops *op, struct ftrace_regs *regs) 4067 { 4068 struct trace_buffer *buffer; 4069 struct ring_buffer_event *event; 4070 struct ftrace_entry *entry; 4071 unsigned int trace_ctx; 4072 long disabled; 4073 int cpu; 4074 4075 trace_ctx = tracing_gen_ctx(); 4076 preempt_disable_notrace(); 4077 cpu = raw_smp_processor_id(); 4078 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); 4079 4080 if (disabled != 1) 4081 goto out; 4082 4083 event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, 4084 TRACE_FN, sizeof(*entry), 4085 trace_ctx); 4086 if (!event) 4087 goto out; 4088 entry = ring_buffer_event_data(event); 4089 entry->ip = ip; 4090 entry->parent_ip = parent_ip; 4091 4092 event_trigger_unlock_commit(&event_trace_file, buffer, event, 4093 entry, trace_ctx); 4094 out: 4095 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); 4096 preempt_enable_notrace(); 4097 } 4098 4099 static struct ftrace_ops trace_ops __initdata = 4100 { 4101 .func = function_test_events_call, 4102 }; 4103 4104 static __init void event_trace_self_test_with_function(void) 4105 { 4106 int ret; 4107 4108 event_trace_file.tr = top_trace_array(); 4109 if (WARN_ON(!event_trace_file.tr)) 4110 return; 4111 4112 ret = register_ftrace_function(&trace_ops); 4113 if (WARN_ON(ret < 0)) { 4114 pr_info("Failed to enable function tracer for event tests\n"); 4115 return; 4116 } 4117 pr_info("Running tests again, along with the function tracer\n"); 4118 event_trace_self_tests(); 4119 unregister_ftrace_function(&trace_ops); 4120 } 4121 #else 4122 static __init void event_trace_self_test_with_function(void) 4123 { 4124 } 4125 #endif 4126 4127 static __init int event_trace_self_tests_init(void) 4128 { 4129 if (!tracing_selftest_disabled) { 4130 event_trace_self_tests(); 4131 event_trace_self_test_with_function(); 4132 } 4133 4134 return 0; 4135 } 4136 4137 late_initcall(event_trace_self_tests_init); 4138 4139 #endif 4140