1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ring buffer based function tracer 4 * 5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> 6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 7 * 8 * Originally taken from the RT patch by: 9 * Arnaldo Carvalho de Melo <acme@redhat.com> 10 * 11 * Based on code from the latency_tracer, that is: 12 * Copyright (C) 2004-2006 Ingo Molnar 13 * Copyright (C) 2004 Nadia Yvette Chambers 14 */ 15 #include <linux/ring_buffer.h> 16 #include <linux/utsname.h> 17 #include <linux/stacktrace.h> 18 #include <linux/writeback.h> 19 #include <linux/kallsyms.h> 20 #include <linux/security.h> 21 #include <linux/seq_file.h> 22 #include <linux/irqflags.h> 23 #include <linux/debugfs.h> 24 #include <linux/tracefs.h> 25 #include <linux/pagemap.h> 26 #include <linux/hardirq.h> 27 #include <linux/linkage.h> 28 #include <linux/uaccess.h> 29 #include <linux/vmalloc.h> 30 #include <linux/ftrace.h> 31 #include <linux/module.h> 32 #include <linux/percpu.h> 33 #include <linux/splice.h> 34 #include <linux/kdebug.h> 35 #include <linux/string.h> 36 #include <linux/mount.h> 37 #include <linux/rwsem.h> 38 #include <linux/slab.h> 39 #include <linux/ctype.h> 40 #include <linux/init.h> 41 #include <linux/panic_notifier.h> 42 #include <linux/poll.h> 43 #include <linux/nmi.h> 44 #include <linux/fs.h> 45 #include <linux/trace.h> 46 #include <linux/sched/clock.h> 47 #include <linux/sched/rt.h> 48 #include <linux/fsnotify.h> 49 #include <linux/irq_work.h> 50 #include <linux/workqueue.h> 51 52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 53 54 #include "trace.h" 55 #include "trace_output.h" 56 57 #ifdef CONFIG_FTRACE_STARTUP_TEST 58 /* 59 * We need to change this state when a selftest is running. 60 * A selftest will lurk into the ring-buffer to count the 61 * entries inserted during the selftest although some concurrent 62 * insertions into the ring-buffer such as trace_printk could occurred 63 * at the same time, giving false positive or negative results. 64 */ 65 static bool __read_mostly tracing_selftest_running; 66 67 /* 68 * If boot-time tracing including tracers/events via kernel cmdline 69 * is running, we do not want to run SELFTEST. 70 */ 71 bool __read_mostly tracing_selftest_disabled; 72 73 void __init disable_tracing_selftest(const char *reason) 74 { 75 if (!tracing_selftest_disabled) { 76 tracing_selftest_disabled = true; 77 pr_info("Ftrace startup test is disabled due to %s\n", reason); 78 } 79 } 80 #else 81 #define tracing_selftest_running 0 82 #define tracing_selftest_disabled 0 83 #endif 84 85 /* Pipe tracepoints to printk */ 86 static struct trace_iterator *tracepoint_print_iter; 87 int tracepoint_printk; 88 static bool tracepoint_printk_stop_on_boot __initdata; 89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); 90 91 /* For tracers that don't implement custom flags */ 92 static struct tracer_opt dummy_tracer_opt[] = { 93 { } 94 }; 95 96 static int 97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) 98 { 99 return 0; 100 } 101 102 /* 103 * To prevent the comm cache from being overwritten when no 104 * tracing is active, only save the comm when a trace event 105 * occurred. 106 */ 107 DEFINE_PER_CPU(bool, trace_taskinfo_save); 108 109 /* 110 * Kill all tracing for good (never come back). 111 * It is initialized to 1 but will turn to zero if the initialization 112 * of the tracer is successful. But that is the only place that sets 113 * this back to zero. 114 */ 115 static int tracing_disabled = 1; 116 117 cpumask_var_t __read_mostly tracing_buffer_mask; 118 119 /* 120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 121 * 122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops 123 * is set, then ftrace_dump is called. This will output the contents 124 * of the ftrace buffers to the console. This is very useful for 125 * capturing traces that lead to crashes and outputing it to a 126 * serial console. 127 * 128 * It is default off, but you can enable it with either specifying 129 * "ftrace_dump_on_oops" in the kernel command line, or setting 130 * /proc/sys/kernel/ftrace_dump_on_oops 131 * Set 1 if you want to dump buffers of all CPUs 132 * Set 2 if you want to dump the buffer of the CPU that triggered oops 133 */ 134 135 enum ftrace_dump_mode ftrace_dump_on_oops; 136 137 /* When set, tracing will stop when a WARN*() is hit */ 138 int __disable_trace_on_warning; 139 140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 141 /* Map of enums to their values, for "eval_map" file */ 142 struct trace_eval_map_head { 143 struct module *mod; 144 unsigned long length; 145 }; 146 147 union trace_eval_map_item; 148 149 struct trace_eval_map_tail { 150 /* 151 * "end" is first and points to NULL as it must be different 152 * than "mod" or "eval_string" 153 */ 154 union trace_eval_map_item *next; 155 const char *end; /* points to NULL */ 156 }; 157 158 static DEFINE_MUTEX(trace_eval_mutex); 159 160 /* 161 * The trace_eval_maps are saved in an array with two extra elements, 162 * one at the beginning, and one at the end. The beginning item contains 163 * the count of the saved maps (head.length), and the module they 164 * belong to if not built in (head.mod). The ending item contains a 165 * pointer to the next array of saved eval_map items. 166 */ 167 union trace_eval_map_item { 168 struct trace_eval_map map; 169 struct trace_eval_map_head head; 170 struct trace_eval_map_tail tail; 171 }; 172 173 static union trace_eval_map_item *trace_eval_maps; 174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 175 176 int tracing_set_tracer(struct trace_array *tr, const char *buf); 177 static void ftrace_trace_userstack(struct trace_array *tr, 178 struct trace_buffer *buffer, 179 unsigned int trace_ctx); 180 181 #define MAX_TRACER_SIZE 100 182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 183 static char *default_bootup_tracer; 184 185 static bool allocate_snapshot; 186 static bool snapshot_at_boot; 187 188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; 189 static int boot_instance_index; 190 191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata; 192 static int boot_snapshot_index; 193 194 static int __init set_cmdline_ftrace(char *str) 195 { 196 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 197 default_bootup_tracer = bootup_tracer_buf; 198 /* We are using ftrace early, expand it */ 199 trace_set_ring_buffer_expanded(NULL); 200 return 1; 201 } 202 __setup("ftrace=", set_cmdline_ftrace); 203 204 static int __init set_ftrace_dump_on_oops(char *str) 205 { 206 if (*str++ != '=' || !*str || !strcmp("1", str)) { 207 ftrace_dump_on_oops = DUMP_ALL; 208 return 1; 209 } 210 211 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) { 212 ftrace_dump_on_oops = DUMP_ORIG; 213 return 1; 214 } 215 216 return 0; 217 } 218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 219 220 static int __init stop_trace_on_warning(char *str) 221 { 222 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 223 __disable_trace_on_warning = 1; 224 return 1; 225 } 226 __setup("traceoff_on_warning", stop_trace_on_warning); 227 228 static int __init boot_alloc_snapshot(char *str) 229 { 230 char *slot = boot_snapshot_info + boot_snapshot_index; 231 int left = sizeof(boot_snapshot_info) - boot_snapshot_index; 232 int ret; 233 234 if (str[0] == '=') { 235 str++; 236 if (strlen(str) >= left) 237 return -1; 238 239 ret = snprintf(slot, left, "%s\t", str); 240 boot_snapshot_index += ret; 241 } else { 242 allocate_snapshot = true; 243 /* We also need the main ring buffer expanded */ 244 trace_set_ring_buffer_expanded(NULL); 245 } 246 return 1; 247 } 248 __setup("alloc_snapshot", boot_alloc_snapshot); 249 250 251 static int __init boot_snapshot(char *str) 252 { 253 snapshot_at_boot = true; 254 boot_alloc_snapshot(str); 255 return 1; 256 } 257 __setup("ftrace_boot_snapshot", boot_snapshot); 258 259 260 static int __init boot_instance(char *str) 261 { 262 char *slot = boot_instance_info + boot_instance_index; 263 int left = sizeof(boot_instance_info) - boot_instance_index; 264 int ret; 265 266 if (strlen(str) >= left) 267 return -1; 268 269 ret = snprintf(slot, left, "%s\t", str); 270 boot_instance_index += ret; 271 272 return 1; 273 } 274 __setup("trace_instance=", boot_instance); 275 276 277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; 278 279 static int __init set_trace_boot_options(char *str) 280 { 281 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); 282 return 1; 283 } 284 __setup("trace_options=", set_trace_boot_options); 285 286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; 287 static char *trace_boot_clock __initdata; 288 289 static int __init set_trace_boot_clock(char *str) 290 { 291 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); 292 trace_boot_clock = trace_boot_clock_buf; 293 return 1; 294 } 295 __setup("trace_clock=", set_trace_boot_clock); 296 297 static int __init set_tracepoint_printk(char *str) 298 { 299 /* Ignore the "tp_printk_stop_on_boot" param */ 300 if (*str == '_') 301 return 0; 302 303 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 304 tracepoint_printk = 1; 305 return 1; 306 } 307 __setup("tp_printk", set_tracepoint_printk); 308 309 static int __init set_tracepoint_printk_stop(char *str) 310 { 311 tracepoint_printk_stop_on_boot = true; 312 return 1; 313 } 314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); 315 316 unsigned long long ns2usecs(u64 nsec) 317 { 318 nsec += 500; 319 do_div(nsec, 1000); 320 return nsec; 321 } 322 323 static void 324 trace_process_export(struct trace_export *export, 325 struct ring_buffer_event *event, int flag) 326 { 327 struct trace_entry *entry; 328 unsigned int size = 0; 329 330 if (export->flags & flag) { 331 entry = ring_buffer_event_data(event); 332 size = ring_buffer_event_length(event); 333 export->write(export, entry, size); 334 } 335 } 336 337 static DEFINE_MUTEX(ftrace_export_lock); 338 339 static struct trace_export __rcu *ftrace_exports_list __read_mostly; 340 341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); 342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); 343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); 344 345 static inline void ftrace_exports_enable(struct trace_export *export) 346 { 347 if (export->flags & TRACE_EXPORT_FUNCTION) 348 static_branch_inc(&trace_function_exports_enabled); 349 350 if (export->flags & TRACE_EXPORT_EVENT) 351 static_branch_inc(&trace_event_exports_enabled); 352 353 if (export->flags & TRACE_EXPORT_MARKER) 354 static_branch_inc(&trace_marker_exports_enabled); 355 } 356 357 static inline void ftrace_exports_disable(struct trace_export *export) 358 { 359 if (export->flags & TRACE_EXPORT_FUNCTION) 360 static_branch_dec(&trace_function_exports_enabled); 361 362 if (export->flags & TRACE_EXPORT_EVENT) 363 static_branch_dec(&trace_event_exports_enabled); 364 365 if (export->flags & TRACE_EXPORT_MARKER) 366 static_branch_dec(&trace_marker_exports_enabled); 367 } 368 369 static void ftrace_exports(struct ring_buffer_event *event, int flag) 370 { 371 struct trace_export *export; 372 373 preempt_disable_notrace(); 374 375 export = rcu_dereference_raw_check(ftrace_exports_list); 376 while (export) { 377 trace_process_export(export, event, flag); 378 export = rcu_dereference_raw_check(export->next); 379 } 380 381 preempt_enable_notrace(); 382 } 383 384 static inline void 385 add_trace_export(struct trace_export **list, struct trace_export *export) 386 { 387 rcu_assign_pointer(export->next, *list); 388 /* 389 * We are entering export into the list but another 390 * CPU might be walking that list. We need to make sure 391 * the export->next pointer is valid before another CPU sees 392 * the export pointer included into the list. 393 */ 394 rcu_assign_pointer(*list, export); 395 } 396 397 static inline int 398 rm_trace_export(struct trace_export **list, struct trace_export *export) 399 { 400 struct trace_export **p; 401 402 for (p = list; *p != NULL; p = &(*p)->next) 403 if (*p == export) 404 break; 405 406 if (*p != export) 407 return -1; 408 409 rcu_assign_pointer(*p, (*p)->next); 410 411 return 0; 412 } 413 414 static inline void 415 add_ftrace_export(struct trace_export **list, struct trace_export *export) 416 { 417 ftrace_exports_enable(export); 418 419 add_trace_export(list, export); 420 } 421 422 static inline int 423 rm_ftrace_export(struct trace_export **list, struct trace_export *export) 424 { 425 int ret; 426 427 ret = rm_trace_export(list, export); 428 ftrace_exports_disable(export); 429 430 return ret; 431 } 432 433 int register_ftrace_export(struct trace_export *export) 434 { 435 if (WARN_ON_ONCE(!export->write)) 436 return -1; 437 438 mutex_lock(&ftrace_export_lock); 439 440 add_ftrace_export(&ftrace_exports_list, export); 441 442 mutex_unlock(&ftrace_export_lock); 443 444 return 0; 445 } 446 EXPORT_SYMBOL_GPL(register_ftrace_export); 447 448 int unregister_ftrace_export(struct trace_export *export) 449 { 450 int ret; 451 452 mutex_lock(&ftrace_export_lock); 453 454 ret = rm_ftrace_export(&ftrace_exports_list, export); 455 456 mutex_unlock(&ftrace_export_lock); 457 458 return ret; 459 } 460 EXPORT_SYMBOL_GPL(unregister_ftrace_export); 461 462 /* trace_flags holds trace_options default values */ 463 #define TRACE_DEFAULT_FLAGS \ 464 (FUNCTION_DEFAULT_FLAGS | \ 465 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ 466 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ 467 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ 468 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \ 469 TRACE_ITER_HASH_PTR) 470 471 /* trace_options that are only supported by global_trace */ 472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ 473 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD) 474 475 /* trace_flags that are default zero for instances */ 476 #define ZEROED_TRACE_FLAGS \ 477 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK) 478 479 /* 480 * The global_trace is the descriptor that holds the top-level tracing 481 * buffers for the live tracing. 482 */ 483 static struct trace_array global_trace = { 484 .trace_flags = TRACE_DEFAULT_FLAGS, 485 }; 486 487 void trace_set_ring_buffer_expanded(struct trace_array *tr) 488 { 489 if (!tr) 490 tr = &global_trace; 491 tr->ring_buffer_expanded = true; 492 } 493 494 LIST_HEAD(ftrace_trace_arrays); 495 496 int trace_array_get(struct trace_array *this_tr) 497 { 498 struct trace_array *tr; 499 int ret = -ENODEV; 500 501 mutex_lock(&trace_types_lock); 502 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 503 if (tr == this_tr) { 504 tr->ref++; 505 ret = 0; 506 break; 507 } 508 } 509 mutex_unlock(&trace_types_lock); 510 511 return ret; 512 } 513 514 static void __trace_array_put(struct trace_array *this_tr) 515 { 516 WARN_ON(!this_tr->ref); 517 this_tr->ref--; 518 } 519 520 /** 521 * trace_array_put - Decrement the reference counter for this trace array. 522 * @this_tr : pointer to the trace array 523 * 524 * NOTE: Use this when we no longer need the trace array returned by 525 * trace_array_get_by_name(). This ensures the trace array can be later 526 * destroyed. 527 * 528 */ 529 void trace_array_put(struct trace_array *this_tr) 530 { 531 if (!this_tr) 532 return; 533 534 mutex_lock(&trace_types_lock); 535 __trace_array_put(this_tr); 536 mutex_unlock(&trace_types_lock); 537 } 538 EXPORT_SYMBOL_GPL(trace_array_put); 539 540 int tracing_check_open_get_tr(struct trace_array *tr) 541 { 542 int ret; 543 544 ret = security_locked_down(LOCKDOWN_TRACEFS); 545 if (ret) 546 return ret; 547 548 if (tracing_disabled) 549 return -ENODEV; 550 551 if (tr && trace_array_get(tr) < 0) 552 return -ENODEV; 553 554 return 0; 555 } 556 557 int call_filter_check_discard(struct trace_event_call *call, void *rec, 558 struct trace_buffer *buffer, 559 struct ring_buffer_event *event) 560 { 561 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && 562 !filter_match_preds(call->filter, rec)) { 563 __trace_event_discard_commit(buffer, event); 564 return 1; 565 } 566 567 return 0; 568 } 569 570 /** 571 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list 572 * @filtered_pids: The list of pids to check 573 * @search_pid: The PID to find in @filtered_pids 574 * 575 * Returns true if @search_pid is found in @filtered_pids, and false otherwise. 576 */ 577 bool 578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) 579 { 580 return trace_pid_list_is_set(filtered_pids, search_pid); 581 } 582 583 /** 584 * trace_ignore_this_task - should a task be ignored for tracing 585 * @filtered_pids: The list of pids to check 586 * @filtered_no_pids: The list of pids not to be traced 587 * @task: The task that should be ignored if not filtered 588 * 589 * Checks if @task should be traced or not from @filtered_pids. 590 * Returns true if @task should *NOT* be traced. 591 * Returns false if @task should be traced. 592 */ 593 bool 594 trace_ignore_this_task(struct trace_pid_list *filtered_pids, 595 struct trace_pid_list *filtered_no_pids, 596 struct task_struct *task) 597 { 598 /* 599 * If filtered_no_pids is not empty, and the task's pid is listed 600 * in filtered_no_pids, then return true. 601 * Otherwise, if filtered_pids is empty, that means we can 602 * trace all tasks. If it has content, then only trace pids 603 * within filtered_pids. 604 */ 605 606 return (filtered_pids && 607 !trace_find_filtered_pid(filtered_pids, task->pid)) || 608 (filtered_no_pids && 609 trace_find_filtered_pid(filtered_no_pids, task->pid)); 610 } 611 612 /** 613 * trace_filter_add_remove_task - Add or remove a task from a pid_list 614 * @pid_list: The list to modify 615 * @self: The current task for fork or NULL for exit 616 * @task: The task to add or remove 617 * 618 * If adding a task, if @self is defined, the task is only added if @self 619 * is also included in @pid_list. This happens on fork and tasks should 620 * only be added when the parent is listed. If @self is NULL, then the 621 * @task pid will be removed from the list, which would happen on exit 622 * of a task. 623 */ 624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list, 625 struct task_struct *self, 626 struct task_struct *task) 627 { 628 if (!pid_list) 629 return; 630 631 /* For forks, we only add if the forking task is listed */ 632 if (self) { 633 if (!trace_find_filtered_pid(pid_list, self->pid)) 634 return; 635 } 636 637 /* "self" is set for forks, and NULL for exits */ 638 if (self) 639 trace_pid_list_set(pid_list, task->pid); 640 else 641 trace_pid_list_clear(pid_list, task->pid); 642 } 643 644 /** 645 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list 646 * @pid_list: The pid list to show 647 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) 648 * @pos: The position of the file 649 * 650 * This is used by the seq_file "next" operation to iterate the pids 651 * listed in a trace_pid_list structure. 652 * 653 * Returns the pid+1 as we want to display pid of zero, but NULL would 654 * stop the iteration. 655 */ 656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) 657 { 658 long pid = (unsigned long)v; 659 unsigned int next; 660 661 (*pos)++; 662 663 /* pid already is +1 of the actual previous bit */ 664 if (trace_pid_list_next(pid_list, pid, &next) < 0) 665 return NULL; 666 667 pid = next; 668 669 /* Return pid + 1 to allow zero to be represented */ 670 return (void *)(pid + 1); 671 } 672 673 /** 674 * trace_pid_start - Used for seq_file to start reading pid lists 675 * @pid_list: The pid list to show 676 * @pos: The position of the file 677 * 678 * This is used by seq_file "start" operation to start the iteration 679 * of listing pids. 680 * 681 * Returns the pid+1 as we want to display pid of zero, but NULL would 682 * stop the iteration. 683 */ 684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) 685 { 686 unsigned long pid; 687 unsigned int first; 688 loff_t l = 0; 689 690 if (trace_pid_list_first(pid_list, &first) < 0) 691 return NULL; 692 693 pid = first; 694 695 /* Return pid + 1 so that zero can be the exit value */ 696 for (pid++; pid && l < *pos; 697 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) 698 ; 699 return (void *)pid; 700 } 701 702 /** 703 * trace_pid_show - show the current pid in seq_file processing 704 * @m: The seq_file structure to write into 705 * @v: A void pointer of the pid (+1) value to display 706 * 707 * Can be directly used by seq_file operations to display the current 708 * pid value. 709 */ 710 int trace_pid_show(struct seq_file *m, void *v) 711 { 712 unsigned long pid = (unsigned long)v - 1; 713 714 seq_printf(m, "%lu\n", pid); 715 return 0; 716 } 717 718 /* 128 should be much more than enough */ 719 #define PID_BUF_SIZE 127 720 721 int trace_pid_write(struct trace_pid_list *filtered_pids, 722 struct trace_pid_list **new_pid_list, 723 const char __user *ubuf, size_t cnt) 724 { 725 struct trace_pid_list *pid_list; 726 struct trace_parser parser; 727 unsigned long val; 728 int nr_pids = 0; 729 ssize_t read = 0; 730 ssize_t ret; 731 loff_t pos; 732 pid_t pid; 733 734 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) 735 return -ENOMEM; 736 737 /* 738 * Always recreate a new array. The write is an all or nothing 739 * operation. Always create a new array when adding new pids by 740 * the user. If the operation fails, then the current list is 741 * not modified. 742 */ 743 pid_list = trace_pid_list_alloc(); 744 if (!pid_list) { 745 trace_parser_put(&parser); 746 return -ENOMEM; 747 } 748 749 if (filtered_pids) { 750 /* copy the current bits to the new max */ 751 ret = trace_pid_list_first(filtered_pids, &pid); 752 while (!ret) { 753 trace_pid_list_set(pid_list, pid); 754 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); 755 nr_pids++; 756 } 757 } 758 759 ret = 0; 760 while (cnt > 0) { 761 762 pos = 0; 763 764 ret = trace_get_user(&parser, ubuf, cnt, &pos); 765 if (ret < 0) 766 break; 767 768 read += ret; 769 ubuf += ret; 770 cnt -= ret; 771 772 if (!trace_parser_loaded(&parser)) 773 break; 774 775 ret = -EINVAL; 776 if (kstrtoul(parser.buffer, 0, &val)) 777 break; 778 779 pid = (pid_t)val; 780 781 if (trace_pid_list_set(pid_list, pid) < 0) { 782 ret = -1; 783 break; 784 } 785 nr_pids++; 786 787 trace_parser_clear(&parser); 788 ret = 0; 789 } 790 trace_parser_put(&parser); 791 792 if (ret < 0) { 793 trace_pid_list_free(pid_list); 794 return ret; 795 } 796 797 if (!nr_pids) { 798 /* Cleared the list of pids */ 799 trace_pid_list_free(pid_list); 800 pid_list = NULL; 801 } 802 803 *new_pid_list = pid_list; 804 805 return read; 806 } 807 808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) 809 { 810 u64 ts; 811 812 /* Early boot up does not have a buffer yet */ 813 if (!buf->buffer) 814 return trace_clock_local(); 815 816 ts = ring_buffer_time_stamp(buf->buffer); 817 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); 818 819 return ts; 820 } 821 822 u64 ftrace_now(int cpu) 823 { 824 return buffer_ftrace_now(&global_trace.array_buffer, cpu); 825 } 826 827 /** 828 * tracing_is_enabled - Show if global_trace has been enabled 829 * 830 * Shows if the global trace has been enabled or not. It uses the 831 * mirror flag "buffer_disabled" to be used in fast paths such as for 832 * the irqsoff tracer. But it may be inaccurate due to races. If you 833 * need to know the accurate state, use tracing_is_on() which is a little 834 * slower, but accurate. 835 */ 836 int tracing_is_enabled(void) 837 { 838 /* 839 * For quick access (irqsoff uses this in fast path), just 840 * return the mirror variable of the state of the ring buffer. 841 * It's a little racy, but we don't really care. 842 */ 843 smp_rmb(); 844 return !global_trace.buffer_disabled; 845 } 846 847 /* 848 * trace_buf_size is the size in bytes that is allocated 849 * for a buffer. Note, the number of bytes is always rounded 850 * to page size. 851 * 852 * This number is purposely set to a low number of 16384. 853 * If the dump on oops happens, it will be much appreciated 854 * to not have to wait for all that output. Anyway this can be 855 * boot time and run time configurable. 856 */ 857 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ 858 859 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; 860 861 /* trace_types holds a link list of available tracers. */ 862 static struct tracer *trace_types __read_mostly; 863 864 /* 865 * trace_types_lock is used to protect the trace_types list. 866 */ 867 DEFINE_MUTEX(trace_types_lock); 868 869 /* 870 * serialize the access of the ring buffer 871 * 872 * ring buffer serializes readers, but it is low level protection. 873 * The validity of the events (which returns by ring_buffer_peek() ..etc) 874 * are not protected by ring buffer. 875 * 876 * The content of events may become garbage if we allow other process consumes 877 * these events concurrently: 878 * A) the page of the consumed events may become a normal page 879 * (not reader page) in ring buffer, and this page will be rewritten 880 * by events producer. 881 * B) The page of the consumed events may become a page for splice_read, 882 * and this page will be returned to system. 883 * 884 * These primitives allow multi process access to different cpu ring buffer 885 * concurrently. 886 * 887 * These primitives don't distinguish read-only and read-consume access. 888 * Multi read-only access are also serialized. 889 */ 890 891 #ifdef CONFIG_SMP 892 static DECLARE_RWSEM(all_cpu_access_lock); 893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock); 894 895 static inline void trace_access_lock(int cpu) 896 { 897 if (cpu == RING_BUFFER_ALL_CPUS) { 898 /* gain it for accessing the whole ring buffer. */ 899 down_write(&all_cpu_access_lock); 900 } else { 901 /* gain it for accessing a cpu ring buffer. */ 902 903 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ 904 down_read(&all_cpu_access_lock); 905 906 /* Secondly block other access to this @cpu ring buffer. */ 907 mutex_lock(&per_cpu(cpu_access_lock, cpu)); 908 } 909 } 910 911 static inline void trace_access_unlock(int cpu) 912 { 913 if (cpu == RING_BUFFER_ALL_CPUS) { 914 up_write(&all_cpu_access_lock); 915 } else { 916 mutex_unlock(&per_cpu(cpu_access_lock, cpu)); 917 up_read(&all_cpu_access_lock); 918 } 919 } 920 921 static inline void trace_access_lock_init(void) 922 { 923 int cpu; 924 925 for_each_possible_cpu(cpu) 926 mutex_init(&per_cpu(cpu_access_lock, cpu)); 927 } 928 929 #else 930 931 static DEFINE_MUTEX(access_lock); 932 933 static inline void trace_access_lock(int cpu) 934 { 935 (void)cpu; 936 mutex_lock(&access_lock); 937 } 938 939 static inline void trace_access_unlock(int cpu) 940 { 941 (void)cpu; 942 mutex_unlock(&access_lock); 943 } 944 945 static inline void trace_access_lock_init(void) 946 { 947 } 948 949 #endif 950 951 #ifdef CONFIG_STACKTRACE 952 static void __ftrace_trace_stack(struct trace_buffer *buffer, 953 unsigned int trace_ctx, 954 int skip, struct pt_regs *regs); 955 static inline void ftrace_trace_stack(struct trace_array *tr, 956 struct trace_buffer *buffer, 957 unsigned int trace_ctx, 958 int skip, struct pt_regs *regs); 959 960 #else 961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer, 962 unsigned int trace_ctx, 963 int skip, struct pt_regs *regs) 964 { 965 } 966 static inline void ftrace_trace_stack(struct trace_array *tr, 967 struct trace_buffer *buffer, 968 unsigned long trace_ctx, 969 int skip, struct pt_regs *regs) 970 { 971 } 972 973 #endif 974 975 static __always_inline void 976 trace_event_setup(struct ring_buffer_event *event, 977 int type, unsigned int trace_ctx) 978 { 979 struct trace_entry *ent = ring_buffer_event_data(event); 980 981 tracing_generic_entry_update(ent, type, trace_ctx); 982 } 983 984 static __always_inline struct ring_buffer_event * 985 __trace_buffer_lock_reserve(struct trace_buffer *buffer, 986 int type, 987 unsigned long len, 988 unsigned int trace_ctx) 989 { 990 struct ring_buffer_event *event; 991 992 event = ring_buffer_lock_reserve(buffer, len); 993 if (event != NULL) 994 trace_event_setup(event, type, trace_ctx); 995 996 return event; 997 } 998 999 void tracer_tracing_on(struct trace_array *tr) 1000 { 1001 if (tr->array_buffer.buffer) 1002 ring_buffer_record_on(tr->array_buffer.buffer); 1003 /* 1004 * This flag is looked at when buffers haven't been allocated 1005 * yet, or by some tracers (like irqsoff), that just want to 1006 * know if the ring buffer has been disabled, but it can handle 1007 * races of where it gets disabled but we still do a record. 1008 * As the check is in the fast path of the tracers, it is more 1009 * important to be fast than accurate. 1010 */ 1011 tr->buffer_disabled = 0; 1012 /* Make the flag seen by readers */ 1013 smp_wmb(); 1014 } 1015 1016 /** 1017 * tracing_on - enable tracing buffers 1018 * 1019 * This function enables tracing buffers that may have been 1020 * disabled with tracing_off. 1021 */ 1022 void tracing_on(void) 1023 { 1024 tracer_tracing_on(&global_trace); 1025 } 1026 EXPORT_SYMBOL_GPL(tracing_on); 1027 1028 1029 static __always_inline void 1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) 1031 { 1032 __this_cpu_write(trace_taskinfo_save, true); 1033 1034 /* If this is the temp buffer, we need to commit fully */ 1035 if (this_cpu_read(trace_buffered_event) == event) { 1036 /* Length is in event->array[0] */ 1037 ring_buffer_write(buffer, event->array[0], &event->array[1]); 1038 /* Release the temp buffer */ 1039 this_cpu_dec(trace_buffered_event_cnt); 1040 /* ring_buffer_unlock_commit() enables preemption */ 1041 preempt_enable_notrace(); 1042 } else 1043 ring_buffer_unlock_commit(buffer); 1044 } 1045 1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip, 1047 const char *str, int size) 1048 { 1049 struct ring_buffer_event *event; 1050 struct trace_buffer *buffer; 1051 struct print_entry *entry; 1052 unsigned int trace_ctx; 1053 int alloc; 1054 1055 if (!(tr->trace_flags & TRACE_ITER_PRINTK)) 1056 return 0; 1057 1058 if (unlikely(tracing_selftest_running && tr == &global_trace)) 1059 return 0; 1060 1061 if (unlikely(tracing_disabled)) 1062 return 0; 1063 1064 alloc = sizeof(*entry) + size + 2; /* possible \n added */ 1065 1066 trace_ctx = tracing_gen_ctx(); 1067 buffer = tr->array_buffer.buffer; 1068 ring_buffer_nest_start(buffer); 1069 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 1070 trace_ctx); 1071 if (!event) { 1072 size = 0; 1073 goto out; 1074 } 1075 1076 entry = ring_buffer_event_data(event); 1077 entry->ip = ip; 1078 1079 memcpy(&entry->buf, str, size); 1080 1081 /* Add a newline if necessary */ 1082 if (entry->buf[size - 1] != '\n') { 1083 entry->buf[size] = '\n'; 1084 entry->buf[size + 1] = '\0'; 1085 } else 1086 entry->buf[size] = '\0'; 1087 1088 __buffer_unlock_commit(buffer, event); 1089 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); 1090 out: 1091 ring_buffer_nest_end(buffer); 1092 return size; 1093 } 1094 EXPORT_SYMBOL_GPL(__trace_array_puts); 1095 1096 /** 1097 * __trace_puts - write a constant string into the trace buffer. 1098 * @ip: The address of the caller 1099 * @str: The constant string to write 1100 * @size: The size of the string. 1101 */ 1102 int __trace_puts(unsigned long ip, const char *str, int size) 1103 { 1104 return __trace_array_puts(&global_trace, ip, str, size); 1105 } 1106 EXPORT_SYMBOL_GPL(__trace_puts); 1107 1108 /** 1109 * __trace_bputs - write the pointer to a constant string into trace buffer 1110 * @ip: The address of the caller 1111 * @str: The constant string to write to the buffer to 1112 */ 1113 int __trace_bputs(unsigned long ip, const char *str) 1114 { 1115 struct ring_buffer_event *event; 1116 struct trace_buffer *buffer; 1117 struct bputs_entry *entry; 1118 unsigned int trace_ctx; 1119 int size = sizeof(struct bputs_entry); 1120 int ret = 0; 1121 1122 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) 1123 return 0; 1124 1125 if (unlikely(tracing_selftest_running || tracing_disabled)) 1126 return 0; 1127 1128 trace_ctx = tracing_gen_ctx(); 1129 buffer = global_trace.array_buffer.buffer; 1130 1131 ring_buffer_nest_start(buffer); 1132 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, 1133 trace_ctx); 1134 if (!event) 1135 goto out; 1136 1137 entry = ring_buffer_event_data(event); 1138 entry->ip = ip; 1139 entry->str = str; 1140 1141 __buffer_unlock_commit(buffer, event); 1142 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); 1143 1144 ret = 1; 1145 out: 1146 ring_buffer_nest_end(buffer); 1147 return ret; 1148 } 1149 EXPORT_SYMBOL_GPL(__trace_bputs); 1150 1151 #ifdef CONFIG_TRACER_SNAPSHOT 1152 static void tracing_snapshot_instance_cond(struct trace_array *tr, 1153 void *cond_data) 1154 { 1155 struct tracer *tracer = tr->current_trace; 1156 unsigned long flags; 1157 1158 if (in_nmi()) { 1159 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); 1160 trace_array_puts(tr, "*** snapshot is being ignored ***\n"); 1161 return; 1162 } 1163 1164 if (!tr->allocated_snapshot) { 1165 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); 1166 trace_array_puts(tr, "*** stopping trace here! ***\n"); 1167 tracer_tracing_off(tr); 1168 return; 1169 } 1170 1171 /* Note, snapshot can not be used when the tracer uses it */ 1172 if (tracer->use_max_tr) { 1173 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); 1174 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 1175 return; 1176 } 1177 1178 local_irq_save(flags); 1179 update_max_tr(tr, current, smp_processor_id(), cond_data); 1180 local_irq_restore(flags); 1181 } 1182 1183 void tracing_snapshot_instance(struct trace_array *tr) 1184 { 1185 tracing_snapshot_instance_cond(tr, NULL); 1186 } 1187 1188 /** 1189 * tracing_snapshot - take a snapshot of the current buffer. 1190 * 1191 * This causes a swap between the snapshot buffer and the current live 1192 * tracing buffer. You can use this to take snapshots of the live 1193 * trace when some condition is triggered, but continue to trace. 1194 * 1195 * Note, make sure to allocate the snapshot with either 1196 * a tracing_snapshot_alloc(), or by doing it manually 1197 * with: echo 1 > /sys/kernel/tracing/snapshot 1198 * 1199 * If the snapshot buffer is not allocated, it will stop tracing. 1200 * Basically making a permanent snapshot. 1201 */ 1202 void tracing_snapshot(void) 1203 { 1204 struct trace_array *tr = &global_trace; 1205 1206 tracing_snapshot_instance(tr); 1207 } 1208 EXPORT_SYMBOL_GPL(tracing_snapshot); 1209 1210 /** 1211 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. 1212 * @tr: The tracing instance to snapshot 1213 * @cond_data: The data to be tested conditionally, and possibly saved 1214 * 1215 * This is the same as tracing_snapshot() except that the snapshot is 1216 * conditional - the snapshot will only happen if the 1217 * cond_snapshot.update() implementation receiving the cond_data 1218 * returns true, which means that the trace array's cond_snapshot 1219 * update() operation used the cond_data to determine whether the 1220 * snapshot should be taken, and if it was, presumably saved it along 1221 * with the snapshot. 1222 */ 1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1224 { 1225 tracing_snapshot_instance_cond(tr, cond_data); 1226 } 1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1228 1229 /** 1230 * tracing_cond_snapshot_data - get the user data associated with a snapshot 1231 * @tr: The tracing instance 1232 * 1233 * When the user enables a conditional snapshot using 1234 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved 1235 * with the snapshot. This accessor is used to retrieve it. 1236 * 1237 * Should not be called from cond_snapshot.update(), since it takes 1238 * the tr->max_lock lock, which the code calling 1239 * cond_snapshot.update() has already done. 1240 * 1241 * Returns the cond_data associated with the trace array's snapshot. 1242 */ 1243 void *tracing_cond_snapshot_data(struct trace_array *tr) 1244 { 1245 void *cond_data = NULL; 1246 1247 local_irq_disable(); 1248 arch_spin_lock(&tr->max_lock); 1249 1250 if (tr->cond_snapshot) 1251 cond_data = tr->cond_snapshot->cond_data; 1252 1253 arch_spin_unlock(&tr->max_lock); 1254 local_irq_enable(); 1255 1256 return cond_data; 1257 } 1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1259 1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 1261 struct array_buffer *size_buf, int cpu_id); 1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val); 1263 1264 int tracing_alloc_snapshot_instance(struct trace_array *tr) 1265 { 1266 int order; 1267 int ret; 1268 1269 if (!tr->allocated_snapshot) { 1270 1271 /* Make the snapshot buffer have the same order as main buffer */ 1272 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 1273 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 1274 if (ret < 0) 1275 return ret; 1276 1277 /* allocate spare buffer */ 1278 ret = resize_buffer_duplicate_size(&tr->max_buffer, 1279 &tr->array_buffer, RING_BUFFER_ALL_CPUS); 1280 if (ret < 0) 1281 return ret; 1282 1283 tr->allocated_snapshot = true; 1284 } 1285 1286 return 0; 1287 } 1288 1289 static void free_snapshot(struct trace_array *tr) 1290 { 1291 /* 1292 * We don't free the ring buffer. instead, resize it because 1293 * The max_tr ring buffer has some state (e.g. ring->clock) and 1294 * we want preserve it. 1295 */ 1296 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); 1297 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); 1298 set_buffer_entries(&tr->max_buffer, 1); 1299 tracing_reset_online_cpus(&tr->max_buffer); 1300 tr->allocated_snapshot = false; 1301 } 1302 1303 static int tracing_arm_snapshot_locked(struct trace_array *tr) 1304 { 1305 int ret; 1306 1307 lockdep_assert_held(&trace_types_lock); 1308 1309 spin_lock(&tr->snapshot_trigger_lock); 1310 if (tr->snapshot == UINT_MAX) { 1311 spin_unlock(&tr->snapshot_trigger_lock); 1312 return -EBUSY; 1313 } 1314 1315 tr->snapshot++; 1316 spin_unlock(&tr->snapshot_trigger_lock); 1317 1318 ret = tracing_alloc_snapshot_instance(tr); 1319 if (ret) { 1320 spin_lock(&tr->snapshot_trigger_lock); 1321 tr->snapshot--; 1322 spin_unlock(&tr->snapshot_trigger_lock); 1323 } 1324 1325 return ret; 1326 } 1327 1328 int tracing_arm_snapshot(struct trace_array *tr) 1329 { 1330 int ret; 1331 1332 mutex_lock(&trace_types_lock); 1333 ret = tracing_arm_snapshot_locked(tr); 1334 mutex_unlock(&trace_types_lock); 1335 1336 return ret; 1337 } 1338 1339 void tracing_disarm_snapshot(struct trace_array *tr) 1340 { 1341 spin_lock(&tr->snapshot_trigger_lock); 1342 if (!WARN_ON(!tr->snapshot)) 1343 tr->snapshot--; 1344 spin_unlock(&tr->snapshot_trigger_lock); 1345 } 1346 1347 /** 1348 * tracing_alloc_snapshot - allocate snapshot buffer. 1349 * 1350 * This only allocates the snapshot buffer if it isn't already 1351 * allocated - it doesn't also take a snapshot. 1352 * 1353 * This is meant to be used in cases where the snapshot buffer needs 1354 * to be set up for events that can't sleep but need to be able to 1355 * trigger a snapshot. 1356 */ 1357 int tracing_alloc_snapshot(void) 1358 { 1359 struct trace_array *tr = &global_trace; 1360 int ret; 1361 1362 ret = tracing_alloc_snapshot_instance(tr); 1363 WARN_ON(ret < 0); 1364 1365 return ret; 1366 } 1367 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1368 1369 /** 1370 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. 1371 * 1372 * This is similar to tracing_snapshot(), but it will allocate the 1373 * snapshot buffer if it isn't already allocated. Use this only 1374 * where it is safe to sleep, as the allocation may sleep. 1375 * 1376 * This causes a swap between the snapshot buffer and the current live 1377 * tracing buffer. You can use this to take snapshots of the live 1378 * trace when some condition is triggered, but continue to trace. 1379 */ 1380 void tracing_snapshot_alloc(void) 1381 { 1382 int ret; 1383 1384 ret = tracing_alloc_snapshot(); 1385 if (ret < 0) 1386 return; 1387 1388 tracing_snapshot(); 1389 } 1390 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1391 1392 /** 1393 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance 1394 * @tr: The tracing instance 1395 * @cond_data: User data to associate with the snapshot 1396 * @update: Implementation of the cond_snapshot update function 1397 * 1398 * Check whether the conditional snapshot for the given instance has 1399 * already been enabled, or if the current tracer is already using a 1400 * snapshot; if so, return -EBUSY, else create a cond_snapshot and 1401 * save the cond_data and update function inside. 1402 * 1403 * Returns 0 if successful, error otherwise. 1404 */ 1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, 1406 cond_update_fn_t update) 1407 { 1408 struct cond_snapshot *cond_snapshot; 1409 int ret = 0; 1410 1411 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); 1412 if (!cond_snapshot) 1413 return -ENOMEM; 1414 1415 cond_snapshot->cond_data = cond_data; 1416 cond_snapshot->update = update; 1417 1418 mutex_lock(&trace_types_lock); 1419 1420 if (tr->current_trace->use_max_tr) { 1421 ret = -EBUSY; 1422 goto fail_unlock; 1423 } 1424 1425 /* 1426 * The cond_snapshot can only change to NULL without the 1427 * trace_types_lock. We don't care if we race with it going 1428 * to NULL, but we want to make sure that it's not set to 1429 * something other than NULL when we get here, which we can 1430 * do safely with only holding the trace_types_lock and not 1431 * having to take the max_lock. 1432 */ 1433 if (tr->cond_snapshot) { 1434 ret = -EBUSY; 1435 goto fail_unlock; 1436 } 1437 1438 ret = tracing_arm_snapshot_locked(tr); 1439 if (ret) 1440 goto fail_unlock; 1441 1442 local_irq_disable(); 1443 arch_spin_lock(&tr->max_lock); 1444 tr->cond_snapshot = cond_snapshot; 1445 arch_spin_unlock(&tr->max_lock); 1446 local_irq_enable(); 1447 1448 mutex_unlock(&trace_types_lock); 1449 1450 return ret; 1451 1452 fail_unlock: 1453 mutex_unlock(&trace_types_lock); 1454 kfree(cond_snapshot); 1455 return ret; 1456 } 1457 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1458 1459 /** 1460 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance 1461 * @tr: The tracing instance 1462 * 1463 * Check whether the conditional snapshot for the given instance is 1464 * enabled; if so, free the cond_snapshot associated with it, 1465 * otherwise return -EINVAL. 1466 * 1467 * Returns 0 if successful, error otherwise. 1468 */ 1469 int tracing_snapshot_cond_disable(struct trace_array *tr) 1470 { 1471 int ret = 0; 1472 1473 local_irq_disable(); 1474 arch_spin_lock(&tr->max_lock); 1475 1476 if (!tr->cond_snapshot) 1477 ret = -EINVAL; 1478 else { 1479 kfree(tr->cond_snapshot); 1480 tr->cond_snapshot = NULL; 1481 } 1482 1483 arch_spin_unlock(&tr->max_lock); 1484 local_irq_enable(); 1485 1486 tracing_disarm_snapshot(tr); 1487 1488 return ret; 1489 } 1490 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1491 #else 1492 void tracing_snapshot(void) 1493 { 1494 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); 1495 } 1496 EXPORT_SYMBOL_GPL(tracing_snapshot); 1497 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1498 { 1499 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used"); 1500 } 1501 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1502 int tracing_alloc_snapshot(void) 1503 { 1504 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); 1505 return -ENODEV; 1506 } 1507 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1508 void tracing_snapshot_alloc(void) 1509 { 1510 /* Give warning */ 1511 tracing_snapshot(); 1512 } 1513 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1514 void *tracing_cond_snapshot_data(struct trace_array *tr) 1515 { 1516 return NULL; 1517 } 1518 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1519 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) 1520 { 1521 return -ENODEV; 1522 } 1523 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1524 int tracing_snapshot_cond_disable(struct trace_array *tr) 1525 { 1526 return false; 1527 } 1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1529 #define free_snapshot(tr) do { } while (0) 1530 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) 1531 #endif /* CONFIG_TRACER_SNAPSHOT */ 1532 1533 void tracer_tracing_off(struct trace_array *tr) 1534 { 1535 if (tr->array_buffer.buffer) 1536 ring_buffer_record_off(tr->array_buffer.buffer); 1537 /* 1538 * This flag is looked at when buffers haven't been allocated 1539 * yet, or by some tracers (like irqsoff), that just want to 1540 * know if the ring buffer has been disabled, but it can handle 1541 * races of where it gets disabled but we still do a record. 1542 * As the check is in the fast path of the tracers, it is more 1543 * important to be fast than accurate. 1544 */ 1545 tr->buffer_disabled = 1; 1546 /* Make the flag seen by readers */ 1547 smp_wmb(); 1548 } 1549 1550 /** 1551 * tracing_off - turn off tracing buffers 1552 * 1553 * This function stops the tracing buffers from recording data. 1554 * It does not disable any overhead the tracers themselves may 1555 * be causing. This function simply causes all recording to 1556 * the ring buffers to fail. 1557 */ 1558 void tracing_off(void) 1559 { 1560 tracer_tracing_off(&global_trace); 1561 } 1562 EXPORT_SYMBOL_GPL(tracing_off); 1563 1564 void disable_trace_on_warning(void) 1565 { 1566 if (__disable_trace_on_warning) { 1567 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, 1568 "Disabling tracing due to warning\n"); 1569 tracing_off(); 1570 } 1571 } 1572 1573 /** 1574 * tracer_tracing_is_on - show real state of ring buffer enabled 1575 * @tr : the trace array to know if ring buffer is enabled 1576 * 1577 * Shows real state of the ring buffer if it is enabled or not. 1578 */ 1579 bool tracer_tracing_is_on(struct trace_array *tr) 1580 { 1581 if (tr->array_buffer.buffer) 1582 return ring_buffer_record_is_set_on(tr->array_buffer.buffer); 1583 return !tr->buffer_disabled; 1584 } 1585 1586 /** 1587 * tracing_is_on - show state of ring buffers enabled 1588 */ 1589 int tracing_is_on(void) 1590 { 1591 return tracer_tracing_is_on(&global_trace); 1592 } 1593 EXPORT_SYMBOL_GPL(tracing_is_on); 1594 1595 static int __init set_buf_size(char *str) 1596 { 1597 unsigned long buf_size; 1598 1599 if (!str) 1600 return 0; 1601 buf_size = memparse(str, &str); 1602 /* 1603 * nr_entries can not be zero and the startup 1604 * tests require some buffer space. Therefore 1605 * ensure we have at least 4096 bytes of buffer. 1606 */ 1607 trace_buf_size = max(4096UL, buf_size); 1608 return 1; 1609 } 1610 __setup("trace_buf_size=", set_buf_size); 1611 1612 static int __init set_tracing_thresh(char *str) 1613 { 1614 unsigned long threshold; 1615 int ret; 1616 1617 if (!str) 1618 return 0; 1619 ret = kstrtoul(str, 0, &threshold); 1620 if (ret < 0) 1621 return 0; 1622 tracing_thresh = threshold * 1000; 1623 return 1; 1624 } 1625 __setup("tracing_thresh=", set_tracing_thresh); 1626 1627 unsigned long nsecs_to_usecs(unsigned long nsecs) 1628 { 1629 return nsecs / 1000; 1630 } 1631 1632 /* 1633 * TRACE_FLAGS is defined as a tuple matching bit masks with strings. 1634 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that 1635 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list 1636 * of strings in the order that the evals (enum) were defined. 1637 */ 1638 #undef C 1639 #define C(a, b) b 1640 1641 /* These must match the bit positions in trace_iterator_flags */ 1642 static const char *trace_options[] = { 1643 TRACE_FLAGS 1644 NULL 1645 }; 1646 1647 static struct { 1648 u64 (*func)(void); 1649 const char *name; 1650 int in_ns; /* is this clock in nanoseconds? */ 1651 } trace_clocks[] = { 1652 { trace_clock_local, "local", 1 }, 1653 { trace_clock_global, "global", 1 }, 1654 { trace_clock_counter, "counter", 0 }, 1655 { trace_clock_jiffies, "uptime", 0 }, 1656 { trace_clock, "perf", 1 }, 1657 { ktime_get_mono_fast_ns, "mono", 1 }, 1658 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1659 { ktime_get_boot_fast_ns, "boot", 1 }, 1660 { ktime_get_tai_fast_ns, "tai", 1 }, 1661 ARCH_TRACE_CLOCKS 1662 }; 1663 1664 bool trace_clock_in_ns(struct trace_array *tr) 1665 { 1666 if (trace_clocks[tr->clock_id].in_ns) 1667 return true; 1668 1669 return false; 1670 } 1671 1672 /* 1673 * trace_parser_get_init - gets the buffer for trace parser 1674 */ 1675 int trace_parser_get_init(struct trace_parser *parser, int size) 1676 { 1677 memset(parser, 0, sizeof(*parser)); 1678 1679 parser->buffer = kmalloc(size, GFP_KERNEL); 1680 if (!parser->buffer) 1681 return 1; 1682 1683 parser->size = size; 1684 return 0; 1685 } 1686 1687 /* 1688 * trace_parser_put - frees the buffer for trace parser 1689 */ 1690 void trace_parser_put(struct trace_parser *parser) 1691 { 1692 kfree(parser->buffer); 1693 parser->buffer = NULL; 1694 } 1695 1696 /* 1697 * trace_get_user - reads the user input string separated by space 1698 * (matched by isspace(ch)) 1699 * 1700 * For each string found the 'struct trace_parser' is updated, 1701 * and the function returns. 1702 * 1703 * Returns number of bytes read. 1704 * 1705 * See kernel/trace/trace.h for 'struct trace_parser' details. 1706 */ 1707 int trace_get_user(struct trace_parser *parser, const char __user *ubuf, 1708 size_t cnt, loff_t *ppos) 1709 { 1710 char ch; 1711 size_t read = 0; 1712 ssize_t ret; 1713 1714 if (!*ppos) 1715 trace_parser_clear(parser); 1716 1717 ret = get_user(ch, ubuf++); 1718 if (ret) 1719 goto out; 1720 1721 read++; 1722 cnt--; 1723 1724 /* 1725 * The parser is not finished with the last write, 1726 * continue reading the user input without skipping spaces. 1727 */ 1728 if (!parser->cont) { 1729 /* skip white space */ 1730 while (cnt && isspace(ch)) { 1731 ret = get_user(ch, ubuf++); 1732 if (ret) 1733 goto out; 1734 read++; 1735 cnt--; 1736 } 1737 1738 parser->idx = 0; 1739 1740 /* only spaces were written */ 1741 if (isspace(ch) || !ch) { 1742 *ppos += read; 1743 ret = read; 1744 goto out; 1745 } 1746 } 1747 1748 /* read the non-space input */ 1749 while (cnt && !isspace(ch) && ch) { 1750 if (parser->idx < parser->size - 1) 1751 parser->buffer[parser->idx++] = ch; 1752 else { 1753 ret = -EINVAL; 1754 goto out; 1755 } 1756 ret = get_user(ch, ubuf++); 1757 if (ret) 1758 goto out; 1759 read++; 1760 cnt--; 1761 } 1762 1763 /* We either got finished input or we have to wait for another call. */ 1764 if (isspace(ch) || !ch) { 1765 parser->buffer[parser->idx] = 0; 1766 parser->cont = false; 1767 } else if (parser->idx < parser->size - 1) { 1768 parser->cont = true; 1769 parser->buffer[parser->idx++] = ch; 1770 /* Make sure the parsed string always terminates with '\0'. */ 1771 parser->buffer[parser->idx] = 0; 1772 } else { 1773 ret = -EINVAL; 1774 goto out; 1775 } 1776 1777 *ppos += read; 1778 ret = read; 1779 1780 out: 1781 return ret; 1782 } 1783 1784 /* TODO add a seq_buf_to_buffer() */ 1785 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 1786 { 1787 int len; 1788 1789 if (trace_seq_used(s) <= s->readpos) 1790 return -EBUSY; 1791 1792 len = trace_seq_used(s) - s->readpos; 1793 if (cnt > len) 1794 cnt = len; 1795 memcpy(buf, s->buffer + s->readpos, cnt); 1796 1797 s->readpos += cnt; 1798 return cnt; 1799 } 1800 1801 unsigned long __read_mostly tracing_thresh; 1802 1803 #ifdef CONFIG_TRACER_MAX_TRACE 1804 static const struct file_operations tracing_max_lat_fops; 1805 1806 #ifdef LATENCY_FS_NOTIFY 1807 1808 static struct workqueue_struct *fsnotify_wq; 1809 1810 static void latency_fsnotify_workfn(struct work_struct *work) 1811 { 1812 struct trace_array *tr = container_of(work, struct trace_array, 1813 fsnotify_work); 1814 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY); 1815 } 1816 1817 static void latency_fsnotify_workfn_irq(struct irq_work *iwork) 1818 { 1819 struct trace_array *tr = container_of(iwork, struct trace_array, 1820 fsnotify_irqwork); 1821 queue_work(fsnotify_wq, &tr->fsnotify_work); 1822 } 1823 1824 static void trace_create_maxlat_file(struct trace_array *tr, 1825 struct dentry *d_tracer) 1826 { 1827 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); 1828 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); 1829 tr->d_max_latency = trace_create_file("tracing_max_latency", 1830 TRACE_MODE_WRITE, 1831 d_tracer, tr, 1832 &tracing_max_lat_fops); 1833 } 1834 1835 __init static int latency_fsnotify_init(void) 1836 { 1837 fsnotify_wq = alloc_workqueue("tr_max_lat_wq", 1838 WQ_UNBOUND | WQ_HIGHPRI, 0); 1839 if (!fsnotify_wq) { 1840 pr_err("Unable to allocate tr_max_lat_wq\n"); 1841 return -ENOMEM; 1842 } 1843 return 0; 1844 } 1845 1846 late_initcall_sync(latency_fsnotify_init); 1847 1848 void latency_fsnotify(struct trace_array *tr) 1849 { 1850 if (!fsnotify_wq) 1851 return; 1852 /* 1853 * We cannot call queue_work(&tr->fsnotify_work) from here because it's 1854 * possible that we are called from __schedule() or do_idle(), which 1855 * could cause a deadlock. 1856 */ 1857 irq_work_queue(&tr->fsnotify_irqwork); 1858 } 1859 1860 #else /* !LATENCY_FS_NOTIFY */ 1861 1862 #define trace_create_maxlat_file(tr, d_tracer) \ 1863 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ 1864 d_tracer, tr, &tracing_max_lat_fops) 1865 1866 #endif 1867 1868 /* 1869 * Copy the new maximum trace into the separate maximum-trace 1870 * structure. (this way the maximum trace is permanently saved, 1871 * for later retrieval via /sys/kernel/tracing/tracing_max_latency) 1872 */ 1873 static void 1874 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1875 { 1876 struct array_buffer *trace_buf = &tr->array_buffer; 1877 struct array_buffer *max_buf = &tr->max_buffer; 1878 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); 1879 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); 1880 1881 max_buf->cpu = cpu; 1882 max_buf->time_start = data->preempt_timestamp; 1883 1884 max_data->saved_latency = tr->max_latency; 1885 max_data->critical_start = data->critical_start; 1886 max_data->critical_end = data->critical_end; 1887 1888 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN); 1889 max_data->pid = tsk->pid; 1890 /* 1891 * If tsk == current, then use current_uid(), as that does not use 1892 * RCU. The irq tracer can be called out of RCU scope. 1893 */ 1894 if (tsk == current) 1895 max_data->uid = current_uid(); 1896 else 1897 max_data->uid = task_uid(tsk); 1898 1899 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 1900 max_data->policy = tsk->policy; 1901 max_data->rt_priority = tsk->rt_priority; 1902 1903 /* record this tasks comm */ 1904 tracing_record_cmdline(tsk); 1905 latency_fsnotify(tr); 1906 } 1907 1908 /** 1909 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 1910 * @tr: tracer 1911 * @tsk: the task with the latency 1912 * @cpu: The cpu that initiated the trace. 1913 * @cond_data: User data associated with a conditional snapshot 1914 * 1915 * Flip the buffers between the @tr and the max_tr and record information 1916 * about which task was the cause of this latency. 1917 */ 1918 void 1919 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, 1920 void *cond_data) 1921 { 1922 if (tr->stop_count) 1923 return; 1924 1925 WARN_ON_ONCE(!irqs_disabled()); 1926 1927 if (!tr->allocated_snapshot) { 1928 /* Only the nop tracer should hit this when disabling */ 1929 WARN_ON_ONCE(tr->current_trace != &nop_trace); 1930 return; 1931 } 1932 1933 arch_spin_lock(&tr->max_lock); 1934 1935 /* Inherit the recordable setting from array_buffer */ 1936 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) 1937 ring_buffer_record_on(tr->max_buffer.buffer); 1938 else 1939 ring_buffer_record_off(tr->max_buffer.buffer); 1940 1941 #ifdef CONFIG_TRACER_SNAPSHOT 1942 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { 1943 arch_spin_unlock(&tr->max_lock); 1944 return; 1945 } 1946 #endif 1947 swap(tr->array_buffer.buffer, tr->max_buffer.buffer); 1948 1949 __update_max_tr(tr, tsk, cpu); 1950 1951 arch_spin_unlock(&tr->max_lock); 1952 1953 /* Any waiters on the old snapshot buffer need to wake up */ 1954 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); 1955 } 1956 1957 /** 1958 * update_max_tr_single - only copy one trace over, and reset the rest 1959 * @tr: tracer 1960 * @tsk: task with the latency 1961 * @cpu: the cpu of the buffer to copy. 1962 * 1963 * Flip the trace of a single CPU buffer between the @tr and the max_tr. 1964 */ 1965 void 1966 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 1967 { 1968 int ret; 1969 1970 if (tr->stop_count) 1971 return; 1972 1973 WARN_ON_ONCE(!irqs_disabled()); 1974 if (!tr->allocated_snapshot) { 1975 /* Only the nop tracer should hit this when disabling */ 1976 WARN_ON_ONCE(tr->current_trace != &nop_trace); 1977 return; 1978 } 1979 1980 arch_spin_lock(&tr->max_lock); 1981 1982 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); 1983 1984 if (ret == -EBUSY) { 1985 /* 1986 * We failed to swap the buffer due to a commit taking 1987 * place on this CPU. We fail to record, but we reset 1988 * the max trace buffer (no one writes directly to it) 1989 * and flag that it failed. 1990 * Another reason is resize is in progress. 1991 */ 1992 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, 1993 "Failed to swap buffers due to commit or resize in progress\n"); 1994 } 1995 1996 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 1997 1998 __update_max_tr(tr, tsk, cpu); 1999 arch_spin_unlock(&tr->max_lock); 2000 } 2001 2002 #endif /* CONFIG_TRACER_MAX_TRACE */ 2003 2004 struct pipe_wait { 2005 struct trace_iterator *iter; 2006 int wait_index; 2007 }; 2008 2009 static bool wait_pipe_cond(void *data) 2010 { 2011 struct pipe_wait *pwait = data; 2012 struct trace_iterator *iter = pwait->iter; 2013 2014 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) 2015 return true; 2016 2017 return iter->closed; 2018 } 2019 2020 static int wait_on_pipe(struct trace_iterator *iter, int full) 2021 { 2022 struct pipe_wait pwait; 2023 int ret; 2024 2025 /* Iterators are static, they should be filled or empty */ 2026 if (trace_buffer_iter(iter, iter->cpu_file)) 2027 return 0; 2028 2029 pwait.wait_index = atomic_read_acquire(&iter->wait_index); 2030 pwait.iter = iter; 2031 2032 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, 2033 wait_pipe_cond, &pwait); 2034 2035 #ifdef CONFIG_TRACER_MAX_TRACE 2036 /* 2037 * Make sure this is still the snapshot buffer, as if a snapshot were 2038 * to happen, this would now be the main buffer. 2039 */ 2040 if (iter->snapshot) 2041 iter->array_buffer = &iter->tr->max_buffer; 2042 #endif 2043 return ret; 2044 } 2045 2046 #ifdef CONFIG_FTRACE_STARTUP_TEST 2047 static bool selftests_can_run; 2048 2049 struct trace_selftests { 2050 struct list_head list; 2051 struct tracer *type; 2052 }; 2053 2054 static LIST_HEAD(postponed_selftests); 2055 2056 static int save_selftest(struct tracer *type) 2057 { 2058 struct trace_selftests *selftest; 2059 2060 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); 2061 if (!selftest) 2062 return -ENOMEM; 2063 2064 selftest->type = type; 2065 list_add(&selftest->list, &postponed_selftests); 2066 return 0; 2067 } 2068 2069 static int run_tracer_selftest(struct tracer *type) 2070 { 2071 struct trace_array *tr = &global_trace; 2072 struct tracer *saved_tracer = tr->current_trace; 2073 int ret; 2074 2075 if (!type->selftest || tracing_selftest_disabled) 2076 return 0; 2077 2078 /* 2079 * If a tracer registers early in boot up (before scheduling is 2080 * initialized and such), then do not run its selftests yet. 2081 * Instead, run it a little later in the boot process. 2082 */ 2083 if (!selftests_can_run) 2084 return save_selftest(type); 2085 2086 if (!tracing_is_on()) { 2087 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", 2088 type->name); 2089 return 0; 2090 } 2091 2092 /* 2093 * Run a selftest on this tracer. 2094 * Here we reset the trace buffer, and set the current 2095 * tracer to be this tracer. The tracer can then run some 2096 * internal tracing to verify that everything is in order. 2097 * If we fail, we do not register this tracer. 2098 */ 2099 tracing_reset_online_cpus(&tr->array_buffer); 2100 2101 tr->current_trace = type; 2102 2103 #ifdef CONFIG_TRACER_MAX_TRACE 2104 if (type->use_max_tr) { 2105 /* If we expanded the buffers, make sure the max is expanded too */ 2106 if (tr->ring_buffer_expanded) 2107 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, 2108 RING_BUFFER_ALL_CPUS); 2109 tr->allocated_snapshot = true; 2110 } 2111 #endif 2112 2113 /* the test is responsible for initializing and enabling */ 2114 pr_info("Testing tracer %s: ", type->name); 2115 ret = type->selftest(type, tr); 2116 /* the test is responsible for resetting too */ 2117 tr->current_trace = saved_tracer; 2118 if (ret) { 2119 printk(KERN_CONT "FAILED!\n"); 2120 /* Add the warning after printing 'FAILED' */ 2121 WARN_ON(1); 2122 return -1; 2123 } 2124 /* Only reset on passing, to avoid touching corrupted buffers */ 2125 tracing_reset_online_cpus(&tr->array_buffer); 2126 2127 #ifdef CONFIG_TRACER_MAX_TRACE 2128 if (type->use_max_tr) { 2129 tr->allocated_snapshot = false; 2130 2131 /* Shrink the max buffer again */ 2132 if (tr->ring_buffer_expanded) 2133 ring_buffer_resize(tr->max_buffer.buffer, 1, 2134 RING_BUFFER_ALL_CPUS); 2135 } 2136 #endif 2137 2138 printk(KERN_CONT "PASSED\n"); 2139 return 0; 2140 } 2141 2142 static int do_run_tracer_selftest(struct tracer *type) 2143 { 2144 int ret; 2145 2146 /* 2147 * Tests can take a long time, especially if they are run one after the 2148 * other, as does happen during bootup when all the tracers are 2149 * registered. This could cause the soft lockup watchdog to trigger. 2150 */ 2151 cond_resched(); 2152 2153 tracing_selftest_running = true; 2154 ret = run_tracer_selftest(type); 2155 tracing_selftest_running = false; 2156 2157 return ret; 2158 } 2159 2160 static __init int init_trace_selftests(void) 2161 { 2162 struct trace_selftests *p, *n; 2163 struct tracer *t, **last; 2164 int ret; 2165 2166 selftests_can_run = true; 2167 2168 mutex_lock(&trace_types_lock); 2169 2170 if (list_empty(&postponed_selftests)) 2171 goto out; 2172 2173 pr_info("Running postponed tracer tests:\n"); 2174 2175 tracing_selftest_running = true; 2176 list_for_each_entry_safe(p, n, &postponed_selftests, list) { 2177 /* This loop can take minutes when sanitizers are enabled, so 2178 * lets make sure we allow RCU processing. 2179 */ 2180 cond_resched(); 2181 ret = run_tracer_selftest(p->type); 2182 /* If the test fails, then warn and remove from available_tracers */ 2183 if (ret < 0) { 2184 WARN(1, "tracer: %s failed selftest, disabling\n", 2185 p->type->name); 2186 last = &trace_types; 2187 for (t = trace_types; t; t = t->next) { 2188 if (t == p->type) { 2189 *last = t->next; 2190 break; 2191 } 2192 last = &t->next; 2193 } 2194 } 2195 list_del(&p->list); 2196 kfree(p); 2197 } 2198 tracing_selftest_running = false; 2199 2200 out: 2201 mutex_unlock(&trace_types_lock); 2202 2203 return 0; 2204 } 2205 core_initcall(init_trace_selftests); 2206 #else 2207 static inline int run_tracer_selftest(struct tracer *type) 2208 { 2209 return 0; 2210 } 2211 static inline int do_run_tracer_selftest(struct tracer *type) 2212 { 2213 return 0; 2214 } 2215 #endif /* CONFIG_FTRACE_STARTUP_TEST */ 2216 2217 static void add_tracer_options(struct trace_array *tr, struct tracer *t); 2218 2219 static void __init apply_trace_boot_options(void); 2220 2221 /** 2222 * register_tracer - register a tracer with the ftrace system. 2223 * @type: the plugin for the tracer 2224 * 2225 * Register a new plugin tracer. 2226 */ 2227 int __init register_tracer(struct tracer *type) 2228 { 2229 struct tracer *t; 2230 int ret = 0; 2231 2232 if (!type->name) { 2233 pr_info("Tracer must have a name\n"); 2234 return -1; 2235 } 2236 2237 if (strlen(type->name) >= MAX_TRACER_SIZE) { 2238 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 2239 return -1; 2240 } 2241 2242 if (security_locked_down(LOCKDOWN_TRACEFS)) { 2243 pr_warn("Can not register tracer %s due to lockdown\n", 2244 type->name); 2245 return -EPERM; 2246 } 2247 2248 mutex_lock(&trace_types_lock); 2249 2250 for (t = trace_types; t; t = t->next) { 2251 if (strcmp(type->name, t->name) == 0) { 2252 /* already found */ 2253 pr_info("Tracer %s already registered\n", 2254 type->name); 2255 ret = -1; 2256 goto out; 2257 } 2258 } 2259 2260 if (!type->set_flag) 2261 type->set_flag = &dummy_set_flag; 2262 if (!type->flags) { 2263 /*allocate a dummy tracer_flags*/ 2264 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL); 2265 if (!type->flags) { 2266 ret = -ENOMEM; 2267 goto out; 2268 } 2269 type->flags->val = 0; 2270 type->flags->opts = dummy_tracer_opt; 2271 } else 2272 if (!type->flags->opts) 2273 type->flags->opts = dummy_tracer_opt; 2274 2275 /* store the tracer for __set_tracer_option */ 2276 type->flags->trace = type; 2277 2278 ret = do_run_tracer_selftest(type); 2279 if (ret < 0) 2280 goto out; 2281 2282 type->next = trace_types; 2283 trace_types = type; 2284 add_tracer_options(&global_trace, type); 2285 2286 out: 2287 mutex_unlock(&trace_types_lock); 2288 2289 if (ret || !default_bootup_tracer) 2290 goto out_unlock; 2291 2292 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) 2293 goto out_unlock; 2294 2295 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 2296 /* Do we want this tracer to start on bootup? */ 2297 tracing_set_tracer(&global_trace, type->name); 2298 default_bootup_tracer = NULL; 2299 2300 apply_trace_boot_options(); 2301 2302 /* disable other selftests, since this will break it. */ 2303 disable_tracing_selftest("running a tracer"); 2304 2305 out_unlock: 2306 return ret; 2307 } 2308 2309 static void tracing_reset_cpu(struct array_buffer *buf, int cpu) 2310 { 2311 struct trace_buffer *buffer = buf->buffer; 2312 2313 if (!buffer) 2314 return; 2315 2316 ring_buffer_record_disable(buffer); 2317 2318 /* Make sure all commits have finished */ 2319 synchronize_rcu(); 2320 ring_buffer_reset_cpu(buffer, cpu); 2321 2322 ring_buffer_record_enable(buffer); 2323 } 2324 2325 void tracing_reset_online_cpus(struct array_buffer *buf) 2326 { 2327 struct trace_buffer *buffer = buf->buffer; 2328 2329 if (!buffer) 2330 return; 2331 2332 ring_buffer_record_disable(buffer); 2333 2334 /* Make sure all commits have finished */ 2335 synchronize_rcu(); 2336 2337 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2338 2339 ring_buffer_reset_online_cpus(buffer); 2340 2341 ring_buffer_record_enable(buffer); 2342 } 2343 2344 /* Must have trace_types_lock held */ 2345 void tracing_reset_all_online_cpus_unlocked(void) 2346 { 2347 struct trace_array *tr; 2348 2349 lockdep_assert_held(&trace_types_lock); 2350 2351 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2352 if (!tr->clear_trace) 2353 continue; 2354 tr->clear_trace = false; 2355 tracing_reset_online_cpus(&tr->array_buffer); 2356 #ifdef CONFIG_TRACER_MAX_TRACE 2357 tracing_reset_online_cpus(&tr->max_buffer); 2358 #endif 2359 } 2360 } 2361 2362 void tracing_reset_all_online_cpus(void) 2363 { 2364 mutex_lock(&trace_types_lock); 2365 tracing_reset_all_online_cpus_unlocked(); 2366 mutex_unlock(&trace_types_lock); 2367 } 2368 2369 int is_tracing_stopped(void) 2370 { 2371 return global_trace.stop_count; 2372 } 2373 2374 static void tracing_start_tr(struct trace_array *tr) 2375 { 2376 struct trace_buffer *buffer; 2377 unsigned long flags; 2378 2379 if (tracing_disabled) 2380 return; 2381 2382 raw_spin_lock_irqsave(&tr->start_lock, flags); 2383 if (--tr->stop_count) { 2384 if (WARN_ON_ONCE(tr->stop_count < 0)) { 2385 /* Someone screwed up their debugging */ 2386 tr->stop_count = 0; 2387 } 2388 goto out; 2389 } 2390 2391 /* Prevent the buffers from switching */ 2392 arch_spin_lock(&tr->max_lock); 2393 2394 buffer = tr->array_buffer.buffer; 2395 if (buffer) 2396 ring_buffer_record_enable(buffer); 2397 2398 #ifdef CONFIG_TRACER_MAX_TRACE 2399 buffer = tr->max_buffer.buffer; 2400 if (buffer) 2401 ring_buffer_record_enable(buffer); 2402 #endif 2403 2404 arch_spin_unlock(&tr->max_lock); 2405 2406 out: 2407 raw_spin_unlock_irqrestore(&tr->start_lock, flags); 2408 } 2409 2410 /** 2411 * tracing_start - quick start of the tracer 2412 * 2413 * If tracing is enabled but was stopped by tracing_stop, 2414 * this will start the tracer back up. 2415 */ 2416 void tracing_start(void) 2417 2418 { 2419 return tracing_start_tr(&global_trace); 2420 } 2421 2422 static void tracing_stop_tr(struct trace_array *tr) 2423 { 2424 struct trace_buffer *buffer; 2425 unsigned long flags; 2426 2427 raw_spin_lock_irqsave(&tr->start_lock, flags); 2428 if (tr->stop_count++) 2429 goto out; 2430 2431 /* Prevent the buffers from switching */ 2432 arch_spin_lock(&tr->max_lock); 2433 2434 buffer = tr->array_buffer.buffer; 2435 if (buffer) 2436 ring_buffer_record_disable(buffer); 2437 2438 #ifdef CONFIG_TRACER_MAX_TRACE 2439 buffer = tr->max_buffer.buffer; 2440 if (buffer) 2441 ring_buffer_record_disable(buffer); 2442 #endif 2443 2444 arch_spin_unlock(&tr->max_lock); 2445 2446 out: 2447 raw_spin_unlock_irqrestore(&tr->start_lock, flags); 2448 } 2449 2450 /** 2451 * tracing_stop - quick stop of the tracer 2452 * 2453 * Light weight way to stop tracing. Use in conjunction with 2454 * tracing_start. 2455 */ 2456 void tracing_stop(void) 2457 { 2458 return tracing_stop_tr(&global_trace); 2459 } 2460 2461 /* 2462 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 2463 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 2464 * simplifies those functions and keeps them in sync. 2465 */ 2466 enum print_line_t trace_handle_return(struct trace_seq *s) 2467 { 2468 return trace_seq_has_overflowed(s) ? 2469 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 2470 } 2471 EXPORT_SYMBOL_GPL(trace_handle_return); 2472 2473 static unsigned short migration_disable_value(void) 2474 { 2475 #if defined(CONFIG_SMP) 2476 return current->migration_disabled; 2477 #else 2478 return 0; 2479 #endif 2480 } 2481 2482 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) 2483 { 2484 unsigned int trace_flags = irqs_status; 2485 unsigned int pc; 2486 2487 pc = preempt_count(); 2488 2489 if (pc & NMI_MASK) 2490 trace_flags |= TRACE_FLAG_NMI; 2491 if (pc & HARDIRQ_MASK) 2492 trace_flags |= TRACE_FLAG_HARDIRQ; 2493 if (in_serving_softirq()) 2494 trace_flags |= TRACE_FLAG_SOFTIRQ; 2495 if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) 2496 trace_flags |= TRACE_FLAG_BH_OFF; 2497 2498 if (tif_need_resched()) 2499 trace_flags |= TRACE_FLAG_NEED_RESCHED; 2500 if (test_preempt_need_resched()) 2501 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; 2502 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | 2503 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; 2504 } 2505 2506 struct ring_buffer_event * 2507 trace_buffer_lock_reserve(struct trace_buffer *buffer, 2508 int type, 2509 unsigned long len, 2510 unsigned int trace_ctx) 2511 { 2512 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); 2513 } 2514 2515 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); 2516 DEFINE_PER_CPU(int, trace_buffered_event_cnt); 2517 static int trace_buffered_event_ref; 2518 2519 /** 2520 * trace_buffered_event_enable - enable buffering events 2521 * 2522 * When events are being filtered, it is quicker to use a temporary 2523 * buffer to write the event data into if there's a likely chance 2524 * that it will not be committed. The discard of the ring buffer 2525 * is not as fast as committing, and is much slower than copying 2526 * a commit. 2527 * 2528 * When an event is to be filtered, allocate per cpu buffers to 2529 * write the event data into, and if the event is filtered and discarded 2530 * it is simply dropped, otherwise, the entire data is to be committed 2531 * in one shot. 2532 */ 2533 void trace_buffered_event_enable(void) 2534 { 2535 struct ring_buffer_event *event; 2536 struct page *page; 2537 int cpu; 2538 2539 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2540 2541 if (trace_buffered_event_ref++) 2542 return; 2543 2544 for_each_tracing_cpu(cpu) { 2545 page = alloc_pages_node(cpu_to_node(cpu), 2546 GFP_KERNEL | __GFP_NORETRY, 0); 2547 /* This is just an optimization and can handle failures */ 2548 if (!page) { 2549 pr_err("Failed to allocate event buffer\n"); 2550 break; 2551 } 2552 2553 event = page_address(page); 2554 memset(event, 0, sizeof(*event)); 2555 2556 per_cpu(trace_buffered_event, cpu) = event; 2557 2558 preempt_disable(); 2559 if (cpu == smp_processor_id() && 2560 __this_cpu_read(trace_buffered_event) != 2561 per_cpu(trace_buffered_event, cpu)) 2562 WARN_ON_ONCE(1); 2563 preempt_enable(); 2564 } 2565 } 2566 2567 static void enable_trace_buffered_event(void *data) 2568 { 2569 /* Probably not needed, but do it anyway */ 2570 smp_rmb(); 2571 this_cpu_dec(trace_buffered_event_cnt); 2572 } 2573 2574 static void disable_trace_buffered_event(void *data) 2575 { 2576 this_cpu_inc(trace_buffered_event_cnt); 2577 } 2578 2579 /** 2580 * trace_buffered_event_disable - disable buffering events 2581 * 2582 * When a filter is removed, it is faster to not use the buffered 2583 * events, and to commit directly into the ring buffer. Free up 2584 * the temp buffers when there are no more users. This requires 2585 * special synchronization with current events. 2586 */ 2587 void trace_buffered_event_disable(void) 2588 { 2589 int cpu; 2590 2591 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2592 2593 if (WARN_ON_ONCE(!trace_buffered_event_ref)) 2594 return; 2595 2596 if (--trace_buffered_event_ref) 2597 return; 2598 2599 /* For each CPU, set the buffer as used. */ 2600 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, 2601 NULL, true); 2602 2603 /* Wait for all current users to finish */ 2604 synchronize_rcu(); 2605 2606 for_each_tracing_cpu(cpu) { 2607 free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); 2608 per_cpu(trace_buffered_event, cpu) = NULL; 2609 } 2610 2611 /* 2612 * Wait for all CPUs that potentially started checking if they can use 2613 * their event buffer only after the previous synchronize_rcu() call and 2614 * they still read a valid pointer from trace_buffered_event. It must be 2615 * ensured they don't see cleared trace_buffered_event_cnt else they 2616 * could wrongly decide to use the pointed-to buffer which is now freed. 2617 */ 2618 synchronize_rcu(); 2619 2620 /* For each CPU, relinquish the buffer */ 2621 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, 2622 true); 2623 } 2624 2625 static struct trace_buffer *temp_buffer; 2626 2627 struct ring_buffer_event * 2628 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, 2629 struct trace_event_file *trace_file, 2630 int type, unsigned long len, 2631 unsigned int trace_ctx) 2632 { 2633 struct ring_buffer_event *entry; 2634 struct trace_array *tr = trace_file->tr; 2635 int val; 2636 2637 *current_rb = tr->array_buffer.buffer; 2638 2639 if (!tr->no_filter_buffering_ref && 2640 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { 2641 preempt_disable_notrace(); 2642 /* 2643 * Filtering is on, so try to use the per cpu buffer first. 2644 * This buffer will simulate a ring_buffer_event, 2645 * where the type_len is zero and the array[0] will 2646 * hold the full length. 2647 * (see include/linux/ring-buffer.h for details on 2648 * how the ring_buffer_event is structured). 2649 * 2650 * Using a temp buffer during filtering and copying it 2651 * on a matched filter is quicker than writing directly 2652 * into the ring buffer and then discarding it when 2653 * it doesn't match. That is because the discard 2654 * requires several atomic operations to get right. 2655 * Copying on match and doing nothing on a failed match 2656 * is still quicker than no copy on match, but having 2657 * to discard out of the ring buffer on a failed match. 2658 */ 2659 if ((entry = __this_cpu_read(trace_buffered_event))) { 2660 int max_len = PAGE_SIZE - struct_size(entry, array, 1); 2661 2662 val = this_cpu_inc_return(trace_buffered_event_cnt); 2663 2664 /* 2665 * Preemption is disabled, but interrupts and NMIs 2666 * can still come in now. If that happens after 2667 * the above increment, then it will have to go 2668 * back to the old method of allocating the event 2669 * on the ring buffer, and if the filter fails, it 2670 * will have to call ring_buffer_discard_commit() 2671 * to remove it. 2672 * 2673 * Need to also check the unlikely case that the 2674 * length is bigger than the temp buffer size. 2675 * If that happens, then the reserve is pretty much 2676 * guaranteed to fail, as the ring buffer currently 2677 * only allows events less than a page. But that may 2678 * change in the future, so let the ring buffer reserve 2679 * handle the failure in that case. 2680 */ 2681 if (val == 1 && likely(len <= max_len)) { 2682 trace_event_setup(entry, type, trace_ctx); 2683 entry->array[0] = len; 2684 /* Return with preemption disabled */ 2685 return entry; 2686 } 2687 this_cpu_dec(trace_buffered_event_cnt); 2688 } 2689 /* __trace_buffer_lock_reserve() disables preemption */ 2690 preempt_enable_notrace(); 2691 } 2692 2693 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2694 trace_ctx); 2695 /* 2696 * If tracing is off, but we have triggers enabled 2697 * we still need to look at the event data. Use the temp_buffer 2698 * to store the trace event for the trigger to use. It's recursive 2699 * safe and will not be recorded anywhere. 2700 */ 2701 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { 2702 *current_rb = temp_buffer; 2703 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2704 trace_ctx); 2705 } 2706 return entry; 2707 } 2708 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); 2709 2710 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); 2711 static DEFINE_MUTEX(tracepoint_printk_mutex); 2712 2713 static void output_printk(struct trace_event_buffer *fbuffer) 2714 { 2715 struct trace_event_call *event_call; 2716 struct trace_event_file *file; 2717 struct trace_event *event; 2718 unsigned long flags; 2719 struct trace_iterator *iter = tracepoint_print_iter; 2720 2721 /* We should never get here if iter is NULL */ 2722 if (WARN_ON_ONCE(!iter)) 2723 return; 2724 2725 event_call = fbuffer->trace_file->event_call; 2726 if (!event_call || !event_call->event.funcs || 2727 !event_call->event.funcs->trace) 2728 return; 2729 2730 file = fbuffer->trace_file; 2731 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 2732 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 2733 !filter_match_preds(file->filter, fbuffer->entry))) 2734 return; 2735 2736 event = &fbuffer->trace_file->event_call->event; 2737 2738 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); 2739 trace_seq_init(&iter->seq); 2740 iter->ent = fbuffer->entry; 2741 event_call->event.funcs->trace(iter, 0, event); 2742 trace_seq_putc(&iter->seq, 0); 2743 printk("%s", iter->seq.buffer); 2744 2745 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); 2746 } 2747 2748 int tracepoint_printk_sysctl(struct ctl_table *table, int write, 2749 void *buffer, size_t *lenp, 2750 loff_t *ppos) 2751 { 2752 int save_tracepoint_printk; 2753 int ret; 2754 2755 mutex_lock(&tracepoint_printk_mutex); 2756 save_tracepoint_printk = tracepoint_printk; 2757 2758 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2759 2760 /* 2761 * This will force exiting early, as tracepoint_printk 2762 * is always zero when tracepoint_printk_iter is not allocated 2763 */ 2764 if (!tracepoint_print_iter) 2765 tracepoint_printk = 0; 2766 2767 if (save_tracepoint_printk == tracepoint_printk) 2768 goto out; 2769 2770 if (tracepoint_printk) 2771 static_key_enable(&tracepoint_printk_key.key); 2772 else 2773 static_key_disable(&tracepoint_printk_key.key); 2774 2775 out: 2776 mutex_unlock(&tracepoint_printk_mutex); 2777 2778 return ret; 2779 } 2780 2781 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) 2782 { 2783 enum event_trigger_type tt = ETT_NONE; 2784 struct trace_event_file *file = fbuffer->trace_file; 2785 2786 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, 2787 fbuffer->entry, &tt)) 2788 goto discard; 2789 2790 if (static_key_false(&tracepoint_printk_key.key)) 2791 output_printk(fbuffer); 2792 2793 if (static_branch_unlikely(&trace_event_exports_enabled)) 2794 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); 2795 2796 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, 2797 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); 2798 2799 discard: 2800 if (tt) 2801 event_triggers_post_call(file, tt); 2802 2803 } 2804 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2805 2806 /* 2807 * Skip 3: 2808 * 2809 * trace_buffer_unlock_commit_regs() 2810 * trace_event_buffer_commit() 2811 * trace_event_raw_event_xxx() 2812 */ 2813 # define STACK_SKIP 3 2814 2815 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2816 struct trace_buffer *buffer, 2817 struct ring_buffer_event *event, 2818 unsigned int trace_ctx, 2819 struct pt_regs *regs) 2820 { 2821 __buffer_unlock_commit(buffer, event); 2822 2823 /* 2824 * If regs is not set, then skip the necessary functions. 2825 * Note, we can still get here via blktrace, wakeup tracer 2826 * and mmiotrace, but that's ok if they lose a function or 2827 * two. They are not that meaningful. 2828 */ 2829 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); 2830 ftrace_trace_userstack(tr, buffer, trace_ctx); 2831 } 2832 2833 /* 2834 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. 2835 */ 2836 void 2837 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, 2838 struct ring_buffer_event *event) 2839 { 2840 __buffer_unlock_commit(buffer, event); 2841 } 2842 2843 void 2844 trace_function(struct trace_array *tr, unsigned long ip, unsigned long 2845 parent_ip, unsigned int trace_ctx) 2846 { 2847 struct trace_event_call *call = &event_function; 2848 struct trace_buffer *buffer = tr->array_buffer.buffer; 2849 struct ring_buffer_event *event; 2850 struct ftrace_entry *entry; 2851 2852 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), 2853 trace_ctx); 2854 if (!event) 2855 return; 2856 entry = ring_buffer_event_data(event); 2857 entry->ip = ip; 2858 entry->parent_ip = parent_ip; 2859 2860 if (!call_filter_check_discard(call, entry, buffer, event)) { 2861 if (static_branch_unlikely(&trace_function_exports_enabled)) 2862 ftrace_exports(event, TRACE_EXPORT_FUNCTION); 2863 __buffer_unlock_commit(buffer, event); 2864 } 2865 } 2866 2867 #ifdef CONFIG_STACKTRACE 2868 2869 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ 2870 #define FTRACE_KSTACK_NESTING 4 2871 2872 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING) 2873 2874 struct ftrace_stack { 2875 unsigned long calls[FTRACE_KSTACK_ENTRIES]; 2876 }; 2877 2878 2879 struct ftrace_stacks { 2880 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; 2881 }; 2882 2883 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); 2884 static DEFINE_PER_CPU(int, ftrace_stack_reserve); 2885 2886 static void __ftrace_trace_stack(struct trace_buffer *buffer, 2887 unsigned int trace_ctx, 2888 int skip, struct pt_regs *regs) 2889 { 2890 struct trace_event_call *call = &event_kernel_stack; 2891 struct ring_buffer_event *event; 2892 unsigned int size, nr_entries; 2893 struct ftrace_stack *fstack; 2894 struct stack_entry *entry; 2895 int stackidx; 2896 2897 /* 2898 * Add one, for this function and the call to save_stack_trace() 2899 * If regs is set, then these functions will not be in the way. 2900 */ 2901 #ifndef CONFIG_UNWINDER_ORC 2902 if (!regs) 2903 skip++; 2904 #endif 2905 2906 preempt_disable_notrace(); 2907 2908 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; 2909 2910 /* This should never happen. If it does, yell once and skip */ 2911 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) 2912 goto out; 2913 2914 /* 2915 * The above __this_cpu_inc_return() is 'atomic' cpu local. An 2916 * interrupt will either see the value pre increment or post 2917 * increment. If the interrupt happens pre increment it will have 2918 * restored the counter when it returns. We just need a barrier to 2919 * keep gcc from moving things around. 2920 */ 2921 barrier(); 2922 2923 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; 2924 size = ARRAY_SIZE(fstack->calls); 2925 2926 if (regs) { 2927 nr_entries = stack_trace_save_regs(regs, fstack->calls, 2928 size, skip); 2929 } else { 2930 nr_entries = stack_trace_save(fstack->calls, size, skip); 2931 } 2932 2933 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, 2934 struct_size(entry, caller, nr_entries), 2935 trace_ctx); 2936 if (!event) 2937 goto out; 2938 entry = ring_buffer_event_data(event); 2939 2940 entry->size = nr_entries; 2941 memcpy(&entry->caller, fstack->calls, 2942 flex_array_size(entry, caller, nr_entries)); 2943 2944 if (!call_filter_check_discard(call, entry, buffer, event)) 2945 __buffer_unlock_commit(buffer, event); 2946 2947 out: 2948 /* Again, don't let gcc optimize things here */ 2949 barrier(); 2950 __this_cpu_dec(ftrace_stack_reserve); 2951 preempt_enable_notrace(); 2952 2953 } 2954 2955 static inline void ftrace_trace_stack(struct trace_array *tr, 2956 struct trace_buffer *buffer, 2957 unsigned int trace_ctx, 2958 int skip, struct pt_regs *regs) 2959 { 2960 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) 2961 return; 2962 2963 __ftrace_trace_stack(buffer, trace_ctx, skip, regs); 2964 } 2965 2966 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, 2967 int skip) 2968 { 2969 struct trace_buffer *buffer = tr->array_buffer.buffer; 2970 2971 if (rcu_is_watching()) { 2972 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); 2973 return; 2974 } 2975 2976 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) 2977 return; 2978 2979 /* 2980 * When an NMI triggers, RCU is enabled via ct_nmi_enter(), 2981 * but if the above rcu_is_watching() failed, then the NMI 2982 * triggered someplace critical, and ct_irq_enter() should 2983 * not be called from NMI. 2984 */ 2985 if (unlikely(in_nmi())) 2986 return; 2987 2988 ct_irq_enter_irqson(); 2989 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); 2990 ct_irq_exit_irqson(); 2991 } 2992 2993 /** 2994 * trace_dump_stack - record a stack back trace in the trace buffer 2995 * @skip: Number of functions to skip (helper handlers) 2996 */ 2997 void trace_dump_stack(int skip) 2998 { 2999 if (tracing_disabled || tracing_selftest_running) 3000 return; 3001 3002 #ifndef CONFIG_UNWINDER_ORC 3003 /* Skip 1 to skip this function. */ 3004 skip++; 3005 #endif 3006 __ftrace_trace_stack(global_trace.array_buffer.buffer, 3007 tracing_gen_ctx(), skip, NULL); 3008 } 3009 EXPORT_SYMBOL_GPL(trace_dump_stack); 3010 3011 #ifdef CONFIG_USER_STACKTRACE_SUPPORT 3012 static DEFINE_PER_CPU(int, user_stack_count); 3013 3014 static void 3015 ftrace_trace_userstack(struct trace_array *tr, 3016 struct trace_buffer *buffer, unsigned int trace_ctx) 3017 { 3018 struct trace_event_call *call = &event_user_stack; 3019 struct ring_buffer_event *event; 3020 struct userstack_entry *entry; 3021 3022 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE)) 3023 return; 3024 3025 /* 3026 * NMIs can not handle page faults, even with fix ups. 3027 * The save user stack can (and often does) fault. 3028 */ 3029 if (unlikely(in_nmi())) 3030 return; 3031 3032 /* 3033 * prevent recursion, since the user stack tracing may 3034 * trigger other kernel events. 3035 */ 3036 preempt_disable(); 3037 if (__this_cpu_read(user_stack_count)) 3038 goto out; 3039 3040 __this_cpu_inc(user_stack_count); 3041 3042 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 3043 sizeof(*entry), trace_ctx); 3044 if (!event) 3045 goto out_drop_count; 3046 entry = ring_buffer_event_data(event); 3047 3048 entry->tgid = current->tgid; 3049 memset(&entry->caller, 0, sizeof(entry->caller)); 3050 3051 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); 3052 if (!call_filter_check_discard(call, entry, buffer, event)) 3053 __buffer_unlock_commit(buffer, event); 3054 3055 out_drop_count: 3056 __this_cpu_dec(user_stack_count); 3057 out: 3058 preempt_enable(); 3059 } 3060 #else /* CONFIG_USER_STACKTRACE_SUPPORT */ 3061 static void ftrace_trace_userstack(struct trace_array *tr, 3062 struct trace_buffer *buffer, 3063 unsigned int trace_ctx) 3064 { 3065 } 3066 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ 3067 3068 #endif /* CONFIG_STACKTRACE */ 3069 3070 static inline void 3071 func_repeats_set_delta_ts(struct func_repeats_entry *entry, 3072 unsigned long long delta) 3073 { 3074 entry->bottom_delta_ts = delta & U32_MAX; 3075 entry->top_delta_ts = (delta >> 32); 3076 } 3077 3078 void trace_last_func_repeats(struct trace_array *tr, 3079 struct trace_func_repeats *last_info, 3080 unsigned int trace_ctx) 3081 { 3082 struct trace_buffer *buffer = tr->array_buffer.buffer; 3083 struct func_repeats_entry *entry; 3084 struct ring_buffer_event *event; 3085 u64 delta; 3086 3087 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, 3088 sizeof(*entry), trace_ctx); 3089 if (!event) 3090 return; 3091 3092 delta = ring_buffer_event_time_stamp(buffer, event) - 3093 last_info->ts_last_call; 3094 3095 entry = ring_buffer_event_data(event); 3096 entry->ip = last_info->ip; 3097 entry->parent_ip = last_info->parent_ip; 3098 entry->count = last_info->count; 3099 func_repeats_set_delta_ts(entry, delta); 3100 3101 __buffer_unlock_commit(buffer, event); 3102 } 3103 3104 /* created for use with alloc_percpu */ 3105 struct trace_buffer_struct { 3106 int nesting; 3107 char buffer[4][TRACE_BUF_SIZE]; 3108 }; 3109 3110 static struct trace_buffer_struct __percpu *trace_percpu_buffer; 3111 3112 /* 3113 * This allows for lockless recording. If we're nested too deeply, then 3114 * this returns NULL. 3115 */ 3116 static char *get_trace_buf(void) 3117 { 3118 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); 3119 3120 if (!trace_percpu_buffer || buffer->nesting >= 4) 3121 return NULL; 3122 3123 buffer->nesting++; 3124 3125 /* Interrupts must see nesting incremented before we use the buffer */ 3126 barrier(); 3127 return &buffer->buffer[buffer->nesting - 1][0]; 3128 } 3129 3130 static void put_trace_buf(void) 3131 { 3132 /* Don't let the decrement of nesting leak before this */ 3133 barrier(); 3134 this_cpu_dec(trace_percpu_buffer->nesting); 3135 } 3136 3137 static int alloc_percpu_trace_buffer(void) 3138 { 3139 struct trace_buffer_struct __percpu *buffers; 3140 3141 if (trace_percpu_buffer) 3142 return 0; 3143 3144 buffers = alloc_percpu(struct trace_buffer_struct); 3145 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) 3146 return -ENOMEM; 3147 3148 trace_percpu_buffer = buffers; 3149 return 0; 3150 } 3151 3152 static int buffers_allocated; 3153 3154 void trace_printk_init_buffers(void) 3155 { 3156 if (buffers_allocated) 3157 return; 3158 3159 if (alloc_percpu_trace_buffer()) 3160 return; 3161 3162 /* trace_printk() is for debug use only. Don't use it in production. */ 3163 3164 pr_warn("\n"); 3165 pr_warn("**********************************************************\n"); 3166 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 3167 pr_warn("** **\n"); 3168 pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); 3169 pr_warn("** **\n"); 3170 pr_warn("** This means that this is a DEBUG kernel and it is **\n"); 3171 pr_warn("** unsafe for production use. **\n"); 3172 pr_warn("** **\n"); 3173 pr_warn("** If you see this message and you are not debugging **\n"); 3174 pr_warn("** the kernel, report this immediately to your vendor! **\n"); 3175 pr_warn("** **\n"); 3176 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 3177 pr_warn("**********************************************************\n"); 3178 3179 /* Expand the buffers to set size */ 3180 tracing_update_buffers(&global_trace); 3181 3182 buffers_allocated = 1; 3183 3184 /* 3185 * trace_printk_init_buffers() can be called by modules. 3186 * If that happens, then we need to start cmdline recording 3187 * directly here. If the global_trace.buffer is already 3188 * allocated here, then this was called by module code. 3189 */ 3190 if (global_trace.array_buffer.buffer) 3191 tracing_start_cmdline_record(); 3192 } 3193 EXPORT_SYMBOL_GPL(trace_printk_init_buffers); 3194 3195 void trace_printk_start_comm(void) 3196 { 3197 /* Start tracing comms if trace printk is set */ 3198 if (!buffers_allocated) 3199 return; 3200 tracing_start_cmdline_record(); 3201 } 3202 3203 static void trace_printk_start_stop_comm(int enabled) 3204 { 3205 if (!buffers_allocated) 3206 return; 3207 3208 if (enabled) 3209 tracing_start_cmdline_record(); 3210 else 3211 tracing_stop_cmdline_record(); 3212 } 3213 3214 /** 3215 * trace_vbprintk - write binary msg to tracing buffer 3216 * @ip: The address of the caller 3217 * @fmt: The string format to write to the buffer 3218 * @args: Arguments for @fmt 3219 */ 3220 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 3221 { 3222 struct trace_event_call *call = &event_bprint; 3223 struct ring_buffer_event *event; 3224 struct trace_buffer *buffer; 3225 struct trace_array *tr = &global_trace; 3226 struct bprint_entry *entry; 3227 unsigned int trace_ctx; 3228 char *tbuffer; 3229 int len = 0, size; 3230 3231 if (unlikely(tracing_selftest_running || tracing_disabled)) 3232 return 0; 3233 3234 /* Don't pollute graph traces with trace_vprintk internals */ 3235 pause_graph_tracing(); 3236 3237 trace_ctx = tracing_gen_ctx(); 3238 preempt_disable_notrace(); 3239 3240 tbuffer = get_trace_buf(); 3241 if (!tbuffer) { 3242 len = 0; 3243 goto out_nobuffer; 3244 } 3245 3246 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); 3247 3248 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) 3249 goto out_put; 3250 3251 size = sizeof(*entry) + sizeof(u32) * len; 3252 buffer = tr->array_buffer.buffer; 3253 ring_buffer_nest_start(buffer); 3254 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, 3255 trace_ctx); 3256 if (!event) 3257 goto out; 3258 entry = ring_buffer_event_data(event); 3259 entry->ip = ip; 3260 entry->fmt = fmt; 3261 3262 memcpy(entry->buf, tbuffer, sizeof(u32) * len); 3263 if (!call_filter_check_discard(call, entry, buffer, event)) { 3264 __buffer_unlock_commit(buffer, event); 3265 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); 3266 } 3267 3268 out: 3269 ring_buffer_nest_end(buffer); 3270 out_put: 3271 put_trace_buf(); 3272 3273 out_nobuffer: 3274 preempt_enable_notrace(); 3275 unpause_graph_tracing(); 3276 3277 return len; 3278 } 3279 EXPORT_SYMBOL_GPL(trace_vbprintk); 3280 3281 __printf(3, 0) 3282 static int 3283 __trace_array_vprintk(struct trace_buffer *buffer, 3284 unsigned long ip, const char *fmt, va_list args) 3285 { 3286 struct trace_event_call *call = &event_print; 3287 struct ring_buffer_event *event; 3288 int len = 0, size; 3289 struct print_entry *entry; 3290 unsigned int trace_ctx; 3291 char *tbuffer; 3292 3293 if (tracing_disabled) 3294 return 0; 3295 3296 /* Don't pollute graph traces with trace_vprintk internals */ 3297 pause_graph_tracing(); 3298 3299 trace_ctx = tracing_gen_ctx(); 3300 preempt_disable_notrace(); 3301 3302 3303 tbuffer = get_trace_buf(); 3304 if (!tbuffer) { 3305 len = 0; 3306 goto out_nobuffer; 3307 } 3308 3309 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); 3310 3311 size = sizeof(*entry) + len + 1; 3312 ring_buffer_nest_start(buffer); 3313 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 3314 trace_ctx); 3315 if (!event) 3316 goto out; 3317 entry = ring_buffer_event_data(event); 3318 entry->ip = ip; 3319 3320 memcpy(&entry->buf, tbuffer, len + 1); 3321 if (!call_filter_check_discard(call, entry, buffer, event)) { 3322 __buffer_unlock_commit(buffer, event); 3323 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL); 3324 } 3325 3326 out: 3327 ring_buffer_nest_end(buffer); 3328 put_trace_buf(); 3329 3330 out_nobuffer: 3331 preempt_enable_notrace(); 3332 unpause_graph_tracing(); 3333 3334 return len; 3335 } 3336 3337 __printf(3, 0) 3338 int trace_array_vprintk(struct trace_array *tr, 3339 unsigned long ip, const char *fmt, va_list args) 3340 { 3341 if (tracing_selftest_running && tr == &global_trace) 3342 return 0; 3343 3344 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); 3345 } 3346 3347 /** 3348 * trace_array_printk - Print a message to a specific instance 3349 * @tr: The instance trace_array descriptor 3350 * @ip: The instruction pointer that this is called from. 3351 * @fmt: The format to print (printf format) 3352 * 3353 * If a subsystem sets up its own instance, they have the right to 3354 * printk strings into their tracing instance buffer using this 3355 * function. Note, this function will not write into the top level 3356 * buffer (use trace_printk() for that), as writing into the top level 3357 * buffer should only have events that can be individually disabled. 3358 * trace_printk() is only used for debugging a kernel, and should not 3359 * be ever incorporated in normal use. 3360 * 3361 * trace_array_printk() can be used, as it will not add noise to the 3362 * top level tracing buffer. 3363 * 3364 * Note, trace_array_init_printk() must be called on @tr before this 3365 * can be used. 3366 */ 3367 __printf(3, 0) 3368 int trace_array_printk(struct trace_array *tr, 3369 unsigned long ip, const char *fmt, ...) 3370 { 3371 int ret; 3372 va_list ap; 3373 3374 if (!tr) 3375 return -ENOENT; 3376 3377 /* This is only allowed for created instances */ 3378 if (tr == &global_trace) 3379 return 0; 3380 3381 if (!(tr->trace_flags & TRACE_ITER_PRINTK)) 3382 return 0; 3383 3384 va_start(ap, fmt); 3385 ret = trace_array_vprintk(tr, ip, fmt, ap); 3386 va_end(ap); 3387 return ret; 3388 } 3389 EXPORT_SYMBOL_GPL(trace_array_printk); 3390 3391 /** 3392 * trace_array_init_printk - Initialize buffers for trace_array_printk() 3393 * @tr: The trace array to initialize the buffers for 3394 * 3395 * As trace_array_printk() only writes into instances, they are OK to 3396 * have in the kernel (unlike trace_printk()). This needs to be called 3397 * before trace_array_printk() can be used on a trace_array. 3398 */ 3399 int trace_array_init_printk(struct trace_array *tr) 3400 { 3401 if (!tr) 3402 return -ENOENT; 3403 3404 /* This is only allowed for created instances */ 3405 if (tr == &global_trace) 3406 return -EINVAL; 3407 3408 return alloc_percpu_trace_buffer(); 3409 } 3410 EXPORT_SYMBOL_GPL(trace_array_init_printk); 3411 3412 __printf(3, 4) 3413 int trace_array_printk_buf(struct trace_buffer *buffer, 3414 unsigned long ip, const char *fmt, ...) 3415 { 3416 int ret; 3417 va_list ap; 3418 3419 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) 3420 return 0; 3421 3422 va_start(ap, fmt); 3423 ret = __trace_array_vprintk(buffer, ip, fmt, ap); 3424 va_end(ap); 3425 return ret; 3426 } 3427 3428 __printf(2, 0) 3429 int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 3430 { 3431 return trace_array_vprintk(&global_trace, ip, fmt, args); 3432 } 3433 EXPORT_SYMBOL_GPL(trace_vprintk); 3434 3435 static void trace_iterator_increment(struct trace_iterator *iter) 3436 { 3437 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); 3438 3439 iter->idx++; 3440 if (buf_iter) 3441 ring_buffer_iter_advance(buf_iter); 3442 } 3443 3444 static struct trace_entry * 3445 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, 3446 unsigned long *lost_events) 3447 { 3448 struct ring_buffer_event *event; 3449 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 3450 3451 if (buf_iter) { 3452 event = ring_buffer_iter_peek(buf_iter, ts); 3453 if (lost_events) 3454 *lost_events = ring_buffer_iter_dropped(buf_iter) ? 3455 (unsigned long)-1 : 0; 3456 } else { 3457 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, 3458 lost_events); 3459 } 3460 3461 if (event) { 3462 iter->ent_size = ring_buffer_event_length(event); 3463 return ring_buffer_event_data(event); 3464 } 3465 iter->ent_size = 0; 3466 return NULL; 3467 } 3468 3469 static struct trace_entry * 3470 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, 3471 unsigned long *missing_events, u64 *ent_ts) 3472 { 3473 struct trace_buffer *buffer = iter->array_buffer->buffer; 3474 struct trace_entry *ent, *next = NULL; 3475 unsigned long lost_events = 0, next_lost = 0; 3476 int cpu_file = iter->cpu_file; 3477 u64 next_ts = 0, ts; 3478 int next_cpu = -1; 3479 int next_size = 0; 3480 int cpu; 3481 3482 /* 3483 * If we are in a per_cpu trace file, don't bother by iterating over 3484 * all cpu and peek directly. 3485 */ 3486 if (cpu_file > RING_BUFFER_ALL_CPUS) { 3487 if (ring_buffer_empty_cpu(buffer, cpu_file)) 3488 return NULL; 3489 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); 3490 if (ent_cpu) 3491 *ent_cpu = cpu_file; 3492 3493 return ent; 3494 } 3495 3496 for_each_tracing_cpu(cpu) { 3497 3498 if (ring_buffer_empty_cpu(buffer, cpu)) 3499 continue; 3500 3501 ent = peek_next_entry(iter, cpu, &ts, &lost_events); 3502 3503 /* 3504 * Pick the entry with the smallest timestamp: 3505 */ 3506 if (ent && (!next || ts < next_ts)) { 3507 next = ent; 3508 next_cpu = cpu; 3509 next_ts = ts; 3510 next_lost = lost_events; 3511 next_size = iter->ent_size; 3512 } 3513 } 3514 3515 iter->ent_size = next_size; 3516 3517 if (ent_cpu) 3518 *ent_cpu = next_cpu; 3519 3520 if (ent_ts) 3521 *ent_ts = next_ts; 3522 3523 if (missing_events) 3524 *missing_events = next_lost; 3525 3526 return next; 3527 } 3528 3529 #define STATIC_FMT_BUF_SIZE 128 3530 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 3531 3532 char *trace_iter_expand_format(struct trace_iterator *iter) 3533 { 3534 char *tmp; 3535 3536 /* 3537 * iter->tr is NULL when used with tp_printk, which makes 3538 * this get called where it is not safe to call krealloc(). 3539 */ 3540 if (!iter->tr || iter->fmt == static_fmt_buf) 3541 return NULL; 3542 3543 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, 3544 GFP_KERNEL); 3545 if (tmp) { 3546 iter->fmt_size += STATIC_FMT_BUF_SIZE; 3547 iter->fmt = tmp; 3548 } 3549 3550 return tmp; 3551 } 3552 3553 /* Returns true if the string is safe to dereference from an event */ 3554 static bool trace_safe_str(struct trace_iterator *iter, const char *str, 3555 bool star, int len) 3556 { 3557 unsigned long addr = (unsigned long)str; 3558 struct trace_event *trace_event; 3559 struct trace_event_call *event; 3560 3561 /* Ignore strings with no length */ 3562 if (star && !len) 3563 return true; 3564 3565 /* OK if part of the event data */ 3566 if ((addr >= (unsigned long)iter->ent) && 3567 (addr < (unsigned long)iter->ent + iter->ent_size)) 3568 return true; 3569 3570 /* OK if part of the temp seq buffer */ 3571 if ((addr >= (unsigned long)iter->tmp_seq.buffer) && 3572 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) 3573 return true; 3574 3575 /* Core rodata can not be freed */ 3576 if (is_kernel_rodata(addr)) 3577 return true; 3578 3579 if (trace_is_tracepoint_string(str)) 3580 return true; 3581 3582 /* 3583 * Now this could be a module event, referencing core module 3584 * data, which is OK. 3585 */ 3586 if (!iter->ent) 3587 return false; 3588 3589 trace_event = ftrace_find_event(iter->ent->type); 3590 if (!trace_event) 3591 return false; 3592 3593 event = container_of(trace_event, struct trace_event_call, event); 3594 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) 3595 return false; 3596 3597 /* Would rather have rodata, but this will suffice */ 3598 if (within_module_core(addr, event->module)) 3599 return true; 3600 3601 return false; 3602 } 3603 3604 static DEFINE_STATIC_KEY_FALSE(trace_no_verify); 3605 3606 static int test_can_verify_check(const char *fmt, ...) 3607 { 3608 char buf[16]; 3609 va_list ap; 3610 int ret; 3611 3612 /* 3613 * The verifier is dependent on vsnprintf() modifies the va_list 3614 * passed to it, where it is sent as a reference. Some architectures 3615 * (like x86_32) passes it by value, which means that vsnprintf() 3616 * does not modify the va_list passed to it, and the verifier 3617 * would then need to be able to understand all the values that 3618 * vsnprintf can use. If it is passed by value, then the verifier 3619 * is disabled. 3620 */ 3621 va_start(ap, fmt); 3622 vsnprintf(buf, 16, "%d", ap); 3623 ret = va_arg(ap, int); 3624 va_end(ap); 3625 3626 return ret; 3627 } 3628 3629 static void test_can_verify(void) 3630 { 3631 if (!test_can_verify_check("%d %d", 0, 1)) { 3632 pr_info("trace event string verifier disabled\n"); 3633 static_branch_inc(&trace_no_verify); 3634 } 3635 } 3636 3637 /** 3638 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer 3639 * @iter: The iterator that holds the seq buffer and the event being printed 3640 * @fmt: The format used to print the event 3641 * @ap: The va_list holding the data to print from @fmt. 3642 * 3643 * This writes the data into the @iter->seq buffer using the data from 3644 * @fmt and @ap. If the format has a %s, then the source of the string 3645 * is examined to make sure it is safe to print, otherwise it will 3646 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string 3647 * pointer. 3648 */ 3649 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, 3650 va_list ap) 3651 { 3652 const char *p = fmt; 3653 const char *str; 3654 int i, j; 3655 3656 if (WARN_ON_ONCE(!fmt)) 3657 return; 3658 3659 if (static_branch_unlikely(&trace_no_verify)) 3660 goto print; 3661 3662 /* Don't bother checking when doing a ftrace_dump() */ 3663 if (iter->fmt == static_fmt_buf) 3664 goto print; 3665 3666 while (*p) { 3667 bool star = false; 3668 int len = 0; 3669 3670 j = 0; 3671 3672 /* We only care about %s and variants */ 3673 for (i = 0; p[i]; i++) { 3674 if (i + 1 >= iter->fmt_size) { 3675 /* 3676 * If we can't expand the copy buffer, 3677 * just print it. 3678 */ 3679 if (!trace_iter_expand_format(iter)) 3680 goto print; 3681 } 3682 3683 if (p[i] == '\\' && p[i+1]) { 3684 i++; 3685 continue; 3686 } 3687 if (p[i] == '%') { 3688 /* Need to test cases like %08.*s */ 3689 for (j = 1; p[i+j]; j++) { 3690 if (isdigit(p[i+j]) || 3691 p[i+j] == '.') 3692 continue; 3693 if (p[i+j] == '*') { 3694 star = true; 3695 continue; 3696 } 3697 break; 3698 } 3699 if (p[i+j] == 's') 3700 break; 3701 star = false; 3702 } 3703 j = 0; 3704 } 3705 /* If no %s found then just print normally */ 3706 if (!p[i]) 3707 break; 3708 3709 /* Copy up to the %s, and print that */ 3710 strncpy(iter->fmt, p, i); 3711 iter->fmt[i] = '\0'; 3712 trace_seq_vprintf(&iter->seq, iter->fmt, ap); 3713 3714 /* 3715 * If iter->seq is full, the above call no longer guarantees 3716 * that ap is in sync with fmt processing, and further calls 3717 * to va_arg() can return wrong positional arguments. 3718 * 3719 * Ensure that ap is no longer used in this case. 3720 */ 3721 if (iter->seq.full) { 3722 p = ""; 3723 break; 3724 } 3725 3726 if (star) 3727 len = va_arg(ap, int); 3728 3729 /* The ap now points to the string data of the %s */ 3730 str = va_arg(ap, const char *); 3731 3732 /* 3733 * If you hit this warning, it is likely that the 3734 * trace event in question used %s on a string that 3735 * was saved at the time of the event, but may not be 3736 * around when the trace is read. Use __string(), 3737 * __assign_str() and __get_str() helpers in the TRACE_EVENT() 3738 * instead. See samples/trace_events/trace-events-sample.h 3739 * for reference. 3740 */ 3741 if (WARN_ONCE(!trace_safe_str(iter, str, star, len), 3742 "fmt: '%s' current_buffer: '%s'", 3743 fmt, seq_buf_str(&iter->seq.seq))) { 3744 int ret; 3745 3746 /* Try to safely read the string */ 3747 if (star) { 3748 if (len + 1 > iter->fmt_size) 3749 len = iter->fmt_size - 1; 3750 if (len < 0) 3751 len = 0; 3752 ret = copy_from_kernel_nofault(iter->fmt, str, len); 3753 iter->fmt[len] = 0; 3754 star = false; 3755 } else { 3756 ret = strncpy_from_kernel_nofault(iter->fmt, str, 3757 iter->fmt_size); 3758 } 3759 if (ret < 0) 3760 trace_seq_printf(&iter->seq, "(0x%px)", str); 3761 else 3762 trace_seq_printf(&iter->seq, "(0x%px:%s)", 3763 str, iter->fmt); 3764 str = "[UNSAFE-MEMORY]"; 3765 strcpy(iter->fmt, "%s"); 3766 } else { 3767 strncpy(iter->fmt, p + i, j + 1); 3768 iter->fmt[j+1] = '\0'; 3769 } 3770 if (star) 3771 trace_seq_printf(&iter->seq, iter->fmt, len, str); 3772 else 3773 trace_seq_printf(&iter->seq, iter->fmt, str); 3774 3775 p += i + j + 1; 3776 } 3777 print: 3778 if (*p) 3779 trace_seq_vprintf(&iter->seq, p, ap); 3780 } 3781 3782 const char *trace_event_format(struct trace_iterator *iter, const char *fmt) 3783 { 3784 const char *p, *new_fmt; 3785 char *q; 3786 3787 if (WARN_ON_ONCE(!fmt)) 3788 return fmt; 3789 3790 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR) 3791 return fmt; 3792 3793 p = fmt; 3794 new_fmt = q = iter->fmt; 3795 while (*p) { 3796 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { 3797 if (!trace_iter_expand_format(iter)) 3798 return fmt; 3799 3800 q += iter->fmt - new_fmt; 3801 new_fmt = iter->fmt; 3802 } 3803 3804 *q++ = *p++; 3805 3806 /* Replace %p with %px */ 3807 if (p[-1] == '%') { 3808 if (p[0] == '%') { 3809 *q++ = *p++; 3810 } else if (p[0] == 'p' && !isalnum(p[1])) { 3811 *q++ = *p++; 3812 *q++ = 'x'; 3813 } 3814 } 3815 } 3816 *q = '\0'; 3817 3818 return new_fmt; 3819 } 3820 3821 #define STATIC_TEMP_BUF_SIZE 128 3822 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); 3823 3824 /* Find the next real entry, without updating the iterator itself */ 3825 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 3826 int *ent_cpu, u64 *ent_ts) 3827 { 3828 /* __find_next_entry will reset ent_size */ 3829 int ent_size = iter->ent_size; 3830 struct trace_entry *entry; 3831 3832 /* 3833 * If called from ftrace_dump(), then the iter->temp buffer 3834 * will be the static_temp_buf and not created from kmalloc. 3835 * If the entry size is greater than the buffer, we can 3836 * not save it. Just return NULL in that case. This is only 3837 * used to add markers when two consecutive events' time 3838 * stamps have a large delta. See trace_print_lat_context() 3839 */ 3840 if (iter->temp == static_temp_buf && 3841 STATIC_TEMP_BUF_SIZE < ent_size) 3842 return NULL; 3843 3844 /* 3845 * The __find_next_entry() may call peek_next_entry(), which may 3846 * call ring_buffer_peek() that may make the contents of iter->ent 3847 * undefined. Need to copy iter->ent now. 3848 */ 3849 if (iter->ent && iter->ent != iter->temp) { 3850 if ((!iter->temp || iter->temp_size < iter->ent_size) && 3851 !WARN_ON_ONCE(iter->temp == static_temp_buf)) { 3852 void *temp; 3853 temp = kmalloc(iter->ent_size, GFP_KERNEL); 3854 if (!temp) 3855 return NULL; 3856 kfree(iter->temp); 3857 iter->temp = temp; 3858 iter->temp_size = iter->ent_size; 3859 } 3860 memcpy(iter->temp, iter->ent, iter->ent_size); 3861 iter->ent = iter->temp; 3862 } 3863 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 3864 /* Put back the original ent_size */ 3865 iter->ent_size = ent_size; 3866 3867 return entry; 3868 } 3869 3870 /* Find the next real entry, and increment the iterator to the next entry */ 3871 void *trace_find_next_entry_inc(struct trace_iterator *iter) 3872 { 3873 iter->ent = __find_next_entry(iter, &iter->cpu, 3874 &iter->lost_events, &iter->ts); 3875 3876 if (iter->ent) 3877 trace_iterator_increment(iter); 3878 3879 return iter->ent ? iter : NULL; 3880 } 3881 3882 static void trace_consume(struct trace_iterator *iter) 3883 { 3884 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, 3885 &iter->lost_events); 3886 } 3887 3888 static void *s_next(struct seq_file *m, void *v, loff_t *pos) 3889 { 3890 struct trace_iterator *iter = m->private; 3891 int i = (int)*pos; 3892 void *ent; 3893 3894 WARN_ON_ONCE(iter->leftover); 3895 3896 (*pos)++; 3897 3898 /* can't go backwards */ 3899 if (iter->idx > i) 3900 return NULL; 3901 3902 if (iter->idx < 0) 3903 ent = trace_find_next_entry_inc(iter); 3904 else 3905 ent = iter; 3906 3907 while (ent && iter->idx < i) 3908 ent = trace_find_next_entry_inc(iter); 3909 3910 iter->pos = *pos; 3911 3912 return ent; 3913 } 3914 3915 void tracing_iter_reset(struct trace_iterator *iter, int cpu) 3916 { 3917 struct ring_buffer_iter *buf_iter; 3918 unsigned long entries = 0; 3919 u64 ts; 3920 3921 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; 3922 3923 buf_iter = trace_buffer_iter(iter, cpu); 3924 if (!buf_iter) 3925 return; 3926 3927 ring_buffer_iter_reset(buf_iter); 3928 3929 /* 3930 * We could have the case with the max latency tracers 3931 * that a reset never took place on a cpu. This is evident 3932 * by the timestamp being before the start of the buffer. 3933 */ 3934 while (ring_buffer_iter_peek(buf_iter, &ts)) { 3935 if (ts >= iter->array_buffer->time_start) 3936 break; 3937 entries++; 3938 ring_buffer_iter_advance(buf_iter); 3939 } 3940 3941 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; 3942 } 3943 3944 /* 3945 * The current tracer is copied to avoid a global locking 3946 * all around. 3947 */ 3948 static void *s_start(struct seq_file *m, loff_t *pos) 3949 { 3950 struct trace_iterator *iter = m->private; 3951 struct trace_array *tr = iter->tr; 3952 int cpu_file = iter->cpu_file; 3953 void *p = NULL; 3954 loff_t l = 0; 3955 int cpu; 3956 3957 mutex_lock(&trace_types_lock); 3958 if (unlikely(tr->current_trace != iter->trace)) { 3959 /* Close iter->trace before switching to the new current tracer */ 3960 if (iter->trace->close) 3961 iter->trace->close(iter); 3962 iter->trace = tr->current_trace; 3963 /* Reopen the new current tracer */ 3964 if (iter->trace->open) 3965 iter->trace->open(iter); 3966 } 3967 mutex_unlock(&trace_types_lock); 3968 3969 #ifdef CONFIG_TRACER_MAX_TRACE 3970 if (iter->snapshot && iter->trace->use_max_tr) 3971 return ERR_PTR(-EBUSY); 3972 #endif 3973 3974 if (*pos != iter->pos) { 3975 iter->ent = NULL; 3976 iter->cpu = 0; 3977 iter->idx = -1; 3978 3979 if (cpu_file == RING_BUFFER_ALL_CPUS) { 3980 for_each_tracing_cpu(cpu) 3981 tracing_iter_reset(iter, cpu); 3982 } else 3983 tracing_iter_reset(iter, cpu_file); 3984 3985 iter->leftover = 0; 3986 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 3987 ; 3988 3989 } else { 3990 /* 3991 * If we overflowed the seq_file before, then we want 3992 * to just reuse the trace_seq buffer again. 3993 */ 3994 if (iter->leftover) 3995 p = iter; 3996 else { 3997 l = *pos - 1; 3998 p = s_next(m, p, &l); 3999 } 4000 } 4001 4002 trace_event_read_lock(); 4003 trace_access_lock(cpu_file); 4004 return p; 4005 } 4006 4007 static void s_stop(struct seq_file *m, void *p) 4008 { 4009 struct trace_iterator *iter = m->private; 4010 4011 #ifdef CONFIG_TRACER_MAX_TRACE 4012 if (iter->snapshot && iter->trace->use_max_tr) 4013 return; 4014 #endif 4015 4016 trace_access_unlock(iter->cpu_file); 4017 trace_event_read_unlock(); 4018 } 4019 4020 static void 4021 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, 4022 unsigned long *entries, int cpu) 4023 { 4024 unsigned long count; 4025 4026 count = ring_buffer_entries_cpu(buf->buffer, cpu); 4027 /* 4028 * If this buffer has skipped entries, then we hold all 4029 * entries for the trace and we need to ignore the 4030 * ones before the time stamp. 4031 */ 4032 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { 4033 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; 4034 /* total is the same as the entries */ 4035 *total = count; 4036 } else 4037 *total = count + 4038 ring_buffer_overrun_cpu(buf->buffer, cpu); 4039 *entries = count; 4040 } 4041 4042 static void 4043 get_total_entries(struct array_buffer *buf, 4044 unsigned long *total, unsigned long *entries) 4045 { 4046 unsigned long t, e; 4047 int cpu; 4048 4049 *total = 0; 4050 *entries = 0; 4051 4052 for_each_tracing_cpu(cpu) { 4053 get_total_entries_cpu(buf, &t, &e, cpu); 4054 *total += t; 4055 *entries += e; 4056 } 4057 } 4058 4059 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) 4060 { 4061 unsigned long total, entries; 4062 4063 if (!tr) 4064 tr = &global_trace; 4065 4066 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); 4067 4068 return entries; 4069 } 4070 4071 unsigned long trace_total_entries(struct trace_array *tr) 4072 { 4073 unsigned long total, entries; 4074 4075 if (!tr) 4076 tr = &global_trace; 4077 4078 get_total_entries(&tr->array_buffer, &total, &entries); 4079 4080 return entries; 4081 } 4082 4083 static void print_lat_help_header(struct seq_file *m) 4084 { 4085 seq_puts(m, "# _------=> CPU# \n" 4086 "# / _-----=> irqs-off/BH-disabled\n" 4087 "# | / _----=> need-resched \n" 4088 "# || / _---=> hardirq/softirq \n" 4089 "# ||| / _--=> preempt-depth \n" 4090 "# |||| / _-=> migrate-disable \n" 4091 "# ||||| / delay \n" 4092 "# cmd pid |||||| time | caller \n" 4093 "# \\ / |||||| \\ | / \n"); 4094 } 4095 4096 static void print_event_info(struct array_buffer *buf, struct seq_file *m) 4097 { 4098 unsigned long total; 4099 unsigned long entries; 4100 4101 get_total_entries(buf, &total, &entries); 4102 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 4103 entries, total, num_online_cpus()); 4104 seq_puts(m, "#\n"); 4105 } 4106 4107 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, 4108 unsigned int flags) 4109 { 4110 bool tgid = flags & TRACE_ITER_RECORD_TGID; 4111 4112 print_event_info(buf, m); 4113 4114 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); 4115 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); 4116 } 4117 4118 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, 4119 unsigned int flags) 4120 { 4121 bool tgid = flags & TRACE_ITER_RECORD_TGID; 4122 static const char space[] = " "; 4123 int prec = tgid ? 12 : 2; 4124 4125 print_event_info(buf, m); 4126 4127 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); 4128 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); 4129 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); 4130 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); 4131 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); 4132 seq_printf(m, "# %.*s|||| / delay\n", prec, space); 4133 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); 4134 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); 4135 } 4136 4137 void 4138 print_trace_header(struct seq_file *m, struct trace_iterator *iter) 4139 { 4140 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); 4141 struct array_buffer *buf = iter->array_buffer; 4142 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); 4143 struct tracer *type = iter->trace; 4144 unsigned long entries; 4145 unsigned long total; 4146 const char *name = type->name; 4147 4148 get_total_entries(buf, &total, &entries); 4149 4150 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 4151 name, init_utsname()->release); 4152 seq_puts(m, "# -----------------------------------" 4153 "---------------------------------\n"); 4154 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" 4155 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 4156 nsecs_to_usecs(data->saved_latency), 4157 entries, 4158 total, 4159 buf->cpu, 4160 preempt_model_none() ? "server" : 4161 preempt_model_voluntary() ? "desktop" : 4162 preempt_model_full() ? "preempt" : 4163 preempt_model_rt() ? "preempt_rt" : 4164 "unknown", 4165 /* These are reserved for later use */ 4166 0, 0, 0, 0); 4167 #ifdef CONFIG_SMP 4168 seq_printf(m, " #P:%d)\n", num_online_cpus()); 4169 #else 4170 seq_puts(m, ")\n"); 4171 #endif 4172 seq_puts(m, "# -----------------\n"); 4173 seq_printf(m, "# | task: %.16s-%d " 4174 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 4175 data->comm, data->pid, 4176 from_kuid_munged(seq_user_ns(m), data->uid), data->nice, 4177 data->policy, data->rt_priority); 4178 seq_puts(m, "# -----------------\n"); 4179 4180 if (data->critical_start) { 4181 seq_puts(m, "# => started at: "); 4182 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 4183 trace_print_seq(m, &iter->seq); 4184 seq_puts(m, "\n# => ended at: "); 4185 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 4186 trace_print_seq(m, &iter->seq); 4187 seq_puts(m, "\n#\n"); 4188 } 4189 4190 seq_puts(m, "#\n"); 4191 } 4192 4193 static void test_cpu_buff_start(struct trace_iterator *iter) 4194 { 4195 struct trace_seq *s = &iter->seq; 4196 struct trace_array *tr = iter->tr; 4197 4198 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE)) 4199 return; 4200 4201 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) 4202 return; 4203 4204 if (cpumask_available(iter->started) && 4205 cpumask_test_cpu(iter->cpu, iter->started)) 4206 return; 4207 4208 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) 4209 return; 4210 4211 if (cpumask_available(iter->started)) 4212 cpumask_set_cpu(iter->cpu, iter->started); 4213 4214 /* Don't print started cpu buffer for the first entry of the trace */ 4215 if (iter->idx > 1) 4216 trace_seq_printf(s, "##### CPU %u buffer started ####\n", 4217 iter->cpu); 4218 } 4219 4220 static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 4221 { 4222 struct trace_array *tr = iter->tr; 4223 struct trace_seq *s = &iter->seq; 4224 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); 4225 struct trace_entry *entry; 4226 struct trace_event *event; 4227 4228 entry = iter->ent; 4229 4230 test_cpu_buff_start(iter); 4231 4232 event = ftrace_find_event(entry->type); 4233 4234 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { 4235 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 4236 trace_print_lat_context(iter); 4237 else 4238 trace_print_context(iter); 4239 } 4240 4241 if (trace_seq_has_overflowed(s)) 4242 return TRACE_TYPE_PARTIAL_LINE; 4243 4244 if (event) { 4245 if (tr->trace_flags & TRACE_ITER_FIELDS) 4246 return print_event_fields(iter, event); 4247 return event->funcs->trace(iter, sym_flags, event); 4248 } 4249 4250 trace_seq_printf(s, "Unknown type %d\n", entry->type); 4251 4252 return trace_handle_return(s); 4253 } 4254 4255 static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 4256 { 4257 struct trace_array *tr = iter->tr; 4258 struct trace_seq *s = &iter->seq; 4259 struct trace_entry *entry; 4260 struct trace_event *event; 4261 4262 entry = iter->ent; 4263 4264 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) 4265 trace_seq_printf(s, "%d %d %llu ", 4266 entry->pid, iter->cpu, iter->ts); 4267 4268 if (trace_seq_has_overflowed(s)) 4269 return TRACE_TYPE_PARTIAL_LINE; 4270 4271 event = ftrace_find_event(entry->type); 4272 if (event) 4273 return event->funcs->raw(iter, 0, event); 4274 4275 trace_seq_printf(s, "%d ?\n", entry->type); 4276 4277 return trace_handle_return(s); 4278 } 4279 4280 static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 4281 { 4282 struct trace_array *tr = iter->tr; 4283 struct trace_seq *s = &iter->seq; 4284 unsigned char newline = '\n'; 4285 struct trace_entry *entry; 4286 struct trace_event *event; 4287 4288 entry = iter->ent; 4289 4290 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { 4291 SEQ_PUT_HEX_FIELD(s, entry->pid); 4292 SEQ_PUT_HEX_FIELD(s, iter->cpu); 4293 SEQ_PUT_HEX_FIELD(s, iter->ts); 4294 if (trace_seq_has_overflowed(s)) 4295 return TRACE_TYPE_PARTIAL_LINE; 4296 } 4297 4298 event = ftrace_find_event(entry->type); 4299 if (event) { 4300 enum print_line_t ret = event->funcs->hex(iter, 0, event); 4301 if (ret != TRACE_TYPE_HANDLED) 4302 return ret; 4303 } 4304 4305 SEQ_PUT_FIELD(s, newline); 4306 4307 return trace_handle_return(s); 4308 } 4309 4310 static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 4311 { 4312 struct trace_array *tr = iter->tr; 4313 struct trace_seq *s = &iter->seq; 4314 struct trace_entry *entry; 4315 struct trace_event *event; 4316 4317 entry = iter->ent; 4318 4319 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { 4320 SEQ_PUT_FIELD(s, entry->pid); 4321 SEQ_PUT_FIELD(s, iter->cpu); 4322 SEQ_PUT_FIELD(s, iter->ts); 4323 if (trace_seq_has_overflowed(s)) 4324 return TRACE_TYPE_PARTIAL_LINE; 4325 } 4326 4327 event = ftrace_find_event(entry->type); 4328 return event ? event->funcs->binary(iter, 0, event) : 4329 TRACE_TYPE_HANDLED; 4330 } 4331 4332 int trace_empty(struct trace_iterator *iter) 4333 { 4334 struct ring_buffer_iter *buf_iter; 4335 int cpu; 4336 4337 /* If we are looking at one CPU buffer, only check that one */ 4338 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 4339 cpu = iter->cpu_file; 4340 buf_iter = trace_buffer_iter(iter, cpu); 4341 if (buf_iter) { 4342 if (!ring_buffer_iter_empty(buf_iter)) 4343 return 0; 4344 } else { 4345 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 4346 return 0; 4347 } 4348 return 1; 4349 } 4350 4351 for_each_tracing_cpu(cpu) { 4352 buf_iter = trace_buffer_iter(iter, cpu); 4353 if (buf_iter) { 4354 if (!ring_buffer_iter_empty(buf_iter)) 4355 return 0; 4356 } else { 4357 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 4358 return 0; 4359 } 4360 } 4361 4362 return 1; 4363 } 4364 4365 /* Called with trace_event_read_lock() held. */ 4366 enum print_line_t print_trace_line(struct trace_iterator *iter) 4367 { 4368 struct trace_array *tr = iter->tr; 4369 unsigned long trace_flags = tr->trace_flags; 4370 enum print_line_t ret; 4371 4372 if (iter->lost_events) { 4373 if (iter->lost_events == (unsigned long)-1) 4374 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", 4375 iter->cpu); 4376 else 4377 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 4378 iter->cpu, iter->lost_events); 4379 if (trace_seq_has_overflowed(&iter->seq)) 4380 return TRACE_TYPE_PARTIAL_LINE; 4381 } 4382 4383 if (iter->trace && iter->trace->print_line) { 4384 ret = iter->trace->print_line(iter); 4385 if (ret != TRACE_TYPE_UNHANDLED) 4386 return ret; 4387 } 4388 4389 if (iter->ent->type == TRACE_BPUTS && 4390 trace_flags & TRACE_ITER_PRINTK && 4391 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 4392 return trace_print_bputs_msg_only(iter); 4393 4394 if (iter->ent->type == TRACE_BPRINT && 4395 trace_flags & TRACE_ITER_PRINTK && 4396 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 4397 return trace_print_bprintk_msg_only(iter); 4398 4399 if (iter->ent->type == TRACE_PRINT && 4400 trace_flags & TRACE_ITER_PRINTK && 4401 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 4402 return trace_print_printk_msg_only(iter); 4403 4404 if (trace_flags & TRACE_ITER_BIN) 4405 return print_bin_fmt(iter); 4406 4407 if (trace_flags & TRACE_ITER_HEX) 4408 return print_hex_fmt(iter); 4409 4410 if (trace_flags & TRACE_ITER_RAW) 4411 return print_raw_fmt(iter); 4412 4413 return print_trace_fmt(iter); 4414 } 4415 4416 void trace_latency_header(struct seq_file *m) 4417 { 4418 struct trace_iterator *iter = m->private; 4419 struct trace_array *tr = iter->tr; 4420 4421 /* print nothing if the buffers are empty */ 4422 if (trace_empty(iter)) 4423 return; 4424 4425 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 4426 print_trace_header(m, iter); 4427 4428 if (!(tr->trace_flags & TRACE_ITER_VERBOSE)) 4429 print_lat_help_header(m); 4430 } 4431 4432 void trace_default_header(struct seq_file *m) 4433 { 4434 struct trace_iterator *iter = m->private; 4435 struct trace_array *tr = iter->tr; 4436 unsigned long trace_flags = tr->trace_flags; 4437 4438 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) 4439 return; 4440 4441 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 4442 /* print nothing if the buffers are empty */ 4443 if (trace_empty(iter)) 4444 return; 4445 print_trace_header(m, iter); 4446 if (!(trace_flags & TRACE_ITER_VERBOSE)) 4447 print_lat_help_header(m); 4448 } else { 4449 if (!(trace_flags & TRACE_ITER_VERBOSE)) { 4450 if (trace_flags & TRACE_ITER_IRQ_INFO) 4451 print_func_help_header_irq(iter->array_buffer, 4452 m, trace_flags); 4453 else 4454 print_func_help_header(iter->array_buffer, m, 4455 trace_flags); 4456 } 4457 } 4458 } 4459 4460 static void test_ftrace_alive(struct seq_file *m) 4461 { 4462 if (!ftrace_is_dead()) 4463 return; 4464 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" 4465 "# MAY BE MISSING FUNCTION EVENTS\n"); 4466 } 4467 4468 #ifdef CONFIG_TRACER_MAX_TRACE 4469 static void show_snapshot_main_help(struct seq_file *m) 4470 { 4471 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" 4472 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 4473 "# Takes a snapshot of the main buffer.\n" 4474 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" 4475 "# (Doesn't have to be '2' works with any number that\n" 4476 "# is not a '0' or '1')\n"); 4477 } 4478 4479 static void show_snapshot_percpu_help(struct seq_file *m) 4480 { 4481 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); 4482 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 4483 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 4484 "# Takes a snapshot of the main buffer for this cpu.\n"); 4485 #else 4486 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" 4487 "# Must use main snapshot file to allocate.\n"); 4488 #endif 4489 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" 4490 "# (Doesn't have to be '2' works with any number that\n" 4491 "# is not a '0' or '1')\n"); 4492 } 4493 4494 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) 4495 { 4496 if (iter->tr->allocated_snapshot) 4497 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); 4498 else 4499 seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); 4500 4501 seq_puts(m, "# Snapshot commands:\n"); 4502 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 4503 show_snapshot_main_help(m); 4504 else 4505 show_snapshot_percpu_help(m); 4506 } 4507 #else 4508 /* Should never be called */ 4509 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } 4510 #endif 4511 4512 static int s_show(struct seq_file *m, void *v) 4513 { 4514 struct trace_iterator *iter = v; 4515 int ret; 4516 4517 if (iter->ent == NULL) { 4518 if (iter->tr) { 4519 seq_printf(m, "# tracer: %s\n", iter->trace->name); 4520 seq_puts(m, "#\n"); 4521 test_ftrace_alive(m); 4522 } 4523 if (iter->snapshot && trace_empty(iter)) 4524 print_snapshot_help(m, iter); 4525 else if (iter->trace && iter->trace->print_header) 4526 iter->trace->print_header(m); 4527 else 4528 trace_default_header(m); 4529 4530 } else if (iter->leftover) { 4531 /* 4532 * If we filled the seq_file buffer earlier, we 4533 * want to just show it now. 4534 */ 4535 ret = trace_print_seq(m, &iter->seq); 4536 4537 /* ret should this time be zero, but you never know */ 4538 iter->leftover = ret; 4539 4540 } else { 4541 ret = print_trace_line(iter); 4542 if (ret == TRACE_TYPE_PARTIAL_LINE) { 4543 iter->seq.full = 0; 4544 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 4545 } 4546 ret = trace_print_seq(m, &iter->seq); 4547 /* 4548 * If we overflow the seq_file buffer, then it will 4549 * ask us for this data again at start up. 4550 * Use that instead. 4551 * ret is 0 if seq_file write succeeded. 4552 * -1 otherwise. 4553 */ 4554 iter->leftover = ret; 4555 } 4556 4557 return 0; 4558 } 4559 4560 /* 4561 * Should be used after trace_array_get(), trace_types_lock 4562 * ensures that i_cdev was already initialized. 4563 */ 4564 static inline int tracing_get_cpu(struct inode *inode) 4565 { 4566 if (inode->i_cdev) /* See trace_create_cpu_file() */ 4567 return (long)inode->i_cdev - 1; 4568 return RING_BUFFER_ALL_CPUS; 4569 } 4570 4571 static const struct seq_operations tracer_seq_ops = { 4572 .start = s_start, 4573 .next = s_next, 4574 .stop = s_stop, 4575 .show = s_show, 4576 }; 4577 4578 /* 4579 * Note, as iter itself can be allocated and freed in different 4580 * ways, this function is only used to free its content, and not 4581 * the iterator itself. The only requirement to all the allocations 4582 * is that it must zero all fields (kzalloc), as freeing works with 4583 * ethier allocated content or NULL. 4584 */ 4585 static void free_trace_iter_content(struct trace_iterator *iter) 4586 { 4587 /* The fmt is either NULL, allocated or points to static_fmt_buf */ 4588 if (iter->fmt != static_fmt_buf) 4589 kfree(iter->fmt); 4590 4591 kfree(iter->temp); 4592 kfree(iter->buffer_iter); 4593 mutex_destroy(&iter->mutex); 4594 free_cpumask_var(iter->started); 4595 } 4596 4597 static struct trace_iterator * 4598 __tracing_open(struct inode *inode, struct file *file, bool snapshot) 4599 { 4600 struct trace_array *tr = inode->i_private; 4601 struct trace_iterator *iter; 4602 int cpu; 4603 4604 if (tracing_disabled) 4605 return ERR_PTR(-ENODEV); 4606 4607 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 4608 if (!iter) 4609 return ERR_PTR(-ENOMEM); 4610 4611 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter), 4612 GFP_KERNEL); 4613 if (!iter->buffer_iter) 4614 goto release; 4615 4616 /* 4617 * trace_find_next_entry() may need to save off iter->ent. 4618 * It will place it into the iter->temp buffer. As most 4619 * events are less than 128, allocate a buffer of that size. 4620 * If one is greater, then trace_find_next_entry() will 4621 * allocate a new buffer to adjust for the bigger iter->ent. 4622 * It's not critical if it fails to get allocated here. 4623 */ 4624 iter->temp = kmalloc(128, GFP_KERNEL); 4625 if (iter->temp) 4626 iter->temp_size = 128; 4627 4628 /* 4629 * trace_event_printf() may need to modify given format 4630 * string to replace %p with %px so that it shows real address 4631 * instead of hash value. However, that is only for the event 4632 * tracing, other tracer may not need. Defer the allocation 4633 * until it is needed. 4634 */ 4635 iter->fmt = NULL; 4636 iter->fmt_size = 0; 4637 4638 mutex_lock(&trace_types_lock); 4639 iter->trace = tr->current_trace; 4640 4641 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 4642 goto fail; 4643 4644 iter->tr = tr; 4645 4646 #ifdef CONFIG_TRACER_MAX_TRACE 4647 /* Currently only the top directory has a snapshot */ 4648 if (tr->current_trace->print_max || snapshot) 4649 iter->array_buffer = &tr->max_buffer; 4650 else 4651 #endif 4652 iter->array_buffer = &tr->array_buffer; 4653 iter->snapshot = snapshot; 4654 iter->pos = -1; 4655 iter->cpu_file = tracing_get_cpu(inode); 4656 mutex_init(&iter->mutex); 4657 4658 /* Notify the tracer early; before we stop tracing. */ 4659 if (iter->trace->open) 4660 iter->trace->open(iter); 4661 4662 /* Annotate start of buffers if we had overruns */ 4663 if (ring_buffer_overruns(iter->array_buffer->buffer)) 4664 iter->iter_flags |= TRACE_FILE_ANNOTATE; 4665 4666 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 4667 if (trace_clocks[tr->clock_id].in_ns) 4668 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 4669 4670 /* 4671 * If pause-on-trace is enabled, then stop the trace while 4672 * dumping, unless this is the "snapshot" file 4673 */ 4674 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE)) 4675 tracing_stop_tr(tr); 4676 4677 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 4678 for_each_tracing_cpu(cpu) { 4679 iter->buffer_iter[cpu] = 4680 ring_buffer_read_prepare(iter->array_buffer->buffer, 4681 cpu, GFP_KERNEL); 4682 } 4683 ring_buffer_read_prepare_sync(); 4684 for_each_tracing_cpu(cpu) { 4685 ring_buffer_read_start(iter->buffer_iter[cpu]); 4686 tracing_iter_reset(iter, cpu); 4687 } 4688 } else { 4689 cpu = iter->cpu_file; 4690 iter->buffer_iter[cpu] = 4691 ring_buffer_read_prepare(iter->array_buffer->buffer, 4692 cpu, GFP_KERNEL); 4693 ring_buffer_read_prepare_sync(); 4694 ring_buffer_read_start(iter->buffer_iter[cpu]); 4695 tracing_iter_reset(iter, cpu); 4696 } 4697 4698 mutex_unlock(&trace_types_lock); 4699 4700 return iter; 4701 4702 fail: 4703 mutex_unlock(&trace_types_lock); 4704 free_trace_iter_content(iter); 4705 release: 4706 seq_release_private(inode, file); 4707 return ERR_PTR(-ENOMEM); 4708 } 4709 4710 int tracing_open_generic(struct inode *inode, struct file *filp) 4711 { 4712 int ret; 4713 4714 ret = tracing_check_open_get_tr(NULL); 4715 if (ret) 4716 return ret; 4717 4718 filp->private_data = inode->i_private; 4719 return 0; 4720 } 4721 4722 bool tracing_is_disabled(void) 4723 { 4724 return (tracing_disabled) ? true: false; 4725 } 4726 4727 /* 4728 * Open and update trace_array ref count. 4729 * Must have the current trace_array passed to it. 4730 */ 4731 int tracing_open_generic_tr(struct inode *inode, struct file *filp) 4732 { 4733 struct trace_array *tr = inode->i_private; 4734 int ret; 4735 4736 ret = tracing_check_open_get_tr(tr); 4737 if (ret) 4738 return ret; 4739 4740 filp->private_data = inode->i_private; 4741 4742 return 0; 4743 } 4744 4745 /* 4746 * The private pointer of the inode is the trace_event_file. 4747 * Update the tr ref count associated to it. 4748 */ 4749 int tracing_open_file_tr(struct inode *inode, struct file *filp) 4750 { 4751 struct trace_event_file *file = inode->i_private; 4752 int ret; 4753 4754 ret = tracing_check_open_get_tr(file->tr); 4755 if (ret) 4756 return ret; 4757 4758 mutex_lock(&event_mutex); 4759 4760 /* Fail if the file is marked for removal */ 4761 if (file->flags & EVENT_FILE_FL_FREED) { 4762 trace_array_put(file->tr); 4763 ret = -ENODEV; 4764 } else { 4765 event_file_get(file); 4766 } 4767 4768 mutex_unlock(&event_mutex); 4769 if (ret) 4770 return ret; 4771 4772 filp->private_data = inode->i_private; 4773 4774 return 0; 4775 } 4776 4777 int tracing_release_file_tr(struct inode *inode, struct file *filp) 4778 { 4779 struct trace_event_file *file = inode->i_private; 4780 4781 trace_array_put(file->tr); 4782 event_file_put(file); 4783 4784 return 0; 4785 } 4786 4787 int tracing_single_release_file_tr(struct inode *inode, struct file *filp) 4788 { 4789 tracing_release_file_tr(inode, filp); 4790 return single_release(inode, filp); 4791 } 4792 4793 static int tracing_mark_open(struct inode *inode, struct file *filp) 4794 { 4795 stream_open(inode, filp); 4796 return tracing_open_generic_tr(inode, filp); 4797 } 4798 4799 static int tracing_release(struct inode *inode, struct file *file) 4800 { 4801 struct trace_array *tr = inode->i_private; 4802 struct seq_file *m = file->private_data; 4803 struct trace_iterator *iter; 4804 int cpu; 4805 4806 if (!(file->f_mode & FMODE_READ)) { 4807 trace_array_put(tr); 4808 return 0; 4809 } 4810 4811 /* Writes do not use seq_file */ 4812 iter = m->private; 4813 mutex_lock(&trace_types_lock); 4814 4815 for_each_tracing_cpu(cpu) { 4816 if (iter->buffer_iter[cpu]) 4817 ring_buffer_read_finish(iter->buffer_iter[cpu]); 4818 } 4819 4820 if (iter->trace && iter->trace->close) 4821 iter->trace->close(iter); 4822 4823 if (!iter->snapshot && tr->stop_count) 4824 /* reenable tracing if it was previously enabled */ 4825 tracing_start_tr(tr); 4826 4827 __trace_array_put(tr); 4828 4829 mutex_unlock(&trace_types_lock); 4830 4831 free_trace_iter_content(iter); 4832 seq_release_private(inode, file); 4833 4834 return 0; 4835 } 4836 4837 int tracing_release_generic_tr(struct inode *inode, struct file *file) 4838 { 4839 struct trace_array *tr = inode->i_private; 4840 4841 trace_array_put(tr); 4842 return 0; 4843 } 4844 4845 static int tracing_single_release_tr(struct inode *inode, struct file *file) 4846 { 4847 struct trace_array *tr = inode->i_private; 4848 4849 trace_array_put(tr); 4850 4851 return single_release(inode, file); 4852 } 4853 4854 static int tracing_open(struct inode *inode, struct file *file) 4855 { 4856 struct trace_array *tr = inode->i_private; 4857 struct trace_iterator *iter; 4858 int ret; 4859 4860 ret = tracing_check_open_get_tr(tr); 4861 if (ret) 4862 return ret; 4863 4864 /* If this file was open for write, then erase contents */ 4865 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 4866 int cpu = tracing_get_cpu(inode); 4867 struct array_buffer *trace_buf = &tr->array_buffer; 4868 4869 #ifdef CONFIG_TRACER_MAX_TRACE 4870 if (tr->current_trace->print_max) 4871 trace_buf = &tr->max_buffer; 4872 #endif 4873 4874 if (cpu == RING_BUFFER_ALL_CPUS) 4875 tracing_reset_online_cpus(trace_buf); 4876 else 4877 tracing_reset_cpu(trace_buf, cpu); 4878 } 4879 4880 if (file->f_mode & FMODE_READ) { 4881 iter = __tracing_open(inode, file, false); 4882 if (IS_ERR(iter)) 4883 ret = PTR_ERR(iter); 4884 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) 4885 iter->iter_flags |= TRACE_FILE_LAT_FMT; 4886 } 4887 4888 if (ret < 0) 4889 trace_array_put(tr); 4890 4891 return ret; 4892 } 4893 4894 /* 4895 * Some tracers are not suitable for instance buffers. 4896 * A tracer is always available for the global array (toplevel) 4897 * or if it explicitly states that it is. 4898 */ 4899 static bool 4900 trace_ok_for_array(struct tracer *t, struct trace_array *tr) 4901 { 4902 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; 4903 } 4904 4905 /* Find the next tracer that this trace array may use */ 4906 static struct tracer * 4907 get_tracer_for_array(struct trace_array *tr, struct tracer *t) 4908 { 4909 while (t && !trace_ok_for_array(t, tr)) 4910 t = t->next; 4911 4912 return t; 4913 } 4914 4915 static void * 4916 t_next(struct seq_file *m, void *v, loff_t *pos) 4917 { 4918 struct trace_array *tr = m->private; 4919 struct tracer *t = v; 4920 4921 (*pos)++; 4922 4923 if (t) 4924 t = get_tracer_for_array(tr, t->next); 4925 4926 return t; 4927 } 4928 4929 static void *t_start(struct seq_file *m, loff_t *pos) 4930 { 4931 struct trace_array *tr = m->private; 4932 struct tracer *t; 4933 loff_t l = 0; 4934 4935 mutex_lock(&trace_types_lock); 4936 4937 t = get_tracer_for_array(tr, trace_types); 4938 for (; t && l < *pos; t = t_next(m, t, &l)) 4939 ; 4940 4941 return t; 4942 } 4943 4944 static void t_stop(struct seq_file *m, void *p) 4945 { 4946 mutex_unlock(&trace_types_lock); 4947 } 4948 4949 static int t_show(struct seq_file *m, void *v) 4950 { 4951 struct tracer *t = v; 4952 4953 if (!t) 4954 return 0; 4955 4956 seq_puts(m, t->name); 4957 if (t->next) 4958 seq_putc(m, ' '); 4959 else 4960 seq_putc(m, '\n'); 4961 4962 return 0; 4963 } 4964 4965 static const struct seq_operations show_traces_seq_ops = { 4966 .start = t_start, 4967 .next = t_next, 4968 .stop = t_stop, 4969 .show = t_show, 4970 }; 4971 4972 static int show_traces_open(struct inode *inode, struct file *file) 4973 { 4974 struct trace_array *tr = inode->i_private; 4975 struct seq_file *m; 4976 int ret; 4977 4978 ret = tracing_check_open_get_tr(tr); 4979 if (ret) 4980 return ret; 4981 4982 ret = seq_open(file, &show_traces_seq_ops); 4983 if (ret) { 4984 trace_array_put(tr); 4985 return ret; 4986 } 4987 4988 m = file->private_data; 4989 m->private = tr; 4990 4991 return 0; 4992 } 4993 4994 static int show_traces_release(struct inode *inode, struct file *file) 4995 { 4996 struct trace_array *tr = inode->i_private; 4997 4998 trace_array_put(tr); 4999 return seq_release(inode, file); 5000 } 5001 5002 static ssize_t 5003 tracing_write_stub(struct file *filp, const char __user *ubuf, 5004 size_t count, loff_t *ppos) 5005 { 5006 return count; 5007 } 5008 5009 loff_t tracing_lseek(struct file *file, loff_t offset, int whence) 5010 { 5011 int ret; 5012 5013 if (file->f_mode & FMODE_READ) 5014 ret = seq_lseek(file, offset, whence); 5015 else 5016 file->f_pos = ret = 0; 5017 5018 return ret; 5019 } 5020 5021 static const struct file_operations tracing_fops = { 5022 .open = tracing_open, 5023 .read = seq_read, 5024 .read_iter = seq_read_iter, 5025 .splice_read = copy_splice_read, 5026 .write = tracing_write_stub, 5027 .llseek = tracing_lseek, 5028 .release = tracing_release, 5029 }; 5030 5031 static const struct file_operations show_traces_fops = { 5032 .open = show_traces_open, 5033 .read = seq_read, 5034 .llseek = seq_lseek, 5035 .release = show_traces_release, 5036 }; 5037 5038 static ssize_t 5039 tracing_cpumask_read(struct file *filp, char __user *ubuf, 5040 size_t count, loff_t *ppos) 5041 { 5042 struct trace_array *tr = file_inode(filp)->i_private; 5043 char *mask_str; 5044 int len; 5045 5046 len = snprintf(NULL, 0, "%*pb\n", 5047 cpumask_pr_args(tr->tracing_cpumask)) + 1; 5048 mask_str = kmalloc(len, GFP_KERNEL); 5049 if (!mask_str) 5050 return -ENOMEM; 5051 5052 len = snprintf(mask_str, len, "%*pb\n", 5053 cpumask_pr_args(tr->tracing_cpumask)); 5054 if (len >= count) { 5055 count = -EINVAL; 5056 goto out_err; 5057 } 5058 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 5059 5060 out_err: 5061 kfree(mask_str); 5062 5063 return count; 5064 } 5065 5066 int tracing_set_cpumask(struct trace_array *tr, 5067 cpumask_var_t tracing_cpumask_new) 5068 { 5069 int cpu; 5070 5071 if (!tr) 5072 return -EINVAL; 5073 5074 local_irq_disable(); 5075 arch_spin_lock(&tr->max_lock); 5076 for_each_tracing_cpu(cpu) { 5077 /* 5078 * Increase/decrease the disabled counter if we are 5079 * about to flip a bit in the cpumask: 5080 */ 5081 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && 5082 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 5083 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); 5084 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); 5085 #ifdef CONFIG_TRACER_MAX_TRACE 5086 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); 5087 #endif 5088 } 5089 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && 5090 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 5091 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); 5092 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); 5093 #ifdef CONFIG_TRACER_MAX_TRACE 5094 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); 5095 #endif 5096 } 5097 } 5098 arch_spin_unlock(&tr->max_lock); 5099 local_irq_enable(); 5100 5101 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 5102 5103 return 0; 5104 } 5105 5106 static ssize_t 5107 tracing_cpumask_write(struct file *filp, const char __user *ubuf, 5108 size_t count, loff_t *ppos) 5109 { 5110 struct trace_array *tr = file_inode(filp)->i_private; 5111 cpumask_var_t tracing_cpumask_new; 5112 int err; 5113 5114 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 5115 return -ENOMEM; 5116 5117 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 5118 if (err) 5119 goto err_free; 5120 5121 err = tracing_set_cpumask(tr, tracing_cpumask_new); 5122 if (err) 5123 goto err_free; 5124 5125 free_cpumask_var(tracing_cpumask_new); 5126 5127 return count; 5128 5129 err_free: 5130 free_cpumask_var(tracing_cpumask_new); 5131 5132 return err; 5133 } 5134 5135 static const struct file_operations tracing_cpumask_fops = { 5136 .open = tracing_open_generic_tr, 5137 .read = tracing_cpumask_read, 5138 .write = tracing_cpumask_write, 5139 .release = tracing_release_generic_tr, 5140 .llseek = generic_file_llseek, 5141 }; 5142 5143 static int tracing_trace_options_show(struct seq_file *m, void *v) 5144 { 5145 struct tracer_opt *trace_opts; 5146 struct trace_array *tr = m->private; 5147 u32 tracer_flags; 5148 int i; 5149 5150 mutex_lock(&trace_types_lock); 5151 tracer_flags = tr->current_trace->flags->val; 5152 trace_opts = tr->current_trace->flags->opts; 5153 5154 for (i = 0; trace_options[i]; i++) { 5155 if (tr->trace_flags & (1 << i)) 5156 seq_printf(m, "%s\n", trace_options[i]); 5157 else 5158 seq_printf(m, "no%s\n", trace_options[i]); 5159 } 5160 5161 for (i = 0; trace_opts[i].name; i++) { 5162 if (tracer_flags & trace_opts[i].bit) 5163 seq_printf(m, "%s\n", trace_opts[i].name); 5164 else 5165 seq_printf(m, "no%s\n", trace_opts[i].name); 5166 } 5167 mutex_unlock(&trace_types_lock); 5168 5169 return 0; 5170 } 5171 5172 static int __set_tracer_option(struct trace_array *tr, 5173 struct tracer_flags *tracer_flags, 5174 struct tracer_opt *opts, int neg) 5175 { 5176 struct tracer *trace = tracer_flags->trace; 5177 int ret; 5178 5179 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); 5180 if (ret) 5181 return ret; 5182 5183 if (neg) 5184 tracer_flags->val &= ~opts->bit; 5185 else 5186 tracer_flags->val |= opts->bit; 5187 return 0; 5188 } 5189 5190 /* Try to assign a tracer specific option */ 5191 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) 5192 { 5193 struct tracer *trace = tr->current_trace; 5194 struct tracer_flags *tracer_flags = trace->flags; 5195 struct tracer_opt *opts = NULL; 5196 int i; 5197 5198 for (i = 0; tracer_flags->opts[i].name; i++) { 5199 opts = &tracer_flags->opts[i]; 5200 5201 if (strcmp(cmp, opts->name) == 0) 5202 return __set_tracer_option(tr, trace->flags, opts, neg); 5203 } 5204 5205 return -EINVAL; 5206 } 5207 5208 /* Some tracers require overwrite to stay enabled */ 5209 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) 5210 { 5211 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set) 5212 return -1; 5213 5214 return 0; 5215 } 5216 5217 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) 5218 { 5219 if ((mask == TRACE_ITER_RECORD_TGID) || 5220 (mask == TRACE_ITER_RECORD_CMD)) 5221 lockdep_assert_held(&event_mutex); 5222 5223 /* do nothing if flag is already set */ 5224 if (!!(tr->trace_flags & mask) == !!enabled) 5225 return 0; 5226 5227 /* Give the tracer a chance to approve the change */ 5228 if (tr->current_trace->flag_changed) 5229 if (tr->current_trace->flag_changed(tr, mask, !!enabled)) 5230 return -EINVAL; 5231 5232 if (enabled) 5233 tr->trace_flags |= mask; 5234 else 5235 tr->trace_flags &= ~mask; 5236 5237 if (mask == TRACE_ITER_RECORD_CMD) 5238 trace_event_enable_cmd_record(enabled); 5239 5240 if (mask == TRACE_ITER_RECORD_TGID) { 5241 5242 if (trace_alloc_tgid_map() < 0) { 5243 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID; 5244 return -ENOMEM; 5245 } 5246 5247 trace_event_enable_tgid_record(enabled); 5248 } 5249 5250 if (mask == TRACE_ITER_EVENT_FORK) 5251 trace_event_follow_fork(tr, enabled); 5252 5253 if (mask == TRACE_ITER_FUNC_FORK) 5254 ftrace_pid_follow_fork(tr, enabled); 5255 5256 if (mask == TRACE_ITER_OVERWRITE) { 5257 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); 5258 #ifdef CONFIG_TRACER_MAX_TRACE 5259 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); 5260 #endif 5261 } 5262 5263 if (mask == TRACE_ITER_PRINTK) { 5264 trace_printk_start_stop_comm(enabled); 5265 trace_printk_control(enabled); 5266 } 5267 5268 return 0; 5269 } 5270 5271 int trace_set_options(struct trace_array *tr, char *option) 5272 { 5273 char *cmp; 5274 int neg = 0; 5275 int ret; 5276 size_t orig_len = strlen(option); 5277 int len; 5278 5279 cmp = strstrip(option); 5280 5281 len = str_has_prefix(cmp, "no"); 5282 if (len) 5283 neg = 1; 5284 5285 cmp += len; 5286 5287 mutex_lock(&event_mutex); 5288 mutex_lock(&trace_types_lock); 5289 5290 ret = match_string(trace_options, -1, cmp); 5291 /* If no option could be set, test the specific tracer options */ 5292 if (ret < 0) 5293 ret = set_tracer_option(tr, cmp, neg); 5294 else 5295 ret = set_tracer_flag(tr, 1 << ret, !neg); 5296 5297 mutex_unlock(&trace_types_lock); 5298 mutex_unlock(&event_mutex); 5299 5300 /* 5301 * If the first trailing whitespace is replaced with '\0' by strstrip, 5302 * turn it back into a space. 5303 */ 5304 if (orig_len > strlen(option)) 5305 option[strlen(option)] = ' '; 5306 5307 return ret; 5308 } 5309 5310 static void __init apply_trace_boot_options(void) 5311 { 5312 char *buf = trace_boot_options_buf; 5313 char *option; 5314 5315 while (true) { 5316 option = strsep(&buf, ","); 5317 5318 if (!option) 5319 break; 5320 5321 if (*option) 5322 trace_set_options(&global_trace, option); 5323 5324 /* Put back the comma to allow this to be called again */ 5325 if (buf) 5326 *(buf - 1) = ','; 5327 } 5328 } 5329 5330 static ssize_t 5331 tracing_trace_options_write(struct file *filp, const char __user *ubuf, 5332 size_t cnt, loff_t *ppos) 5333 { 5334 struct seq_file *m = filp->private_data; 5335 struct trace_array *tr = m->private; 5336 char buf[64]; 5337 int ret; 5338 5339 if (cnt >= sizeof(buf)) 5340 return -EINVAL; 5341 5342 if (copy_from_user(buf, ubuf, cnt)) 5343 return -EFAULT; 5344 5345 buf[cnt] = 0; 5346 5347 ret = trace_set_options(tr, buf); 5348 if (ret < 0) 5349 return ret; 5350 5351 *ppos += cnt; 5352 5353 return cnt; 5354 } 5355 5356 static int tracing_trace_options_open(struct inode *inode, struct file *file) 5357 { 5358 struct trace_array *tr = inode->i_private; 5359 int ret; 5360 5361 ret = tracing_check_open_get_tr(tr); 5362 if (ret) 5363 return ret; 5364 5365 ret = single_open(file, tracing_trace_options_show, inode->i_private); 5366 if (ret < 0) 5367 trace_array_put(tr); 5368 5369 return ret; 5370 } 5371 5372 static const struct file_operations tracing_iter_fops = { 5373 .open = tracing_trace_options_open, 5374 .read = seq_read, 5375 .llseek = seq_lseek, 5376 .release = tracing_single_release_tr, 5377 .write = tracing_trace_options_write, 5378 }; 5379 5380 static const char readme_msg[] = 5381 "tracing mini-HOWTO:\n\n" 5382 "# echo 0 > tracing_on : quick way to disable tracing\n" 5383 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" 5384 " Important files:\n" 5385 " trace\t\t\t- The static contents of the buffer\n" 5386 "\t\t\t To clear the buffer write into this file: echo > trace\n" 5387 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" 5388 " current_tracer\t- function and latency tracers\n" 5389 " available_tracers\t- list of configured tracers for current_tracer\n" 5390 " error_log\t- error log for failed commands (that support it)\n" 5391 " buffer_size_kb\t- view and modify size of per cpu buffer\n" 5392 " buffer_total_size_kb - view total size of all cpu buffers\n\n" 5393 " trace_clock\t\t- change the clock used to order events\n" 5394 " local: Per cpu clock but may not be synced across CPUs\n" 5395 " global: Synced across CPUs but slows tracing down.\n" 5396 " counter: Not a clock, but just an increment\n" 5397 " uptime: Jiffy counter from time of boot\n" 5398 " perf: Same clock that perf events use\n" 5399 #ifdef CONFIG_X86_64 5400 " x86-tsc: TSC cycle counter\n" 5401 #endif 5402 "\n timestamp_mode\t- view the mode used to timestamp events\n" 5403 " delta: Delta difference against a buffer-wide timestamp\n" 5404 " absolute: Absolute (standalone) timestamp\n" 5405 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" 5406 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" 5407 " tracing_cpumask\t- Limit which CPUs to trace\n" 5408 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" 5409 "\t\t\t Remove sub-buffer with rmdir\n" 5410 " trace_options\t\t- Set format or modify how tracing happens\n" 5411 "\t\t\t Disable an option by prefixing 'no' to the\n" 5412 "\t\t\t option name\n" 5413 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" 5414 #ifdef CONFIG_DYNAMIC_FTRACE 5415 "\n available_filter_functions - list of functions that can be filtered on\n" 5416 " set_ftrace_filter\t- echo function name in here to only trace these\n" 5417 "\t\t\t functions\n" 5418 "\t accepts: func_full_name or glob-matching-pattern\n" 5419 "\t modules: Can select a group via module\n" 5420 "\t Format: :mod:<module-name>\n" 5421 "\t example: echo :mod:ext3 > set_ftrace_filter\n" 5422 "\t triggers: a command to perform when function is hit\n" 5423 "\t Format: <function>:<trigger>[:count]\n" 5424 "\t trigger: traceon, traceoff\n" 5425 "\t\t enable_event:<system>:<event>\n" 5426 "\t\t disable_event:<system>:<event>\n" 5427 #ifdef CONFIG_STACKTRACE 5428 "\t\t stacktrace\n" 5429 #endif 5430 #ifdef CONFIG_TRACER_SNAPSHOT 5431 "\t\t snapshot\n" 5432 #endif 5433 "\t\t dump\n" 5434 "\t\t cpudump\n" 5435 "\t example: echo do_fault:traceoff > set_ftrace_filter\n" 5436 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" 5437 "\t The first one will disable tracing every time do_fault is hit\n" 5438 "\t The second will disable tracing at most 3 times when do_trap is hit\n" 5439 "\t The first time do trap is hit and it disables tracing, the\n" 5440 "\t counter will decrement to 2. If tracing is already disabled,\n" 5441 "\t the counter will not decrement. It only decrements when the\n" 5442 "\t trigger did work\n" 5443 "\t To remove trigger without count:\n" 5444 "\t echo '!<function>:<trigger> > set_ftrace_filter\n" 5445 "\t To remove trigger with a count:\n" 5446 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" 5447 " set_ftrace_notrace\t- echo function name in here to never trace.\n" 5448 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" 5449 "\t modules: Can select a group via module command :mod:\n" 5450 "\t Does not accept triggers\n" 5451 #endif /* CONFIG_DYNAMIC_FTRACE */ 5452 #ifdef CONFIG_FUNCTION_TRACER 5453 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" 5454 "\t\t (function)\n" 5455 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" 5456 "\t\t (function)\n" 5457 #endif 5458 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 5459 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" 5460 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" 5461 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" 5462 #endif 5463 #ifdef CONFIG_TRACER_SNAPSHOT 5464 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" 5465 "\t\t\t snapshot buffer. Read the contents for more\n" 5466 "\t\t\t information\n" 5467 #endif 5468 #ifdef CONFIG_STACK_TRACER 5469 " stack_trace\t\t- Shows the max stack trace when active\n" 5470 " stack_max_size\t- Shows current max stack size that was traced\n" 5471 "\t\t\t Write into this file to reset the max size (trigger a\n" 5472 "\t\t\t new trace)\n" 5473 #ifdef CONFIG_DYNAMIC_FTRACE 5474 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" 5475 "\t\t\t traces\n" 5476 #endif 5477 #endif /* CONFIG_STACK_TRACER */ 5478 #ifdef CONFIG_DYNAMIC_EVENTS 5479 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" 5480 "\t\t\t Write into this file to define/undefine new trace events.\n" 5481 #endif 5482 #ifdef CONFIG_KPROBE_EVENTS 5483 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" 5484 "\t\t\t Write into this file to define/undefine new trace events.\n" 5485 #endif 5486 #ifdef CONFIG_UPROBE_EVENTS 5487 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" 5488 "\t\t\t Write into this file to define/undefine new trace events.\n" 5489 #endif 5490 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ 5491 defined(CONFIG_FPROBE_EVENTS) 5492 "\t accepts: event-definitions (one definition per line)\n" 5493 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) 5494 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" 5495 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" 5496 #endif 5497 #ifdef CONFIG_FPROBE_EVENTS 5498 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" 5499 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" 5500 #endif 5501 #ifdef CONFIG_HIST_TRIGGERS 5502 "\t s:[synthetic/]<event> <field> [<field>]\n" 5503 #endif 5504 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" 5505 "\t -:[<group>/][<event>]\n" 5506 #ifdef CONFIG_KPROBE_EVENTS 5507 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" 5508 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" 5509 #endif 5510 #ifdef CONFIG_UPROBE_EVENTS 5511 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" 5512 #endif 5513 "\t args: <name>=fetcharg[:type]\n" 5514 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" 5515 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 5516 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" 5517 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS 5518 "\t <argname>[->field[->field|.field...]],\n" 5519 #endif 5520 #else 5521 "\t $stack<index>, $stack, $retval, $comm,\n" 5522 #endif 5523 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" 5524 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 5525 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 5526 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 5527 "\t symstr, <type>\\[<array-size>\\]\n" 5528 #ifdef CONFIG_HIST_TRIGGERS 5529 "\t field: <stype> <name>;\n" 5530 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" 5531 "\t [unsigned] char/int/long\n" 5532 #endif 5533 "\t efield: For event probes ('e' types), the field is on of the fields\n" 5534 "\t of the <attached-group>/<attached-event>.\n" 5535 #endif 5536 " events/\t\t- Directory containing all trace event subsystems:\n" 5537 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" 5538 " events/<system>/\t- Directory containing all trace events for <system>:\n" 5539 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" 5540 "\t\t\t events\n" 5541 " filter\t\t- If set, only events passing filter are traced\n" 5542 " events/<system>/<event>/\t- Directory containing control files for\n" 5543 "\t\t\t <event>:\n" 5544 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" 5545 " filter\t\t- If set, only events passing filter are traced\n" 5546 " trigger\t\t- If set, a command to perform when event is hit\n" 5547 "\t Format: <trigger>[:count][if <filter>]\n" 5548 "\t trigger: traceon, traceoff\n" 5549 "\t enable_event:<system>:<event>\n" 5550 "\t disable_event:<system>:<event>\n" 5551 #ifdef CONFIG_HIST_TRIGGERS 5552 "\t enable_hist:<system>:<event>\n" 5553 "\t disable_hist:<system>:<event>\n" 5554 #endif 5555 #ifdef CONFIG_STACKTRACE 5556 "\t\t stacktrace\n" 5557 #endif 5558 #ifdef CONFIG_TRACER_SNAPSHOT 5559 "\t\t snapshot\n" 5560 #endif 5561 #ifdef CONFIG_HIST_TRIGGERS 5562 "\t\t hist (see below)\n" 5563 #endif 5564 "\t example: echo traceoff > events/block/block_unplug/trigger\n" 5565 "\t echo traceoff:3 > events/block/block_unplug/trigger\n" 5566 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" 5567 "\t events/block/block_unplug/trigger\n" 5568 "\t The first disables tracing every time block_unplug is hit.\n" 5569 "\t The second disables tracing the first 3 times block_unplug is hit.\n" 5570 "\t The third enables the kmalloc event the first 3 times block_unplug\n" 5571 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" 5572 "\t Like function triggers, the counter is only decremented if it\n" 5573 "\t enabled or disabled tracing.\n" 5574 "\t To remove a trigger without a count:\n" 5575 "\t echo '!<trigger> > <system>/<event>/trigger\n" 5576 "\t To remove a trigger with a count:\n" 5577 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" 5578 "\t Filters can be ignored when removing a trigger.\n" 5579 #ifdef CONFIG_HIST_TRIGGERS 5580 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 5581 "\t Format: hist:keys=<field1[,field2,...]>\n" 5582 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 5583 "\t [:values=<field1[,field2,...]>]\n" 5584 "\t [:sort=<field1[,field2,...]>]\n" 5585 "\t [:size=#entries]\n" 5586 "\t [:pause][:continue][:clear]\n" 5587 "\t [:name=histname1]\n" 5588 "\t [:nohitcount]\n" 5589 "\t [:<handler>.<action>]\n" 5590 "\t [if <filter>]\n\n" 5591 "\t Note, special fields can be used as well:\n" 5592 "\t common_timestamp - to record current timestamp\n" 5593 "\t common_cpu - to record the CPU the event happened on\n" 5594 "\n" 5595 "\t A hist trigger variable can be:\n" 5596 "\t - a reference to a field e.g. x=current_timestamp,\n" 5597 "\t - a reference to another variable e.g. y=$x,\n" 5598 "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 5599 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 5600 "\n" 5601 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" 5602 "\t multiplication(*) and division(/) operators. An operand can be either a\n" 5603 "\t variable reference, field or numeric literal.\n" 5604 "\n" 5605 "\t When a matching event is hit, an entry is added to a hash\n" 5606 "\t table using the key(s) and value(s) named, and the value of a\n" 5607 "\t sum called 'hitcount' is incremented. Keys and values\n" 5608 "\t correspond to fields in the event's format description. Keys\n" 5609 "\t can be any field, or the special string 'common_stacktrace'.\n" 5610 "\t Compound keys consisting of up to two fields can be specified\n" 5611 "\t by the 'keys' keyword. Values must correspond to numeric\n" 5612 "\t fields. Sort keys consisting of up to two fields can be\n" 5613 "\t specified using the 'sort' keyword. The sort direction can\n" 5614 "\t be modified by appending '.descending' or '.ascending' to a\n" 5615 "\t sort field. The 'size' parameter can be used to specify more\n" 5616 "\t or fewer than the default 2048 entries for the hashtable size.\n" 5617 "\t If a hist trigger is given a name using the 'name' parameter,\n" 5618 "\t its histogram data will be shared with other triggers of the\n" 5619 "\t same name, and trigger hits will update this common data.\n\n" 5620 "\t Reading the 'hist' file for the event will dump the hash\n" 5621 "\t table in its entirety to stdout. If there are multiple hist\n" 5622 "\t triggers attached to an event, there will be a table for each\n" 5623 "\t trigger in the output. The table displayed for a named\n" 5624 "\t trigger will be the same as any other instance having the\n" 5625 "\t same name. The default format used to display a given field\n" 5626 "\t can be modified by appending any of the following modifiers\n" 5627 "\t to the field name, as applicable:\n\n" 5628 "\t .hex display a number as a hex value\n" 5629 "\t .sym display an address as a symbol\n" 5630 "\t .sym-offset display an address as a symbol and offset\n" 5631 "\t .execname display a common_pid as a program name\n" 5632 "\t .syscall display a syscall id as a syscall name\n" 5633 "\t .log2 display log2 value rather than raw number\n" 5634 "\t .buckets=size display values in groups of size rather than raw number\n" 5635 "\t .usecs display a common_timestamp in microseconds\n" 5636 "\t .percent display a number of percentage value\n" 5637 "\t .graph display a bar-graph of a value\n\n" 5638 "\t The 'pause' parameter can be used to pause an existing hist\n" 5639 "\t trigger or to start a hist trigger but not log any events\n" 5640 "\t until told to do so. 'continue' can be used to start or\n" 5641 "\t restart a paused hist trigger.\n\n" 5642 "\t The 'clear' parameter will clear the contents of a running\n" 5643 "\t hist trigger and leave its current paused/active state\n" 5644 "\t unchanged.\n\n" 5645 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" 5646 "\t raw hitcount in the histogram.\n\n" 5647 "\t The enable_hist and disable_hist triggers can be used to\n" 5648 "\t have one event conditionally start and stop another event's\n" 5649 "\t already-attached hist trigger. The syntax is analogous to\n" 5650 "\t the enable_event and disable_event triggers.\n\n" 5651 "\t Hist trigger handlers and actions are executed whenever a\n" 5652 "\t a histogram entry is added or updated. They take the form:\n\n" 5653 "\t <handler>.<action>\n\n" 5654 "\t The available handlers are:\n\n" 5655 "\t onmatch(matching.event) - invoke on addition or update\n" 5656 "\t onmax(var) - invoke if var exceeds current max\n" 5657 "\t onchange(var) - invoke action if var changes\n\n" 5658 "\t The available actions are:\n\n" 5659 "\t trace(<synthetic_event>,param list) - generate synthetic event\n" 5660 "\t save(field,...) - save current event fields\n" 5661 #ifdef CONFIG_TRACER_SNAPSHOT 5662 "\t snapshot() - snapshot the trace buffer\n\n" 5663 #endif 5664 #ifdef CONFIG_SYNTH_EVENTS 5665 " events/synthetic_events\t- Create/append/remove/show synthetic events\n" 5666 "\t Write into this file to define/undefine new synthetic events.\n" 5667 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" 5668 #endif 5669 #endif 5670 ; 5671 5672 static ssize_t 5673 tracing_readme_read(struct file *filp, char __user *ubuf, 5674 size_t cnt, loff_t *ppos) 5675 { 5676 return simple_read_from_buffer(ubuf, cnt, ppos, 5677 readme_msg, strlen(readme_msg)); 5678 } 5679 5680 static const struct file_operations tracing_readme_fops = { 5681 .open = tracing_open_generic, 5682 .read = tracing_readme_read, 5683 .llseek = generic_file_llseek, 5684 }; 5685 5686 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 5687 static union trace_eval_map_item * 5688 update_eval_map(union trace_eval_map_item *ptr) 5689 { 5690 if (!ptr->map.eval_string) { 5691 if (ptr->tail.next) { 5692 ptr = ptr->tail.next; 5693 /* Set ptr to the next real item (skip head) */ 5694 ptr++; 5695 } else 5696 return NULL; 5697 } 5698 return ptr; 5699 } 5700 5701 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) 5702 { 5703 union trace_eval_map_item *ptr = v; 5704 5705 /* 5706 * Paranoid! If ptr points to end, we don't want to increment past it. 5707 * This really should never happen. 5708 */ 5709 (*pos)++; 5710 ptr = update_eval_map(ptr); 5711 if (WARN_ON_ONCE(!ptr)) 5712 return NULL; 5713 5714 ptr++; 5715 ptr = update_eval_map(ptr); 5716 5717 return ptr; 5718 } 5719 5720 static void *eval_map_start(struct seq_file *m, loff_t *pos) 5721 { 5722 union trace_eval_map_item *v; 5723 loff_t l = 0; 5724 5725 mutex_lock(&trace_eval_mutex); 5726 5727 v = trace_eval_maps; 5728 if (v) 5729 v++; 5730 5731 while (v && l < *pos) { 5732 v = eval_map_next(m, v, &l); 5733 } 5734 5735 return v; 5736 } 5737 5738 static void eval_map_stop(struct seq_file *m, void *v) 5739 { 5740 mutex_unlock(&trace_eval_mutex); 5741 } 5742 5743 static int eval_map_show(struct seq_file *m, void *v) 5744 { 5745 union trace_eval_map_item *ptr = v; 5746 5747 seq_printf(m, "%s %ld (%s)\n", 5748 ptr->map.eval_string, ptr->map.eval_value, 5749 ptr->map.system); 5750 5751 return 0; 5752 } 5753 5754 static const struct seq_operations tracing_eval_map_seq_ops = { 5755 .start = eval_map_start, 5756 .next = eval_map_next, 5757 .stop = eval_map_stop, 5758 .show = eval_map_show, 5759 }; 5760 5761 static int tracing_eval_map_open(struct inode *inode, struct file *filp) 5762 { 5763 int ret; 5764 5765 ret = tracing_check_open_get_tr(NULL); 5766 if (ret) 5767 return ret; 5768 5769 return seq_open(filp, &tracing_eval_map_seq_ops); 5770 } 5771 5772 static const struct file_operations tracing_eval_map_fops = { 5773 .open = tracing_eval_map_open, 5774 .read = seq_read, 5775 .llseek = seq_lseek, 5776 .release = seq_release, 5777 }; 5778 5779 static inline union trace_eval_map_item * 5780 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) 5781 { 5782 /* Return tail of array given the head */ 5783 return ptr + ptr->head.length + 1; 5784 } 5785 5786 static void 5787 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, 5788 int len) 5789 { 5790 struct trace_eval_map **stop; 5791 struct trace_eval_map **map; 5792 union trace_eval_map_item *map_array; 5793 union trace_eval_map_item *ptr; 5794 5795 stop = start + len; 5796 5797 /* 5798 * The trace_eval_maps contains the map plus a head and tail item, 5799 * where the head holds the module and length of array, and the 5800 * tail holds a pointer to the next list. 5801 */ 5802 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL); 5803 if (!map_array) { 5804 pr_warn("Unable to allocate trace eval mapping\n"); 5805 return; 5806 } 5807 5808 mutex_lock(&trace_eval_mutex); 5809 5810 if (!trace_eval_maps) 5811 trace_eval_maps = map_array; 5812 else { 5813 ptr = trace_eval_maps; 5814 for (;;) { 5815 ptr = trace_eval_jmp_to_tail(ptr); 5816 if (!ptr->tail.next) 5817 break; 5818 ptr = ptr->tail.next; 5819 5820 } 5821 ptr->tail.next = map_array; 5822 } 5823 map_array->head.mod = mod; 5824 map_array->head.length = len; 5825 map_array++; 5826 5827 for (map = start; (unsigned long)map < (unsigned long)stop; map++) { 5828 map_array->map = **map; 5829 map_array++; 5830 } 5831 memset(map_array, 0, sizeof(*map_array)); 5832 5833 mutex_unlock(&trace_eval_mutex); 5834 } 5835 5836 static void trace_create_eval_file(struct dentry *d_tracer) 5837 { 5838 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, 5839 NULL, &tracing_eval_map_fops); 5840 } 5841 5842 #else /* CONFIG_TRACE_EVAL_MAP_FILE */ 5843 static inline void trace_create_eval_file(struct dentry *d_tracer) { } 5844 static inline void trace_insert_eval_map_file(struct module *mod, 5845 struct trace_eval_map **start, int len) { } 5846 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ 5847 5848 static void trace_insert_eval_map(struct module *mod, 5849 struct trace_eval_map **start, int len) 5850 { 5851 struct trace_eval_map **map; 5852 5853 if (len <= 0) 5854 return; 5855 5856 map = start; 5857 5858 trace_event_eval_update(map, len); 5859 5860 trace_insert_eval_map_file(mod, start, len); 5861 } 5862 5863 static ssize_t 5864 tracing_set_trace_read(struct file *filp, char __user *ubuf, 5865 size_t cnt, loff_t *ppos) 5866 { 5867 struct trace_array *tr = filp->private_data; 5868 char buf[MAX_TRACER_SIZE+2]; 5869 int r; 5870 5871 mutex_lock(&trace_types_lock); 5872 r = sprintf(buf, "%s\n", tr->current_trace->name); 5873 mutex_unlock(&trace_types_lock); 5874 5875 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5876 } 5877 5878 int tracer_init(struct tracer *t, struct trace_array *tr) 5879 { 5880 tracing_reset_online_cpus(&tr->array_buffer); 5881 return t->init(tr); 5882 } 5883 5884 static void set_buffer_entries(struct array_buffer *buf, unsigned long val) 5885 { 5886 int cpu; 5887 5888 for_each_tracing_cpu(cpu) 5889 per_cpu_ptr(buf->data, cpu)->entries = val; 5890 } 5891 5892 static void update_buffer_entries(struct array_buffer *buf, int cpu) 5893 { 5894 if (cpu == RING_BUFFER_ALL_CPUS) { 5895 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); 5896 } else { 5897 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); 5898 } 5899 } 5900 5901 #ifdef CONFIG_TRACER_MAX_TRACE 5902 /* resize @tr's buffer to the size of @size_tr's entries */ 5903 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 5904 struct array_buffer *size_buf, int cpu_id) 5905 { 5906 int cpu, ret = 0; 5907 5908 if (cpu_id == RING_BUFFER_ALL_CPUS) { 5909 for_each_tracing_cpu(cpu) { 5910 ret = ring_buffer_resize(trace_buf->buffer, 5911 per_cpu_ptr(size_buf->data, cpu)->entries, cpu); 5912 if (ret < 0) 5913 break; 5914 per_cpu_ptr(trace_buf->data, cpu)->entries = 5915 per_cpu_ptr(size_buf->data, cpu)->entries; 5916 } 5917 } else { 5918 ret = ring_buffer_resize(trace_buf->buffer, 5919 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); 5920 if (ret == 0) 5921 per_cpu_ptr(trace_buf->data, cpu_id)->entries = 5922 per_cpu_ptr(size_buf->data, cpu_id)->entries; 5923 } 5924 5925 return ret; 5926 } 5927 #endif /* CONFIG_TRACER_MAX_TRACE */ 5928 5929 static int __tracing_resize_ring_buffer(struct trace_array *tr, 5930 unsigned long size, int cpu) 5931 { 5932 int ret; 5933 5934 /* 5935 * If kernel or user changes the size of the ring buffer 5936 * we use the size that was given, and we can forget about 5937 * expanding it later. 5938 */ 5939 trace_set_ring_buffer_expanded(tr); 5940 5941 /* May be called before buffers are initialized */ 5942 if (!tr->array_buffer.buffer) 5943 return 0; 5944 5945 /* Do not allow tracing while resizing ring buffer */ 5946 tracing_stop_tr(tr); 5947 5948 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); 5949 if (ret < 0) 5950 goto out_start; 5951 5952 #ifdef CONFIG_TRACER_MAX_TRACE 5953 if (!tr->allocated_snapshot) 5954 goto out; 5955 5956 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); 5957 if (ret < 0) { 5958 int r = resize_buffer_duplicate_size(&tr->array_buffer, 5959 &tr->array_buffer, cpu); 5960 if (r < 0) { 5961 /* 5962 * AARGH! We are left with different 5963 * size max buffer!!!! 5964 * The max buffer is our "snapshot" buffer. 5965 * When a tracer needs a snapshot (one of the 5966 * latency tracers), it swaps the max buffer 5967 * with the saved snap shot. We succeeded to 5968 * update the size of the main buffer, but failed to 5969 * update the size of the max buffer. But when we tried 5970 * to reset the main buffer to the original size, we 5971 * failed there too. This is very unlikely to 5972 * happen, but if it does, warn and kill all 5973 * tracing. 5974 */ 5975 WARN_ON(1); 5976 tracing_disabled = 1; 5977 } 5978 goto out_start; 5979 } 5980 5981 update_buffer_entries(&tr->max_buffer, cpu); 5982 5983 out: 5984 #endif /* CONFIG_TRACER_MAX_TRACE */ 5985 5986 update_buffer_entries(&tr->array_buffer, cpu); 5987 out_start: 5988 tracing_start_tr(tr); 5989 return ret; 5990 } 5991 5992 ssize_t tracing_resize_ring_buffer(struct trace_array *tr, 5993 unsigned long size, int cpu_id) 5994 { 5995 int ret; 5996 5997 mutex_lock(&trace_types_lock); 5998 5999 if (cpu_id != RING_BUFFER_ALL_CPUS) { 6000 /* make sure, this cpu is enabled in the mask */ 6001 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) { 6002 ret = -EINVAL; 6003 goto out; 6004 } 6005 } 6006 6007 ret = __tracing_resize_ring_buffer(tr, size, cpu_id); 6008 if (ret < 0) 6009 ret = -ENOMEM; 6010 6011 out: 6012 mutex_unlock(&trace_types_lock); 6013 6014 return ret; 6015 } 6016 6017 6018 /** 6019 * tracing_update_buffers - used by tracing facility to expand ring buffers 6020 * @tr: The tracing instance 6021 * 6022 * To save on memory when the tracing is never used on a system with it 6023 * configured in. The ring buffers are set to a minimum size. But once 6024 * a user starts to use the tracing facility, then they need to grow 6025 * to their default size. 6026 * 6027 * This function is to be called when a tracer is about to be used. 6028 */ 6029 int tracing_update_buffers(struct trace_array *tr) 6030 { 6031 int ret = 0; 6032 6033 mutex_lock(&trace_types_lock); 6034 if (!tr->ring_buffer_expanded) 6035 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 6036 RING_BUFFER_ALL_CPUS); 6037 mutex_unlock(&trace_types_lock); 6038 6039 return ret; 6040 } 6041 6042 struct trace_option_dentry; 6043 6044 static void 6045 create_trace_option_files(struct trace_array *tr, struct tracer *tracer); 6046 6047 /* 6048 * Used to clear out the tracer before deletion of an instance. 6049 * Must have trace_types_lock held. 6050 */ 6051 static void tracing_set_nop(struct trace_array *tr) 6052 { 6053 if (tr->current_trace == &nop_trace) 6054 return; 6055 6056 tr->current_trace->enabled--; 6057 6058 if (tr->current_trace->reset) 6059 tr->current_trace->reset(tr); 6060 6061 tr->current_trace = &nop_trace; 6062 } 6063 6064 static bool tracer_options_updated; 6065 6066 static void add_tracer_options(struct trace_array *tr, struct tracer *t) 6067 { 6068 /* Only enable if the directory has been created already. */ 6069 if (!tr->dir) 6070 return; 6071 6072 /* Only create trace option files after update_tracer_options finish */ 6073 if (!tracer_options_updated) 6074 return; 6075 6076 create_trace_option_files(tr, t); 6077 } 6078 6079 int tracing_set_tracer(struct trace_array *tr, const char *buf) 6080 { 6081 struct tracer *t; 6082 #ifdef CONFIG_TRACER_MAX_TRACE 6083 bool had_max_tr; 6084 #endif 6085 int ret = 0; 6086 6087 mutex_lock(&trace_types_lock); 6088 6089 if (!tr->ring_buffer_expanded) { 6090 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 6091 RING_BUFFER_ALL_CPUS); 6092 if (ret < 0) 6093 goto out; 6094 ret = 0; 6095 } 6096 6097 for (t = trace_types; t; t = t->next) { 6098 if (strcmp(t->name, buf) == 0) 6099 break; 6100 } 6101 if (!t) { 6102 ret = -EINVAL; 6103 goto out; 6104 } 6105 if (t == tr->current_trace) 6106 goto out; 6107 6108 #ifdef CONFIG_TRACER_SNAPSHOT 6109 if (t->use_max_tr) { 6110 local_irq_disable(); 6111 arch_spin_lock(&tr->max_lock); 6112 if (tr->cond_snapshot) 6113 ret = -EBUSY; 6114 arch_spin_unlock(&tr->max_lock); 6115 local_irq_enable(); 6116 if (ret) 6117 goto out; 6118 } 6119 #endif 6120 /* Some tracers won't work on kernel command line */ 6121 if (system_state < SYSTEM_RUNNING && t->noboot) { 6122 pr_warn("Tracer '%s' is not allowed on command line, ignored\n", 6123 t->name); 6124 goto out; 6125 } 6126 6127 /* Some tracers are only allowed for the top level buffer */ 6128 if (!trace_ok_for_array(t, tr)) { 6129 ret = -EINVAL; 6130 goto out; 6131 } 6132 6133 /* If trace pipe files are being read, we can't change the tracer */ 6134 if (tr->trace_ref) { 6135 ret = -EBUSY; 6136 goto out; 6137 } 6138 6139 trace_branch_disable(); 6140 6141 tr->current_trace->enabled--; 6142 6143 if (tr->current_trace->reset) 6144 tr->current_trace->reset(tr); 6145 6146 #ifdef CONFIG_TRACER_MAX_TRACE 6147 had_max_tr = tr->current_trace->use_max_tr; 6148 6149 /* Current trace needs to be nop_trace before synchronize_rcu */ 6150 tr->current_trace = &nop_trace; 6151 6152 if (had_max_tr && !t->use_max_tr) { 6153 /* 6154 * We need to make sure that the update_max_tr sees that 6155 * current_trace changed to nop_trace to keep it from 6156 * swapping the buffers after we resize it. 6157 * The update_max_tr is called from interrupts disabled 6158 * so a synchronized_sched() is sufficient. 6159 */ 6160 synchronize_rcu(); 6161 free_snapshot(tr); 6162 tracing_disarm_snapshot(tr); 6163 } 6164 6165 if (!had_max_tr && t->use_max_tr) { 6166 ret = tracing_arm_snapshot_locked(tr); 6167 if (ret) 6168 goto out; 6169 } 6170 #else 6171 tr->current_trace = &nop_trace; 6172 #endif 6173 6174 if (t->init) { 6175 ret = tracer_init(t, tr); 6176 if (ret) { 6177 #ifdef CONFIG_TRACER_MAX_TRACE 6178 if (t->use_max_tr) 6179 tracing_disarm_snapshot(tr); 6180 #endif 6181 goto out; 6182 } 6183 } 6184 6185 tr->current_trace = t; 6186 tr->current_trace->enabled++; 6187 trace_branch_enable(tr); 6188 out: 6189 mutex_unlock(&trace_types_lock); 6190 6191 return ret; 6192 } 6193 6194 static ssize_t 6195 tracing_set_trace_write(struct file *filp, const char __user *ubuf, 6196 size_t cnt, loff_t *ppos) 6197 { 6198 struct trace_array *tr = filp->private_data; 6199 char buf[MAX_TRACER_SIZE+1]; 6200 char *name; 6201 size_t ret; 6202 int err; 6203 6204 ret = cnt; 6205 6206 if (cnt > MAX_TRACER_SIZE) 6207 cnt = MAX_TRACER_SIZE; 6208 6209 if (copy_from_user(buf, ubuf, cnt)) 6210 return -EFAULT; 6211 6212 buf[cnt] = 0; 6213 6214 name = strim(buf); 6215 6216 err = tracing_set_tracer(tr, name); 6217 if (err) 6218 return err; 6219 6220 *ppos += ret; 6221 6222 return ret; 6223 } 6224 6225 static ssize_t 6226 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, 6227 size_t cnt, loff_t *ppos) 6228 { 6229 char buf[64]; 6230 int r; 6231 6232 r = snprintf(buf, sizeof(buf), "%ld\n", 6233 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); 6234 if (r > sizeof(buf)) 6235 r = sizeof(buf); 6236 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6237 } 6238 6239 static ssize_t 6240 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, 6241 size_t cnt, loff_t *ppos) 6242 { 6243 unsigned long val; 6244 int ret; 6245 6246 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6247 if (ret) 6248 return ret; 6249 6250 *ptr = val * 1000; 6251 6252 return cnt; 6253 } 6254 6255 static ssize_t 6256 tracing_thresh_read(struct file *filp, char __user *ubuf, 6257 size_t cnt, loff_t *ppos) 6258 { 6259 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); 6260 } 6261 6262 static ssize_t 6263 tracing_thresh_write(struct file *filp, const char __user *ubuf, 6264 size_t cnt, loff_t *ppos) 6265 { 6266 struct trace_array *tr = filp->private_data; 6267 int ret; 6268 6269 mutex_lock(&trace_types_lock); 6270 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); 6271 if (ret < 0) 6272 goto out; 6273 6274 if (tr->current_trace->update_thresh) { 6275 ret = tr->current_trace->update_thresh(tr); 6276 if (ret < 0) 6277 goto out; 6278 } 6279 6280 ret = cnt; 6281 out: 6282 mutex_unlock(&trace_types_lock); 6283 6284 return ret; 6285 } 6286 6287 #ifdef CONFIG_TRACER_MAX_TRACE 6288 6289 static ssize_t 6290 tracing_max_lat_read(struct file *filp, char __user *ubuf, 6291 size_t cnt, loff_t *ppos) 6292 { 6293 struct trace_array *tr = filp->private_data; 6294 6295 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); 6296 } 6297 6298 static ssize_t 6299 tracing_max_lat_write(struct file *filp, const char __user *ubuf, 6300 size_t cnt, loff_t *ppos) 6301 { 6302 struct trace_array *tr = filp->private_data; 6303 6304 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); 6305 } 6306 6307 #endif 6308 6309 static int open_pipe_on_cpu(struct trace_array *tr, int cpu) 6310 { 6311 if (cpu == RING_BUFFER_ALL_CPUS) { 6312 if (cpumask_empty(tr->pipe_cpumask)) { 6313 cpumask_setall(tr->pipe_cpumask); 6314 return 0; 6315 } 6316 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { 6317 cpumask_set_cpu(cpu, tr->pipe_cpumask); 6318 return 0; 6319 } 6320 return -EBUSY; 6321 } 6322 6323 static void close_pipe_on_cpu(struct trace_array *tr, int cpu) 6324 { 6325 if (cpu == RING_BUFFER_ALL_CPUS) { 6326 WARN_ON(!cpumask_full(tr->pipe_cpumask)); 6327 cpumask_clear(tr->pipe_cpumask); 6328 } else { 6329 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); 6330 cpumask_clear_cpu(cpu, tr->pipe_cpumask); 6331 } 6332 } 6333 6334 static int tracing_open_pipe(struct inode *inode, struct file *filp) 6335 { 6336 struct trace_array *tr = inode->i_private; 6337 struct trace_iterator *iter; 6338 int cpu; 6339 int ret; 6340 6341 ret = tracing_check_open_get_tr(tr); 6342 if (ret) 6343 return ret; 6344 6345 mutex_lock(&trace_types_lock); 6346 cpu = tracing_get_cpu(inode); 6347 ret = open_pipe_on_cpu(tr, cpu); 6348 if (ret) 6349 goto fail_pipe_on_cpu; 6350 6351 /* create a buffer to store the information to pass to userspace */ 6352 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 6353 if (!iter) { 6354 ret = -ENOMEM; 6355 goto fail_alloc_iter; 6356 } 6357 6358 trace_seq_init(&iter->seq); 6359 iter->trace = tr->current_trace; 6360 6361 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 6362 ret = -ENOMEM; 6363 goto fail; 6364 } 6365 6366 /* trace pipe does not show start of buffer */ 6367 cpumask_setall(iter->started); 6368 6369 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) 6370 iter->iter_flags |= TRACE_FILE_LAT_FMT; 6371 6372 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 6373 if (trace_clocks[tr->clock_id].in_ns) 6374 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 6375 6376 iter->tr = tr; 6377 iter->array_buffer = &tr->array_buffer; 6378 iter->cpu_file = cpu; 6379 mutex_init(&iter->mutex); 6380 filp->private_data = iter; 6381 6382 if (iter->trace->pipe_open) 6383 iter->trace->pipe_open(iter); 6384 6385 nonseekable_open(inode, filp); 6386 6387 tr->trace_ref++; 6388 6389 mutex_unlock(&trace_types_lock); 6390 return ret; 6391 6392 fail: 6393 kfree(iter); 6394 fail_alloc_iter: 6395 close_pipe_on_cpu(tr, cpu); 6396 fail_pipe_on_cpu: 6397 __trace_array_put(tr); 6398 mutex_unlock(&trace_types_lock); 6399 return ret; 6400 } 6401 6402 static int tracing_release_pipe(struct inode *inode, struct file *file) 6403 { 6404 struct trace_iterator *iter = file->private_data; 6405 struct trace_array *tr = inode->i_private; 6406 6407 mutex_lock(&trace_types_lock); 6408 6409 tr->trace_ref--; 6410 6411 if (iter->trace->pipe_close) 6412 iter->trace->pipe_close(iter); 6413 close_pipe_on_cpu(tr, iter->cpu_file); 6414 mutex_unlock(&trace_types_lock); 6415 6416 free_trace_iter_content(iter); 6417 kfree(iter); 6418 6419 trace_array_put(tr); 6420 6421 return 0; 6422 } 6423 6424 static __poll_t 6425 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 6426 { 6427 struct trace_array *tr = iter->tr; 6428 6429 /* Iterators are static, they should be filled or empty */ 6430 if (trace_buffer_iter(iter, iter->cpu_file)) 6431 return EPOLLIN | EPOLLRDNORM; 6432 6433 if (tr->trace_flags & TRACE_ITER_BLOCK) 6434 /* 6435 * Always select as readable when in blocking mode 6436 */ 6437 return EPOLLIN | EPOLLRDNORM; 6438 else 6439 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, 6440 filp, poll_table, iter->tr->buffer_percent); 6441 } 6442 6443 static __poll_t 6444 tracing_poll_pipe(struct file *filp, poll_table *poll_table) 6445 { 6446 struct trace_iterator *iter = filp->private_data; 6447 6448 return trace_poll(iter, filp, poll_table); 6449 } 6450 6451 /* Must be called with iter->mutex held. */ 6452 static int tracing_wait_pipe(struct file *filp) 6453 { 6454 struct trace_iterator *iter = filp->private_data; 6455 int ret; 6456 6457 while (trace_empty(iter)) { 6458 6459 if ((filp->f_flags & O_NONBLOCK)) { 6460 return -EAGAIN; 6461 } 6462 6463 /* 6464 * We block until we read something and tracing is disabled. 6465 * We still block if tracing is disabled, but we have never 6466 * read anything. This allows a user to cat this file, and 6467 * then enable tracing. But after we have read something, 6468 * we give an EOF when tracing is again disabled. 6469 * 6470 * iter->pos will be 0 if we haven't read anything. 6471 */ 6472 if (!tracer_tracing_is_on(iter->tr) && iter->pos) 6473 break; 6474 6475 mutex_unlock(&iter->mutex); 6476 6477 ret = wait_on_pipe(iter, 0); 6478 6479 mutex_lock(&iter->mutex); 6480 6481 if (ret) 6482 return ret; 6483 } 6484 6485 return 1; 6486 } 6487 6488 /* 6489 * Consumer reader. 6490 */ 6491 static ssize_t 6492 tracing_read_pipe(struct file *filp, char __user *ubuf, 6493 size_t cnt, loff_t *ppos) 6494 { 6495 struct trace_iterator *iter = filp->private_data; 6496 ssize_t sret; 6497 6498 /* 6499 * Avoid more than one consumer on a single file descriptor 6500 * This is just a matter of traces coherency, the ring buffer itself 6501 * is protected. 6502 */ 6503 mutex_lock(&iter->mutex); 6504 6505 /* return any leftover data */ 6506 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6507 if (sret != -EBUSY) 6508 goto out; 6509 6510 trace_seq_init(&iter->seq); 6511 6512 if (iter->trace->read) { 6513 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 6514 if (sret) 6515 goto out; 6516 } 6517 6518 waitagain: 6519 sret = tracing_wait_pipe(filp); 6520 if (sret <= 0) 6521 goto out; 6522 6523 /* stop when tracing is finished */ 6524 if (trace_empty(iter)) { 6525 sret = 0; 6526 goto out; 6527 } 6528 6529 if (cnt >= TRACE_SEQ_BUFFER_SIZE) 6530 cnt = TRACE_SEQ_BUFFER_SIZE - 1; 6531 6532 /* reset all but tr, trace, and overruns */ 6533 trace_iterator_reset(iter); 6534 cpumask_clear(iter->started); 6535 trace_seq_init(&iter->seq); 6536 6537 trace_event_read_lock(); 6538 trace_access_lock(iter->cpu_file); 6539 while (trace_find_next_entry_inc(iter) != NULL) { 6540 enum print_line_t ret; 6541 int save_len = iter->seq.seq.len; 6542 6543 ret = print_trace_line(iter); 6544 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6545 /* 6546 * If one print_trace_line() fills entire trace_seq in one shot, 6547 * trace_seq_to_user() will returns -EBUSY because save_len == 0, 6548 * In this case, we need to consume it, otherwise, loop will peek 6549 * this event next time, resulting in an infinite loop. 6550 */ 6551 if (save_len == 0) { 6552 iter->seq.full = 0; 6553 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 6554 trace_consume(iter); 6555 break; 6556 } 6557 6558 /* In other cases, don't print partial lines */ 6559 iter->seq.seq.len = save_len; 6560 break; 6561 } 6562 if (ret != TRACE_TYPE_NO_CONSUME) 6563 trace_consume(iter); 6564 6565 if (trace_seq_used(&iter->seq) >= cnt) 6566 break; 6567 6568 /* 6569 * Setting the full flag means we reached the trace_seq buffer 6570 * size and we should leave by partial output condition above. 6571 * One of the trace_seq_* functions is not used properly. 6572 */ 6573 WARN_ONCE(iter->seq.full, "full flag set for trace type %d", 6574 iter->ent->type); 6575 } 6576 trace_access_unlock(iter->cpu_file); 6577 trace_event_read_unlock(); 6578 6579 /* Now copy what we have to the user */ 6580 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6581 if (iter->seq.readpos >= trace_seq_used(&iter->seq)) 6582 trace_seq_init(&iter->seq); 6583 6584 /* 6585 * If there was nothing to send to user, in spite of consuming trace 6586 * entries, go back to wait for more entries. 6587 */ 6588 if (sret == -EBUSY) 6589 goto waitagain; 6590 6591 out: 6592 mutex_unlock(&iter->mutex); 6593 6594 return sret; 6595 } 6596 6597 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, 6598 unsigned int idx) 6599 { 6600 __free_page(spd->pages[idx]); 6601 } 6602 6603 static size_t 6604 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) 6605 { 6606 size_t count; 6607 int save_len; 6608 int ret; 6609 6610 /* Seq buffer is page-sized, exactly what we need. */ 6611 for (;;) { 6612 save_len = iter->seq.seq.len; 6613 ret = print_trace_line(iter); 6614 6615 if (trace_seq_has_overflowed(&iter->seq)) { 6616 iter->seq.seq.len = save_len; 6617 break; 6618 } 6619 6620 /* 6621 * This should not be hit, because it should only 6622 * be set if the iter->seq overflowed. But check it 6623 * anyway to be safe. 6624 */ 6625 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6626 iter->seq.seq.len = save_len; 6627 break; 6628 } 6629 6630 count = trace_seq_used(&iter->seq) - save_len; 6631 if (rem < count) { 6632 rem = 0; 6633 iter->seq.seq.len = save_len; 6634 break; 6635 } 6636 6637 if (ret != TRACE_TYPE_NO_CONSUME) 6638 trace_consume(iter); 6639 rem -= count; 6640 if (!trace_find_next_entry_inc(iter)) { 6641 rem = 0; 6642 iter->ent = NULL; 6643 break; 6644 } 6645 } 6646 6647 return rem; 6648 } 6649 6650 static ssize_t tracing_splice_read_pipe(struct file *filp, 6651 loff_t *ppos, 6652 struct pipe_inode_info *pipe, 6653 size_t len, 6654 unsigned int flags) 6655 { 6656 struct page *pages_def[PIPE_DEF_BUFFERS]; 6657 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 6658 struct trace_iterator *iter = filp->private_data; 6659 struct splice_pipe_desc spd = { 6660 .pages = pages_def, 6661 .partial = partial_def, 6662 .nr_pages = 0, /* This gets updated below. */ 6663 .nr_pages_max = PIPE_DEF_BUFFERS, 6664 .ops = &default_pipe_buf_ops, 6665 .spd_release = tracing_spd_release_pipe, 6666 }; 6667 ssize_t ret; 6668 size_t rem; 6669 unsigned int i; 6670 6671 if (splice_grow_spd(pipe, &spd)) 6672 return -ENOMEM; 6673 6674 mutex_lock(&iter->mutex); 6675 6676 if (iter->trace->splice_read) { 6677 ret = iter->trace->splice_read(iter, filp, 6678 ppos, pipe, len, flags); 6679 if (ret) 6680 goto out_err; 6681 } 6682 6683 ret = tracing_wait_pipe(filp); 6684 if (ret <= 0) 6685 goto out_err; 6686 6687 if (!iter->ent && !trace_find_next_entry_inc(iter)) { 6688 ret = -EFAULT; 6689 goto out_err; 6690 } 6691 6692 trace_event_read_lock(); 6693 trace_access_lock(iter->cpu_file); 6694 6695 /* Fill as many pages as possible. */ 6696 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 6697 spd.pages[i] = alloc_page(GFP_KERNEL); 6698 if (!spd.pages[i]) 6699 break; 6700 6701 rem = tracing_fill_pipe_page(rem, iter); 6702 6703 /* Copy the data into the page, so we can start over. */ 6704 ret = trace_seq_to_buffer(&iter->seq, 6705 page_address(spd.pages[i]), 6706 trace_seq_used(&iter->seq)); 6707 if (ret < 0) { 6708 __free_page(spd.pages[i]); 6709 break; 6710 } 6711 spd.partial[i].offset = 0; 6712 spd.partial[i].len = trace_seq_used(&iter->seq); 6713 6714 trace_seq_init(&iter->seq); 6715 } 6716 6717 trace_access_unlock(iter->cpu_file); 6718 trace_event_read_unlock(); 6719 mutex_unlock(&iter->mutex); 6720 6721 spd.nr_pages = i; 6722 6723 if (i) 6724 ret = splice_to_pipe(pipe, &spd); 6725 else 6726 ret = 0; 6727 out: 6728 splice_shrink_spd(&spd); 6729 return ret; 6730 6731 out_err: 6732 mutex_unlock(&iter->mutex); 6733 goto out; 6734 } 6735 6736 static ssize_t 6737 tracing_entries_read(struct file *filp, char __user *ubuf, 6738 size_t cnt, loff_t *ppos) 6739 { 6740 struct inode *inode = file_inode(filp); 6741 struct trace_array *tr = inode->i_private; 6742 int cpu = tracing_get_cpu(inode); 6743 char buf[64]; 6744 int r = 0; 6745 ssize_t ret; 6746 6747 mutex_lock(&trace_types_lock); 6748 6749 if (cpu == RING_BUFFER_ALL_CPUS) { 6750 int cpu, buf_size_same; 6751 unsigned long size; 6752 6753 size = 0; 6754 buf_size_same = 1; 6755 /* check if all cpu sizes are same */ 6756 for_each_tracing_cpu(cpu) { 6757 /* fill in the size from first enabled cpu */ 6758 if (size == 0) 6759 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; 6760 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { 6761 buf_size_same = 0; 6762 break; 6763 } 6764 } 6765 6766 if (buf_size_same) { 6767 if (!tr->ring_buffer_expanded) 6768 r = sprintf(buf, "%lu (expanded: %lu)\n", 6769 size >> 10, 6770 trace_buf_size >> 10); 6771 else 6772 r = sprintf(buf, "%lu\n", size >> 10); 6773 } else 6774 r = sprintf(buf, "X\n"); 6775 } else 6776 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); 6777 6778 mutex_unlock(&trace_types_lock); 6779 6780 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6781 return ret; 6782 } 6783 6784 static ssize_t 6785 tracing_entries_write(struct file *filp, const char __user *ubuf, 6786 size_t cnt, loff_t *ppos) 6787 { 6788 struct inode *inode = file_inode(filp); 6789 struct trace_array *tr = inode->i_private; 6790 unsigned long val; 6791 int ret; 6792 6793 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6794 if (ret) 6795 return ret; 6796 6797 /* must have at least 1 entry */ 6798 if (!val) 6799 return -EINVAL; 6800 6801 /* value is in KB */ 6802 val <<= 10; 6803 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); 6804 if (ret < 0) 6805 return ret; 6806 6807 *ppos += cnt; 6808 6809 return cnt; 6810 } 6811 6812 static ssize_t 6813 tracing_total_entries_read(struct file *filp, char __user *ubuf, 6814 size_t cnt, loff_t *ppos) 6815 { 6816 struct trace_array *tr = filp->private_data; 6817 char buf[64]; 6818 int r, cpu; 6819 unsigned long size = 0, expanded_size = 0; 6820 6821 mutex_lock(&trace_types_lock); 6822 for_each_tracing_cpu(cpu) { 6823 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; 6824 if (!tr->ring_buffer_expanded) 6825 expanded_size += trace_buf_size >> 10; 6826 } 6827 if (tr->ring_buffer_expanded) 6828 r = sprintf(buf, "%lu\n", size); 6829 else 6830 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); 6831 mutex_unlock(&trace_types_lock); 6832 6833 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6834 } 6835 6836 static ssize_t 6837 tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 6838 size_t cnt, loff_t *ppos) 6839 { 6840 /* 6841 * There is no need to read what the user has written, this function 6842 * is just to make sure that there is no error when "echo" is used 6843 */ 6844 6845 *ppos += cnt; 6846 6847 return cnt; 6848 } 6849 6850 static int 6851 tracing_free_buffer_release(struct inode *inode, struct file *filp) 6852 { 6853 struct trace_array *tr = inode->i_private; 6854 6855 /* disable tracing ? */ 6856 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE) 6857 tracer_tracing_off(tr); 6858 /* resize the ring buffer to 0 */ 6859 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 6860 6861 trace_array_put(tr); 6862 6863 return 0; 6864 } 6865 6866 #define TRACE_MARKER_MAX_SIZE 4096 6867 6868 static ssize_t 6869 tracing_mark_write(struct file *filp, const char __user *ubuf, 6870 size_t cnt, loff_t *fpos) 6871 { 6872 struct trace_array *tr = filp->private_data; 6873 struct ring_buffer_event *event; 6874 enum event_trigger_type tt = ETT_NONE; 6875 struct trace_buffer *buffer; 6876 struct print_entry *entry; 6877 int meta_size; 6878 ssize_t written; 6879 size_t size; 6880 int len; 6881 6882 /* Used in tracing_mark_raw_write() as well */ 6883 #define FAULTED_STR "<faulted>" 6884 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */ 6885 6886 if (tracing_disabled) 6887 return -EINVAL; 6888 6889 if (!(tr->trace_flags & TRACE_ITER_MARKERS)) 6890 return -EINVAL; 6891 6892 if ((ssize_t)cnt < 0) 6893 return -EINVAL; 6894 6895 if (cnt > TRACE_MARKER_MAX_SIZE) 6896 cnt = TRACE_MARKER_MAX_SIZE; 6897 6898 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 6899 again: 6900 size = cnt + meta_size; 6901 6902 /* If less than "<faulted>", then make sure we can still add that */ 6903 if (cnt < FAULTED_SIZE) 6904 size += FAULTED_SIZE - cnt; 6905 6906 buffer = tr->array_buffer.buffer; 6907 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 6908 tracing_gen_ctx()); 6909 if (unlikely(!event)) { 6910 /* 6911 * If the size was greater than what was allowed, then 6912 * make it smaller and try again. 6913 */ 6914 if (size > ring_buffer_max_event_size(buffer)) { 6915 /* cnt < FAULTED size should never be bigger than max */ 6916 if (WARN_ON_ONCE(cnt < FAULTED_SIZE)) 6917 return -EBADF; 6918 cnt = ring_buffer_max_event_size(buffer) - meta_size; 6919 /* The above should only happen once */ 6920 if (WARN_ON_ONCE(cnt + meta_size == size)) 6921 return -EBADF; 6922 goto again; 6923 } 6924 6925 /* Ring buffer disabled, return as if not open for write */ 6926 return -EBADF; 6927 } 6928 6929 entry = ring_buffer_event_data(event); 6930 entry->ip = _THIS_IP_; 6931 6932 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); 6933 if (len) { 6934 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); 6935 cnt = FAULTED_SIZE; 6936 written = -EFAULT; 6937 } else 6938 written = cnt; 6939 6940 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { 6941 /* do not add \n before testing triggers, but add \0 */ 6942 entry->buf[cnt] = '\0'; 6943 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); 6944 } 6945 6946 if (entry->buf[cnt - 1] != '\n') { 6947 entry->buf[cnt] = '\n'; 6948 entry->buf[cnt + 1] = '\0'; 6949 } else 6950 entry->buf[cnt] = '\0'; 6951 6952 if (static_branch_unlikely(&trace_marker_exports_enabled)) 6953 ftrace_exports(event, TRACE_EXPORT_MARKER); 6954 __buffer_unlock_commit(buffer, event); 6955 6956 if (tt) 6957 event_triggers_post_call(tr->trace_marker_file, tt); 6958 6959 return written; 6960 } 6961 6962 static ssize_t 6963 tracing_mark_raw_write(struct file *filp, const char __user *ubuf, 6964 size_t cnt, loff_t *fpos) 6965 { 6966 struct trace_array *tr = filp->private_data; 6967 struct ring_buffer_event *event; 6968 struct trace_buffer *buffer; 6969 struct raw_data_entry *entry; 6970 ssize_t written; 6971 int size; 6972 int len; 6973 6974 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int)) 6975 6976 if (tracing_disabled) 6977 return -EINVAL; 6978 6979 if (!(tr->trace_flags & TRACE_ITER_MARKERS)) 6980 return -EINVAL; 6981 6982 /* The marker must at least have a tag id */ 6983 if (cnt < sizeof(unsigned int)) 6984 return -EINVAL; 6985 6986 size = sizeof(*entry) + cnt; 6987 if (cnt < FAULT_SIZE_ID) 6988 size += FAULT_SIZE_ID - cnt; 6989 6990 buffer = tr->array_buffer.buffer; 6991 6992 if (size > ring_buffer_max_event_size(buffer)) 6993 return -EINVAL; 6994 6995 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, 6996 tracing_gen_ctx()); 6997 if (!event) 6998 /* Ring buffer disabled, return as if not open for write */ 6999 return -EBADF; 7000 7001 entry = ring_buffer_event_data(event); 7002 7003 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); 7004 if (len) { 7005 entry->id = -1; 7006 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); 7007 written = -EFAULT; 7008 } else 7009 written = cnt; 7010 7011 __buffer_unlock_commit(buffer, event); 7012 7013 return written; 7014 } 7015 7016 static int tracing_clock_show(struct seq_file *m, void *v) 7017 { 7018 struct trace_array *tr = m->private; 7019 int i; 7020 7021 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 7022 seq_printf(m, 7023 "%s%s%s%s", i ? " " : "", 7024 i == tr->clock_id ? "[" : "", trace_clocks[i].name, 7025 i == tr->clock_id ? "]" : ""); 7026 seq_putc(m, '\n'); 7027 7028 return 0; 7029 } 7030 7031 int tracing_set_clock(struct trace_array *tr, const char *clockstr) 7032 { 7033 int i; 7034 7035 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { 7036 if (strcmp(trace_clocks[i].name, clockstr) == 0) 7037 break; 7038 } 7039 if (i == ARRAY_SIZE(trace_clocks)) 7040 return -EINVAL; 7041 7042 mutex_lock(&trace_types_lock); 7043 7044 tr->clock_id = i; 7045 7046 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); 7047 7048 /* 7049 * New clock may not be consistent with the previous clock. 7050 * Reset the buffer so that it doesn't have incomparable timestamps. 7051 */ 7052 tracing_reset_online_cpus(&tr->array_buffer); 7053 7054 #ifdef CONFIG_TRACER_MAX_TRACE 7055 if (tr->max_buffer.buffer) 7056 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); 7057 tracing_reset_online_cpus(&tr->max_buffer); 7058 #endif 7059 7060 mutex_unlock(&trace_types_lock); 7061 7062 return 0; 7063 } 7064 7065 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 7066 size_t cnt, loff_t *fpos) 7067 { 7068 struct seq_file *m = filp->private_data; 7069 struct trace_array *tr = m->private; 7070 char buf[64]; 7071 const char *clockstr; 7072 int ret; 7073 7074 if (cnt >= sizeof(buf)) 7075 return -EINVAL; 7076 7077 if (copy_from_user(buf, ubuf, cnt)) 7078 return -EFAULT; 7079 7080 buf[cnt] = 0; 7081 7082 clockstr = strstrip(buf); 7083 7084 ret = tracing_set_clock(tr, clockstr); 7085 if (ret) 7086 return ret; 7087 7088 *fpos += cnt; 7089 7090 return cnt; 7091 } 7092 7093 static int tracing_clock_open(struct inode *inode, struct file *file) 7094 { 7095 struct trace_array *tr = inode->i_private; 7096 int ret; 7097 7098 ret = tracing_check_open_get_tr(tr); 7099 if (ret) 7100 return ret; 7101 7102 ret = single_open(file, tracing_clock_show, inode->i_private); 7103 if (ret < 0) 7104 trace_array_put(tr); 7105 7106 return ret; 7107 } 7108 7109 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) 7110 { 7111 struct trace_array *tr = m->private; 7112 7113 mutex_lock(&trace_types_lock); 7114 7115 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) 7116 seq_puts(m, "delta [absolute]\n"); 7117 else 7118 seq_puts(m, "[delta] absolute\n"); 7119 7120 mutex_unlock(&trace_types_lock); 7121 7122 return 0; 7123 } 7124 7125 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) 7126 { 7127 struct trace_array *tr = inode->i_private; 7128 int ret; 7129 7130 ret = tracing_check_open_get_tr(tr); 7131 if (ret) 7132 return ret; 7133 7134 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); 7135 if (ret < 0) 7136 trace_array_put(tr); 7137 7138 return ret; 7139 } 7140 7141 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) 7142 { 7143 if (rbe == this_cpu_read(trace_buffered_event)) 7144 return ring_buffer_time_stamp(buffer); 7145 7146 return ring_buffer_event_time_stamp(buffer, rbe); 7147 } 7148 7149 /* 7150 * Set or disable using the per CPU trace_buffer_event when possible. 7151 */ 7152 int tracing_set_filter_buffering(struct trace_array *tr, bool set) 7153 { 7154 int ret = 0; 7155 7156 mutex_lock(&trace_types_lock); 7157 7158 if (set && tr->no_filter_buffering_ref++) 7159 goto out; 7160 7161 if (!set) { 7162 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) { 7163 ret = -EINVAL; 7164 goto out; 7165 } 7166 7167 --tr->no_filter_buffering_ref; 7168 } 7169 out: 7170 mutex_unlock(&trace_types_lock); 7171 7172 return ret; 7173 } 7174 7175 struct ftrace_buffer_info { 7176 struct trace_iterator iter; 7177 void *spare; 7178 unsigned int spare_cpu; 7179 unsigned int spare_size; 7180 unsigned int read; 7181 }; 7182 7183 #ifdef CONFIG_TRACER_SNAPSHOT 7184 static int tracing_snapshot_open(struct inode *inode, struct file *file) 7185 { 7186 struct trace_array *tr = inode->i_private; 7187 struct trace_iterator *iter; 7188 struct seq_file *m; 7189 int ret; 7190 7191 ret = tracing_check_open_get_tr(tr); 7192 if (ret) 7193 return ret; 7194 7195 if (file->f_mode & FMODE_READ) { 7196 iter = __tracing_open(inode, file, true); 7197 if (IS_ERR(iter)) 7198 ret = PTR_ERR(iter); 7199 } else { 7200 /* Writes still need the seq_file to hold the private data */ 7201 ret = -ENOMEM; 7202 m = kzalloc(sizeof(*m), GFP_KERNEL); 7203 if (!m) 7204 goto out; 7205 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 7206 if (!iter) { 7207 kfree(m); 7208 goto out; 7209 } 7210 ret = 0; 7211 7212 iter->tr = tr; 7213 iter->array_buffer = &tr->max_buffer; 7214 iter->cpu_file = tracing_get_cpu(inode); 7215 m->private = iter; 7216 file->private_data = m; 7217 } 7218 out: 7219 if (ret < 0) 7220 trace_array_put(tr); 7221 7222 return ret; 7223 } 7224 7225 static void tracing_swap_cpu_buffer(void *tr) 7226 { 7227 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); 7228 } 7229 7230 static ssize_t 7231 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, 7232 loff_t *ppos) 7233 { 7234 struct seq_file *m = filp->private_data; 7235 struct trace_iterator *iter = m->private; 7236 struct trace_array *tr = iter->tr; 7237 unsigned long val; 7238 int ret; 7239 7240 ret = tracing_update_buffers(tr); 7241 if (ret < 0) 7242 return ret; 7243 7244 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7245 if (ret) 7246 return ret; 7247 7248 mutex_lock(&trace_types_lock); 7249 7250 if (tr->current_trace->use_max_tr) { 7251 ret = -EBUSY; 7252 goto out; 7253 } 7254 7255 local_irq_disable(); 7256 arch_spin_lock(&tr->max_lock); 7257 if (tr->cond_snapshot) 7258 ret = -EBUSY; 7259 arch_spin_unlock(&tr->max_lock); 7260 local_irq_enable(); 7261 if (ret) 7262 goto out; 7263 7264 switch (val) { 7265 case 0: 7266 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 7267 ret = -EINVAL; 7268 break; 7269 } 7270 if (tr->allocated_snapshot) 7271 free_snapshot(tr); 7272 break; 7273 case 1: 7274 /* Only allow per-cpu swap if the ring buffer supports it */ 7275 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP 7276 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 7277 ret = -EINVAL; 7278 break; 7279 } 7280 #endif 7281 if (tr->allocated_snapshot) 7282 ret = resize_buffer_duplicate_size(&tr->max_buffer, 7283 &tr->array_buffer, iter->cpu_file); 7284 7285 ret = tracing_arm_snapshot_locked(tr); 7286 if (ret) 7287 break; 7288 7289 /* Now, we're going to swap */ 7290 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 7291 local_irq_disable(); 7292 update_max_tr(tr, current, smp_processor_id(), NULL); 7293 local_irq_enable(); 7294 } else { 7295 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, 7296 (void *)tr, 1); 7297 } 7298 tracing_disarm_snapshot(tr); 7299 break; 7300 default: 7301 if (tr->allocated_snapshot) { 7302 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 7303 tracing_reset_online_cpus(&tr->max_buffer); 7304 else 7305 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); 7306 } 7307 break; 7308 } 7309 7310 if (ret >= 0) { 7311 *ppos += cnt; 7312 ret = cnt; 7313 } 7314 out: 7315 mutex_unlock(&trace_types_lock); 7316 return ret; 7317 } 7318 7319 static int tracing_snapshot_release(struct inode *inode, struct file *file) 7320 { 7321 struct seq_file *m = file->private_data; 7322 int ret; 7323 7324 ret = tracing_release(inode, file); 7325 7326 if (file->f_mode & FMODE_READ) 7327 return ret; 7328 7329 /* If write only, the seq_file is just a stub */ 7330 if (m) 7331 kfree(m->private); 7332 kfree(m); 7333 7334 return 0; 7335 } 7336 7337 static int tracing_buffers_open(struct inode *inode, struct file *filp); 7338 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, 7339 size_t count, loff_t *ppos); 7340 static int tracing_buffers_release(struct inode *inode, struct file *file); 7341 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, 7342 struct pipe_inode_info *pipe, size_t len, unsigned int flags); 7343 7344 static int snapshot_raw_open(struct inode *inode, struct file *filp) 7345 { 7346 struct ftrace_buffer_info *info; 7347 int ret; 7348 7349 /* The following checks for tracefs lockdown */ 7350 ret = tracing_buffers_open(inode, filp); 7351 if (ret < 0) 7352 return ret; 7353 7354 info = filp->private_data; 7355 7356 if (info->iter.trace->use_max_tr) { 7357 tracing_buffers_release(inode, filp); 7358 return -EBUSY; 7359 } 7360 7361 info->iter.snapshot = true; 7362 info->iter.array_buffer = &info->iter.tr->max_buffer; 7363 7364 return ret; 7365 } 7366 7367 #endif /* CONFIG_TRACER_SNAPSHOT */ 7368 7369 7370 static const struct file_operations tracing_thresh_fops = { 7371 .open = tracing_open_generic, 7372 .read = tracing_thresh_read, 7373 .write = tracing_thresh_write, 7374 .llseek = generic_file_llseek, 7375 }; 7376 7377 #ifdef CONFIG_TRACER_MAX_TRACE 7378 static const struct file_operations tracing_max_lat_fops = { 7379 .open = tracing_open_generic_tr, 7380 .read = tracing_max_lat_read, 7381 .write = tracing_max_lat_write, 7382 .llseek = generic_file_llseek, 7383 .release = tracing_release_generic_tr, 7384 }; 7385 #endif 7386 7387 static const struct file_operations set_tracer_fops = { 7388 .open = tracing_open_generic_tr, 7389 .read = tracing_set_trace_read, 7390 .write = tracing_set_trace_write, 7391 .llseek = generic_file_llseek, 7392 .release = tracing_release_generic_tr, 7393 }; 7394 7395 static const struct file_operations tracing_pipe_fops = { 7396 .open = tracing_open_pipe, 7397 .poll = tracing_poll_pipe, 7398 .read = tracing_read_pipe, 7399 .splice_read = tracing_splice_read_pipe, 7400 .release = tracing_release_pipe, 7401 .llseek = no_llseek, 7402 }; 7403 7404 static const struct file_operations tracing_entries_fops = { 7405 .open = tracing_open_generic_tr, 7406 .read = tracing_entries_read, 7407 .write = tracing_entries_write, 7408 .llseek = generic_file_llseek, 7409 .release = tracing_release_generic_tr, 7410 }; 7411 7412 static const struct file_operations tracing_total_entries_fops = { 7413 .open = tracing_open_generic_tr, 7414 .read = tracing_total_entries_read, 7415 .llseek = generic_file_llseek, 7416 .release = tracing_release_generic_tr, 7417 }; 7418 7419 static const struct file_operations tracing_free_buffer_fops = { 7420 .open = tracing_open_generic_tr, 7421 .write = tracing_free_buffer_write, 7422 .release = tracing_free_buffer_release, 7423 }; 7424 7425 static const struct file_operations tracing_mark_fops = { 7426 .open = tracing_mark_open, 7427 .write = tracing_mark_write, 7428 .release = tracing_release_generic_tr, 7429 }; 7430 7431 static const struct file_operations tracing_mark_raw_fops = { 7432 .open = tracing_mark_open, 7433 .write = tracing_mark_raw_write, 7434 .release = tracing_release_generic_tr, 7435 }; 7436 7437 static const struct file_operations trace_clock_fops = { 7438 .open = tracing_clock_open, 7439 .read = seq_read, 7440 .llseek = seq_lseek, 7441 .release = tracing_single_release_tr, 7442 .write = tracing_clock_write, 7443 }; 7444 7445 static const struct file_operations trace_time_stamp_mode_fops = { 7446 .open = tracing_time_stamp_mode_open, 7447 .read = seq_read, 7448 .llseek = seq_lseek, 7449 .release = tracing_single_release_tr, 7450 }; 7451 7452 #ifdef CONFIG_TRACER_SNAPSHOT 7453 static const struct file_operations snapshot_fops = { 7454 .open = tracing_snapshot_open, 7455 .read = seq_read, 7456 .write = tracing_snapshot_write, 7457 .llseek = tracing_lseek, 7458 .release = tracing_snapshot_release, 7459 }; 7460 7461 static const struct file_operations snapshot_raw_fops = { 7462 .open = snapshot_raw_open, 7463 .read = tracing_buffers_read, 7464 .release = tracing_buffers_release, 7465 .splice_read = tracing_buffers_splice_read, 7466 .llseek = no_llseek, 7467 }; 7468 7469 #endif /* CONFIG_TRACER_SNAPSHOT */ 7470 7471 /* 7472 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct 7473 * @filp: The active open file structure 7474 * @ubuf: The userspace provided buffer to read value into 7475 * @cnt: The maximum number of bytes to read 7476 * @ppos: The current "file" position 7477 * 7478 * This function implements the write interface for a struct trace_min_max_param. 7479 * The filp->private_data must point to a trace_min_max_param structure that 7480 * defines where to write the value, the min and the max acceptable values, 7481 * and a lock to protect the write. 7482 */ 7483 static ssize_t 7484 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 7485 { 7486 struct trace_min_max_param *param = filp->private_data; 7487 u64 val; 7488 int err; 7489 7490 if (!param) 7491 return -EFAULT; 7492 7493 err = kstrtoull_from_user(ubuf, cnt, 10, &val); 7494 if (err) 7495 return err; 7496 7497 if (param->lock) 7498 mutex_lock(param->lock); 7499 7500 if (param->min && val < *param->min) 7501 err = -EINVAL; 7502 7503 if (param->max && val > *param->max) 7504 err = -EINVAL; 7505 7506 if (!err) 7507 *param->val = val; 7508 7509 if (param->lock) 7510 mutex_unlock(param->lock); 7511 7512 if (err) 7513 return err; 7514 7515 return cnt; 7516 } 7517 7518 /* 7519 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct 7520 * @filp: The active open file structure 7521 * @ubuf: The userspace provided buffer to read value into 7522 * @cnt: The maximum number of bytes to read 7523 * @ppos: The current "file" position 7524 * 7525 * This function implements the read interface for a struct trace_min_max_param. 7526 * The filp->private_data must point to a trace_min_max_param struct with valid 7527 * data. 7528 */ 7529 static ssize_t 7530 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 7531 { 7532 struct trace_min_max_param *param = filp->private_data; 7533 char buf[U64_STR_SIZE]; 7534 int len; 7535 u64 val; 7536 7537 if (!param) 7538 return -EFAULT; 7539 7540 val = *param->val; 7541 7542 if (cnt > sizeof(buf)) 7543 cnt = sizeof(buf); 7544 7545 len = snprintf(buf, sizeof(buf), "%llu\n", val); 7546 7547 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 7548 } 7549 7550 const struct file_operations trace_min_max_fops = { 7551 .open = tracing_open_generic, 7552 .read = trace_min_max_read, 7553 .write = trace_min_max_write, 7554 }; 7555 7556 #define TRACING_LOG_ERRS_MAX 8 7557 #define TRACING_LOG_LOC_MAX 128 7558 7559 #define CMD_PREFIX " Command: " 7560 7561 struct err_info { 7562 const char **errs; /* ptr to loc-specific array of err strings */ 7563 u8 type; /* index into errs -> specific err string */ 7564 u16 pos; /* caret position */ 7565 u64 ts; 7566 }; 7567 7568 struct tracing_log_err { 7569 struct list_head list; 7570 struct err_info info; 7571 char loc[TRACING_LOG_LOC_MAX]; /* err location */ 7572 char *cmd; /* what caused err */ 7573 }; 7574 7575 static DEFINE_MUTEX(tracing_err_log_lock); 7576 7577 static struct tracing_log_err *alloc_tracing_log_err(int len) 7578 { 7579 struct tracing_log_err *err; 7580 7581 err = kzalloc(sizeof(*err), GFP_KERNEL); 7582 if (!err) 7583 return ERR_PTR(-ENOMEM); 7584 7585 err->cmd = kzalloc(len, GFP_KERNEL); 7586 if (!err->cmd) { 7587 kfree(err); 7588 return ERR_PTR(-ENOMEM); 7589 } 7590 7591 return err; 7592 } 7593 7594 static void free_tracing_log_err(struct tracing_log_err *err) 7595 { 7596 kfree(err->cmd); 7597 kfree(err); 7598 } 7599 7600 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, 7601 int len) 7602 { 7603 struct tracing_log_err *err; 7604 char *cmd; 7605 7606 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { 7607 err = alloc_tracing_log_err(len); 7608 if (PTR_ERR(err) != -ENOMEM) 7609 tr->n_err_log_entries++; 7610 7611 return err; 7612 } 7613 cmd = kzalloc(len, GFP_KERNEL); 7614 if (!cmd) 7615 return ERR_PTR(-ENOMEM); 7616 err = list_first_entry(&tr->err_log, struct tracing_log_err, list); 7617 kfree(err->cmd); 7618 err->cmd = cmd; 7619 list_del(&err->list); 7620 7621 return err; 7622 } 7623 7624 /** 7625 * err_pos - find the position of a string within a command for error careting 7626 * @cmd: The tracing command that caused the error 7627 * @str: The string to position the caret at within @cmd 7628 * 7629 * Finds the position of the first occurrence of @str within @cmd. The 7630 * return value can be passed to tracing_log_err() for caret placement 7631 * within @cmd. 7632 * 7633 * Returns the index within @cmd of the first occurrence of @str or 0 7634 * if @str was not found. 7635 */ 7636 unsigned int err_pos(char *cmd, const char *str) 7637 { 7638 char *found; 7639 7640 if (WARN_ON(!strlen(cmd))) 7641 return 0; 7642 7643 found = strstr(cmd, str); 7644 if (found) 7645 return found - cmd; 7646 7647 return 0; 7648 } 7649 7650 /** 7651 * tracing_log_err - write an error to the tracing error log 7652 * @tr: The associated trace array for the error (NULL for top level array) 7653 * @loc: A string describing where the error occurred 7654 * @cmd: The tracing command that caused the error 7655 * @errs: The array of loc-specific static error strings 7656 * @type: The index into errs[], which produces the specific static err string 7657 * @pos: The position the caret should be placed in the cmd 7658 * 7659 * Writes an error into tracing/error_log of the form: 7660 * 7661 * <loc>: error: <text> 7662 * Command: <cmd> 7663 * ^ 7664 * 7665 * tracing/error_log is a small log file containing the last 7666 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated 7667 * unless there has been a tracing error, and the error log can be 7668 * cleared and have its memory freed by writing the empty string in 7669 * truncation mode to it i.e. echo > tracing/error_log. 7670 * 7671 * NOTE: the @errs array along with the @type param are used to 7672 * produce a static error string - this string is not copied and saved 7673 * when the error is logged - only a pointer to it is saved. See 7674 * existing callers for examples of how static strings are typically 7675 * defined for use with tracing_log_err(). 7676 */ 7677 void tracing_log_err(struct trace_array *tr, 7678 const char *loc, const char *cmd, 7679 const char **errs, u8 type, u16 pos) 7680 { 7681 struct tracing_log_err *err; 7682 int len = 0; 7683 7684 if (!tr) 7685 tr = &global_trace; 7686 7687 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; 7688 7689 mutex_lock(&tracing_err_log_lock); 7690 err = get_tracing_log_err(tr, len); 7691 if (PTR_ERR(err) == -ENOMEM) { 7692 mutex_unlock(&tracing_err_log_lock); 7693 return; 7694 } 7695 7696 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); 7697 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); 7698 7699 err->info.errs = errs; 7700 err->info.type = type; 7701 err->info.pos = pos; 7702 err->info.ts = local_clock(); 7703 7704 list_add_tail(&err->list, &tr->err_log); 7705 mutex_unlock(&tracing_err_log_lock); 7706 } 7707 7708 static void clear_tracing_err_log(struct trace_array *tr) 7709 { 7710 struct tracing_log_err *err, *next; 7711 7712 mutex_lock(&tracing_err_log_lock); 7713 list_for_each_entry_safe(err, next, &tr->err_log, list) { 7714 list_del(&err->list); 7715 free_tracing_log_err(err); 7716 } 7717 7718 tr->n_err_log_entries = 0; 7719 mutex_unlock(&tracing_err_log_lock); 7720 } 7721 7722 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) 7723 { 7724 struct trace_array *tr = m->private; 7725 7726 mutex_lock(&tracing_err_log_lock); 7727 7728 return seq_list_start(&tr->err_log, *pos); 7729 } 7730 7731 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) 7732 { 7733 struct trace_array *tr = m->private; 7734 7735 return seq_list_next(v, &tr->err_log, pos); 7736 } 7737 7738 static void tracing_err_log_seq_stop(struct seq_file *m, void *v) 7739 { 7740 mutex_unlock(&tracing_err_log_lock); 7741 } 7742 7743 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) 7744 { 7745 u16 i; 7746 7747 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) 7748 seq_putc(m, ' '); 7749 for (i = 0; i < pos; i++) 7750 seq_putc(m, ' '); 7751 seq_puts(m, "^\n"); 7752 } 7753 7754 static int tracing_err_log_seq_show(struct seq_file *m, void *v) 7755 { 7756 struct tracing_log_err *err = v; 7757 7758 if (err) { 7759 const char *err_text = err->info.errs[err->info.type]; 7760 u64 sec = err->info.ts; 7761 u32 nsec; 7762 7763 nsec = do_div(sec, NSEC_PER_SEC); 7764 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, 7765 err->loc, err_text); 7766 seq_printf(m, "%s", err->cmd); 7767 tracing_err_log_show_pos(m, err->info.pos); 7768 } 7769 7770 return 0; 7771 } 7772 7773 static const struct seq_operations tracing_err_log_seq_ops = { 7774 .start = tracing_err_log_seq_start, 7775 .next = tracing_err_log_seq_next, 7776 .stop = tracing_err_log_seq_stop, 7777 .show = tracing_err_log_seq_show 7778 }; 7779 7780 static int tracing_err_log_open(struct inode *inode, struct file *file) 7781 { 7782 struct trace_array *tr = inode->i_private; 7783 int ret = 0; 7784 7785 ret = tracing_check_open_get_tr(tr); 7786 if (ret) 7787 return ret; 7788 7789 /* If this file was opened for write, then erase contents */ 7790 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 7791 clear_tracing_err_log(tr); 7792 7793 if (file->f_mode & FMODE_READ) { 7794 ret = seq_open(file, &tracing_err_log_seq_ops); 7795 if (!ret) { 7796 struct seq_file *m = file->private_data; 7797 m->private = tr; 7798 } else { 7799 trace_array_put(tr); 7800 } 7801 } 7802 return ret; 7803 } 7804 7805 static ssize_t tracing_err_log_write(struct file *file, 7806 const char __user *buffer, 7807 size_t count, loff_t *ppos) 7808 { 7809 return count; 7810 } 7811 7812 static int tracing_err_log_release(struct inode *inode, struct file *file) 7813 { 7814 struct trace_array *tr = inode->i_private; 7815 7816 trace_array_put(tr); 7817 7818 if (file->f_mode & FMODE_READ) 7819 seq_release(inode, file); 7820 7821 return 0; 7822 } 7823 7824 static const struct file_operations tracing_err_log_fops = { 7825 .open = tracing_err_log_open, 7826 .write = tracing_err_log_write, 7827 .read = seq_read, 7828 .llseek = tracing_lseek, 7829 .release = tracing_err_log_release, 7830 }; 7831 7832 static int tracing_buffers_open(struct inode *inode, struct file *filp) 7833 { 7834 struct trace_array *tr = inode->i_private; 7835 struct ftrace_buffer_info *info; 7836 int ret; 7837 7838 ret = tracing_check_open_get_tr(tr); 7839 if (ret) 7840 return ret; 7841 7842 info = kvzalloc(sizeof(*info), GFP_KERNEL); 7843 if (!info) { 7844 trace_array_put(tr); 7845 return -ENOMEM; 7846 } 7847 7848 mutex_lock(&trace_types_lock); 7849 7850 info->iter.tr = tr; 7851 info->iter.cpu_file = tracing_get_cpu(inode); 7852 info->iter.trace = tr->current_trace; 7853 info->iter.array_buffer = &tr->array_buffer; 7854 info->spare = NULL; 7855 /* Force reading ring buffer for first read */ 7856 info->read = (unsigned int)-1; 7857 7858 filp->private_data = info; 7859 7860 tr->trace_ref++; 7861 7862 mutex_unlock(&trace_types_lock); 7863 7864 ret = nonseekable_open(inode, filp); 7865 if (ret < 0) 7866 trace_array_put(tr); 7867 7868 return ret; 7869 } 7870 7871 static __poll_t 7872 tracing_buffers_poll(struct file *filp, poll_table *poll_table) 7873 { 7874 struct ftrace_buffer_info *info = filp->private_data; 7875 struct trace_iterator *iter = &info->iter; 7876 7877 return trace_poll(iter, filp, poll_table); 7878 } 7879 7880 static ssize_t 7881 tracing_buffers_read(struct file *filp, char __user *ubuf, 7882 size_t count, loff_t *ppos) 7883 { 7884 struct ftrace_buffer_info *info = filp->private_data; 7885 struct trace_iterator *iter = &info->iter; 7886 void *trace_data; 7887 int page_size; 7888 ssize_t ret = 0; 7889 ssize_t size; 7890 7891 if (!count) 7892 return 0; 7893 7894 #ifdef CONFIG_TRACER_MAX_TRACE 7895 if (iter->snapshot && iter->tr->current_trace->use_max_tr) 7896 return -EBUSY; 7897 #endif 7898 7899 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 7900 7901 /* Make sure the spare matches the current sub buffer size */ 7902 if (info->spare) { 7903 if (page_size != info->spare_size) { 7904 ring_buffer_free_read_page(iter->array_buffer->buffer, 7905 info->spare_cpu, info->spare); 7906 info->spare = NULL; 7907 } 7908 } 7909 7910 if (!info->spare) { 7911 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, 7912 iter->cpu_file); 7913 if (IS_ERR(info->spare)) { 7914 ret = PTR_ERR(info->spare); 7915 info->spare = NULL; 7916 } else { 7917 info->spare_cpu = iter->cpu_file; 7918 info->spare_size = page_size; 7919 } 7920 } 7921 if (!info->spare) 7922 return ret; 7923 7924 /* Do we have previous read data to read? */ 7925 if (info->read < page_size) 7926 goto read; 7927 7928 again: 7929 trace_access_lock(iter->cpu_file); 7930 ret = ring_buffer_read_page(iter->array_buffer->buffer, 7931 info->spare, 7932 count, 7933 iter->cpu_file, 0); 7934 trace_access_unlock(iter->cpu_file); 7935 7936 if (ret < 0) { 7937 if (trace_empty(iter)) { 7938 if ((filp->f_flags & O_NONBLOCK)) 7939 return -EAGAIN; 7940 7941 ret = wait_on_pipe(iter, 0); 7942 if (ret) 7943 return ret; 7944 7945 goto again; 7946 } 7947 return 0; 7948 } 7949 7950 info->read = 0; 7951 read: 7952 size = page_size - info->read; 7953 if (size > count) 7954 size = count; 7955 trace_data = ring_buffer_read_page_data(info->spare); 7956 ret = copy_to_user(ubuf, trace_data + info->read, size); 7957 if (ret == size) 7958 return -EFAULT; 7959 7960 size -= ret; 7961 7962 *ppos += size; 7963 info->read += size; 7964 7965 return size; 7966 } 7967 7968 static int tracing_buffers_flush(struct file *file, fl_owner_t id) 7969 { 7970 struct ftrace_buffer_info *info = file->private_data; 7971 struct trace_iterator *iter = &info->iter; 7972 7973 iter->closed = true; 7974 /* Make sure the waiters see the new wait_index */ 7975 (void)atomic_fetch_inc_release(&iter->wait_index); 7976 7977 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 7978 7979 return 0; 7980 } 7981 7982 static int tracing_buffers_release(struct inode *inode, struct file *file) 7983 { 7984 struct ftrace_buffer_info *info = file->private_data; 7985 struct trace_iterator *iter = &info->iter; 7986 7987 mutex_lock(&trace_types_lock); 7988 7989 iter->tr->trace_ref--; 7990 7991 __trace_array_put(iter->tr); 7992 7993 if (info->spare) 7994 ring_buffer_free_read_page(iter->array_buffer->buffer, 7995 info->spare_cpu, info->spare); 7996 kvfree(info); 7997 7998 mutex_unlock(&trace_types_lock); 7999 8000 return 0; 8001 } 8002 8003 struct buffer_ref { 8004 struct trace_buffer *buffer; 8005 void *page; 8006 int cpu; 8007 refcount_t refcount; 8008 }; 8009 8010 static void buffer_ref_release(struct buffer_ref *ref) 8011 { 8012 if (!refcount_dec_and_test(&ref->refcount)) 8013 return; 8014 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); 8015 kfree(ref); 8016 } 8017 8018 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, 8019 struct pipe_buffer *buf) 8020 { 8021 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 8022 8023 buffer_ref_release(ref); 8024 buf->private = 0; 8025 } 8026 8027 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, 8028 struct pipe_buffer *buf) 8029 { 8030 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 8031 8032 if (refcount_read(&ref->refcount) > INT_MAX/2) 8033 return false; 8034 8035 refcount_inc(&ref->refcount); 8036 return true; 8037 } 8038 8039 /* Pipe buffer operations for a buffer. */ 8040 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 8041 .release = buffer_pipe_buf_release, 8042 .get = buffer_pipe_buf_get, 8043 }; 8044 8045 /* 8046 * Callback from splice_to_pipe(), if we need to release some pages 8047 * at the end of the spd in case we error'ed out in filling the pipe. 8048 */ 8049 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) 8050 { 8051 struct buffer_ref *ref = 8052 (struct buffer_ref *)spd->partial[i].private; 8053 8054 buffer_ref_release(ref); 8055 spd->partial[i].private = 0; 8056 } 8057 8058 static ssize_t 8059 tracing_buffers_splice_read(struct file *file, loff_t *ppos, 8060 struct pipe_inode_info *pipe, size_t len, 8061 unsigned int flags) 8062 { 8063 struct ftrace_buffer_info *info = file->private_data; 8064 struct trace_iterator *iter = &info->iter; 8065 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 8066 struct page *pages_def[PIPE_DEF_BUFFERS]; 8067 struct splice_pipe_desc spd = { 8068 .pages = pages_def, 8069 .partial = partial_def, 8070 .nr_pages_max = PIPE_DEF_BUFFERS, 8071 .ops = &buffer_pipe_buf_ops, 8072 .spd_release = buffer_spd_release, 8073 }; 8074 struct buffer_ref *ref; 8075 bool woken = false; 8076 int page_size; 8077 int entries, i; 8078 ssize_t ret = 0; 8079 8080 #ifdef CONFIG_TRACER_MAX_TRACE 8081 if (iter->snapshot && iter->tr->current_trace->use_max_tr) 8082 return -EBUSY; 8083 #endif 8084 8085 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8086 if (*ppos & (page_size - 1)) 8087 return -EINVAL; 8088 8089 if (len & (page_size - 1)) { 8090 if (len < page_size) 8091 return -EINVAL; 8092 len &= (~(page_size - 1)); 8093 } 8094 8095 if (splice_grow_spd(pipe, &spd)) 8096 return -ENOMEM; 8097 8098 again: 8099 trace_access_lock(iter->cpu_file); 8100 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8101 8102 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { 8103 struct page *page; 8104 int r; 8105 8106 ref = kzalloc(sizeof(*ref), GFP_KERNEL); 8107 if (!ref) { 8108 ret = -ENOMEM; 8109 break; 8110 } 8111 8112 refcount_set(&ref->refcount, 1); 8113 ref->buffer = iter->array_buffer->buffer; 8114 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 8115 if (IS_ERR(ref->page)) { 8116 ret = PTR_ERR(ref->page); 8117 ref->page = NULL; 8118 kfree(ref); 8119 break; 8120 } 8121 ref->cpu = iter->cpu_file; 8122 8123 r = ring_buffer_read_page(ref->buffer, ref->page, 8124 len, iter->cpu_file, 1); 8125 if (r < 0) { 8126 ring_buffer_free_read_page(ref->buffer, ref->cpu, 8127 ref->page); 8128 kfree(ref); 8129 break; 8130 } 8131 8132 page = virt_to_page(ring_buffer_read_page_data(ref->page)); 8133 8134 spd.pages[i] = page; 8135 spd.partial[i].len = page_size; 8136 spd.partial[i].offset = 0; 8137 spd.partial[i].private = (unsigned long)ref; 8138 spd.nr_pages++; 8139 *ppos += page_size; 8140 8141 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8142 } 8143 8144 trace_access_unlock(iter->cpu_file); 8145 spd.nr_pages = i; 8146 8147 /* did we read anything? */ 8148 if (!spd.nr_pages) { 8149 8150 if (ret) 8151 goto out; 8152 8153 if (woken) 8154 goto out; 8155 8156 ret = -EAGAIN; 8157 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) 8158 goto out; 8159 8160 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 8161 if (ret) 8162 goto out; 8163 8164 /* No need to wait after waking up when tracing is off */ 8165 if (!tracer_tracing_is_on(iter->tr)) 8166 goto out; 8167 8168 /* Iterate one more time to collect any new data then exit */ 8169 woken = true; 8170 8171 goto again; 8172 } 8173 8174 ret = splice_to_pipe(pipe, &spd); 8175 out: 8176 splice_shrink_spd(&spd); 8177 8178 return ret; 8179 } 8180 8181 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */ 8182 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 8183 { 8184 struct ftrace_buffer_info *info = file->private_data; 8185 struct trace_iterator *iter = &info->iter; 8186 8187 if (cmd) 8188 return -ENOIOCTLCMD; 8189 8190 mutex_lock(&trace_types_lock); 8191 8192 /* Make sure the waiters see the new wait_index */ 8193 (void)atomic_fetch_inc_release(&iter->wait_index); 8194 8195 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8196 8197 mutex_unlock(&trace_types_lock); 8198 return 0; 8199 } 8200 8201 static const struct file_operations tracing_buffers_fops = { 8202 .open = tracing_buffers_open, 8203 .read = tracing_buffers_read, 8204 .poll = tracing_buffers_poll, 8205 .release = tracing_buffers_release, 8206 .flush = tracing_buffers_flush, 8207 .splice_read = tracing_buffers_splice_read, 8208 .unlocked_ioctl = tracing_buffers_ioctl, 8209 .llseek = no_llseek, 8210 }; 8211 8212 static ssize_t 8213 tracing_stats_read(struct file *filp, char __user *ubuf, 8214 size_t count, loff_t *ppos) 8215 { 8216 struct inode *inode = file_inode(filp); 8217 struct trace_array *tr = inode->i_private; 8218 struct array_buffer *trace_buf = &tr->array_buffer; 8219 int cpu = tracing_get_cpu(inode); 8220 struct trace_seq *s; 8221 unsigned long cnt; 8222 unsigned long long t; 8223 unsigned long usec_rem; 8224 8225 s = kmalloc(sizeof(*s), GFP_KERNEL); 8226 if (!s) 8227 return -ENOMEM; 8228 8229 trace_seq_init(s); 8230 8231 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); 8232 trace_seq_printf(s, "entries: %ld\n", cnt); 8233 8234 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); 8235 trace_seq_printf(s, "overrun: %ld\n", cnt); 8236 8237 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); 8238 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 8239 8240 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 8241 trace_seq_printf(s, "bytes: %ld\n", cnt); 8242 8243 if (trace_clocks[tr->clock_id].in_ns) { 8244 /* local or global for trace_clock */ 8245 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 8246 usec_rem = do_div(t, USEC_PER_SEC); 8247 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", 8248 t, usec_rem); 8249 8250 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); 8251 usec_rem = do_div(t, USEC_PER_SEC); 8252 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); 8253 } else { 8254 /* counter or tsc mode for trace_clock */ 8255 trace_seq_printf(s, "oldest event ts: %llu\n", 8256 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 8257 8258 trace_seq_printf(s, "now ts: %llu\n", 8259 ring_buffer_time_stamp(trace_buf->buffer)); 8260 } 8261 8262 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); 8263 trace_seq_printf(s, "dropped events: %ld\n", cnt); 8264 8265 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); 8266 trace_seq_printf(s, "read events: %ld\n", cnt); 8267 8268 count = simple_read_from_buffer(ubuf, count, ppos, 8269 s->buffer, trace_seq_used(s)); 8270 8271 kfree(s); 8272 8273 return count; 8274 } 8275 8276 static const struct file_operations tracing_stats_fops = { 8277 .open = tracing_open_generic_tr, 8278 .read = tracing_stats_read, 8279 .llseek = generic_file_llseek, 8280 .release = tracing_release_generic_tr, 8281 }; 8282 8283 #ifdef CONFIG_DYNAMIC_FTRACE 8284 8285 static ssize_t 8286 tracing_read_dyn_info(struct file *filp, char __user *ubuf, 8287 size_t cnt, loff_t *ppos) 8288 { 8289 ssize_t ret; 8290 char *buf; 8291 int r; 8292 8293 /* 256 should be plenty to hold the amount needed */ 8294 buf = kmalloc(256, GFP_KERNEL); 8295 if (!buf) 8296 return -ENOMEM; 8297 8298 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n", 8299 ftrace_update_tot_cnt, 8300 ftrace_number_of_pages, 8301 ftrace_number_of_groups); 8302 8303 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8304 kfree(buf); 8305 return ret; 8306 } 8307 8308 static const struct file_operations tracing_dyn_info_fops = { 8309 .open = tracing_open_generic, 8310 .read = tracing_read_dyn_info, 8311 .llseek = generic_file_llseek, 8312 }; 8313 #endif /* CONFIG_DYNAMIC_FTRACE */ 8314 8315 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) 8316 static void 8317 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, 8318 struct trace_array *tr, struct ftrace_probe_ops *ops, 8319 void *data) 8320 { 8321 tracing_snapshot_instance(tr); 8322 } 8323 8324 static void 8325 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, 8326 struct trace_array *tr, struct ftrace_probe_ops *ops, 8327 void *data) 8328 { 8329 struct ftrace_func_mapper *mapper = data; 8330 long *count = NULL; 8331 8332 if (mapper) 8333 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 8334 8335 if (count) { 8336 8337 if (*count <= 0) 8338 return; 8339 8340 (*count)--; 8341 } 8342 8343 tracing_snapshot_instance(tr); 8344 } 8345 8346 static int 8347 ftrace_snapshot_print(struct seq_file *m, unsigned long ip, 8348 struct ftrace_probe_ops *ops, void *data) 8349 { 8350 struct ftrace_func_mapper *mapper = data; 8351 long *count = NULL; 8352 8353 seq_printf(m, "%ps:", (void *)ip); 8354 8355 seq_puts(m, "snapshot"); 8356 8357 if (mapper) 8358 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 8359 8360 if (count) 8361 seq_printf(m, ":count=%ld\n", *count); 8362 else 8363 seq_puts(m, ":unlimited\n"); 8364 8365 return 0; 8366 } 8367 8368 static int 8369 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 8370 unsigned long ip, void *init_data, void **data) 8371 { 8372 struct ftrace_func_mapper *mapper = *data; 8373 8374 if (!mapper) { 8375 mapper = allocate_ftrace_func_mapper(); 8376 if (!mapper) 8377 return -ENOMEM; 8378 *data = mapper; 8379 } 8380 8381 return ftrace_func_mapper_add_ip(mapper, ip, init_data); 8382 } 8383 8384 static void 8385 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 8386 unsigned long ip, void *data) 8387 { 8388 struct ftrace_func_mapper *mapper = data; 8389 8390 if (!ip) { 8391 if (!mapper) 8392 return; 8393 free_ftrace_func_mapper(mapper, NULL); 8394 return; 8395 } 8396 8397 ftrace_func_mapper_remove_ip(mapper, ip); 8398 } 8399 8400 static struct ftrace_probe_ops snapshot_probe_ops = { 8401 .func = ftrace_snapshot, 8402 .print = ftrace_snapshot_print, 8403 }; 8404 8405 static struct ftrace_probe_ops snapshot_count_probe_ops = { 8406 .func = ftrace_count_snapshot, 8407 .print = ftrace_snapshot_print, 8408 .init = ftrace_snapshot_init, 8409 .free = ftrace_snapshot_free, 8410 }; 8411 8412 static int 8413 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, 8414 char *glob, char *cmd, char *param, int enable) 8415 { 8416 struct ftrace_probe_ops *ops; 8417 void *count = (void *)-1; 8418 char *number; 8419 int ret; 8420 8421 if (!tr) 8422 return -ENODEV; 8423 8424 /* hash funcs only work with set_ftrace_filter */ 8425 if (!enable) 8426 return -EINVAL; 8427 8428 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; 8429 8430 if (glob[0] == '!') { 8431 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); 8432 if (!ret) 8433 tracing_disarm_snapshot(tr); 8434 8435 return ret; 8436 } 8437 8438 if (!param) 8439 goto out_reg; 8440 8441 number = strsep(¶m, ":"); 8442 8443 if (!strlen(number)) 8444 goto out_reg; 8445 8446 /* 8447 * We use the callback data field (which is a pointer) 8448 * as our counter. 8449 */ 8450 ret = kstrtoul(number, 0, (unsigned long *)&count); 8451 if (ret) 8452 return ret; 8453 8454 out_reg: 8455 ret = tracing_arm_snapshot(tr); 8456 if (ret < 0) 8457 goto out; 8458 8459 ret = register_ftrace_function_probe(glob, tr, ops, count); 8460 if (ret < 0) 8461 tracing_disarm_snapshot(tr); 8462 out: 8463 return ret < 0 ? ret : 0; 8464 } 8465 8466 static struct ftrace_func_command ftrace_snapshot_cmd = { 8467 .name = "snapshot", 8468 .func = ftrace_trace_snapshot_callback, 8469 }; 8470 8471 static __init int register_snapshot_cmd(void) 8472 { 8473 return register_ftrace_command(&ftrace_snapshot_cmd); 8474 } 8475 #else 8476 static inline __init int register_snapshot_cmd(void) { return 0; } 8477 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ 8478 8479 static struct dentry *tracing_get_dentry(struct trace_array *tr) 8480 { 8481 if (WARN_ON(!tr->dir)) 8482 return ERR_PTR(-ENODEV); 8483 8484 /* Top directory uses NULL as the parent */ 8485 if (tr->flags & TRACE_ARRAY_FL_GLOBAL) 8486 return NULL; 8487 8488 /* All sub buffers have a descriptor */ 8489 return tr->dir; 8490 } 8491 8492 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 8493 { 8494 struct dentry *d_tracer; 8495 8496 if (tr->percpu_dir) 8497 return tr->percpu_dir; 8498 8499 d_tracer = tracing_get_dentry(tr); 8500 if (IS_ERR(d_tracer)) 8501 return NULL; 8502 8503 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); 8504 8505 MEM_FAIL(!tr->percpu_dir, 8506 "Could not create tracefs directory 'per_cpu/%d'\n", cpu); 8507 8508 return tr->percpu_dir; 8509 } 8510 8511 static struct dentry * 8512 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, 8513 void *data, long cpu, const struct file_operations *fops) 8514 { 8515 struct dentry *ret = trace_create_file(name, mode, parent, data, fops); 8516 8517 if (ret) /* See tracing_get_cpu() */ 8518 d_inode(ret)->i_cdev = (void *)(cpu + 1); 8519 return ret; 8520 } 8521 8522 static void 8523 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) 8524 { 8525 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 8526 struct dentry *d_cpu; 8527 char cpu_dir[30]; /* 30 characters should be more than enough */ 8528 8529 if (!d_percpu) 8530 return; 8531 8532 snprintf(cpu_dir, 30, "cpu%ld", cpu); 8533 d_cpu = tracefs_create_dir(cpu_dir, d_percpu); 8534 if (!d_cpu) { 8535 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); 8536 return; 8537 } 8538 8539 /* per cpu trace_pipe */ 8540 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, 8541 tr, cpu, &tracing_pipe_fops); 8542 8543 /* per cpu trace */ 8544 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, 8545 tr, cpu, &tracing_fops); 8546 8547 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, 8548 tr, cpu, &tracing_buffers_fops); 8549 8550 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, 8551 tr, cpu, &tracing_stats_fops); 8552 8553 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, 8554 tr, cpu, &tracing_entries_fops); 8555 8556 #ifdef CONFIG_TRACER_SNAPSHOT 8557 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, 8558 tr, cpu, &snapshot_fops); 8559 8560 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, 8561 tr, cpu, &snapshot_raw_fops); 8562 #endif 8563 } 8564 8565 #ifdef CONFIG_FTRACE_SELFTEST 8566 /* Let selftest have access to static functions in this file */ 8567 #include "trace_selftest.c" 8568 #endif 8569 8570 static ssize_t 8571 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, 8572 loff_t *ppos) 8573 { 8574 struct trace_option_dentry *topt = filp->private_data; 8575 char *buf; 8576 8577 if (topt->flags->val & topt->opt->bit) 8578 buf = "1\n"; 8579 else 8580 buf = "0\n"; 8581 8582 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 8583 } 8584 8585 static ssize_t 8586 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, 8587 loff_t *ppos) 8588 { 8589 struct trace_option_dentry *topt = filp->private_data; 8590 unsigned long val; 8591 int ret; 8592 8593 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8594 if (ret) 8595 return ret; 8596 8597 if (val != 0 && val != 1) 8598 return -EINVAL; 8599 8600 if (!!(topt->flags->val & topt->opt->bit) != val) { 8601 mutex_lock(&trace_types_lock); 8602 ret = __set_tracer_option(topt->tr, topt->flags, 8603 topt->opt, !val); 8604 mutex_unlock(&trace_types_lock); 8605 if (ret) 8606 return ret; 8607 } 8608 8609 *ppos += cnt; 8610 8611 return cnt; 8612 } 8613 8614 static int tracing_open_options(struct inode *inode, struct file *filp) 8615 { 8616 struct trace_option_dentry *topt = inode->i_private; 8617 int ret; 8618 8619 ret = tracing_check_open_get_tr(topt->tr); 8620 if (ret) 8621 return ret; 8622 8623 filp->private_data = inode->i_private; 8624 return 0; 8625 } 8626 8627 static int tracing_release_options(struct inode *inode, struct file *file) 8628 { 8629 struct trace_option_dentry *topt = file->private_data; 8630 8631 trace_array_put(topt->tr); 8632 return 0; 8633 } 8634 8635 static const struct file_operations trace_options_fops = { 8636 .open = tracing_open_options, 8637 .read = trace_options_read, 8638 .write = trace_options_write, 8639 .llseek = generic_file_llseek, 8640 .release = tracing_release_options, 8641 }; 8642 8643 /* 8644 * In order to pass in both the trace_array descriptor as well as the index 8645 * to the flag that the trace option file represents, the trace_array 8646 * has a character array of trace_flags_index[], which holds the index 8647 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. 8648 * The address of this character array is passed to the flag option file 8649 * read/write callbacks. 8650 * 8651 * In order to extract both the index and the trace_array descriptor, 8652 * get_tr_index() uses the following algorithm. 8653 * 8654 * idx = *ptr; 8655 * 8656 * As the pointer itself contains the address of the index (remember 8657 * index[1] == 1). 8658 * 8659 * Then to get the trace_array descriptor, by subtracting that index 8660 * from the ptr, we get to the start of the index itself. 8661 * 8662 * ptr - idx == &index[0] 8663 * 8664 * Then a simple container_of() from that pointer gets us to the 8665 * trace_array descriptor. 8666 */ 8667 static void get_tr_index(void *data, struct trace_array **ptr, 8668 unsigned int *pindex) 8669 { 8670 *pindex = *(unsigned char *)data; 8671 8672 *ptr = container_of(data - *pindex, struct trace_array, 8673 trace_flags_index); 8674 } 8675 8676 static ssize_t 8677 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, 8678 loff_t *ppos) 8679 { 8680 void *tr_index = filp->private_data; 8681 struct trace_array *tr; 8682 unsigned int index; 8683 char *buf; 8684 8685 get_tr_index(tr_index, &tr, &index); 8686 8687 if (tr->trace_flags & (1 << index)) 8688 buf = "1\n"; 8689 else 8690 buf = "0\n"; 8691 8692 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 8693 } 8694 8695 static ssize_t 8696 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, 8697 loff_t *ppos) 8698 { 8699 void *tr_index = filp->private_data; 8700 struct trace_array *tr; 8701 unsigned int index; 8702 unsigned long val; 8703 int ret; 8704 8705 get_tr_index(tr_index, &tr, &index); 8706 8707 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8708 if (ret) 8709 return ret; 8710 8711 if (val != 0 && val != 1) 8712 return -EINVAL; 8713 8714 mutex_lock(&event_mutex); 8715 mutex_lock(&trace_types_lock); 8716 ret = set_tracer_flag(tr, 1 << index, val); 8717 mutex_unlock(&trace_types_lock); 8718 mutex_unlock(&event_mutex); 8719 8720 if (ret < 0) 8721 return ret; 8722 8723 *ppos += cnt; 8724 8725 return cnt; 8726 } 8727 8728 static const struct file_operations trace_options_core_fops = { 8729 .open = tracing_open_generic, 8730 .read = trace_options_core_read, 8731 .write = trace_options_core_write, 8732 .llseek = generic_file_llseek, 8733 }; 8734 8735 struct dentry *trace_create_file(const char *name, 8736 umode_t mode, 8737 struct dentry *parent, 8738 void *data, 8739 const struct file_operations *fops) 8740 { 8741 struct dentry *ret; 8742 8743 ret = tracefs_create_file(name, mode, parent, data, fops); 8744 if (!ret) 8745 pr_warn("Could not create tracefs '%s' entry\n", name); 8746 8747 return ret; 8748 } 8749 8750 8751 static struct dentry *trace_options_init_dentry(struct trace_array *tr) 8752 { 8753 struct dentry *d_tracer; 8754 8755 if (tr->options) 8756 return tr->options; 8757 8758 d_tracer = tracing_get_dentry(tr); 8759 if (IS_ERR(d_tracer)) 8760 return NULL; 8761 8762 tr->options = tracefs_create_dir("options", d_tracer); 8763 if (!tr->options) { 8764 pr_warn("Could not create tracefs directory 'options'\n"); 8765 return NULL; 8766 } 8767 8768 return tr->options; 8769 } 8770 8771 static void 8772 create_trace_option_file(struct trace_array *tr, 8773 struct trace_option_dentry *topt, 8774 struct tracer_flags *flags, 8775 struct tracer_opt *opt) 8776 { 8777 struct dentry *t_options; 8778 8779 t_options = trace_options_init_dentry(tr); 8780 if (!t_options) 8781 return; 8782 8783 topt->flags = flags; 8784 topt->opt = opt; 8785 topt->tr = tr; 8786 8787 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, 8788 t_options, topt, &trace_options_fops); 8789 8790 } 8791 8792 static void 8793 create_trace_option_files(struct trace_array *tr, struct tracer *tracer) 8794 { 8795 struct trace_option_dentry *topts; 8796 struct trace_options *tr_topts; 8797 struct tracer_flags *flags; 8798 struct tracer_opt *opts; 8799 int cnt; 8800 int i; 8801 8802 if (!tracer) 8803 return; 8804 8805 flags = tracer->flags; 8806 8807 if (!flags || !flags->opts) 8808 return; 8809 8810 /* 8811 * If this is an instance, only create flags for tracers 8812 * the instance may have. 8813 */ 8814 if (!trace_ok_for_array(tracer, tr)) 8815 return; 8816 8817 for (i = 0; i < tr->nr_topts; i++) { 8818 /* Make sure there's no duplicate flags. */ 8819 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags)) 8820 return; 8821 } 8822 8823 opts = flags->opts; 8824 8825 for (cnt = 0; opts[cnt].name; cnt++) 8826 ; 8827 8828 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); 8829 if (!topts) 8830 return; 8831 8832 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), 8833 GFP_KERNEL); 8834 if (!tr_topts) { 8835 kfree(topts); 8836 return; 8837 } 8838 8839 tr->topts = tr_topts; 8840 tr->topts[tr->nr_topts].tracer = tracer; 8841 tr->topts[tr->nr_topts].topts = topts; 8842 tr->nr_topts++; 8843 8844 for (cnt = 0; opts[cnt].name; cnt++) { 8845 create_trace_option_file(tr, &topts[cnt], flags, 8846 &opts[cnt]); 8847 MEM_FAIL(topts[cnt].entry == NULL, 8848 "Failed to create trace option: %s", 8849 opts[cnt].name); 8850 } 8851 } 8852 8853 static struct dentry * 8854 create_trace_option_core_file(struct trace_array *tr, 8855 const char *option, long index) 8856 { 8857 struct dentry *t_options; 8858 8859 t_options = trace_options_init_dentry(tr); 8860 if (!t_options) 8861 return NULL; 8862 8863 return trace_create_file(option, TRACE_MODE_WRITE, t_options, 8864 (void *)&tr->trace_flags_index[index], 8865 &trace_options_core_fops); 8866 } 8867 8868 static void create_trace_options_dir(struct trace_array *tr) 8869 { 8870 struct dentry *t_options; 8871 bool top_level = tr == &global_trace; 8872 int i; 8873 8874 t_options = trace_options_init_dentry(tr); 8875 if (!t_options) 8876 return; 8877 8878 for (i = 0; trace_options[i]; i++) { 8879 if (top_level || 8880 !((1 << i) & TOP_LEVEL_TRACE_FLAGS)) 8881 create_trace_option_core_file(tr, trace_options[i], i); 8882 } 8883 } 8884 8885 static ssize_t 8886 rb_simple_read(struct file *filp, char __user *ubuf, 8887 size_t cnt, loff_t *ppos) 8888 { 8889 struct trace_array *tr = filp->private_data; 8890 char buf[64]; 8891 int r; 8892 8893 r = tracer_tracing_is_on(tr); 8894 r = sprintf(buf, "%d\n", r); 8895 8896 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8897 } 8898 8899 static ssize_t 8900 rb_simple_write(struct file *filp, const char __user *ubuf, 8901 size_t cnt, loff_t *ppos) 8902 { 8903 struct trace_array *tr = filp->private_data; 8904 struct trace_buffer *buffer = tr->array_buffer.buffer; 8905 unsigned long val; 8906 int ret; 8907 8908 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8909 if (ret) 8910 return ret; 8911 8912 if (buffer) { 8913 mutex_lock(&trace_types_lock); 8914 if (!!val == tracer_tracing_is_on(tr)) { 8915 val = 0; /* do nothing */ 8916 } else if (val) { 8917 tracer_tracing_on(tr); 8918 if (tr->current_trace->start) 8919 tr->current_trace->start(tr); 8920 } else { 8921 tracer_tracing_off(tr); 8922 if (tr->current_trace->stop) 8923 tr->current_trace->stop(tr); 8924 /* Wake up any waiters */ 8925 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); 8926 } 8927 mutex_unlock(&trace_types_lock); 8928 } 8929 8930 (*ppos)++; 8931 8932 return cnt; 8933 } 8934 8935 static const struct file_operations rb_simple_fops = { 8936 .open = tracing_open_generic_tr, 8937 .read = rb_simple_read, 8938 .write = rb_simple_write, 8939 .release = tracing_release_generic_tr, 8940 .llseek = default_llseek, 8941 }; 8942 8943 static ssize_t 8944 buffer_percent_read(struct file *filp, char __user *ubuf, 8945 size_t cnt, loff_t *ppos) 8946 { 8947 struct trace_array *tr = filp->private_data; 8948 char buf[64]; 8949 int r; 8950 8951 r = tr->buffer_percent; 8952 r = sprintf(buf, "%d\n", r); 8953 8954 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8955 } 8956 8957 static ssize_t 8958 buffer_percent_write(struct file *filp, const char __user *ubuf, 8959 size_t cnt, loff_t *ppos) 8960 { 8961 struct trace_array *tr = filp->private_data; 8962 unsigned long val; 8963 int ret; 8964 8965 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8966 if (ret) 8967 return ret; 8968 8969 if (val > 100) 8970 return -EINVAL; 8971 8972 tr->buffer_percent = val; 8973 8974 (*ppos)++; 8975 8976 return cnt; 8977 } 8978 8979 static const struct file_operations buffer_percent_fops = { 8980 .open = tracing_open_generic_tr, 8981 .read = buffer_percent_read, 8982 .write = buffer_percent_write, 8983 .release = tracing_release_generic_tr, 8984 .llseek = default_llseek, 8985 }; 8986 8987 static ssize_t 8988 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 8989 { 8990 struct trace_array *tr = filp->private_data; 8991 size_t size; 8992 char buf[64]; 8993 int order; 8994 int r; 8995 8996 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 8997 size = (PAGE_SIZE << order) / 1024; 8998 8999 r = sprintf(buf, "%zd\n", size); 9000 9001 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9002 } 9003 9004 static ssize_t 9005 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 9006 size_t cnt, loff_t *ppos) 9007 { 9008 struct trace_array *tr = filp->private_data; 9009 unsigned long val; 9010 int old_order; 9011 int order; 9012 int pages; 9013 int ret; 9014 9015 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9016 if (ret) 9017 return ret; 9018 9019 val *= 1024; /* value passed in is in KB */ 9020 9021 pages = DIV_ROUND_UP(val, PAGE_SIZE); 9022 order = fls(pages - 1); 9023 9024 /* limit between 1 and 128 system pages */ 9025 if (order < 0 || order > 7) 9026 return -EINVAL; 9027 9028 /* Do not allow tracing while changing the order of the ring buffer */ 9029 tracing_stop_tr(tr); 9030 9031 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9032 if (old_order == order) 9033 goto out; 9034 9035 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 9036 if (ret) 9037 goto out; 9038 9039 #ifdef CONFIG_TRACER_MAX_TRACE 9040 9041 if (!tr->allocated_snapshot) 9042 goto out_max; 9043 9044 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 9045 if (ret) { 9046 /* Put back the old order */ 9047 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); 9048 if (WARN_ON_ONCE(cnt)) { 9049 /* 9050 * AARGH! We are left with different orders! 9051 * The max buffer is our "snapshot" buffer. 9052 * When a tracer needs a snapshot (one of the 9053 * latency tracers), it swaps the max buffer 9054 * with the saved snap shot. We succeeded to 9055 * update the order of the main buffer, but failed to 9056 * update the order of the max buffer. But when we tried 9057 * to reset the main buffer to the original size, we 9058 * failed there too. This is very unlikely to 9059 * happen, but if it does, warn and kill all 9060 * tracing. 9061 */ 9062 tracing_disabled = 1; 9063 } 9064 goto out; 9065 } 9066 out_max: 9067 #endif 9068 (*ppos)++; 9069 out: 9070 if (ret) 9071 cnt = ret; 9072 tracing_start_tr(tr); 9073 return cnt; 9074 } 9075 9076 static const struct file_operations buffer_subbuf_size_fops = { 9077 .open = tracing_open_generic_tr, 9078 .read = buffer_subbuf_size_read, 9079 .write = buffer_subbuf_size_write, 9080 .release = tracing_release_generic_tr, 9081 .llseek = default_llseek, 9082 }; 9083 9084 static struct dentry *trace_instance_dir; 9085 9086 static void 9087 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 9088 9089 static int 9090 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 9091 { 9092 enum ring_buffer_flags rb_flags; 9093 9094 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; 9095 9096 buf->tr = tr; 9097 9098 buf->buffer = ring_buffer_alloc(size, rb_flags); 9099 if (!buf->buffer) 9100 return -ENOMEM; 9101 9102 buf->data = alloc_percpu(struct trace_array_cpu); 9103 if (!buf->data) { 9104 ring_buffer_free(buf->buffer); 9105 buf->buffer = NULL; 9106 return -ENOMEM; 9107 } 9108 9109 /* Allocate the first page for all buffers */ 9110 set_buffer_entries(&tr->array_buffer, 9111 ring_buffer_size(tr->array_buffer.buffer, 0)); 9112 9113 return 0; 9114 } 9115 9116 static void free_trace_buffer(struct array_buffer *buf) 9117 { 9118 if (buf->buffer) { 9119 ring_buffer_free(buf->buffer); 9120 buf->buffer = NULL; 9121 free_percpu(buf->data); 9122 buf->data = NULL; 9123 } 9124 } 9125 9126 static int allocate_trace_buffers(struct trace_array *tr, int size) 9127 { 9128 int ret; 9129 9130 ret = allocate_trace_buffer(tr, &tr->array_buffer, size); 9131 if (ret) 9132 return ret; 9133 9134 #ifdef CONFIG_TRACER_MAX_TRACE 9135 ret = allocate_trace_buffer(tr, &tr->max_buffer, 9136 allocate_snapshot ? size : 1); 9137 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { 9138 free_trace_buffer(&tr->array_buffer); 9139 return -ENOMEM; 9140 } 9141 tr->allocated_snapshot = allocate_snapshot; 9142 9143 allocate_snapshot = false; 9144 #endif 9145 9146 return 0; 9147 } 9148 9149 static void free_trace_buffers(struct trace_array *tr) 9150 { 9151 if (!tr) 9152 return; 9153 9154 free_trace_buffer(&tr->array_buffer); 9155 9156 #ifdef CONFIG_TRACER_MAX_TRACE 9157 free_trace_buffer(&tr->max_buffer); 9158 #endif 9159 } 9160 9161 static void init_trace_flags_index(struct trace_array *tr) 9162 { 9163 int i; 9164 9165 /* Used by the trace options files */ 9166 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) 9167 tr->trace_flags_index[i] = i; 9168 } 9169 9170 static void __update_tracer_options(struct trace_array *tr) 9171 { 9172 struct tracer *t; 9173 9174 for (t = trace_types; t; t = t->next) 9175 add_tracer_options(tr, t); 9176 } 9177 9178 static void update_tracer_options(struct trace_array *tr) 9179 { 9180 mutex_lock(&trace_types_lock); 9181 tracer_options_updated = true; 9182 __update_tracer_options(tr); 9183 mutex_unlock(&trace_types_lock); 9184 } 9185 9186 /* Must have trace_types_lock held */ 9187 struct trace_array *trace_array_find(const char *instance) 9188 { 9189 struct trace_array *tr, *found = NULL; 9190 9191 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9192 if (tr->name && strcmp(tr->name, instance) == 0) { 9193 found = tr; 9194 break; 9195 } 9196 } 9197 9198 return found; 9199 } 9200 9201 struct trace_array *trace_array_find_get(const char *instance) 9202 { 9203 struct trace_array *tr; 9204 9205 mutex_lock(&trace_types_lock); 9206 tr = trace_array_find(instance); 9207 if (tr) 9208 tr->ref++; 9209 mutex_unlock(&trace_types_lock); 9210 9211 return tr; 9212 } 9213 9214 static int trace_array_create_dir(struct trace_array *tr) 9215 { 9216 int ret; 9217 9218 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); 9219 if (!tr->dir) 9220 return -EINVAL; 9221 9222 ret = event_trace_add_tracer(tr->dir, tr); 9223 if (ret) { 9224 tracefs_remove(tr->dir); 9225 return ret; 9226 } 9227 9228 init_tracer_tracefs(tr, tr->dir); 9229 __update_tracer_options(tr); 9230 9231 return ret; 9232 } 9233 9234 static struct trace_array * 9235 trace_array_create_systems(const char *name, const char *systems) 9236 { 9237 struct trace_array *tr; 9238 int ret; 9239 9240 ret = -ENOMEM; 9241 tr = kzalloc(sizeof(*tr), GFP_KERNEL); 9242 if (!tr) 9243 return ERR_PTR(ret); 9244 9245 tr->name = kstrdup(name, GFP_KERNEL); 9246 if (!tr->name) 9247 goto out_free_tr; 9248 9249 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) 9250 goto out_free_tr; 9251 9252 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) 9253 goto out_free_tr; 9254 9255 if (systems) { 9256 tr->system_names = kstrdup_const(systems, GFP_KERNEL); 9257 if (!tr->system_names) 9258 goto out_free_tr; 9259 } 9260 9261 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; 9262 9263 cpumask_copy(tr->tracing_cpumask, cpu_all_mask); 9264 9265 raw_spin_lock_init(&tr->start_lock); 9266 9267 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 9268 #ifdef CONFIG_TRACER_MAX_TRACE 9269 spin_lock_init(&tr->snapshot_trigger_lock); 9270 #endif 9271 tr->current_trace = &nop_trace; 9272 9273 INIT_LIST_HEAD(&tr->systems); 9274 INIT_LIST_HEAD(&tr->events); 9275 INIT_LIST_HEAD(&tr->hist_vars); 9276 INIT_LIST_HEAD(&tr->err_log); 9277 9278 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 9279 goto out_free_tr; 9280 9281 /* The ring buffer is defaultly expanded */ 9282 trace_set_ring_buffer_expanded(tr); 9283 9284 if (ftrace_allocate_ftrace_ops(tr) < 0) 9285 goto out_free_tr; 9286 9287 ftrace_init_trace_array(tr); 9288 9289 init_trace_flags_index(tr); 9290 9291 if (trace_instance_dir) { 9292 ret = trace_array_create_dir(tr); 9293 if (ret) 9294 goto out_free_tr; 9295 } else 9296 __trace_early_add_events(tr); 9297 9298 list_add(&tr->list, &ftrace_trace_arrays); 9299 9300 tr->ref++; 9301 9302 return tr; 9303 9304 out_free_tr: 9305 ftrace_free_ftrace_ops(tr); 9306 free_trace_buffers(tr); 9307 free_cpumask_var(tr->pipe_cpumask); 9308 free_cpumask_var(tr->tracing_cpumask); 9309 kfree_const(tr->system_names); 9310 kfree(tr->name); 9311 kfree(tr); 9312 9313 return ERR_PTR(ret); 9314 } 9315 9316 static struct trace_array *trace_array_create(const char *name) 9317 { 9318 return trace_array_create_systems(name, NULL); 9319 } 9320 9321 static int instance_mkdir(const char *name) 9322 { 9323 struct trace_array *tr; 9324 int ret; 9325 9326 mutex_lock(&event_mutex); 9327 mutex_lock(&trace_types_lock); 9328 9329 ret = -EEXIST; 9330 if (trace_array_find(name)) 9331 goto out_unlock; 9332 9333 tr = trace_array_create(name); 9334 9335 ret = PTR_ERR_OR_ZERO(tr); 9336 9337 out_unlock: 9338 mutex_unlock(&trace_types_lock); 9339 mutex_unlock(&event_mutex); 9340 return ret; 9341 } 9342 9343 /** 9344 * trace_array_get_by_name - Create/Lookup a trace array, given its name. 9345 * @name: The name of the trace array to be looked up/created. 9346 * @systems: A list of systems to create event directories for (NULL for all) 9347 * 9348 * Returns pointer to trace array with given name. 9349 * NULL, if it cannot be created. 9350 * 9351 * NOTE: This function increments the reference counter associated with the 9352 * trace array returned. This makes sure it cannot be freed while in use. 9353 * Use trace_array_put() once the trace array is no longer needed. 9354 * If the trace_array is to be freed, trace_array_destroy() needs to 9355 * be called after the trace_array_put(), or simply let user space delete 9356 * it from the tracefs instances directory. But until the 9357 * trace_array_put() is called, user space can not delete it. 9358 * 9359 */ 9360 struct trace_array *trace_array_get_by_name(const char *name, const char *systems) 9361 { 9362 struct trace_array *tr; 9363 9364 mutex_lock(&event_mutex); 9365 mutex_lock(&trace_types_lock); 9366 9367 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9368 if (tr->name && strcmp(tr->name, name) == 0) 9369 goto out_unlock; 9370 } 9371 9372 tr = trace_array_create_systems(name, systems); 9373 9374 if (IS_ERR(tr)) 9375 tr = NULL; 9376 out_unlock: 9377 if (tr) 9378 tr->ref++; 9379 9380 mutex_unlock(&trace_types_lock); 9381 mutex_unlock(&event_mutex); 9382 return tr; 9383 } 9384 EXPORT_SYMBOL_GPL(trace_array_get_by_name); 9385 9386 static int __remove_instance(struct trace_array *tr) 9387 { 9388 int i; 9389 9390 /* Reference counter for a newly created trace array = 1. */ 9391 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) 9392 return -EBUSY; 9393 9394 list_del(&tr->list); 9395 9396 /* Disable all the flags that were enabled coming in */ 9397 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { 9398 if ((1 << i) & ZEROED_TRACE_FLAGS) 9399 set_tracer_flag(tr, 1 << i, 0); 9400 } 9401 9402 tracing_set_nop(tr); 9403 clear_ftrace_function_probes(tr); 9404 event_trace_del_tracer(tr); 9405 ftrace_clear_pids(tr); 9406 ftrace_destroy_function_files(tr); 9407 tracefs_remove(tr->dir); 9408 free_percpu(tr->last_func_repeats); 9409 free_trace_buffers(tr); 9410 clear_tracing_err_log(tr); 9411 9412 for (i = 0; i < tr->nr_topts; i++) { 9413 kfree(tr->topts[i].topts); 9414 } 9415 kfree(tr->topts); 9416 9417 free_cpumask_var(tr->pipe_cpumask); 9418 free_cpumask_var(tr->tracing_cpumask); 9419 kfree_const(tr->system_names); 9420 kfree(tr->name); 9421 kfree(tr); 9422 9423 return 0; 9424 } 9425 9426 int trace_array_destroy(struct trace_array *this_tr) 9427 { 9428 struct trace_array *tr; 9429 int ret; 9430 9431 if (!this_tr) 9432 return -EINVAL; 9433 9434 mutex_lock(&event_mutex); 9435 mutex_lock(&trace_types_lock); 9436 9437 ret = -ENODEV; 9438 9439 /* Making sure trace array exists before destroying it. */ 9440 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9441 if (tr == this_tr) { 9442 ret = __remove_instance(tr); 9443 break; 9444 } 9445 } 9446 9447 mutex_unlock(&trace_types_lock); 9448 mutex_unlock(&event_mutex); 9449 9450 return ret; 9451 } 9452 EXPORT_SYMBOL_GPL(trace_array_destroy); 9453 9454 static int instance_rmdir(const char *name) 9455 { 9456 struct trace_array *tr; 9457 int ret; 9458 9459 mutex_lock(&event_mutex); 9460 mutex_lock(&trace_types_lock); 9461 9462 ret = -ENODEV; 9463 tr = trace_array_find(name); 9464 if (tr) 9465 ret = __remove_instance(tr); 9466 9467 mutex_unlock(&trace_types_lock); 9468 mutex_unlock(&event_mutex); 9469 9470 return ret; 9471 } 9472 9473 static __init void create_trace_instances(struct dentry *d_tracer) 9474 { 9475 struct trace_array *tr; 9476 9477 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, 9478 instance_mkdir, 9479 instance_rmdir); 9480 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) 9481 return; 9482 9483 mutex_lock(&event_mutex); 9484 mutex_lock(&trace_types_lock); 9485 9486 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9487 if (!tr->name) 9488 continue; 9489 if (MEM_FAIL(trace_array_create_dir(tr) < 0, 9490 "Failed to create instance directory\n")) 9491 break; 9492 } 9493 9494 mutex_unlock(&trace_types_lock); 9495 mutex_unlock(&event_mutex); 9496 } 9497 9498 static void 9499 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) 9500 { 9501 int cpu; 9502 9503 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, 9504 tr, &show_traces_fops); 9505 9506 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, 9507 tr, &set_tracer_fops); 9508 9509 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, 9510 tr, &tracing_cpumask_fops); 9511 9512 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, 9513 tr, &tracing_iter_fops); 9514 9515 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, 9516 tr, &tracing_fops); 9517 9518 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, 9519 tr, &tracing_pipe_fops); 9520 9521 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, 9522 tr, &tracing_entries_fops); 9523 9524 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, 9525 tr, &tracing_total_entries_fops); 9526 9527 trace_create_file("free_buffer", 0200, d_tracer, 9528 tr, &tracing_free_buffer_fops); 9529 9530 trace_create_file("trace_marker", 0220, d_tracer, 9531 tr, &tracing_mark_fops); 9532 9533 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); 9534 9535 trace_create_file("trace_marker_raw", 0220, d_tracer, 9536 tr, &tracing_mark_raw_fops); 9537 9538 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, 9539 &trace_clock_fops); 9540 9541 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, 9542 tr, &rb_simple_fops); 9543 9544 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, 9545 &trace_time_stamp_mode_fops); 9546 9547 tr->buffer_percent = 50; 9548 9549 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 9550 tr, &buffer_percent_fops); 9551 9552 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, 9553 tr, &buffer_subbuf_size_fops); 9554 9555 create_trace_options_dir(tr); 9556 9557 #ifdef CONFIG_TRACER_MAX_TRACE 9558 trace_create_maxlat_file(tr, d_tracer); 9559 #endif 9560 9561 if (ftrace_create_function_files(tr, d_tracer)) 9562 MEM_FAIL(1, "Could not allocate function filter files"); 9563 9564 #ifdef CONFIG_TRACER_SNAPSHOT 9565 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, 9566 tr, &snapshot_fops); 9567 #endif 9568 9569 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, 9570 tr, &tracing_err_log_fops); 9571 9572 for_each_tracing_cpu(cpu) 9573 tracing_init_tracefs_percpu(tr, cpu); 9574 9575 ftrace_init_tracefs(tr, d_tracer); 9576 } 9577 9578 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 9579 { 9580 struct vfsmount *mnt; 9581 struct file_system_type *type; 9582 9583 /* 9584 * To maintain backward compatibility for tools that mount 9585 * debugfs to get to the tracing facility, tracefs is automatically 9586 * mounted to the debugfs/tracing directory. 9587 */ 9588 type = get_fs_type("tracefs"); 9589 if (!type) 9590 return NULL; 9591 mnt = vfs_submount(mntpt, type, "tracefs", NULL); 9592 put_filesystem(type); 9593 if (IS_ERR(mnt)) 9594 return NULL; 9595 mntget(mnt); 9596 9597 return mnt; 9598 } 9599 9600 /** 9601 * tracing_init_dentry - initialize top level trace array 9602 * 9603 * This is called when creating files or directories in the tracing 9604 * directory. It is called via fs_initcall() by any of the boot up code 9605 * and expects to return the dentry of the top level tracing directory. 9606 */ 9607 int tracing_init_dentry(void) 9608 { 9609 struct trace_array *tr = &global_trace; 9610 9611 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9612 pr_warn("Tracing disabled due to lockdown\n"); 9613 return -EPERM; 9614 } 9615 9616 /* The top level trace array uses NULL as parent */ 9617 if (tr->dir) 9618 return 0; 9619 9620 if (WARN_ON(!tracefs_initialized())) 9621 return -ENODEV; 9622 9623 /* 9624 * As there may still be users that expect the tracing 9625 * files to exist in debugfs/tracing, we must automount 9626 * the tracefs file system there, so older tools still 9627 * work with the newer kernel. 9628 */ 9629 tr->dir = debugfs_create_automount("tracing", NULL, 9630 trace_automount, NULL); 9631 9632 return 0; 9633 } 9634 9635 extern struct trace_eval_map *__start_ftrace_eval_maps[]; 9636 extern struct trace_eval_map *__stop_ftrace_eval_maps[]; 9637 9638 static struct workqueue_struct *eval_map_wq __initdata; 9639 static struct work_struct eval_map_work __initdata; 9640 static struct work_struct tracerfs_init_work __initdata; 9641 9642 static void __init eval_map_work_func(struct work_struct *work) 9643 { 9644 int len; 9645 9646 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; 9647 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len); 9648 } 9649 9650 static int __init trace_eval_init(void) 9651 { 9652 INIT_WORK(&eval_map_work, eval_map_work_func); 9653 9654 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0); 9655 if (!eval_map_wq) { 9656 pr_err("Unable to allocate eval_map_wq\n"); 9657 /* Do work here */ 9658 eval_map_work_func(&eval_map_work); 9659 return -ENOMEM; 9660 } 9661 9662 queue_work(eval_map_wq, &eval_map_work); 9663 return 0; 9664 } 9665 9666 subsys_initcall(trace_eval_init); 9667 9668 static int __init trace_eval_sync(void) 9669 { 9670 /* Make sure the eval map updates are finished */ 9671 if (eval_map_wq) 9672 destroy_workqueue(eval_map_wq); 9673 return 0; 9674 } 9675 9676 late_initcall_sync(trace_eval_sync); 9677 9678 9679 #ifdef CONFIG_MODULES 9680 static void trace_module_add_evals(struct module *mod) 9681 { 9682 if (!mod->num_trace_evals) 9683 return; 9684 9685 /* 9686 * Modules with bad taint do not have events created, do 9687 * not bother with enums either. 9688 */ 9689 if (trace_module_has_bad_taint(mod)) 9690 return; 9691 9692 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals); 9693 } 9694 9695 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 9696 static void trace_module_remove_evals(struct module *mod) 9697 { 9698 union trace_eval_map_item *map; 9699 union trace_eval_map_item **last = &trace_eval_maps; 9700 9701 if (!mod->num_trace_evals) 9702 return; 9703 9704 mutex_lock(&trace_eval_mutex); 9705 9706 map = trace_eval_maps; 9707 9708 while (map) { 9709 if (map->head.mod == mod) 9710 break; 9711 map = trace_eval_jmp_to_tail(map); 9712 last = &map->tail.next; 9713 map = map->tail.next; 9714 } 9715 if (!map) 9716 goto out; 9717 9718 *last = trace_eval_jmp_to_tail(map)->tail.next; 9719 kfree(map); 9720 out: 9721 mutex_unlock(&trace_eval_mutex); 9722 } 9723 #else 9724 static inline void trace_module_remove_evals(struct module *mod) { } 9725 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 9726 9727 static int trace_module_notify(struct notifier_block *self, 9728 unsigned long val, void *data) 9729 { 9730 struct module *mod = data; 9731 9732 switch (val) { 9733 case MODULE_STATE_COMING: 9734 trace_module_add_evals(mod); 9735 break; 9736 case MODULE_STATE_GOING: 9737 trace_module_remove_evals(mod); 9738 break; 9739 } 9740 9741 return NOTIFY_OK; 9742 } 9743 9744 static struct notifier_block trace_module_nb = { 9745 .notifier_call = trace_module_notify, 9746 .priority = 0, 9747 }; 9748 #endif /* CONFIG_MODULES */ 9749 9750 static __init void tracer_init_tracefs_work_func(struct work_struct *work) 9751 { 9752 9753 event_trace_init(); 9754 9755 init_tracer_tracefs(&global_trace, NULL); 9756 ftrace_init_tracefs_toplevel(&global_trace, NULL); 9757 9758 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, 9759 &global_trace, &tracing_thresh_fops); 9760 9761 trace_create_file("README", TRACE_MODE_READ, NULL, 9762 NULL, &tracing_readme_fops); 9763 9764 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, 9765 NULL, &tracing_saved_cmdlines_fops); 9766 9767 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, 9768 NULL, &tracing_saved_cmdlines_size_fops); 9769 9770 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, 9771 NULL, &tracing_saved_tgids_fops); 9772 9773 trace_create_eval_file(NULL); 9774 9775 #ifdef CONFIG_MODULES 9776 register_module_notifier(&trace_module_nb); 9777 #endif 9778 9779 #ifdef CONFIG_DYNAMIC_FTRACE 9780 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, 9781 NULL, &tracing_dyn_info_fops); 9782 #endif 9783 9784 create_trace_instances(NULL); 9785 9786 update_tracer_options(&global_trace); 9787 } 9788 9789 static __init int tracer_init_tracefs(void) 9790 { 9791 int ret; 9792 9793 trace_access_lock_init(); 9794 9795 ret = tracing_init_dentry(); 9796 if (ret) 9797 return 0; 9798 9799 if (eval_map_wq) { 9800 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); 9801 queue_work(eval_map_wq, &tracerfs_init_work); 9802 } else { 9803 tracer_init_tracefs_work_func(NULL); 9804 } 9805 9806 rv_init_interface(); 9807 9808 return 0; 9809 } 9810 9811 fs_initcall(tracer_init_tracefs); 9812 9813 static int trace_die_panic_handler(struct notifier_block *self, 9814 unsigned long ev, void *unused); 9815 9816 static struct notifier_block trace_panic_notifier = { 9817 .notifier_call = trace_die_panic_handler, 9818 .priority = INT_MAX - 1, 9819 }; 9820 9821 static struct notifier_block trace_die_notifier = { 9822 .notifier_call = trace_die_panic_handler, 9823 .priority = INT_MAX - 1, 9824 }; 9825 9826 /* 9827 * The idea is to execute the following die/panic callback early, in order 9828 * to avoid showing irrelevant information in the trace (like other panic 9829 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall 9830 * warnings get disabled (to prevent potential log flooding). 9831 */ 9832 static int trace_die_panic_handler(struct notifier_block *self, 9833 unsigned long ev, void *unused) 9834 { 9835 if (!ftrace_dump_on_oops) 9836 return NOTIFY_DONE; 9837 9838 /* The die notifier requires DIE_OOPS to trigger */ 9839 if (self == &trace_die_notifier && ev != DIE_OOPS) 9840 return NOTIFY_DONE; 9841 9842 ftrace_dump(ftrace_dump_on_oops); 9843 9844 return NOTIFY_DONE; 9845 } 9846 9847 /* 9848 * printk is set to max of 1024, we really don't need it that big. 9849 * Nothing should be printing 1000 characters anyway. 9850 */ 9851 #define TRACE_MAX_PRINT 1000 9852 9853 /* 9854 * Define here KERN_TRACE so that we have one place to modify 9855 * it if we decide to change what log level the ftrace dump 9856 * should be at. 9857 */ 9858 #define KERN_TRACE KERN_EMERG 9859 9860 void 9861 trace_printk_seq(struct trace_seq *s) 9862 { 9863 /* Probably should print a warning here. */ 9864 if (s->seq.len >= TRACE_MAX_PRINT) 9865 s->seq.len = TRACE_MAX_PRINT; 9866 9867 /* 9868 * More paranoid code. Although the buffer size is set to 9869 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just 9870 * an extra layer of protection. 9871 */ 9872 if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) 9873 s->seq.len = s->seq.size - 1; 9874 9875 /* should be zero ended, but we are paranoid. */ 9876 s->buffer[s->seq.len] = 0; 9877 9878 printk(KERN_TRACE "%s", s->buffer); 9879 9880 trace_seq_init(s); 9881 } 9882 9883 void trace_init_global_iter(struct trace_iterator *iter) 9884 { 9885 iter->tr = &global_trace; 9886 iter->trace = iter->tr->current_trace; 9887 iter->cpu_file = RING_BUFFER_ALL_CPUS; 9888 iter->array_buffer = &global_trace.array_buffer; 9889 9890 if (iter->trace && iter->trace->open) 9891 iter->trace->open(iter); 9892 9893 /* Annotate start of buffers if we had overruns */ 9894 if (ring_buffer_overruns(iter->array_buffer->buffer)) 9895 iter->iter_flags |= TRACE_FILE_ANNOTATE; 9896 9897 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 9898 if (trace_clocks[iter->tr->clock_id].in_ns) 9899 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 9900 9901 /* Can not use kmalloc for iter.temp and iter.fmt */ 9902 iter->temp = static_temp_buf; 9903 iter->temp_size = STATIC_TEMP_BUF_SIZE; 9904 iter->fmt = static_fmt_buf; 9905 iter->fmt_size = STATIC_FMT_BUF_SIZE; 9906 } 9907 9908 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) 9909 { 9910 /* use static because iter can be a bit big for the stack */ 9911 static struct trace_iterator iter; 9912 static atomic_t dump_running; 9913 struct trace_array *tr = &global_trace; 9914 unsigned int old_userobj; 9915 unsigned long flags; 9916 int cnt = 0, cpu; 9917 9918 /* Only allow one dump user at a time. */ 9919 if (atomic_inc_return(&dump_running) != 1) { 9920 atomic_dec(&dump_running); 9921 return; 9922 } 9923 9924 /* 9925 * Always turn off tracing when we dump. 9926 * We don't need to show trace output of what happens 9927 * between multiple crashes. 9928 * 9929 * If the user does a sysrq-z, then they can re-enable 9930 * tracing with echo 1 > tracing_on. 9931 */ 9932 tracing_off(); 9933 9934 local_irq_save(flags); 9935 9936 /* Simulate the iterator */ 9937 trace_init_global_iter(&iter); 9938 9939 for_each_tracing_cpu(cpu) { 9940 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); 9941 } 9942 9943 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ; 9944 9945 /* don't look at user memory in panic mode */ 9946 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 9947 9948 switch (oops_dump_mode) { 9949 case DUMP_ALL: 9950 iter.cpu_file = RING_BUFFER_ALL_CPUS; 9951 break; 9952 case DUMP_ORIG: 9953 iter.cpu_file = raw_smp_processor_id(); 9954 break; 9955 case DUMP_NONE: 9956 goto out_enable; 9957 default: 9958 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); 9959 iter.cpu_file = RING_BUFFER_ALL_CPUS; 9960 } 9961 9962 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 9963 9964 /* Did function tracer already get disabled? */ 9965 if (ftrace_is_dead()) { 9966 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); 9967 printk("# MAY BE MISSING FUNCTION EVENTS\n"); 9968 } 9969 9970 /* 9971 * We need to stop all tracing on all CPUS to read 9972 * the next buffer. This is a bit expensive, but is 9973 * not done often. We fill all what we can read, 9974 * and then release the locks again. 9975 */ 9976 9977 while (!trace_empty(&iter)) { 9978 9979 if (!cnt) 9980 printk(KERN_TRACE "---------------------------------\n"); 9981 9982 cnt++; 9983 9984 trace_iterator_reset(&iter); 9985 iter.iter_flags |= TRACE_FILE_LAT_FMT; 9986 9987 if (trace_find_next_entry_inc(&iter) != NULL) { 9988 int ret; 9989 9990 ret = print_trace_line(&iter); 9991 if (ret != TRACE_TYPE_NO_CONSUME) 9992 trace_consume(&iter); 9993 } 9994 touch_nmi_watchdog(); 9995 9996 trace_printk_seq(&iter.seq); 9997 } 9998 9999 if (!cnt) 10000 printk(KERN_TRACE " (ftrace buffer empty)\n"); 10001 else 10002 printk(KERN_TRACE "---------------------------------\n"); 10003 10004 out_enable: 10005 tr->trace_flags |= old_userobj; 10006 10007 for_each_tracing_cpu(cpu) { 10008 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); 10009 } 10010 atomic_dec(&dump_running); 10011 local_irq_restore(flags); 10012 } 10013 EXPORT_SYMBOL_GPL(ftrace_dump); 10014 10015 #define WRITE_BUFSIZE 4096 10016 10017 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, 10018 size_t count, loff_t *ppos, 10019 int (*createfn)(const char *)) 10020 { 10021 char *kbuf, *buf, *tmp; 10022 int ret = 0; 10023 size_t done = 0; 10024 size_t size; 10025 10026 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 10027 if (!kbuf) 10028 return -ENOMEM; 10029 10030 while (done < count) { 10031 size = count - done; 10032 10033 if (size >= WRITE_BUFSIZE) 10034 size = WRITE_BUFSIZE - 1; 10035 10036 if (copy_from_user(kbuf, buffer + done, size)) { 10037 ret = -EFAULT; 10038 goto out; 10039 } 10040 kbuf[size] = '\0'; 10041 buf = kbuf; 10042 do { 10043 tmp = strchr(buf, '\n'); 10044 if (tmp) { 10045 *tmp = '\0'; 10046 size = tmp - buf + 1; 10047 } else { 10048 size = strlen(buf); 10049 if (done + size < count) { 10050 if (buf != kbuf) 10051 break; 10052 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ 10053 pr_warn("Line length is too long: Should be less than %d\n", 10054 WRITE_BUFSIZE - 2); 10055 ret = -EINVAL; 10056 goto out; 10057 } 10058 } 10059 done += size; 10060 10061 /* Remove comments */ 10062 tmp = strchr(buf, '#'); 10063 10064 if (tmp) 10065 *tmp = '\0'; 10066 10067 ret = createfn(buf); 10068 if (ret) 10069 goto out; 10070 buf += size; 10071 10072 } while (done < count); 10073 } 10074 ret = done; 10075 10076 out: 10077 kfree(kbuf); 10078 10079 return ret; 10080 } 10081 10082 #ifdef CONFIG_TRACER_MAX_TRACE 10083 __init static bool tr_needs_alloc_snapshot(const char *name) 10084 { 10085 char *test; 10086 int len = strlen(name); 10087 bool ret; 10088 10089 if (!boot_snapshot_index) 10090 return false; 10091 10092 if (strncmp(name, boot_snapshot_info, len) == 0 && 10093 boot_snapshot_info[len] == '\t') 10094 return true; 10095 10096 test = kmalloc(strlen(name) + 3, GFP_KERNEL); 10097 if (!test) 10098 return false; 10099 10100 sprintf(test, "\t%s\t", name); 10101 ret = strstr(boot_snapshot_info, test) == NULL; 10102 kfree(test); 10103 return ret; 10104 } 10105 10106 __init static void do_allocate_snapshot(const char *name) 10107 { 10108 if (!tr_needs_alloc_snapshot(name)) 10109 return; 10110 10111 /* 10112 * When allocate_snapshot is set, the next call to 10113 * allocate_trace_buffers() (called by trace_array_get_by_name()) 10114 * will allocate the snapshot buffer. That will alse clear 10115 * this flag. 10116 */ 10117 allocate_snapshot = true; 10118 } 10119 #else 10120 static inline void do_allocate_snapshot(const char *name) { } 10121 #endif 10122 10123 __init static void enable_instances(void) 10124 { 10125 struct trace_array *tr; 10126 char *curr_str; 10127 char *str; 10128 char *tok; 10129 10130 /* A tab is always appended */ 10131 boot_instance_info[boot_instance_index - 1] = '\0'; 10132 str = boot_instance_info; 10133 10134 while ((curr_str = strsep(&str, "\t"))) { 10135 10136 tok = strsep(&curr_str, ","); 10137 10138 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) 10139 do_allocate_snapshot(tok); 10140 10141 tr = trace_array_get_by_name(tok, NULL); 10142 if (!tr) { 10143 pr_warn("Failed to create instance buffer %s\n", curr_str); 10144 continue; 10145 } 10146 /* Allow user space to delete it */ 10147 trace_array_put(tr); 10148 10149 while ((tok = strsep(&curr_str, ","))) { 10150 early_enable_events(tr, tok, true); 10151 } 10152 } 10153 } 10154 10155 __init static int tracer_alloc_buffers(void) 10156 { 10157 int ring_buf_size; 10158 int ret = -ENOMEM; 10159 10160 10161 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10162 pr_warn("Tracing disabled due to lockdown\n"); 10163 return -EPERM; 10164 } 10165 10166 /* 10167 * Make sure we don't accidentally add more trace options 10168 * than we have bits for. 10169 */ 10170 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); 10171 10172 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 10173 goto out; 10174 10175 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) 10176 goto out_free_buffer_mask; 10177 10178 /* Only allocate trace_printk buffers if a trace_printk exists */ 10179 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) 10180 /* Must be called before global_trace.buffer is allocated */ 10181 trace_printk_init_buffers(); 10182 10183 /* To save memory, keep the ring buffer size to its minimum */ 10184 if (global_trace.ring_buffer_expanded) 10185 ring_buf_size = trace_buf_size; 10186 else 10187 ring_buf_size = 1; 10188 10189 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 10190 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); 10191 10192 raw_spin_lock_init(&global_trace.start_lock); 10193 10194 /* 10195 * The prepare callbacks allocates some memory for the ring buffer. We 10196 * don't free the buffer if the CPU goes down. If we were to free 10197 * the buffer, then the user would lose any trace that was in the 10198 * buffer. The memory will be removed once the "instance" is removed. 10199 */ 10200 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, 10201 "trace/RB:prepare", trace_rb_cpu_prepare, 10202 NULL); 10203 if (ret < 0) 10204 goto out_free_cpumask; 10205 /* Used for event triggers */ 10206 ret = -ENOMEM; 10207 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); 10208 if (!temp_buffer) 10209 goto out_rm_hp_state; 10210 10211 if (trace_create_savedcmd() < 0) 10212 goto out_free_temp_buffer; 10213 10214 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) 10215 goto out_free_savedcmd; 10216 10217 /* TODO: make the number of buffers hot pluggable with CPUS */ 10218 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { 10219 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); 10220 goto out_free_pipe_cpumask; 10221 } 10222 if (global_trace.buffer_disabled) 10223 tracing_off(); 10224 10225 if (trace_boot_clock) { 10226 ret = tracing_set_clock(&global_trace, trace_boot_clock); 10227 if (ret < 0) 10228 pr_warn("Trace clock %s not defined, going back to default\n", 10229 trace_boot_clock); 10230 } 10231 10232 /* 10233 * register_tracer() might reference current_trace, so it 10234 * needs to be set before we register anything. This is 10235 * just a bootstrap of current_trace anyway. 10236 */ 10237 global_trace.current_trace = &nop_trace; 10238 10239 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 10240 #ifdef CONFIG_TRACER_MAX_TRACE 10241 spin_lock_init(&global_trace.snapshot_trigger_lock); 10242 #endif 10243 ftrace_init_global_array_ops(&global_trace); 10244 10245 init_trace_flags_index(&global_trace); 10246 10247 register_tracer(&nop_trace); 10248 10249 /* Function tracing may start here (via kernel command line) */ 10250 init_function_trace(); 10251 10252 /* All seems OK, enable tracing */ 10253 tracing_disabled = 0; 10254 10255 atomic_notifier_chain_register(&panic_notifier_list, 10256 &trace_panic_notifier); 10257 10258 register_die_notifier(&trace_die_notifier); 10259 10260 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 10261 10262 INIT_LIST_HEAD(&global_trace.systems); 10263 INIT_LIST_HEAD(&global_trace.events); 10264 INIT_LIST_HEAD(&global_trace.hist_vars); 10265 INIT_LIST_HEAD(&global_trace.err_log); 10266 list_add(&global_trace.list, &ftrace_trace_arrays); 10267 10268 apply_trace_boot_options(); 10269 10270 register_snapshot_cmd(); 10271 10272 test_can_verify(); 10273 10274 return 0; 10275 10276 out_free_pipe_cpumask: 10277 free_cpumask_var(global_trace.pipe_cpumask); 10278 out_free_savedcmd: 10279 trace_free_saved_cmdlines_buffer(); 10280 out_free_temp_buffer: 10281 ring_buffer_free(temp_buffer); 10282 out_rm_hp_state: 10283 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); 10284 out_free_cpumask: 10285 free_cpumask_var(global_trace.tracing_cpumask); 10286 out_free_buffer_mask: 10287 free_cpumask_var(tracing_buffer_mask); 10288 out: 10289 return ret; 10290 } 10291 10292 void __init ftrace_boot_snapshot(void) 10293 { 10294 #ifdef CONFIG_TRACER_MAX_TRACE 10295 struct trace_array *tr; 10296 10297 if (!snapshot_at_boot) 10298 return; 10299 10300 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10301 if (!tr->allocated_snapshot) 10302 continue; 10303 10304 tracing_snapshot_instance(tr); 10305 trace_array_puts(tr, "** Boot snapshot taken **\n"); 10306 } 10307 #endif 10308 } 10309 10310 void __init early_trace_init(void) 10311 { 10312 if (tracepoint_printk) { 10313 tracepoint_print_iter = 10314 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); 10315 if (MEM_FAIL(!tracepoint_print_iter, 10316 "Failed to allocate trace iterator\n")) 10317 tracepoint_printk = 0; 10318 else 10319 static_key_enable(&tracepoint_printk_key.key); 10320 } 10321 tracer_alloc_buffers(); 10322 10323 init_events(); 10324 } 10325 10326 void __init trace_init(void) 10327 { 10328 trace_event_init(); 10329 10330 if (boot_instance_index) 10331 enable_instances(); 10332 } 10333 10334 __init static void clear_boot_tracer(void) 10335 { 10336 /* 10337 * The default tracer at boot buffer is an init section. 10338 * This function is called in lateinit. If we did not 10339 * find the boot tracer, then clear it out, to prevent 10340 * later registration from accessing the buffer that is 10341 * about to be freed. 10342 */ 10343 if (!default_bootup_tracer) 10344 return; 10345 10346 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", 10347 default_bootup_tracer); 10348 default_bootup_tracer = NULL; 10349 } 10350 10351 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 10352 __init static void tracing_set_default_clock(void) 10353 { 10354 /* sched_clock_stable() is determined in late_initcall */ 10355 if (!trace_boot_clock && !sched_clock_stable()) { 10356 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10357 pr_warn("Can not set tracing clock due to lockdown\n"); 10358 return; 10359 } 10360 10361 printk(KERN_WARNING 10362 "Unstable clock detected, switching default tracing clock to \"global\"\n" 10363 "If you want to keep using the local clock, then add:\n" 10364 " \"trace_clock=local\"\n" 10365 "on the kernel command line\n"); 10366 tracing_set_clock(&global_trace, "global"); 10367 } 10368 } 10369 #else 10370 static inline void tracing_set_default_clock(void) { } 10371 #endif 10372 10373 __init static int late_trace_init(void) 10374 { 10375 if (tracepoint_printk && tracepoint_printk_stop_on_boot) { 10376 static_key_disable(&tracepoint_printk_key.key); 10377 tracepoint_printk = 0; 10378 } 10379 10380 tracing_set_default_clock(); 10381 clear_boot_tracer(); 10382 return 0; 10383 } 10384 10385 late_initcall_sync(late_trace_init); 10386