1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ring buffer based function tracer 4 * 5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> 6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 7 * 8 * Originally taken from the RT patch by: 9 * Arnaldo Carvalho de Melo <acme@redhat.com> 10 * 11 * Based on code from the latency_tracer, that is: 12 * Copyright (C) 2004-2006 Ingo Molnar 13 * Copyright (C) 2004 Nadia Yvette Chambers 14 */ 15 #include <linux/ring_buffer.h> 16 #include <linux/utsname.h> 17 #include <linux/stacktrace.h> 18 #include <linux/writeback.h> 19 #include <linux/kallsyms.h> 20 #include <linux/security.h> 21 #include <linux/seq_file.h> 22 #include <linux/irqflags.h> 23 #include <linux/debugfs.h> 24 #include <linux/tracefs.h> 25 #include <linux/pagemap.h> 26 #include <linux/hardirq.h> 27 #include <linux/linkage.h> 28 #include <linux/uaccess.h> 29 #include <linux/cleanup.h> 30 #include <linux/vmalloc.h> 31 #include <linux/ftrace.h> 32 #include <linux/module.h> 33 #include <linux/percpu.h> 34 #include <linux/splice.h> 35 #include <linux/kdebug.h> 36 #include <linux/string.h> 37 #include <linux/mount.h> 38 #include <linux/rwsem.h> 39 #include <linux/slab.h> 40 #include <linux/ctype.h> 41 #include <linux/init.h> 42 #include <linux/panic_notifier.h> 43 #include <linux/poll.h> 44 #include <linux/nmi.h> 45 #include <linux/fs.h> 46 #include <linux/trace.h> 47 #include <linux/sched/clock.h> 48 #include <linux/sched/rt.h> 49 #include <linux/fsnotify.h> 50 #include <linux/irq_work.h> 51 #include <linux/workqueue.h> 52 #include <linux/sort.h> 53 #include <linux/io.h> /* vmap_page_range() */ 54 55 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 56 57 #include "trace.h" 58 #include "trace_output.h" 59 60 #ifdef CONFIG_FTRACE_STARTUP_TEST 61 /* 62 * We need to change this state when a selftest is running. 63 * A selftest will lurk into the ring-buffer to count the 64 * entries inserted during the selftest although some concurrent 65 * insertions into the ring-buffer such as trace_printk could occurred 66 * at the same time, giving false positive or negative results. 67 */ 68 static bool __read_mostly tracing_selftest_running; 69 70 /* 71 * If boot-time tracing including tracers/events via kernel cmdline 72 * is running, we do not want to run SELFTEST. 73 */ 74 bool __read_mostly tracing_selftest_disabled; 75 76 void __init disable_tracing_selftest(const char *reason) 77 { 78 if (!tracing_selftest_disabled) { 79 tracing_selftest_disabled = true; 80 pr_info("Ftrace startup test is disabled due to %s\n", reason); 81 } 82 } 83 #else 84 #define tracing_selftest_running 0 85 #define tracing_selftest_disabled 0 86 #endif 87 88 /* Pipe tracepoints to printk */ 89 static struct trace_iterator *tracepoint_print_iter; 90 int tracepoint_printk; 91 static bool tracepoint_printk_stop_on_boot __initdata; 92 static bool traceoff_after_boot __initdata; 93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); 94 95 /* For tracers that don't implement custom flags */ 96 static struct tracer_opt dummy_tracer_opt[] = { 97 { } 98 }; 99 100 static int 101 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) 102 { 103 return 0; 104 } 105 106 /* 107 * To prevent the comm cache from being overwritten when no 108 * tracing is active, only save the comm when a trace event 109 * occurred. 110 */ 111 DEFINE_PER_CPU(bool, trace_taskinfo_save); 112 113 /* 114 * Kill all tracing for good (never come back). 115 * It is initialized to 1 but will turn to zero if the initialization 116 * of the tracer is successful. But that is the only place that sets 117 * this back to zero. 118 */ 119 static int tracing_disabled = 1; 120 121 cpumask_var_t __read_mostly tracing_buffer_mask; 122 123 #define MAX_TRACER_SIZE 100 124 /* 125 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 126 * 127 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops 128 * is set, then ftrace_dump is called. This will output the contents 129 * of the ftrace buffers to the console. This is very useful for 130 * capturing traces that lead to crashes and outputing it to a 131 * serial console. 132 * 133 * It is default off, but you can enable it with either specifying 134 * "ftrace_dump_on_oops" in the kernel command line, or setting 135 * /proc/sys/kernel/ftrace_dump_on_oops 136 * Set 1 if you want to dump buffers of all CPUs 137 * Set 2 if you want to dump the buffer of the CPU that triggered oops 138 * Set instance name if you want to dump the specific trace instance 139 * Multiple instance dump is also supported, and instances are seperated 140 * by commas. 141 */ 142 /* Set to string format zero to disable by default */ 143 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; 144 145 /* When set, tracing will stop when a WARN*() is hit */ 146 static int __disable_trace_on_warning; 147 148 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 149 void *buffer, size_t *lenp, loff_t *ppos); 150 static const struct ctl_table trace_sysctl_table[] = { 151 { 152 .procname = "ftrace_dump_on_oops", 153 .data = &ftrace_dump_on_oops, 154 .maxlen = MAX_TRACER_SIZE, 155 .mode = 0644, 156 .proc_handler = proc_dostring, 157 }, 158 { 159 .procname = "traceoff_on_warning", 160 .data = &__disable_trace_on_warning, 161 .maxlen = sizeof(__disable_trace_on_warning), 162 .mode = 0644, 163 .proc_handler = proc_dointvec, 164 }, 165 { 166 .procname = "tracepoint_printk", 167 .data = &tracepoint_printk, 168 .maxlen = sizeof(tracepoint_printk), 169 .mode = 0644, 170 .proc_handler = tracepoint_printk_sysctl, 171 }, 172 }; 173 174 static int __init init_trace_sysctls(void) 175 { 176 register_sysctl_init("kernel", trace_sysctl_table); 177 return 0; 178 } 179 subsys_initcall(init_trace_sysctls); 180 181 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 182 /* Map of enums to their values, for "eval_map" file */ 183 struct trace_eval_map_head { 184 struct module *mod; 185 unsigned long length; 186 }; 187 188 union trace_eval_map_item; 189 190 struct trace_eval_map_tail { 191 /* 192 * "end" is first and points to NULL as it must be different 193 * than "mod" or "eval_string" 194 */ 195 union trace_eval_map_item *next; 196 const char *end; /* points to NULL */ 197 }; 198 199 static DEFINE_MUTEX(trace_eval_mutex); 200 201 /* 202 * The trace_eval_maps are saved in an array with two extra elements, 203 * one at the beginning, and one at the end. The beginning item contains 204 * the count of the saved maps (head.length), and the module they 205 * belong to if not built in (head.mod). The ending item contains a 206 * pointer to the next array of saved eval_map items. 207 */ 208 union trace_eval_map_item { 209 struct trace_eval_map map; 210 struct trace_eval_map_head head; 211 struct trace_eval_map_tail tail; 212 }; 213 214 static union trace_eval_map_item *trace_eval_maps; 215 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 216 217 int tracing_set_tracer(struct trace_array *tr, const char *buf); 218 static void ftrace_trace_userstack(struct trace_array *tr, 219 struct trace_buffer *buffer, 220 unsigned int trace_ctx); 221 222 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 223 static char *default_bootup_tracer; 224 225 static bool allocate_snapshot; 226 static bool snapshot_at_boot; 227 228 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; 229 static int boot_instance_index; 230 231 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata; 232 static int boot_snapshot_index; 233 234 static int __init set_cmdline_ftrace(char *str) 235 { 236 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 237 default_bootup_tracer = bootup_tracer_buf; 238 /* We are using ftrace early, expand it */ 239 trace_set_ring_buffer_expanded(NULL); 240 return 1; 241 } 242 __setup("ftrace=", set_cmdline_ftrace); 243 244 int ftrace_dump_on_oops_enabled(void) 245 { 246 if (!strcmp("0", ftrace_dump_on_oops)) 247 return 0; 248 else 249 return 1; 250 } 251 252 static int __init set_ftrace_dump_on_oops(char *str) 253 { 254 if (!*str) { 255 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 256 return 1; 257 } 258 259 if (*str == ',') { 260 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 261 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); 262 return 1; 263 } 264 265 if (*str++ == '=') { 266 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); 267 return 1; 268 } 269 270 return 0; 271 } 272 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 273 274 static int __init stop_trace_on_warning(char *str) 275 { 276 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 277 __disable_trace_on_warning = 1; 278 return 1; 279 } 280 __setup("traceoff_on_warning", stop_trace_on_warning); 281 282 static int __init boot_alloc_snapshot(char *str) 283 { 284 char *slot = boot_snapshot_info + boot_snapshot_index; 285 int left = sizeof(boot_snapshot_info) - boot_snapshot_index; 286 int ret; 287 288 if (str[0] == '=') { 289 str++; 290 if (strlen(str) >= left) 291 return -1; 292 293 ret = snprintf(slot, left, "%s\t", str); 294 boot_snapshot_index += ret; 295 } else { 296 allocate_snapshot = true; 297 /* We also need the main ring buffer expanded */ 298 trace_set_ring_buffer_expanded(NULL); 299 } 300 return 1; 301 } 302 __setup("alloc_snapshot", boot_alloc_snapshot); 303 304 305 static int __init boot_snapshot(char *str) 306 { 307 snapshot_at_boot = true; 308 boot_alloc_snapshot(str); 309 return 1; 310 } 311 __setup("ftrace_boot_snapshot", boot_snapshot); 312 313 314 static int __init boot_instance(char *str) 315 { 316 char *slot = boot_instance_info + boot_instance_index; 317 int left = sizeof(boot_instance_info) - boot_instance_index; 318 int ret; 319 320 if (strlen(str) >= left) 321 return -1; 322 323 ret = snprintf(slot, left, "%s\t", str); 324 boot_instance_index += ret; 325 326 return 1; 327 } 328 __setup("trace_instance=", boot_instance); 329 330 331 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; 332 333 static int __init set_trace_boot_options(char *str) 334 { 335 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); 336 return 1; 337 } 338 __setup("trace_options=", set_trace_boot_options); 339 340 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; 341 static char *trace_boot_clock __initdata; 342 343 static int __init set_trace_boot_clock(char *str) 344 { 345 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); 346 trace_boot_clock = trace_boot_clock_buf; 347 return 1; 348 } 349 __setup("trace_clock=", set_trace_boot_clock); 350 351 static int __init set_tracepoint_printk(char *str) 352 { 353 /* Ignore the "tp_printk_stop_on_boot" param */ 354 if (*str == '_') 355 return 0; 356 357 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 358 tracepoint_printk = 1; 359 return 1; 360 } 361 __setup("tp_printk", set_tracepoint_printk); 362 363 static int __init set_tracepoint_printk_stop(char *str) 364 { 365 tracepoint_printk_stop_on_boot = true; 366 return 1; 367 } 368 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); 369 370 static int __init set_traceoff_after_boot(char *str) 371 { 372 traceoff_after_boot = true; 373 return 1; 374 } 375 __setup("traceoff_after_boot", set_traceoff_after_boot); 376 377 unsigned long long ns2usecs(u64 nsec) 378 { 379 nsec += 500; 380 do_div(nsec, 1000); 381 return nsec; 382 } 383 384 static void 385 trace_process_export(struct trace_export *export, 386 struct ring_buffer_event *event, int flag) 387 { 388 struct trace_entry *entry; 389 unsigned int size = 0; 390 391 if (export->flags & flag) { 392 entry = ring_buffer_event_data(event); 393 size = ring_buffer_event_length(event); 394 export->write(export, entry, size); 395 } 396 } 397 398 static DEFINE_MUTEX(ftrace_export_lock); 399 400 static struct trace_export __rcu *ftrace_exports_list __read_mostly; 401 402 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); 403 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); 404 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); 405 406 static inline void ftrace_exports_enable(struct trace_export *export) 407 { 408 if (export->flags & TRACE_EXPORT_FUNCTION) 409 static_branch_inc(&trace_function_exports_enabled); 410 411 if (export->flags & TRACE_EXPORT_EVENT) 412 static_branch_inc(&trace_event_exports_enabled); 413 414 if (export->flags & TRACE_EXPORT_MARKER) 415 static_branch_inc(&trace_marker_exports_enabled); 416 } 417 418 static inline void ftrace_exports_disable(struct trace_export *export) 419 { 420 if (export->flags & TRACE_EXPORT_FUNCTION) 421 static_branch_dec(&trace_function_exports_enabled); 422 423 if (export->flags & TRACE_EXPORT_EVENT) 424 static_branch_dec(&trace_event_exports_enabled); 425 426 if (export->flags & TRACE_EXPORT_MARKER) 427 static_branch_dec(&trace_marker_exports_enabled); 428 } 429 430 static void ftrace_exports(struct ring_buffer_event *event, int flag) 431 { 432 struct trace_export *export; 433 434 preempt_disable_notrace(); 435 436 export = rcu_dereference_raw_check(ftrace_exports_list); 437 while (export) { 438 trace_process_export(export, event, flag); 439 export = rcu_dereference_raw_check(export->next); 440 } 441 442 preempt_enable_notrace(); 443 } 444 445 static inline void 446 add_trace_export(struct trace_export **list, struct trace_export *export) 447 { 448 rcu_assign_pointer(export->next, *list); 449 /* 450 * We are entering export into the list but another 451 * CPU might be walking that list. We need to make sure 452 * the export->next pointer is valid before another CPU sees 453 * the export pointer included into the list. 454 */ 455 rcu_assign_pointer(*list, export); 456 } 457 458 static inline int 459 rm_trace_export(struct trace_export **list, struct trace_export *export) 460 { 461 struct trace_export **p; 462 463 for (p = list; *p != NULL; p = &(*p)->next) 464 if (*p == export) 465 break; 466 467 if (*p != export) 468 return -1; 469 470 rcu_assign_pointer(*p, (*p)->next); 471 472 return 0; 473 } 474 475 static inline void 476 add_ftrace_export(struct trace_export **list, struct trace_export *export) 477 { 478 ftrace_exports_enable(export); 479 480 add_trace_export(list, export); 481 } 482 483 static inline int 484 rm_ftrace_export(struct trace_export **list, struct trace_export *export) 485 { 486 int ret; 487 488 ret = rm_trace_export(list, export); 489 ftrace_exports_disable(export); 490 491 return ret; 492 } 493 494 int register_ftrace_export(struct trace_export *export) 495 { 496 if (WARN_ON_ONCE(!export->write)) 497 return -1; 498 499 mutex_lock(&ftrace_export_lock); 500 501 add_ftrace_export(&ftrace_exports_list, export); 502 503 mutex_unlock(&ftrace_export_lock); 504 505 return 0; 506 } 507 EXPORT_SYMBOL_GPL(register_ftrace_export); 508 509 int unregister_ftrace_export(struct trace_export *export) 510 { 511 int ret; 512 513 mutex_lock(&ftrace_export_lock); 514 515 ret = rm_ftrace_export(&ftrace_exports_list, export); 516 517 mutex_unlock(&ftrace_export_lock); 518 519 return ret; 520 } 521 EXPORT_SYMBOL_GPL(unregister_ftrace_export); 522 523 /* trace_flags holds trace_options default values */ 524 #define TRACE_DEFAULT_FLAGS \ 525 (FUNCTION_DEFAULT_FLAGS | \ 526 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ 527 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ 528 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ 529 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \ 530 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK) 531 532 /* trace_options that are only supported by global_trace */ 533 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ 534 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD) 535 536 /* trace_flags that are default zero for instances */ 537 #define ZEROED_TRACE_FLAGS \ 538 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK) 539 540 /* 541 * The global_trace is the descriptor that holds the top-level tracing 542 * buffers for the live tracing. 543 */ 544 static struct trace_array global_trace = { 545 .trace_flags = TRACE_DEFAULT_FLAGS, 546 }; 547 548 static struct trace_array *printk_trace = &global_trace; 549 550 static __always_inline bool printk_binsafe(struct trace_array *tr) 551 { 552 /* 553 * The binary format of traceprintk can cause a crash if used 554 * by a buffer from another boot. Force the use of the 555 * non binary version of trace_printk if the trace_printk 556 * buffer is a boot mapped ring buffer. 557 */ 558 return !(tr->flags & TRACE_ARRAY_FL_BOOT); 559 } 560 561 static void update_printk_trace(struct trace_array *tr) 562 { 563 if (printk_trace == tr) 564 return; 565 566 printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK; 567 printk_trace = tr; 568 tr->trace_flags |= TRACE_ITER_TRACE_PRINTK; 569 } 570 571 void trace_set_ring_buffer_expanded(struct trace_array *tr) 572 { 573 if (!tr) 574 tr = &global_trace; 575 tr->ring_buffer_expanded = true; 576 } 577 578 LIST_HEAD(ftrace_trace_arrays); 579 580 int trace_array_get(struct trace_array *this_tr) 581 { 582 struct trace_array *tr; 583 584 guard(mutex)(&trace_types_lock); 585 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 586 if (tr == this_tr) { 587 tr->ref++; 588 return 0; 589 } 590 } 591 592 return -ENODEV; 593 } 594 595 static void __trace_array_put(struct trace_array *this_tr) 596 { 597 WARN_ON(!this_tr->ref); 598 this_tr->ref--; 599 } 600 601 /** 602 * trace_array_put - Decrement the reference counter for this trace array. 603 * @this_tr : pointer to the trace array 604 * 605 * NOTE: Use this when we no longer need the trace array returned by 606 * trace_array_get_by_name(). This ensures the trace array can be later 607 * destroyed. 608 * 609 */ 610 void trace_array_put(struct trace_array *this_tr) 611 { 612 if (!this_tr) 613 return; 614 615 mutex_lock(&trace_types_lock); 616 __trace_array_put(this_tr); 617 mutex_unlock(&trace_types_lock); 618 } 619 EXPORT_SYMBOL_GPL(trace_array_put); 620 621 int tracing_check_open_get_tr(struct trace_array *tr) 622 { 623 int ret; 624 625 ret = security_locked_down(LOCKDOWN_TRACEFS); 626 if (ret) 627 return ret; 628 629 if (tracing_disabled) 630 return -ENODEV; 631 632 if (tr && trace_array_get(tr) < 0) 633 return -ENODEV; 634 635 return 0; 636 } 637 638 /** 639 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list 640 * @filtered_pids: The list of pids to check 641 * @search_pid: The PID to find in @filtered_pids 642 * 643 * Returns true if @search_pid is found in @filtered_pids, and false otherwise. 644 */ 645 bool 646 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) 647 { 648 return trace_pid_list_is_set(filtered_pids, search_pid); 649 } 650 651 /** 652 * trace_ignore_this_task - should a task be ignored for tracing 653 * @filtered_pids: The list of pids to check 654 * @filtered_no_pids: The list of pids not to be traced 655 * @task: The task that should be ignored if not filtered 656 * 657 * Checks if @task should be traced or not from @filtered_pids. 658 * Returns true if @task should *NOT* be traced. 659 * Returns false if @task should be traced. 660 */ 661 bool 662 trace_ignore_this_task(struct trace_pid_list *filtered_pids, 663 struct trace_pid_list *filtered_no_pids, 664 struct task_struct *task) 665 { 666 /* 667 * If filtered_no_pids is not empty, and the task's pid is listed 668 * in filtered_no_pids, then return true. 669 * Otherwise, if filtered_pids is empty, that means we can 670 * trace all tasks. If it has content, then only trace pids 671 * within filtered_pids. 672 */ 673 674 return (filtered_pids && 675 !trace_find_filtered_pid(filtered_pids, task->pid)) || 676 (filtered_no_pids && 677 trace_find_filtered_pid(filtered_no_pids, task->pid)); 678 } 679 680 /** 681 * trace_filter_add_remove_task - Add or remove a task from a pid_list 682 * @pid_list: The list to modify 683 * @self: The current task for fork or NULL for exit 684 * @task: The task to add or remove 685 * 686 * If adding a task, if @self is defined, the task is only added if @self 687 * is also included in @pid_list. This happens on fork and tasks should 688 * only be added when the parent is listed. If @self is NULL, then the 689 * @task pid will be removed from the list, which would happen on exit 690 * of a task. 691 */ 692 void trace_filter_add_remove_task(struct trace_pid_list *pid_list, 693 struct task_struct *self, 694 struct task_struct *task) 695 { 696 if (!pid_list) 697 return; 698 699 /* For forks, we only add if the forking task is listed */ 700 if (self) { 701 if (!trace_find_filtered_pid(pid_list, self->pid)) 702 return; 703 } 704 705 /* "self" is set for forks, and NULL for exits */ 706 if (self) 707 trace_pid_list_set(pid_list, task->pid); 708 else 709 trace_pid_list_clear(pid_list, task->pid); 710 } 711 712 /** 713 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list 714 * @pid_list: The pid list to show 715 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) 716 * @pos: The position of the file 717 * 718 * This is used by the seq_file "next" operation to iterate the pids 719 * listed in a trace_pid_list structure. 720 * 721 * Returns the pid+1 as we want to display pid of zero, but NULL would 722 * stop the iteration. 723 */ 724 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) 725 { 726 long pid = (unsigned long)v; 727 unsigned int next; 728 729 (*pos)++; 730 731 /* pid already is +1 of the actual previous bit */ 732 if (trace_pid_list_next(pid_list, pid, &next) < 0) 733 return NULL; 734 735 pid = next; 736 737 /* Return pid + 1 to allow zero to be represented */ 738 return (void *)(pid + 1); 739 } 740 741 /** 742 * trace_pid_start - Used for seq_file to start reading pid lists 743 * @pid_list: The pid list to show 744 * @pos: The position of the file 745 * 746 * This is used by seq_file "start" operation to start the iteration 747 * of listing pids. 748 * 749 * Returns the pid+1 as we want to display pid of zero, but NULL would 750 * stop the iteration. 751 */ 752 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) 753 { 754 unsigned long pid; 755 unsigned int first; 756 loff_t l = 0; 757 758 if (trace_pid_list_first(pid_list, &first) < 0) 759 return NULL; 760 761 pid = first; 762 763 /* Return pid + 1 so that zero can be the exit value */ 764 for (pid++; pid && l < *pos; 765 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) 766 ; 767 return (void *)pid; 768 } 769 770 /** 771 * trace_pid_show - show the current pid in seq_file processing 772 * @m: The seq_file structure to write into 773 * @v: A void pointer of the pid (+1) value to display 774 * 775 * Can be directly used by seq_file operations to display the current 776 * pid value. 777 */ 778 int trace_pid_show(struct seq_file *m, void *v) 779 { 780 unsigned long pid = (unsigned long)v - 1; 781 782 seq_printf(m, "%lu\n", pid); 783 return 0; 784 } 785 786 /* 128 should be much more than enough */ 787 #define PID_BUF_SIZE 127 788 789 int trace_pid_write(struct trace_pid_list *filtered_pids, 790 struct trace_pid_list **new_pid_list, 791 const char __user *ubuf, size_t cnt) 792 { 793 struct trace_pid_list *pid_list; 794 struct trace_parser parser; 795 unsigned long val; 796 int nr_pids = 0; 797 ssize_t read = 0; 798 ssize_t ret; 799 loff_t pos; 800 pid_t pid; 801 802 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) 803 return -ENOMEM; 804 805 /* 806 * Always recreate a new array. The write is an all or nothing 807 * operation. Always create a new array when adding new pids by 808 * the user. If the operation fails, then the current list is 809 * not modified. 810 */ 811 pid_list = trace_pid_list_alloc(); 812 if (!pid_list) { 813 trace_parser_put(&parser); 814 return -ENOMEM; 815 } 816 817 if (filtered_pids) { 818 /* copy the current bits to the new max */ 819 ret = trace_pid_list_first(filtered_pids, &pid); 820 while (!ret) { 821 trace_pid_list_set(pid_list, pid); 822 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); 823 nr_pids++; 824 } 825 } 826 827 ret = 0; 828 while (cnt > 0) { 829 830 pos = 0; 831 832 ret = trace_get_user(&parser, ubuf, cnt, &pos); 833 if (ret < 0) 834 break; 835 836 read += ret; 837 ubuf += ret; 838 cnt -= ret; 839 840 if (!trace_parser_loaded(&parser)) 841 break; 842 843 ret = -EINVAL; 844 if (kstrtoul(parser.buffer, 0, &val)) 845 break; 846 847 pid = (pid_t)val; 848 849 if (trace_pid_list_set(pid_list, pid) < 0) { 850 ret = -1; 851 break; 852 } 853 nr_pids++; 854 855 trace_parser_clear(&parser); 856 ret = 0; 857 } 858 trace_parser_put(&parser); 859 860 if (ret < 0) { 861 trace_pid_list_free(pid_list); 862 return ret; 863 } 864 865 if (!nr_pids) { 866 /* Cleared the list of pids */ 867 trace_pid_list_free(pid_list); 868 pid_list = NULL; 869 } 870 871 *new_pid_list = pid_list; 872 873 return read; 874 } 875 876 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) 877 { 878 u64 ts; 879 880 /* Early boot up does not have a buffer yet */ 881 if (!buf->buffer) 882 return trace_clock_local(); 883 884 ts = ring_buffer_time_stamp(buf->buffer); 885 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); 886 887 return ts; 888 } 889 890 u64 ftrace_now(int cpu) 891 { 892 return buffer_ftrace_now(&global_trace.array_buffer, cpu); 893 } 894 895 /** 896 * tracing_is_enabled - Show if global_trace has been enabled 897 * 898 * Shows if the global trace has been enabled or not. It uses the 899 * mirror flag "buffer_disabled" to be used in fast paths such as for 900 * the irqsoff tracer. But it may be inaccurate due to races. If you 901 * need to know the accurate state, use tracing_is_on() which is a little 902 * slower, but accurate. 903 */ 904 int tracing_is_enabled(void) 905 { 906 /* 907 * For quick access (irqsoff uses this in fast path), just 908 * return the mirror variable of the state of the ring buffer. 909 * It's a little racy, but we don't really care. 910 */ 911 smp_rmb(); 912 return !global_trace.buffer_disabled; 913 } 914 915 /* 916 * trace_buf_size is the size in bytes that is allocated 917 * for a buffer. Note, the number of bytes is always rounded 918 * to page size. 919 * 920 * This number is purposely set to a low number of 16384. 921 * If the dump on oops happens, it will be much appreciated 922 * to not have to wait for all that output. Anyway this can be 923 * boot time and run time configurable. 924 */ 925 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ 926 927 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; 928 929 /* trace_types holds a link list of available tracers. */ 930 static struct tracer *trace_types __read_mostly; 931 932 /* 933 * trace_types_lock is used to protect the trace_types list. 934 */ 935 DEFINE_MUTEX(trace_types_lock); 936 937 /* 938 * serialize the access of the ring buffer 939 * 940 * ring buffer serializes readers, but it is low level protection. 941 * The validity of the events (which returns by ring_buffer_peek() ..etc) 942 * are not protected by ring buffer. 943 * 944 * The content of events may become garbage if we allow other process consumes 945 * these events concurrently: 946 * A) the page of the consumed events may become a normal page 947 * (not reader page) in ring buffer, and this page will be rewritten 948 * by events producer. 949 * B) The page of the consumed events may become a page for splice_read, 950 * and this page will be returned to system. 951 * 952 * These primitives allow multi process access to different cpu ring buffer 953 * concurrently. 954 * 955 * These primitives don't distinguish read-only and read-consume access. 956 * Multi read-only access are also serialized. 957 */ 958 959 #ifdef CONFIG_SMP 960 static DECLARE_RWSEM(all_cpu_access_lock); 961 static DEFINE_PER_CPU(struct mutex, cpu_access_lock); 962 963 static inline void trace_access_lock(int cpu) 964 { 965 if (cpu == RING_BUFFER_ALL_CPUS) { 966 /* gain it for accessing the whole ring buffer. */ 967 down_write(&all_cpu_access_lock); 968 } else { 969 /* gain it for accessing a cpu ring buffer. */ 970 971 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ 972 down_read(&all_cpu_access_lock); 973 974 /* Secondly block other access to this @cpu ring buffer. */ 975 mutex_lock(&per_cpu(cpu_access_lock, cpu)); 976 } 977 } 978 979 static inline void trace_access_unlock(int cpu) 980 { 981 if (cpu == RING_BUFFER_ALL_CPUS) { 982 up_write(&all_cpu_access_lock); 983 } else { 984 mutex_unlock(&per_cpu(cpu_access_lock, cpu)); 985 up_read(&all_cpu_access_lock); 986 } 987 } 988 989 static inline void trace_access_lock_init(void) 990 { 991 int cpu; 992 993 for_each_possible_cpu(cpu) 994 mutex_init(&per_cpu(cpu_access_lock, cpu)); 995 } 996 997 #else 998 999 static DEFINE_MUTEX(access_lock); 1000 1001 static inline void trace_access_lock(int cpu) 1002 { 1003 (void)cpu; 1004 mutex_lock(&access_lock); 1005 } 1006 1007 static inline void trace_access_unlock(int cpu) 1008 { 1009 (void)cpu; 1010 mutex_unlock(&access_lock); 1011 } 1012 1013 static inline void trace_access_lock_init(void) 1014 { 1015 } 1016 1017 #endif 1018 1019 #ifdef CONFIG_STACKTRACE 1020 static void __ftrace_trace_stack(struct trace_array *tr, 1021 struct trace_buffer *buffer, 1022 unsigned int trace_ctx, 1023 int skip, struct pt_regs *regs); 1024 static inline void ftrace_trace_stack(struct trace_array *tr, 1025 struct trace_buffer *buffer, 1026 unsigned int trace_ctx, 1027 int skip, struct pt_regs *regs); 1028 1029 #else 1030 static inline void __ftrace_trace_stack(struct trace_array *tr, 1031 struct trace_buffer *buffer, 1032 unsigned int trace_ctx, 1033 int skip, struct pt_regs *regs) 1034 { 1035 } 1036 static inline void ftrace_trace_stack(struct trace_array *tr, 1037 struct trace_buffer *buffer, 1038 unsigned long trace_ctx, 1039 int skip, struct pt_regs *regs) 1040 { 1041 } 1042 1043 #endif 1044 1045 static __always_inline void 1046 trace_event_setup(struct ring_buffer_event *event, 1047 int type, unsigned int trace_ctx) 1048 { 1049 struct trace_entry *ent = ring_buffer_event_data(event); 1050 1051 tracing_generic_entry_update(ent, type, trace_ctx); 1052 } 1053 1054 static __always_inline struct ring_buffer_event * 1055 __trace_buffer_lock_reserve(struct trace_buffer *buffer, 1056 int type, 1057 unsigned long len, 1058 unsigned int trace_ctx) 1059 { 1060 struct ring_buffer_event *event; 1061 1062 event = ring_buffer_lock_reserve(buffer, len); 1063 if (event != NULL) 1064 trace_event_setup(event, type, trace_ctx); 1065 1066 return event; 1067 } 1068 1069 void tracer_tracing_on(struct trace_array *tr) 1070 { 1071 if (tr->array_buffer.buffer) 1072 ring_buffer_record_on(tr->array_buffer.buffer); 1073 /* 1074 * This flag is looked at when buffers haven't been allocated 1075 * yet, or by some tracers (like irqsoff), that just want to 1076 * know if the ring buffer has been disabled, but it can handle 1077 * races of where it gets disabled but we still do a record. 1078 * As the check is in the fast path of the tracers, it is more 1079 * important to be fast than accurate. 1080 */ 1081 tr->buffer_disabled = 0; 1082 /* Make the flag seen by readers */ 1083 smp_wmb(); 1084 } 1085 1086 /** 1087 * tracing_on - enable tracing buffers 1088 * 1089 * This function enables tracing buffers that may have been 1090 * disabled with tracing_off. 1091 */ 1092 void tracing_on(void) 1093 { 1094 tracer_tracing_on(&global_trace); 1095 } 1096 EXPORT_SYMBOL_GPL(tracing_on); 1097 1098 1099 static __always_inline void 1100 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) 1101 { 1102 __this_cpu_write(trace_taskinfo_save, true); 1103 1104 /* If this is the temp buffer, we need to commit fully */ 1105 if (this_cpu_read(trace_buffered_event) == event) { 1106 /* Length is in event->array[0] */ 1107 ring_buffer_write(buffer, event->array[0], &event->array[1]); 1108 /* Release the temp buffer */ 1109 this_cpu_dec(trace_buffered_event_cnt); 1110 /* ring_buffer_unlock_commit() enables preemption */ 1111 preempt_enable_notrace(); 1112 } else 1113 ring_buffer_unlock_commit(buffer); 1114 } 1115 1116 int __trace_array_puts(struct trace_array *tr, unsigned long ip, 1117 const char *str, int size) 1118 { 1119 struct ring_buffer_event *event; 1120 struct trace_buffer *buffer; 1121 struct print_entry *entry; 1122 unsigned int trace_ctx; 1123 int alloc; 1124 1125 if (!(tr->trace_flags & TRACE_ITER_PRINTK)) 1126 return 0; 1127 1128 if (unlikely(tracing_selftest_running && tr == &global_trace)) 1129 return 0; 1130 1131 if (unlikely(tracing_disabled)) 1132 return 0; 1133 1134 alloc = sizeof(*entry) + size + 2; /* possible \n added */ 1135 1136 trace_ctx = tracing_gen_ctx(); 1137 buffer = tr->array_buffer.buffer; 1138 ring_buffer_nest_start(buffer); 1139 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 1140 trace_ctx); 1141 if (!event) { 1142 size = 0; 1143 goto out; 1144 } 1145 1146 entry = ring_buffer_event_data(event); 1147 entry->ip = ip; 1148 1149 memcpy(&entry->buf, str, size); 1150 1151 /* Add a newline if necessary */ 1152 if (entry->buf[size - 1] != '\n') { 1153 entry->buf[size] = '\n'; 1154 entry->buf[size + 1] = '\0'; 1155 } else 1156 entry->buf[size] = '\0'; 1157 1158 __buffer_unlock_commit(buffer, event); 1159 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); 1160 out: 1161 ring_buffer_nest_end(buffer); 1162 return size; 1163 } 1164 EXPORT_SYMBOL_GPL(__trace_array_puts); 1165 1166 /** 1167 * __trace_puts - write a constant string into the trace buffer. 1168 * @ip: The address of the caller 1169 * @str: The constant string to write 1170 * @size: The size of the string. 1171 */ 1172 int __trace_puts(unsigned long ip, const char *str, int size) 1173 { 1174 return __trace_array_puts(printk_trace, ip, str, size); 1175 } 1176 EXPORT_SYMBOL_GPL(__trace_puts); 1177 1178 /** 1179 * __trace_bputs - write the pointer to a constant string into trace buffer 1180 * @ip: The address of the caller 1181 * @str: The constant string to write to the buffer to 1182 */ 1183 int __trace_bputs(unsigned long ip, const char *str) 1184 { 1185 struct trace_array *tr = READ_ONCE(printk_trace); 1186 struct ring_buffer_event *event; 1187 struct trace_buffer *buffer; 1188 struct bputs_entry *entry; 1189 unsigned int trace_ctx; 1190 int size = sizeof(struct bputs_entry); 1191 int ret = 0; 1192 1193 if (!printk_binsafe(tr)) 1194 return __trace_puts(ip, str, strlen(str)); 1195 1196 if (!(tr->trace_flags & TRACE_ITER_PRINTK)) 1197 return 0; 1198 1199 if (unlikely(tracing_selftest_running || tracing_disabled)) 1200 return 0; 1201 1202 trace_ctx = tracing_gen_ctx(); 1203 buffer = tr->array_buffer.buffer; 1204 1205 ring_buffer_nest_start(buffer); 1206 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, 1207 trace_ctx); 1208 if (!event) 1209 goto out; 1210 1211 entry = ring_buffer_event_data(event); 1212 entry->ip = ip; 1213 entry->str = str; 1214 1215 __buffer_unlock_commit(buffer, event); 1216 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); 1217 1218 ret = 1; 1219 out: 1220 ring_buffer_nest_end(buffer); 1221 return ret; 1222 } 1223 EXPORT_SYMBOL_GPL(__trace_bputs); 1224 1225 #ifdef CONFIG_TRACER_SNAPSHOT 1226 static void tracing_snapshot_instance_cond(struct trace_array *tr, 1227 void *cond_data) 1228 { 1229 struct tracer *tracer = tr->current_trace; 1230 unsigned long flags; 1231 1232 if (in_nmi()) { 1233 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); 1234 trace_array_puts(tr, "*** snapshot is being ignored ***\n"); 1235 return; 1236 } 1237 1238 if (!tr->allocated_snapshot) { 1239 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); 1240 trace_array_puts(tr, "*** stopping trace here! ***\n"); 1241 tracer_tracing_off(tr); 1242 return; 1243 } 1244 1245 /* Note, snapshot can not be used when the tracer uses it */ 1246 if (tracer->use_max_tr) { 1247 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); 1248 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 1249 return; 1250 } 1251 1252 if (tr->mapped) { 1253 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); 1254 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 1255 return; 1256 } 1257 1258 local_irq_save(flags); 1259 update_max_tr(tr, current, smp_processor_id(), cond_data); 1260 local_irq_restore(flags); 1261 } 1262 1263 void tracing_snapshot_instance(struct trace_array *tr) 1264 { 1265 tracing_snapshot_instance_cond(tr, NULL); 1266 } 1267 1268 /** 1269 * tracing_snapshot - take a snapshot of the current buffer. 1270 * 1271 * This causes a swap between the snapshot buffer and the current live 1272 * tracing buffer. You can use this to take snapshots of the live 1273 * trace when some condition is triggered, but continue to trace. 1274 * 1275 * Note, make sure to allocate the snapshot with either 1276 * a tracing_snapshot_alloc(), or by doing it manually 1277 * with: echo 1 > /sys/kernel/tracing/snapshot 1278 * 1279 * If the snapshot buffer is not allocated, it will stop tracing. 1280 * Basically making a permanent snapshot. 1281 */ 1282 void tracing_snapshot(void) 1283 { 1284 struct trace_array *tr = &global_trace; 1285 1286 tracing_snapshot_instance(tr); 1287 } 1288 EXPORT_SYMBOL_GPL(tracing_snapshot); 1289 1290 /** 1291 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. 1292 * @tr: The tracing instance to snapshot 1293 * @cond_data: The data to be tested conditionally, and possibly saved 1294 * 1295 * This is the same as tracing_snapshot() except that the snapshot is 1296 * conditional - the snapshot will only happen if the 1297 * cond_snapshot.update() implementation receiving the cond_data 1298 * returns true, which means that the trace array's cond_snapshot 1299 * update() operation used the cond_data to determine whether the 1300 * snapshot should be taken, and if it was, presumably saved it along 1301 * with the snapshot. 1302 */ 1303 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1304 { 1305 tracing_snapshot_instance_cond(tr, cond_data); 1306 } 1307 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1308 1309 /** 1310 * tracing_cond_snapshot_data - get the user data associated with a snapshot 1311 * @tr: The tracing instance 1312 * 1313 * When the user enables a conditional snapshot using 1314 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved 1315 * with the snapshot. This accessor is used to retrieve it. 1316 * 1317 * Should not be called from cond_snapshot.update(), since it takes 1318 * the tr->max_lock lock, which the code calling 1319 * cond_snapshot.update() has already done. 1320 * 1321 * Returns the cond_data associated with the trace array's snapshot. 1322 */ 1323 void *tracing_cond_snapshot_data(struct trace_array *tr) 1324 { 1325 void *cond_data = NULL; 1326 1327 local_irq_disable(); 1328 arch_spin_lock(&tr->max_lock); 1329 1330 if (tr->cond_snapshot) 1331 cond_data = tr->cond_snapshot->cond_data; 1332 1333 arch_spin_unlock(&tr->max_lock); 1334 local_irq_enable(); 1335 1336 return cond_data; 1337 } 1338 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1339 1340 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 1341 struct array_buffer *size_buf, int cpu_id); 1342 static void set_buffer_entries(struct array_buffer *buf, unsigned long val); 1343 1344 int tracing_alloc_snapshot_instance(struct trace_array *tr) 1345 { 1346 int order; 1347 int ret; 1348 1349 if (!tr->allocated_snapshot) { 1350 1351 /* Make the snapshot buffer have the same order as main buffer */ 1352 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 1353 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 1354 if (ret < 0) 1355 return ret; 1356 1357 /* allocate spare buffer */ 1358 ret = resize_buffer_duplicate_size(&tr->max_buffer, 1359 &tr->array_buffer, RING_BUFFER_ALL_CPUS); 1360 if (ret < 0) 1361 return ret; 1362 1363 tr->allocated_snapshot = true; 1364 } 1365 1366 return 0; 1367 } 1368 1369 static void free_snapshot(struct trace_array *tr) 1370 { 1371 /* 1372 * We don't free the ring buffer. instead, resize it because 1373 * The max_tr ring buffer has some state (e.g. ring->clock) and 1374 * we want preserve it. 1375 */ 1376 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); 1377 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); 1378 set_buffer_entries(&tr->max_buffer, 1); 1379 tracing_reset_online_cpus(&tr->max_buffer); 1380 tr->allocated_snapshot = false; 1381 } 1382 1383 static int tracing_arm_snapshot_locked(struct trace_array *tr) 1384 { 1385 int ret; 1386 1387 lockdep_assert_held(&trace_types_lock); 1388 1389 spin_lock(&tr->snapshot_trigger_lock); 1390 if (tr->snapshot == UINT_MAX || tr->mapped) { 1391 spin_unlock(&tr->snapshot_trigger_lock); 1392 return -EBUSY; 1393 } 1394 1395 tr->snapshot++; 1396 spin_unlock(&tr->snapshot_trigger_lock); 1397 1398 ret = tracing_alloc_snapshot_instance(tr); 1399 if (ret) { 1400 spin_lock(&tr->snapshot_trigger_lock); 1401 tr->snapshot--; 1402 spin_unlock(&tr->snapshot_trigger_lock); 1403 } 1404 1405 return ret; 1406 } 1407 1408 int tracing_arm_snapshot(struct trace_array *tr) 1409 { 1410 int ret; 1411 1412 mutex_lock(&trace_types_lock); 1413 ret = tracing_arm_snapshot_locked(tr); 1414 mutex_unlock(&trace_types_lock); 1415 1416 return ret; 1417 } 1418 1419 void tracing_disarm_snapshot(struct trace_array *tr) 1420 { 1421 spin_lock(&tr->snapshot_trigger_lock); 1422 if (!WARN_ON(!tr->snapshot)) 1423 tr->snapshot--; 1424 spin_unlock(&tr->snapshot_trigger_lock); 1425 } 1426 1427 /** 1428 * tracing_alloc_snapshot - allocate snapshot buffer. 1429 * 1430 * This only allocates the snapshot buffer if it isn't already 1431 * allocated - it doesn't also take a snapshot. 1432 * 1433 * This is meant to be used in cases where the snapshot buffer needs 1434 * to be set up for events that can't sleep but need to be able to 1435 * trigger a snapshot. 1436 */ 1437 int tracing_alloc_snapshot(void) 1438 { 1439 struct trace_array *tr = &global_trace; 1440 int ret; 1441 1442 ret = tracing_alloc_snapshot_instance(tr); 1443 WARN_ON(ret < 0); 1444 1445 return ret; 1446 } 1447 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1448 1449 /** 1450 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. 1451 * 1452 * This is similar to tracing_snapshot(), but it will allocate the 1453 * snapshot buffer if it isn't already allocated. Use this only 1454 * where it is safe to sleep, as the allocation may sleep. 1455 * 1456 * This causes a swap between the snapshot buffer and the current live 1457 * tracing buffer. You can use this to take snapshots of the live 1458 * trace when some condition is triggered, but continue to trace. 1459 */ 1460 void tracing_snapshot_alloc(void) 1461 { 1462 int ret; 1463 1464 ret = tracing_alloc_snapshot(); 1465 if (ret < 0) 1466 return; 1467 1468 tracing_snapshot(); 1469 } 1470 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1471 1472 /** 1473 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance 1474 * @tr: The tracing instance 1475 * @cond_data: User data to associate with the snapshot 1476 * @update: Implementation of the cond_snapshot update function 1477 * 1478 * Check whether the conditional snapshot for the given instance has 1479 * already been enabled, or if the current tracer is already using a 1480 * snapshot; if so, return -EBUSY, else create a cond_snapshot and 1481 * save the cond_data and update function inside. 1482 * 1483 * Returns 0 if successful, error otherwise. 1484 */ 1485 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, 1486 cond_update_fn_t update) 1487 { 1488 struct cond_snapshot *cond_snapshot __free(kfree) = 1489 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); 1490 int ret; 1491 1492 if (!cond_snapshot) 1493 return -ENOMEM; 1494 1495 cond_snapshot->cond_data = cond_data; 1496 cond_snapshot->update = update; 1497 1498 guard(mutex)(&trace_types_lock); 1499 1500 if (tr->current_trace->use_max_tr) 1501 return -EBUSY; 1502 1503 /* 1504 * The cond_snapshot can only change to NULL without the 1505 * trace_types_lock. We don't care if we race with it going 1506 * to NULL, but we want to make sure that it's not set to 1507 * something other than NULL when we get here, which we can 1508 * do safely with only holding the trace_types_lock and not 1509 * having to take the max_lock. 1510 */ 1511 if (tr->cond_snapshot) 1512 return -EBUSY; 1513 1514 ret = tracing_arm_snapshot_locked(tr); 1515 if (ret) 1516 return ret; 1517 1518 local_irq_disable(); 1519 arch_spin_lock(&tr->max_lock); 1520 tr->cond_snapshot = no_free_ptr(cond_snapshot); 1521 arch_spin_unlock(&tr->max_lock); 1522 local_irq_enable(); 1523 1524 return 0; 1525 } 1526 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1527 1528 /** 1529 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance 1530 * @tr: The tracing instance 1531 * 1532 * Check whether the conditional snapshot for the given instance is 1533 * enabled; if so, free the cond_snapshot associated with it, 1534 * otherwise return -EINVAL. 1535 * 1536 * Returns 0 if successful, error otherwise. 1537 */ 1538 int tracing_snapshot_cond_disable(struct trace_array *tr) 1539 { 1540 int ret = 0; 1541 1542 local_irq_disable(); 1543 arch_spin_lock(&tr->max_lock); 1544 1545 if (!tr->cond_snapshot) 1546 ret = -EINVAL; 1547 else { 1548 kfree(tr->cond_snapshot); 1549 tr->cond_snapshot = NULL; 1550 } 1551 1552 arch_spin_unlock(&tr->max_lock); 1553 local_irq_enable(); 1554 1555 tracing_disarm_snapshot(tr); 1556 1557 return ret; 1558 } 1559 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1560 #else 1561 void tracing_snapshot(void) 1562 { 1563 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); 1564 } 1565 EXPORT_SYMBOL_GPL(tracing_snapshot); 1566 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1567 { 1568 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used"); 1569 } 1570 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1571 int tracing_alloc_snapshot(void) 1572 { 1573 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); 1574 return -ENODEV; 1575 } 1576 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1577 void tracing_snapshot_alloc(void) 1578 { 1579 /* Give warning */ 1580 tracing_snapshot(); 1581 } 1582 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1583 void *tracing_cond_snapshot_data(struct trace_array *tr) 1584 { 1585 return NULL; 1586 } 1587 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1588 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) 1589 { 1590 return -ENODEV; 1591 } 1592 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1593 int tracing_snapshot_cond_disable(struct trace_array *tr) 1594 { 1595 return false; 1596 } 1597 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1598 #define free_snapshot(tr) do { } while (0) 1599 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) 1600 #endif /* CONFIG_TRACER_SNAPSHOT */ 1601 1602 void tracer_tracing_off(struct trace_array *tr) 1603 { 1604 if (tr->array_buffer.buffer) 1605 ring_buffer_record_off(tr->array_buffer.buffer); 1606 /* 1607 * This flag is looked at when buffers haven't been allocated 1608 * yet, or by some tracers (like irqsoff), that just want to 1609 * know if the ring buffer has been disabled, but it can handle 1610 * races of where it gets disabled but we still do a record. 1611 * As the check is in the fast path of the tracers, it is more 1612 * important to be fast than accurate. 1613 */ 1614 tr->buffer_disabled = 1; 1615 /* Make the flag seen by readers */ 1616 smp_wmb(); 1617 } 1618 1619 /** 1620 * tracing_off - turn off tracing buffers 1621 * 1622 * This function stops the tracing buffers from recording data. 1623 * It does not disable any overhead the tracers themselves may 1624 * be causing. This function simply causes all recording to 1625 * the ring buffers to fail. 1626 */ 1627 void tracing_off(void) 1628 { 1629 tracer_tracing_off(&global_trace); 1630 } 1631 EXPORT_SYMBOL_GPL(tracing_off); 1632 1633 void disable_trace_on_warning(void) 1634 { 1635 if (__disable_trace_on_warning) { 1636 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, 1637 "Disabling tracing due to warning\n"); 1638 tracing_off(); 1639 } 1640 } 1641 1642 /** 1643 * tracer_tracing_is_on - show real state of ring buffer enabled 1644 * @tr : the trace array to know if ring buffer is enabled 1645 * 1646 * Shows real state of the ring buffer if it is enabled or not. 1647 */ 1648 bool tracer_tracing_is_on(struct trace_array *tr) 1649 { 1650 if (tr->array_buffer.buffer) 1651 return ring_buffer_record_is_set_on(tr->array_buffer.buffer); 1652 return !tr->buffer_disabled; 1653 } 1654 1655 /** 1656 * tracing_is_on - show state of ring buffers enabled 1657 */ 1658 int tracing_is_on(void) 1659 { 1660 return tracer_tracing_is_on(&global_trace); 1661 } 1662 EXPORT_SYMBOL_GPL(tracing_is_on); 1663 1664 static int __init set_buf_size(char *str) 1665 { 1666 unsigned long buf_size; 1667 1668 if (!str) 1669 return 0; 1670 buf_size = memparse(str, &str); 1671 /* 1672 * nr_entries can not be zero and the startup 1673 * tests require some buffer space. Therefore 1674 * ensure we have at least 4096 bytes of buffer. 1675 */ 1676 trace_buf_size = max(4096UL, buf_size); 1677 return 1; 1678 } 1679 __setup("trace_buf_size=", set_buf_size); 1680 1681 static int __init set_tracing_thresh(char *str) 1682 { 1683 unsigned long threshold; 1684 int ret; 1685 1686 if (!str) 1687 return 0; 1688 ret = kstrtoul(str, 0, &threshold); 1689 if (ret < 0) 1690 return 0; 1691 tracing_thresh = threshold * 1000; 1692 return 1; 1693 } 1694 __setup("tracing_thresh=", set_tracing_thresh); 1695 1696 unsigned long nsecs_to_usecs(unsigned long nsecs) 1697 { 1698 return nsecs / 1000; 1699 } 1700 1701 /* 1702 * TRACE_FLAGS is defined as a tuple matching bit masks with strings. 1703 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that 1704 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list 1705 * of strings in the order that the evals (enum) were defined. 1706 */ 1707 #undef C 1708 #define C(a, b) b 1709 1710 /* These must match the bit positions in trace_iterator_flags */ 1711 static const char *trace_options[] = { 1712 TRACE_FLAGS 1713 NULL 1714 }; 1715 1716 static struct { 1717 u64 (*func)(void); 1718 const char *name; 1719 int in_ns; /* is this clock in nanoseconds? */ 1720 } trace_clocks[] = { 1721 { trace_clock_local, "local", 1 }, 1722 { trace_clock_global, "global", 1 }, 1723 { trace_clock_counter, "counter", 0 }, 1724 { trace_clock_jiffies, "uptime", 0 }, 1725 { trace_clock, "perf", 1 }, 1726 { ktime_get_mono_fast_ns, "mono", 1 }, 1727 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1728 { ktime_get_boot_fast_ns, "boot", 1 }, 1729 { ktime_get_tai_fast_ns, "tai", 1 }, 1730 ARCH_TRACE_CLOCKS 1731 }; 1732 1733 bool trace_clock_in_ns(struct trace_array *tr) 1734 { 1735 if (trace_clocks[tr->clock_id].in_ns) 1736 return true; 1737 1738 return false; 1739 } 1740 1741 /* 1742 * trace_parser_get_init - gets the buffer for trace parser 1743 */ 1744 int trace_parser_get_init(struct trace_parser *parser, int size) 1745 { 1746 memset(parser, 0, sizeof(*parser)); 1747 1748 parser->buffer = kmalloc(size, GFP_KERNEL); 1749 if (!parser->buffer) 1750 return 1; 1751 1752 parser->size = size; 1753 return 0; 1754 } 1755 1756 /* 1757 * trace_parser_put - frees the buffer for trace parser 1758 */ 1759 void trace_parser_put(struct trace_parser *parser) 1760 { 1761 kfree(parser->buffer); 1762 parser->buffer = NULL; 1763 } 1764 1765 /* 1766 * trace_get_user - reads the user input string separated by space 1767 * (matched by isspace(ch)) 1768 * 1769 * For each string found the 'struct trace_parser' is updated, 1770 * and the function returns. 1771 * 1772 * Returns number of bytes read. 1773 * 1774 * See kernel/trace/trace.h for 'struct trace_parser' details. 1775 */ 1776 int trace_get_user(struct trace_parser *parser, const char __user *ubuf, 1777 size_t cnt, loff_t *ppos) 1778 { 1779 char ch; 1780 size_t read = 0; 1781 ssize_t ret; 1782 1783 if (!*ppos) 1784 trace_parser_clear(parser); 1785 1786 ret = get_user(ch, ubuf++); 1787 if (ret) 1788 goto out; 1789 1790 read++; 1791 cnt--; 1792 1793 /* 1794 * The parser is not finished with the last write, 1795 * continue reading the user input without skipping spaces. 1796 */ 1797 if (!parser->cont) { 1798 /* skip white space */ 1799 while (cnt && isspace(ch)) { 1800 ret = get_user(ch, ubuf++); 1801 if (ret) 1802 goto out; 1803 read++; 1804 cnt--; 1805 } 1806 1807 parser->idx = 0; 1808 1809 /* only spaces were written */ 1810 if (isspace(ch) || !ch) { 1811 *ppos += read; 1812 ret = read; 1813 goto out; 1814 } 1815 } 1816 1817 /* read the non-space input */ 1818 while (cnt && !isspace(ch) && ch) { 1819 if (parser->idx < parser->size - 1) 1820 parser->buffer[parser->idx++] = ch; 1821 else { 1822 ret = -EINVAL; 1823 goto out; 1824 } 1825 ret = get_user(ch, ubuf++); 1826 if (ret) 1827 goto out; 1828 read++; 1829 cnt--; 1830 } 1831 1832 /* We either got finished input or we have to wait for another call. */ 1833 if (isspace(ch) || !ch) { 1834 parser->buffer[parser->idx] = 0; 1835 parser->cont = false; 1836 } else if (parser->idx < parser->size - 1) { 1837 parser->cont = true; 1838 parser->buffer[parser->idx++] = ch; 1839 /* Make sure the parsed string always terminates with '\0'. */ 1840 parser->buffer[parser->idx] = 0; 1841 } else { 1842 ret = -EINVAL; 1843 goto out; 1844 } 1845 1846 *ppos += read; 1847 ret = read; 1848 1849 out: 1850 return ret; 1851 } 1852 1853 /* TODO add a seq_buf_to_buffer() */ 1854 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 1855 { 1856 int len; 1857 1858 if (trace_seq_used(s) <= s->readpos) 1859 return -EBUSY; 1860 1861 len = trace_seq_used(s) - s->readpos; 1862 if (cnt > len) 1863 cnt = len; 1864 memcpy(buf, s->buffer + s->readpos, cnt); 1865 1866 s->readpos += cnt; 1867 return cnt; 1868 } 1869 1870 unsigned long __read_mostly tracing_thresh; 1871 1872 #ifdef CONFIG_TRACER_MAX_TRACE 1873 static const struct file_operations tracing_max_lat_fops; 1874 1875 #ifdef LATENCY_FS_NOTIFY 1876 1877 static struct workqueue_struct *fsnotify_wq; 1878 1879 static void latency_fsnotify_workfn(struct work_struct *work) 1880 { 1881 struct trace_array *tr = container_of(work, struct trace_array, 1882 fsnotify_work); 1883 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY); 1884 } 1885 1886 static void latency_fsnotify_workfn_irq(struct irq_work *iwork) 1887 { 1888 struct trace_array *tr = container_of(iwork, struct trace_array, 1889 fsnotify_irqwork); 1890 queue_work(fsnotify_wq, &tr->fsnotify_work); 1891 } 1892 1893 static void trace_create_maxlat_file(struct trace_array *tr, 1894 struct dentry *d_tracer) 1895 { 1896 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); 1897 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); 1898 tr->d_max_latency = trace_create_file("tracing_max_latency", 1899 TRACE_MODE_WRITE, 1900 d_tracer, tr, 1901 &tracing_max_lat_fops); 1902 } 1903 1904 __init static int latency_fsnotify_init(void) 1905 { 1906 fsnotify_wq = alloc_workqueue("tr_max_lat_wq", 1907 WQ_UNBOUND | WQ_HIGHPRI, 0); 1908 if (!fsnotify_wq) { 1909 pr_err("Unable to allocate tr_max_lat_wq\n"); 1910 return -ENOMEM; 1911 } 1912 return 0; 1913 } 1914 1915 late_initcall_sync(latency_fsnotify_init); 1916 1917 void latency_fsnotify(struct trace_array *tr) 1918 { 1919 if (!fsnotify_wq) 1920 return; 1921 /* 1922 * We cannot call queue_work(&tr->fsnotify_work) from here because it's 1923 * possible that we are called from __schedule() or do_idle(), which 1924 * could cause a deadlock. 1925 */ 1926 irq_work_queue(&tr->fsnotify_irqwork); 1927 } 1928 1929 #else /* !LATENCY_FS_NOTIFY */ 1930 1931 #define trace_create_maxlat_file(tr, d_tracer) \ 1932 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ 1933 d_tracer, tr, &tracing_max_lat_fops) 1934 1935 #endif 1936 1937 /* 1938 * Copy the new maximum trace into the separate maximum-trace 1939 * structure. (this way the maximum trace is permanently saved, 1940 * for later retrieval via /sys/kernel/tracing/tracing_max_latency) 1941 */ 1942 static void 1943 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1944 { 1945 struct array_buffer *trace_buf = &tr->array_buffer; 1946 struct array_buffer *max_buf = &tr->max_buffer; 1947 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); 1948 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); 1949 1950 max_buf->cpu = cpu; 1951 max_buf->time_start = data->preempt_timestamp; 1952 1953 max_data->saved_latency = tr->max_latency; 1954 max_data->critical_start = data->critical_start; 1955 max_data->critical_end = data->critical_end; 1956 1957 strscpy(max_data->comm, tsk->comm); 1958 max_data->pid = tsk->pid; 1959 /* 1960 * If tsk == current, then use current_uid(), as that does not use 1961 * RCU. The irq tracer can be called out of RCU scope. 1962 */ 1963 if (tsk == current) 1964 max_data->uid = current_uid(); 1965 else 1966 max_data->uid = task_uid(tsk); 1967 1968 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 1969 max_data->policy = tsk->policy; 1970 max_data->rt_priority = tsk->rt_priority; 1971 1972 /* record this tasks comm */ 1973 tracing_record_cmdline(tsk); 1974 latency_fsnotify(tr); 1975 } 1976 1977 /** 1978 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 1979 * @tr: tracer 1980 * @tsk: the task with the latency 1981 * @cpu: The cpu that initiated the trace. 1982 * @cond_data: User data associated with a conditional snapshot 1983 * 1984 * Flip the buffers between the @tr and the max_tr and record information 1985 * about which task was the cause of this latency. 1986 */ 1987 void 1988 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, 1989 void *cond_data) 1990 { 1991 if (tr->stop_count) 1992 return; 1993 1994 WARN_ON_ONCE(!irqs_disabled()); 1995 1996 if (!tr->allocated_snapshot) { 1997 /* Only the nop tracer should hit this when disabling */ 1998 WARN_ON_ONCE(tr->current_trace != &nop_trace); 1999 return; 2000 } 2001 2002 arch_spin_lock(&tr->max_lock); 2003 2004 /* Inherit the recordable setting from array_buffer */ 2005 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) 2006 ring_buffer_record_on(tr->max_buffer.buffer); 2007 else 2008 ring_buffer_record_off(tr->max_buffer.buffer); 2009 2010 #ifdef CONFIG_TRACER_SNAPSHOT 2011 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { 2012 arch_spin_unlock(&tr->max_lock); 2013 return; 2014 } 2015 #endif 2016 swap(tr->array_buffer.buffer, tr->max_buffer.buffer); 2017 2018 __update_max_tr(tr, tsk, cpu); 2019 2020 arch_spin_unlock(&tr->max_lock); 2021 2022 /* Any waiters on the old snapshot buffer need to wake up */ 2023 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); 2024 } 2025 2026 /** 2027 * update_max_tr_single - only copy one trace over, and reset the rest 2028 * @tr: tracer 2029 * @tsk: task with the latency 2030 * @cpu: the cpu of the buffer to copy. 2031 * 2032 * Flip the trace of a single CPU buffer between the @tr and the max_tr. 2033 */ 2034 void 2035 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 2036 { 2037 int ret; 2038 2039 if (tr->stop_count) 2040 return; 2041 2042 WARN_ON_ONCE(!irqs_disabled()); 2043 if (!tr->allocated_snapshot) { 2044 /* Only the nop tracer should hit this when disabling */ 2045 WARN_ON_ONCE(tr->current_trace != &nop_trace); 2046 return; 2047 } 2048 2049 arch_spin_lock(&tr->max_lock); 2050 2051 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); 2052 2053 if (ret == -EBUSY) { 2054 /* 2055 * We failed to swap the buffer due to a commit taking 2056 * place on this CPU. We fail to record, but we reset 2057 * the max trace buffer (no one writes directly to it) 2058 * and flag that it failed. 2059 * Another reason is resize is in progress. 2060 */ 2061 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, 2062 "Failed to swap buffers due to commit or resize in progress\n"); 2063 } 2064 2065 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 2066 2067 __update_max_tr(tr, tsk, cpu); 2068 arch_spin_unlock(&tr->max_lock); 2069 } 2070 2071 #endif /* CONFIG_TRACER_MAX_TRACE */ 2072 2073 struct pipe_wait { 2074 struct trace_iterator *iter; 2075 int wait_index; 2076 }; 2077 2078 static bool wait_pipe_cond(void *data) 2079 { 2080 struct pipe_wait *pwait = data; 2081 struct trace_iterator *iter = pwait->iter; 2082 2083 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) 2084 return true; 2085 2086 return iter->closed; 2087 } 2088 2089 static int wait_on_pipe(struct trace_iterator *iter, int full) 2090 { 2091 struct pipe_wait pwait; 2092 int ret; 2093 2094 /* Iterators are static, they should be filled or empty */ 2095 if (trace_buffer_iter(iter, iter->cpu_file)) 2096 return 0; 2097 2098 pwait.wait_index = atomic_read_acquire(&iter->wait_index); 2099 pwait.iter = iter; 2100 2101 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, 2102 wait_pipe_cond, &pwait); 2103 2104 #ifdef CONFIG_TRACER_MAX_TRACE 2105 /* 2106 * Make sure this is still the snapshot buffer, as if a snapshot were 2107 * to happen, this would now be the main buffer. 2108 */ 2109 if (iter->snapshot) 2110 iter->array_buffer = &iter->tr->max_buffer; 2111 #endif 2112 return ret; 2113 } 2114 2115 #ifdef CONFIG_FTRACE_STARTUP_TEST 2116 static bool selftests_can_run; 2117 2118 struct trace_selftests { 2119 struct list_head list; 2120 struct tracer *type; 2121 }; 2122 2123 static LIST_HEAD(postponed_selftests); 2124 2125 static int save_selftest(struct tracer *type) 2126 { 2127 struct trace_selftests *selftest; 2128 2129 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); 2130 if (!selftest) 2131 return -ENOMEM; 2132 2133 selftest->type = type; 2134 list_add(&selftest->list, &postponed_selftests); 2135 return 0; 2136 } 2137 2138 static int run_tracer_selftest(struct tracer *type) 2139 { 2140 struct trace_array *tr = &global_trace; 2141 struct tracer *saved_tracer = tr->current_trace; 2142 int ret; 2143 2144 if (!type->selftest || tracing_selftest_disabled) 2145 return 0; 2146 2147 /* 2148 * If a tracer registers early in boot up (before scheduling is 2149 * initialized and such), then do not run its selftests yet. 2150 * Instead, run it a little later in the boot process. 2151 */ 2152 if (!selftests_can_run) 2153 return save_selftest(type); 2154 2155 if (!tracing_is_on()) { 2156 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", 2157 type->name); 2158 return 0; 2159 } 2160 2161 /* 2162 * Run a selftest on this tracer. 2163 * Here we reset the trace buffer, and set the current 2164 * tracer to be this tracer. The tracer can then run some 2165 * internal tracing to verify that everything is in order. 2166 * If we fail, we do not register this tracer. 2167 */ 2168 tracing_reset_online_cpus(&tr->array_buffer); 2169 2170 tr->current_trace = type; 2171 2172 #ifdef CONFIG_TRACER_MAX_TRACE 2173 if (type->use_max_tr) { 2174 /* If we expanded the buffers, make sure the max is expanded too */ 2175 if (tr->ring_buffer_expanded) 2176 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, 2177 RING_BUFFER_ALL_CPUS); 2178 tr->allocated_snapshot = true; 2179 } 2180 #endif 2181 2182 /* the test is responsible for initializing and enabling */ 2183 pr_info("Testing tracer %s: ", type->name); 2184 ret = type->selftest(type, tr); 2185 /* the test is responsible for resetting too */ 2186 tr->current_trace = saved_tracer; 2187 if (ret) { 2188 printk(KERN_CONT "FAILED!\n"); 2189 /* Add the warning after printing 'FAILED' */ 2190 WARN_ON(1); 2191 return -1; 2192 } 2193 /* Only reset on passing, to avoid touching corrupted buffers */ 2194 tracing_reset_online_cpus(&tr->array_buffer); 2195 2196 #ifdef CONFIG_TRACER_MAX_TRACE 2197 if (type->use_max_tr) { 2198 tr->allocated_snapshot = false; 2199 2200 /* Shrink the max buffer again */ 2201 if (tr->ring_buffer_expanded) 2202 ring_buffer_resize(tr->max_buffer.buffer, 1, 2203 RING_BUFFER_ALL_CPUS); 2204 } 2205 #endif 2206 2207 printk(KERN_CONT "PASSED\n"); 2208 return 0; 2209 } 2210 2211 static int do_run_tracer_selftest(struct tracer *type) 2212 { 2213 int ret; 2214 2215 /* 2216 * Tests can take a long time, especially if they are run one after the 2217 * other, as does happen during bootup when all the tracers are 2218 * registered. This could cause the soft lockup watchdog to trigger. 2219 */ 2220 cond_resched(); 2221 2222 tracing_selftest_running = true; 2223 ret = run_tracer_selftest(type); 2224 tracing_selftest_running = false; 2225 2226 return ret; 2227 } 2228 2229 static __init int init_trace_selftests(void) 2230 { 2231 struct trace_selftests *p, *n; 2232 struct tracer *t, **last; 2233 int ret; 2234 2235 selftests_can_run = true; 2236 2237 guard(mutex)(&trace_types_lock); 2238 2239 if (list_empty(&postponed_selftests)) 2240 return 0; 2241 2242 pr_info("Running postponed tracer tests:\n"); 2243 2244 tracing_selftest_running = true; 2245 list_for_each_entry_safe(p, n, &postponed_selftests, list) { 2246 /* This loop can take minutes when sanitizers are enabled, so 2247 * lets make sure we allow RCU processing. 2248 */ 2249 cond_resched(); 2250 ret = run_tracer_selftest(p->type); 2251 /* If the test fails, then warn and remove from available_tracers */ 2252 if (ret < 0) { 2253 WARN(1, "tracer: %s failed selftest, disabling\n", 2254 p->type->name); 2255 last = &trace_types; 2256 for (t = trace_types; t; t = t->next) { 2257 if (t == p->type) { 2258 *last = t->next; 2259 break; 2260 } 2261 last = &t->next; 2262 } 2263 } 2264 list_del(&p->list); 2265 kfree(p); 2266 } 2267 tracing_selftest_running = false; 2268 2269 return 0; 2270 } 2271 core_initcall(init_trace_selftests); 2272 #else 2273 static inline int do_run_tracer_selftest(struct tracer *type) 2274 { 2275 return 0; 2276 } 2277 #endif /* CONFIG_FTRACE_STARTUP_TEST */ 2278 2279 static void add_tracer_options(struct trace_array *tr, struct tracer *t); 2280 2281 static void __init apply_trace_boot_options(void); 2282 2283 /** 2284 * register_tracer - register a tracer with the ftrace system. 2285 * @type: the plugin for the tracer 2286 * 2287 * Register a new plugin tracer. 2288 */ 2289 int __init register_tracer(struct tracer *type) 2290 { 2291 struct tracer *t; 2292 int ret = 0; 2293 2294 if (!type->name) { 2295 pr_info("Tracer must have a name\n"); 2296 return -1; 2297 } 2298 2299 if (strlen(type->name) >= MAX_TRACER_SIZE) { 2300 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 2301 return -1; 2302 } 2303 2304 if (security_locked_down(LOCKDOWN_TRACEFS)) { 2305 pr_warn("Can not register tracer %s due to lockdown\n", 2306 type->name); 2307 return -EPERM; 2308 } 2309 2310 mutex_lock(&trace_types_lock); 2311 2312 for (t = trace_types; t; t = t->next) { 2313 if (strcmp(type->name, t->name) == 0) { 2314 /* already found */ 2315 pr_info("Tracer %s already registered\n", 2316 type->name); 2317 ret = -1; 2318 goto out; 2319 } 2320 } 2321 2322 if (!type->set_flag) 2323 type->set_flag = &dummy_set_flag; 2324 if (!type->flags) { 2325 /*allocate a dummy tracer_flags*/ 2326 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL); 2327 if (!type->flags) { 2328 ret = -ENOMEM; 2329 goto out; 2330 } 2331 type->flags->val = 0; 2332 type->flags->opts = dummy_tracer_opt; 2333 } else 2334 if (!type->flags->opts) 2335 type->flags->opts = dummy_tracer_opt; 2336 2337 /* store the tracer for __set_tracer_option */ 2338 type->flags->trace = type; 2339 2340 ret = do_run_tracer_selftest(type); 2341 if (ret < 0) 2342 goto out; 2343 2344 type->next = trace_types; 2345 trace_types = type; 2346 add_tracer_options(&global_trace, type); 2347 2348 out: 2349 mutex_unlock(&trace_types_lock); 2350 2351 if (ret || !default_bootup_tracer) 2352 goto out_unlock; 2353 2354 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) 2355 goto out_unlock; 2356 2357 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 2358 /* Do we want this tracer to start on bootup? */ 2359 tracing_set_tracer(&global_trace, type->name); 2360 default_bootup_tracer = NULL; 2361 2362 apply_trace_boot_options(); 2363 2364 /* disable other selftests, since this will break it. */ 2365 disable_tracing_selftest("running a tracer"); 2366 2367 out_unlock: 2368 return ret; 2369 } 2370 2371 static void tracing_reset_cpu(struct array_buffer *buf, int cpu) 2372 { 2373 struct trace_buffer *buffer = buf->buffer; 2374 2375 if (!buffer) 2376 return; 2377 2378 ring_buffer_record_disable(buffer); 2379 2380 /* Make sure all commits have finished */ 2381 synchronize_rcu(); 2382 ring_buffer_reset_cpu(buffer, cpu); 2383 2384 ring_buffer_record_enable(buffer); 2385 } 2386 2387 void tracing_reset_online_cpus(struct array_buffer *buf) 2388 { 2389 struct trace_buffer *buffer = buf->buffer; 2390 2391 if (!buffer) 2392 return; 2393 2394 ring_buffer_record_disable(buffer); 2395 2396 /* Make sure all commits have finished */ 2397 synchronize_rcu(); 2398 2399 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2400 2401 ring_buffer_reset_online_cpus(buffer); 2402 2403 ring_buffer_record_enable(buffer); 2404 } 2405 2406 static void tracing_reset_all_cpus(struct array_buffer *buf) 2407 { 2408 struct trace_buffer *buffer = buf->buffer; 2409 2410 if (!buffer) 2411 return; 2412 2413 ring_buffer_record_disable(buffer); 2414 2415 /* Make sure all commits have finished */ 2416 synchronize_rcu(); 2417 2418 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2419 2420 ring_buffer_reset(buffer); 2421 2422 ring_buffer_record_enable(buffer); 2423 } 2424 2425 /* Must have trace_types_lock held */ 2426 void tracing_reset_all_online_cpus_unlocked(void) 2427 { 2428 struct trace_array *tr; 2429 2430 lockdep_assert_held(&trace_types_lock); 2431 2432 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2433 if (!tr->clear_trace) 2434 continue; 2435 tr->clear_trace = false; 2436 tracing_reset_online_cpus(&tr->array_buffer); 2437 #ifdef CONFIG_TRACER_MAX_TRACE 2438 tracing_reset_online_cpus(&tr->max_buffer); 2439 #endif 2440 } 2441 } 2442 2443 void tracing_reset_all_online_cpus(void) 2444 { 2445 mutex_lock(&trace_types_lock); 2446 tracing_reset_all_online_cpus_unlocked(); 2447 mutex_unlock(&trace_types_lock); 2448 } 2449 2450 int is_tracing_stopped(void) 2451 { 2452 return global_trace.stop_count; 2453 } 2454 2455 static void tracing_start_tr(struct trace_array *tr) 2456 { 2457 struct trace_buffer *buffer; 2458 unsigned long flags; 2459 2460 if (tracing_disabled) 2461 return; 2462 2463 raw_spin_lock_irqsave(&tr->start_lock, flags); 2464 if (--tr->stop_count) { 2465 if (WARN_ON_ONCE(tr->stop_count < 0)) { 2466 /* Someone screwed up their debugging */ 2467 tr->stop_count = 0; 2468 } 2469 goto out; 2470 } 2471 2472 /* Prevent the buffers from switching */ 2473 arch_spin_lock(&tr->max_lock); 2474 2475 buffer = tr->array_buffer.buffer; 2476 if (buffer) 2477 ring_buffer_record_enable(buffer); 2478 2479 #ifdef CONFIG_TRACER_MAX_TRACE 2480 buffer = tr->max_buffer.buffer; 2481 if (buffer) 2482 ring_buffer_record_enable(buffer); 2483 #endif 2484 2485 arch_spin_unlock(&tr->max_lock); 2486 2487 out: 2488 raw_spin_unlock_irqrestore(&tr->start_lock, flags); 2489 } 2490 2491 /** 2492 * tracing_start - quick start of the tracer 2493 * 2494 * If tracing is enabled but was stopped by tracing_stop, 2495 * this will start the tracer back up. 2496 */ 2497 void tracing_start(void) 2498 2499 { 2500 return tracing_start_tr(&global_trace); 2501 } 2502 2503 static void tracing_stop_tr(struct trace_array *tr) 2504 { 2505 struct trace_buffer *buffer; 2506 unsigned long flags; 2507 2508 raw_spin_lock_irqsave(&tr->start_lock, flags); 2509 if (tr->stop_count++) 2510 goto out; 2511 2512 /* Prevent the buffers from switching */ 2513 arch_spin_lock(&tr->max_lock); 2514 2515 buffer = tr->array_buffer.buffer; 2516 if (buffer) 2517 ring_buffer_record_disable(buffer); 2518 2519 #ifdef CONFIG_TRACER_MAX_TRACE 2520 buffer = tr->max_buffer.buffer; 2521 if (buffer) 2522 ring_buffer_record_disable(buffer); 2523 #endif 2524 2525 arch_spin_unlock(&tr->max_lock); 2526 2527 out: 2528 raw_spin_unlock_irqrestore(&tr->start_lock, flags); 2529 } 2530 2531 /** 2532 * tracing_stop - quick stop of the tracer 2533 * 2534 * Light weight way to stop tracing. Use in conjunction with 2535 * tracing_start. 2536 */ 2537 void tracing_stop(void) 2538 { 2539 return tracing_stop_tr(&global_trace); 2540 } 2541 2542 /* 2543 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 2544 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 2545 * simplifies those functions and keeps them in sync. 2546 */ 2547 enum print_line_t trace_handle_return(struct trace_seq *s) 2548 { 2549 return trace_seq_has_overflowed(s) ? 2550 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 2551 } 2552 EXPORT_SYMBOL_GPL(trace_handle_return); 2553 2554 static unsigned short migration_disable_value(void) 2555 { 2556 #if defined(CONFIG_SMP) 2557 return current->migration_disabled; 2558 #else 2559 return 0; 2560 #endif 2561 } 2562 2563 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) 2564 { 2565 unsigned int trace_flags = irqs_status; 2566 unsigned int pc; 2567 2568 pc = preempt_count(); 2569 2570 if (pc & NMI_MASK) 2571 trace_flags |= TRACE_FLAG_NMI; 2572 if (pc & HARDIRQ_MASK) 2573 trace_flags |= TRACE_FLAG_HARDIRQ; 2574 if (in_serving_softirq()) 2575 trace_flags |= TRACE_FLAG_SOFTIRQ; 2576 if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) 2577 trace_flags |= TRACE_FLAG_BH_OFF; 2578 2579 if (tif_need_resched()) 2580 trace_flags |= TRACE_FLAG_NEED_RESCHED; 2581 if (test_preempt_need_resched()) 2582 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; 2583 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) 2584 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; 2585 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | 2586 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; 2587 } 2588 2589 struct ring_buffer_event * 2590 trace_buffer_lock_reserve(struct trace_buffer *buffer, 2591 int type, 2592 unsigned long len, 2593 unsigned int trace_ctx) 2594 { 2595 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); 2596 } 2597 2598 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); 2599 DEFINE_PER_CPU(int, trace_buffered_event_cnt); 2600 static int trace_buffered_event_ref; 2601 2602 /** 2603 * trace_buffered_event_enable - enable buffering events 2604 * 2605 * When events are being filtered, it is quicker to use a temporary 2606 * buffer to write the event data into if there's a likely chance 2607 * that it will not be committed. The discard of the ring buffer 2608 * is not as fast as committing, and is much slower than copying 2609 * a commit. 2610 * 2611 * When an event is to be filtered, allocate per cpu buffers to 2612 * write the event data into, and if the event is filtered and discarded 2613 * it is simply dropped, otherwise, the entire data is to be committed 2614 * in one shot. 2615 */ 2616 void trace_buffered_event_enable(void) 2617 { 2618 struct ring_buffer_event *event; 2619 struct page *page; 2620 int cpu; 2621 2622 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2623 2624 if (trace_buffered_event_ref++) 2625 return; 2626 2627 for_each_tracing_cpu(cpu) { 2628 page = alloc_pages_node(cpu_to_node(cpu), 2629 GFP_KERNEL | __GFP_NORETRY, 0); 2630 /* This is just an optimization and can handle failures */ 2631 if (!page) { 2632 pr_err("Failed to allocate event buffer\n"); 2633 break; 2634 } 2635 2636 event = page_address(page); 2637 memset(event, 0, sizeof(*event)); 2638 2639 per_cpu(trace_buffered_event, cpu) = event; 2640 2641 preempt_disable(); 2642 if (cpu == smp_processor_id() && 2643 __this_cpu_read(trace_buffered_event) != 2644 per_cpu(trace_buffered_event, cpu)) 2645 WARN_ON_ONCE(1); 2646 preempt_enable(); 2647 } 2648 } 2649 2650 static void enable_trace_buffered_event(void *data) 2651 { 2652 /* Probably not needed, but do it anyway */ 2653 smp_rmb(); 2654 this_cpu_dec(trace_buffered_event_cnt); 2655 } 2656 2657 static void disable_trace_buffered_event(void *data) 2658 { 2659 this_cpu_inc(trace_buffered_event_cnt); 2660 } 2661 2662 /** 2663 * trace_buffered_event_disable - disable buffering events 2664 * 2665 * When a filter is removed, it is faster to not use the buffered 2666 * events, and to commit directly into the ring buffer. Free up 2667 * the temp buffers when there are no more users. This requires 2668 * special synchronization with current events. 2669 */ 2670 void trace_buffered_event_disable(void) 2671 { 2672 int cpu; 2673 2674 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2675 2676 if (WARN_ON_ONCE(!trace_buffered_event_ref)) 2677 return; 2678 2679 if (--trace_buffered_event_ref) 2680 return; 2681 2682 /* For each CPU, set the buffer as used. */ 2683 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, 2684 NULL, true); 2685 2686 /* Wait for all current users to finish */ 2687 synchronize_rcu(); 2688 2689 for_each_tracing_cpu(cpu) { 2690 free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); 2691 per_cpu(trace_buffered_event, cpu) = NULL; 2692 } 2693 2694 /* 2695 * Wait for all CPUs that potentially started checking if they can use 2696 * their event buffer only after the previous synchronize_rcu() call and 2697 * they still read a valid pointer from trace_buffered_event. It must be 2698 * ensured they don't see cleared trace_buffered_event_cnt else they 2699 * could wrongly decide to use the pointed-to buffer which is now freed. 2700 */ 2701 synchronize_rcu(); 2702 2703 /* For each CPU, relinquish the buffer */ 2704 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, 2705 true); 2706 } 2707 2708 static struct trace_buffer *temp_buffer; 2709 2710 struct ring_buffer_event * 2711 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, 2712 struct trace_event_file *trace_file, 2713 int type, unsigned long len, 2714 unsigned int trace_ctx) 2715 { 2716 struct ring_buffer_event *entry; 2717 struct trace_array *tr = trace_file->tr; 2718 int val; 2719 2720 *current_rb = tr->array_buffer.buffer; 2721 2722 if (!tr->no_filter_buffering_ref && 2723 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { 2724 preempt_disable_notrace(); 2725 /* 2726 * Filtering is on, so try to use the per cpu buffer first. 2727 * This buffer will simulate a ring_buffer_event, 2728 * where the type_len is zero and the array[0] will 2729 * hold the full length. 2730 * (see include/linux/ring-buffer.h for details on 2731 * how the ring_buffer_event is structured). 2732 * 2733 * Using a temp buffer during filtering and copying it 2734 * on a matched filter is quicker than writing directly 2735 * into the ring buffer and then discarding it when 2736 * it doesn't match. That is because the discard 2737 * requires several atomic operations to get right. 2738 * Copying on match and doing nothing on a failed match 2739 * is still quicker than no copy on match, but having 2740 * to discard out of the ring buffer on a failed match. 2741 */ 2742 if ((entry = __this_cpu_read(trace_buffered_event))) { 2743 int max_len = PAGE_SIZE - struct_size(entry, array, 1); 2744 2745 val = this_cpu_inc_return(trace_buffered_event_cnt); 2746 2747 /* 2748 * Preemption is disabled, but interrupts and NMIs 2749 * can still come in now. If that happens after 2750 * the above increment, then it will have to go 2751 * back to the old method of allocating the event 2752 * on the ring buffer, and if the filter fails, it 2753 * will have to call ring_buffer_discard_commit() 2754 * to remove it. 2755 * 2756 * Need to also check the unlikely case that the 2757 * length is bigger than the temp buffer size. 2758 * If that happens, then the reserve is pretty much 2759 * guaranteed to fail, as the ring buffer currently 2760 * only allows events less than a page. But that may 2761 * change in the future, so let the ring buffer reserve 2762 * handle the failure in that case. 2763 */ 2764 if (val == 1 && likely(len <= max_len)) { 2765 trace_event_setup(entry, type, trace_ctx); 2766 entry->array[0] = len; 2767 /* Return with preemption disabled */ 2768 return entry; 2769 } 2770 this_cpu_dec(trace_buffered_event_cnt); 2771 } 2772 /* __trace_buffer_lock_reserve() disables preemption */ 2773 preempt_enable_notrace(); 2774 } 2775 2776 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2777 trace_ctx); 2778 /* 2779 * If tracing is off, but we have triggers enabled 2780 * we still need to look at the event data. Use the temp_buffer 2781 * to store the trace event for the trigger to use. It's recursive 2782 * safe and will not be recorded anywhere. 2783 */ 2784 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { 2785 *current_rb = temp_buffer; 2786 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2787 trace_ctx); 2788 } 2789 return entry; 2790 } 2791 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); 2792 2793 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); 2794 static DEFINE_MUTEX(tracepoint_printk_mutex); 2795 2796 static void output_printk(struct trace_event_buffer *fbuffer) 2797 { 2798 struct trace_event_call *event_call; 2799 struct trace_event_file *file; 2800 struct trace_event *event; 2801 unsigned long flags; 2802 struct trace_iterator *iter = tracepoint_print_iter; 2803 2804 /* We should never get here if iter is NULL */ 2805 if (WARN_ON_ONCE(!iter)) 2806 return; 2807 2808 event_call = fbuffer->trace_file->event_call; 2809 if (!event_call || !event_call->event.funcs || 2810 !event_call->event.funcs->trace) 2811 return; 2812 2813 file = fbuffer->trace_file; 2814 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 2815 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 2816 !filter_match_preds(file->filter, fbuffer->entry))) 2817 return; 2818 2819 event = &fbuffer->trace_file->event_call->event; 2820 2821 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); 2822 trace_seq_init(&iter->seq); 2823 iter->ent = fbuffer->entry; 2824 event_call->event.funcs->trace(iter, 0, event); 2825 trace_seq_putc(&iter->seq, 0); 2826 printk("%s", iter->seq.buffer); 2827 2828 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); 2829 } 2830 2831 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 2832 void *buffer, size_t *lenp, 2833 loff_t *ppos) 2834 { 2835 int save_tracepoint_printk; 2836 int ret; 2837 2838 guard(mutex)(&tracepoint_printk_mutex); 2839 save_tracepoint_printk = tracepoint_printk; 2840 2841 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2842 2843 /* 2844 * This will force exiting early, as tracepoint_printk 2845 * is always zero when tracepoint_printk_iter is not allocated 2846 */ 2847 if (!tracepoint_print_iter) 2848 tracepoint_printk = 0; 2849 2850 if (save_tracepoint_printk == tracepoint_printk) 2851 return ret; 2852 2853 if (tracepoint_printk) 2854 static_key_enable(&tracepoint_printk_key.key); 2855 else 2856 static_key_disable(&tracepoint_printk_key.key); 2857 2858 return ret; 2859 } 2860 2861 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) 2862 { 2863 enum event_trigger_type tt = ETT_NONE; 2864 struct trace_event_file *file = fbuffer->trace_file; 2865 2866 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, 2867 fbuffer->entry, &tt)) 2868 goto discard; 2869 2870 if (static_key_false(&tracepoint_printk_key.key)) 2871 output_printk(fbuffer); 2872 2873 if (static_branch_unlikely(&trace_event_exports_enabled)) 2874 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); 2875 2876 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, 2877 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); 2878 2879 discard: 2880 if (tt) 2881 event_triggers_post_call(file, tt); 2882 2883 } 2884 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2885 2886 /* 2887 * Skip 3: 2888 * 2889 * trace_buffer_unlock_commit_regs() 2890 * trace_event_buffer_commit() 2891 * trace_event_raw_event_xxx() 2892 */ 2893 # define STACK_SKIP 3 2894 2895 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2896 struct trace_buffer *buffer, 2897 struct ring_buffer_event *event, 2898 unsigned int trace_ctx, 2899 struct pt_regs *regs) 2900 { 2901 __buffer_unlock_commit(buffer, event); 2902 2903 /* 2904 * If regs is not set, then skip the necessary functions. 2905 * Note, we can still get here via blktrace, wakeup tracer 2906 * and mmiotrace, but that's ok if they lose a function or 2907 * two. They are not that meaningful. 2908 */ 2909 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); 2910 ftrace_trace_userstack(tr, buffer, trace_ctx); 2911 } 2912 2913 /* 2914 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. 2915 */ 2916 void 2917 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, 2918 struct ring_buffer_event *event) 2919 { 2920 __buffer_unlock_commit(buffer, event); 2921 } 2922 2923 void 2924 trace_function(struct trace_array *tr, unsigned long ip, unsigned long 2925 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) 2926 { 2927 struct trace_buffer *buffer = tr->array_buffer.buffer; 2928 struct ring_buffer_event *event; 2929 struct ftrace_entry *entry; 2930 int size = sizeof(*entry); 2931 2932 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); 2933 2934 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, 2935 trace_ctx); 2936 if (!event) 2937 return; 2938 entry = ring_buffer_event_data(event); 2939 entry->ip = ip; 2940 entry->parent_ip = parent_ip; 2941 2942 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 2943 if (fregs) { 2944 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) 2945 entry->args[i] = ftrace_regs_get_argument(fregs, i); 2946 } 2947 #endif 2948 2949 if (static_branch_unlikely(&trace_function_exports_enabled)) 2950 ftrace_exports(event, TRACE_EXPORT_FUNCTION); 2951 __buffer_unlock_commit(buffer, event); 2952 } 2953 2954 #ifdef CONFIG_STACKTRACE 2955 2956 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ 2957 #define FTRACE_KSTACK_NESTING 4 2958 2959 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) 2960 2961 struct ftrace_stack { 2962 unsigned long calls[FTRACE_KSTACK_ENTRIES]; 2963 }; 2964 2965 2966 struct ftrace_stacks { 2967 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; 2968 }; 2969 2970 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); 2971 static DEFINE_PER_CPU(int, ftrace_stack_reserve); 2972 2973 static void __ftrace_trace_stack(struct trace_array *tr, 2974 struct trace_buffer *buffer, 2975 unsigned int trace_ctx, 2976 int skip, struct pt_regs *regs) 2977 { 2978 struct ring_buffer_event *event; 2979 unsigned int size, nr_entries; 2980 struct ftrace_stack *fstack; 2981 struct stack_entry *entry; 2982 int stackidx; 2983 2984 /* 2985 * Add one, for this function and the call to save_stack_trace() 2986 * If regs is set, then these functions will not be in the way. 2987 */ 2988 #ifndef CONFIG_UNWINDER_ORC 2989 if (!regs) 2990 skip++; 2991 #endif 2992 2993 preempt_disable_notrace(); 2994 2995 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; 2996 2997 /* This should never happen. If it does, yell once and skip */ 2998 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) 2999 goto out; 3000 3001 /* 3002 * The above __this_cpu_inc_return() is 'atomic' cpu local. An 3003 * interrupt will either see the value pre increment or post 3004 * increment. If the interrupt happens pre increment it will have 3005 * restored the counter when it returns. We just need a barrier to 3006 * keep gcc from moving things around. 3007 */ 3008 barrier(); 3009 3010 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; 3011 size = ARRAY_SIZE(fstack->calls); 3012 3013 if (regs) { 3014 nr_entries = stack_trace_save_regs(regs, fstack->calls, 3015 size, skip); 3016 } else { 3017 nr_entries = stack_trace_save(fstack->calls, size, skip); 3018 } 3019 3020 #ifdef CONFIG_DYNAMIC_FTRACE 3021 /* Mark entry of stack trace as trampoline code */ 3022 if (tr->ops && tr->ops->trampoline) { 3023 unsigned long tramp_start = tr->ops->trampoline; 3024 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; 3025 unsigned long *calls = fstack->calls; 3026 3027 for (int i = 0; i < nr_entries; i++) { 3028 if (calls[i] >= tramp_start && calls[i] < tramp_end) 3029 calls[i] = FTRACE_TRAMPOLINE_MARKER; 3030 } 3031 } 3032 #endif 3033 3034 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, 3035 struct_size(entry, caller, nr_entries), 3036 trace_ctx); 3037 if (!event) 3038 goto out; 3039 entry = ring_buffer_event_data(event); 3040 3041 entry->size = nr_entries; 3042 memcpy(&entry->caller, fstack->calls, 3043 flex_array_size(entry, caller, nr_entries)); 3044 3045 __buffer_unlock_commit(buffer, event); 3046 3047 out: 3048 /* Again, don't let gcc optimize things here */ 3049 barrier(); 3050 __this_cpu_dec(ftrace_stack_reserve); 3051 preempt_enable_notrace(); 3052 3053 } 3054 3055 static inline void ftrace_trace_stack(struct trace_array *tr, 3056 struct trace_buffer *buffer, 3057 unsigned int trace_ctx, 3058 int skip, struct pt_regs *regs) 3059 { 3060 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) 3061 return; 3062 3063 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); 3064 } 3065 3066 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, 3067 int skip) 3068 { 3069 struct trace_buffer *buffer = tr->array_buffer.buffer; 3070 3071 if (rcu_is_watching()) { 3072 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 3073 return; 3074 } 3075 3076 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) 3077 return; 3078 3079 /* 3080 * When an NMI triggers, RCU is enabled via ct_nmi_enter(), 3081 * but if the above rcu_is_watching() failed, then the NMI 3082 * triggered someplace critical, and ct_irq_enter() should 3083 * not be called from NMI. 3084 */ 3085 if (unlikely(in_nmi())) 3086 return; 3087 3088 ct_irq_enter_irqson(); 3089 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 3090 ct_irq_exit_irqson(); 3091 } 3092 3093 /** 3094 * trace_dump_stack - record a stack back trace in the trace buffer 3095 * @skip: Number of functions to skip (helper handlers) 3096 */ 3097 void trace_dump_stack(int skip) 3098 { 3099 if (tracing_disabled || tracing_selftest_running) 3100 return; 3101 3102 #ifndef CONFIG_UNWINDER_ORC 3103 /* Skip 1 to skip this function. */ 3104 skip++; 3105 #endif 3106 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, 3107 tracing_gen_ctx(), skip, NULL); 3108 } 3109 EXPORT_SYMBOL_GPL(trace_dump_stack); 3110 3111 #ifdef CONFIG_USER_STACKTRACE_SUPPORT 3112 static DEFINE_PER_CPU(int, user_stack_count); 3113 3114 static void 3115 ftrace_trace_userstack(struct trace_array *tr, 3116 struct trace_buffer *buffer, unsigned int trace_ctx) 3117 { 3118 struct ring_buffer_event *event; 3119 struct userstack_entry *entry; 3120 3121 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE)) 3122 return; 3123 3124 /* 3125 * NMIs can not handle page faults, even with fix ups. 3126 * The save user stack can (and often does) fault. 3127 */ 3128 if (unlikely(in_nmi())) 3129 return; 3130 3131 /* 3132 * prevent recursion, since the user stack tracing may 3133 * trigger other kernel events. 3134 */ 3135 preempt_disable(); 3136 if (__this_cpu_read(user_stack_count)) 3137 goto out; 3138 3139 __this_cpu_inc(user_stack_count); 3140 3141 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 3142 sizeof(*entry), trace_ctx); 3143 if (!event) 3144 goto out_drop_count; 3145 entry = ring_buffer_event_data(event); 3146 3147 entry->tgid = current->tgid; 3148 memset(&entry->caller, 0, sizeof(entry->caller)); 3149 3150 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); 3151 __buffer_unlock_commit(buffer, event); 3152 3153 out_drop_count: 3154 __this_cpu_dec(user_stack_count); 3155 out: 3156 preempt_enable(); 3157 } 3158 #else /* CONFIG_USER_STACKTRACE_SUPPORT */ 3159 static void ftrace_trace_userstack(struct trace_array *tr, 3160 struct trace_buffer *buffer, 3161 unsigned int trace_ctx) 3162 { 3163 } 3164 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ 3165 3166 #endif /* CONFIG_STACKTRACE */ 3167 3168 static inline void 3169 func_repeats_set_delta_ts(struct func_repeats_entry *entry, 3170 unsigned long long delta) 3171 { 3172 entry->bottom_delta_ts = delta & U32_MAX; 3173 entry->top_delta_ts = (delta >> 32); 3174 } 3175 3176 void trace_last_func_repeats(struct trace_array *tr, 3177 struct trace_func_repeats *last_info, 3178 unsigned int trace_ctx) 3179 { 3180 struct trace_buffer *buffer = tr->array_buffer.buffer; 3181 struct func_repeats_entry *entry; 3182 struct ring_buffer_event *event; 3183 u64 delta; 3184 3185 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, 3186 sizeof(*entry), trace_ctx); 3187 if (!event) 3188 return; 3189 3190 delta = ring_buffer_event_time_stamp(buffer, event) - 3191 last_info->ts_last_call; 3192 3193 entry = ring_buffer_event_data(event); 3194 entry->ip = last_info->ip; 3195 entry->parent_ip = last_info->parent_ip; 3196 entry->count = last_info->count; 3197 func_repeats_set_delta_ts(entry, delta); 3198 3199 __buffer_unlock_commit(buffer, event); 3200 } 3201 3202 /* created for use with alloc_percpu */ 3203 struct trace_buffer_struct { 3204 int nesting; 3205 char buffer[4][TRACE_BUF_SIZE]; 3206 }; 3207 3208 static struct trace_buffer_struct __percpu *trace_percpu_buffer; 3209 3210 /* 3211 * This allows for lockless recording. If we're nested too deeply, then 3212 * this returns NULL. 3213 */ 3214 static char *get_trace_buf(void) 3215 { 3216 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); 3217 3218 if (!trace_percpu_buffer || buffer->nesting >= 4) 3219 return NULL; 3220 3221 buffer->nesting++; 3222 3223 /* Interrupts must see nesting incremented before we use the buffer */ 3224 barrier(); 3225 return &buffer->buffer[buffer->nesting - 1][0]; 3226 } 3227 3228 static void put_trace_buf(void) 3229 { 3230 /* Don't let the decrement of nesting leak before this */ 3231 barrier(); 3232 this_cpu_dec(trace_percpu_buffer->nesting); 3233 } 3234 3235 static int alloc_percpu_trace_buffer(void) 3236 { 3237 struct trace_buffer_struct __percpu *buffers; 3238 3239 if (trace_percpu_buffer) 3240 return 0; 3241 3242 buffers = alloc_percpu(struct trace_buffer_struct); 3243 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) 3244 return -ENOMEM; 3245 3246 trace_percpu_buffer = buffers; 3247 return 0; 3248 } 3249 3250 static int buffers_allocated; 3251 3252 void trace_printk_init_buffers(void) 3253 { 3254 if (buffers_allocated) 3255 return; 3256 3257 if (alloc_percpu_trace_buffer()) 3258 return; 3259 3260 /* trace_printk() is for debug use only. Don't use it in production. */ 3261 3262 pr_warn("\n"); 3263 pr_warn("**********************************************************\n"); 3264 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 3265 pr_warn("** **\n"); 3266 pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); 3267 pr_warn("** **\n"); 3268 pr_warn("** This means that this is a DEBUG kernel and it is **\n"); 3269 pr_warn("** unsafe for production use. **\n"); 3270 pr_warn("** **\n"); 3271 pr_warn("** If you see this message and you are not debugging **\n"); 3272 pr_warn("** the kernel, report this immediately to your vendor! **\n"); 3273 pr_warn("** **\n"); 3274 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 3275 pr_warn("**********************************************************\n"); 3276 3277 /* Expand the buffers to set size */ 3278 tracing_update_buffers(&global_trace); 3279 3280 buffers_allocated = 1; 3281 3282 /* 3283 * trace_printk_init_buffers() can be called by modules. 3284 * If that happens, then we need to start cmdline recording 3285 * directly here. If the global_trace.buffer is already 3286 * allocated here, then this was called by module code. 3287 */ 3288 if (global_trace.array_buffer.buffer) 3289 tracing_start_cmdline_record(); 3290 } 3291 EXPORT_SYMBOL_GPL(trace_printk_init_buffers); 3292 3293 void trace_printk_start_comm(void) 3294 { 3295 /* Start tracing comms if trace printk is set */ 3296 if (!buffers_allocated) 3297 return; 3298 tracing_start_cmdline_record(); 3299 } 3300 3301 static void trace_printk_start_stop_comm(int enabled) 3302 { 3303 if (!buffers_allocated) 3304 return; 3305 3306 if (enabled) 3307 tracing_start_cmdline_record(); 3308 else 3309 tracing_stop_cmdline_record(); 3310 } 3311 3312 /** 3313 * trace_vbprintk - write binary msg to tracing buffer 3314 * @ip: The address of the caller 3315 * @fmt: The string format to write to the buffer 3316 * @args: Arguments for @fmt 3317 */ 3318 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 3319 { 3320 struct ring_buffer_event *event; 3321 struct trace_buffer *buffer; 3322 struct trace_array *tr = READ_ONCE(printk_trace); 3323 struct bprint_entry *entry; 3324 unsigned int trace_ctx; 3325 char *tbuffer; 3326 int len = 0, size; 3327 3328 if (!printk_binsafe(tr)) 3329 return trace_vprintk(ip, fmt, args); 3330 3331 if (unlikely(tracing_selftest_running || tracing_disabled)) 3332 return 0; 3333 3334 /* Don't pollute graph traces with trace_vprintk internals */ 3335 pause_graph_tracing(); 3336 3337 trace_ctx = tracing_gen_ctx(); 3338 preempt_disable_notrace(); 3339 3340 tbuffer = get_trace_buf(); 3341 if (!tbuffer) { 3342 len = 0; 3343 goto out_nobuffer; 3344 } 3345 3346 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); 3347 3348 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) 3349 goto out_put; 3350 3351 size = sizeof(*entry) + sizeof(u32) * len; 3352 buffer = tr->array_buffer.buffer; 3353 ring_buffer_nest_start(buffer); 3354 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, 3355 trace_ctx); 3356 if (!event) 3357 goto out; 3358 entry = ring_buffer_event_data(event); 3359 entry->ip = ip; 3360 entry->fmt = fmt; 3361 3362 memcpy(entry->buf, tbuffer, sizeof(u32) * len); 3363 __buffer_unlock_commit(buffer, event); 3364 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); 3365 3366 out: 3367 ring_buffer_nest_end(buffer); 3368 out_put: 3369 put_trace_buf(); 3370 3371 out_nobuffer: 3372 preempt_enable_notrace(); 3373 unpause_graph_tracing(); 3374 3375 return len; 3376 } 3377 EXPORT_SYMBOL_GPL(trace_vbprintk); 3378 3379 static __printf(3, 0) 3380 int __trace_array_vprintk(struct trace_buffer *buffer, 3381 unsigned long ip, const char *fmt, va_list args) 3382 { 3383 struct ring_buffer_event *event; 3384 int len = 0, size; 3385 struct print_entry *entry; 3386 unsigned int trace_ctx; 3387 char *tbuffer; 3388 3389 if (tracing_disabled) 3390 return 0; 3391 3392 /* Don't pollute graph traces with trace_vprintk internals */ 3393 pause_graph_tracing(); 3394 3395 trace_ctx = tracing_gen_ctx(); 3396 preempt_disable_notrace(); 3397 3398 3399 tbuffer = get_trace_buf(); 3400 if (!tbuffer) { 3401 len = 0; 3402 goto out_nobuffer; 3403 } 3404 3405 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); 3406 3407 size = sizeof(*entry) + len + 1; 3408 ring_buffer_nest_start(buffer); 3409 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 3410 trace_ctx); 3411 if (!event) 3412 goto out; 3413 entry = ring_buffer_event_data(event); 3414 entry->ip = ip; 3415 3416 memcpy(&entry->buf, tbuffer, len + 1); 3417 __buffer_unlock_commit(buffer, event); 3418 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); 3419 3420 out: 3421 ring_buffer_nest_end(buffer); 3422 put_trace_buf(); 3423 3424 out_nobuffer: 3425 preempt_enable_notrace(); 3426 unpause_graph_tracing(); 3427 3428 return len; 3429 } 3430 3431 int trace_array_vprintk(struct trace_array *tr, 3432 unsigned long ip, const char *fmt, va_list args) 3433 { 3434 if (tracing_selftest_running && tr == &global_trace) 3435 return 0; 3436 3437 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); 3438 } 3439 3440 /** 3441 * trace_array_printk - Print a message to a specific instance 3442 * @tr: The instance trace_array descriptor 3443 * @ip: The instruction pointer that this is called from. 3444 * @fmt: The format to print (printf format) 3445 * 3446 * If a subsystem sets up its own instance, they have the right to 3447 * printk strings into their tracing instance buffer using this 3448 * function. Note, this function will not write into the top level 3449 * buffer (use trace_printk() for that), as writing into the top level 3450 * buffer should only have events that can be individually disabled. 3451 * trace_printk() is only used for debugging a kernel, and should not 3452 * be ever incorporated in normal use. 3453 * 3454 * trace_array_printk() can be used, as it will not add noise to the 3455 * top level tracing buffer. 3456 * 3457 * Note, trace_array_init_printk() must be called on @tr before this 3458 * can be used. 3459 */ 3460 int trace_array_printk(struct trace_array *tr, 3461 unsigned long ip, const char *fmt, ...) 3462 { 3463 int ret; 3464 va_list ap; 3465 3466 if (!tr) 3467 return -ENOENT; 3468 3469 /* This is only allowed for created instances */ 3470 if (tr == &global_trace) 3471 return 0; 3472 3473 if (!(tr->trace_flags & TRACE_ITER_PRINTK)) 3474 return 0; 3475 3476 va_start(ap, fmt); 3477 ret = trace_array_vprintk(tr, ip, fmt, ap); 3478 va_end(ap); 3479 return ret; 3480 } 3481 EXPORT_SYMBOL_GPL(trace_array_printk); 3482 3483 /** 3484 * trace_array_init_printk - Initialize buffers for trace_array_printk() 3485 * @tr: The trace array to initialize the buffers for 3486 * 3487 * As trace_array_printk() only writes into instances, they are OK to 3488 * have in the kernel (unlike trace_printk()). This needs to be called 3489 * before trace_array_printk() can be used on a trace_array. 3490 */ 3491 int trace_array_init_printk(struct trace_array *tr) 3492 { 3493 if (!tr) 3494 return -ENOENT; 3495 3496 /* This is only allowed for created instances */ 3497 if (tr == &global_trace) 3498 return -EINVAL; 3499 3500 return alloc_percpu_trace_buffer(); 3501 } 3502 EXPORT_SYMBOL_GPL(trace_array_init_printk); 3503 3504 int trace_array_printk_buf(struct trace_buffer *buffer, 3505 unsigned long ip, const char *fmt, ...) 3506 { 3507 int ret; 3508 va_list ap; 3509 3510 if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK)) 3511 return 0; 3512 3513 va_start(ap, fmt); 3514 ret = __trace_array_vprintk(buffer, ip, fmt, ap); 3515 va_end(ap); 3516 return ret; 3517 } 3518 3519 int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 3520 { 3521 return trace_array_vprintk(printk_trace, ip, fmt, args); 3522 } 3523 EXPORT_SYMBOL_GPL(trace_vprintk); 3524 3525 static void trace_iterator_increment(struct trace_iterator *iter) 3526 { 3527 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); 3528 3529 iter->idx++; 3530 if (buf_iter) 3531 ring_buffer_iter_advance(buf_iter); 3532 } 3533 3534 static struct trace_entry * 3535 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, 3536 unsigned long *lost_events) 3537 { 3538 struct ring_buffer_event *event; 3539 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 3540 3541 if (buf_iter) { 3542 event = ring_buffer_iter_peek(buf_iter, ts); 3543 if (lost_events) 3544 *lost_events = ring_buffer_iter_dropped(buf_iter) ? 3545 (unsigned long)-1 : 0; 3546 } else { 3547 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, 3548 lost_events); 3549 } 3550 3551 if (event) { 3552 iter->ent_size = ring_buffer_event_length(event); 3553 return ring_buffer_event_data(event); 3554 } 3555 iter->ent_size = 0; 3556 return NULL; 3557 } 3558 3559 static struct trace_entry * 3560 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, 3561 unsigned long *missing_events, u64 *ent_ts) 3562 { 3563 struct trace_buffer *buffer = iter->array_buffer->buffer; 3564 struct trace_entry *ent, *next = NULL; 3565 unsigned long lost_events = 0, next_lost = 0; 3566 int cpu_file = iter->cpu_file; 3567 u64 next_ts = 0, ts; 3568 int next_cpu = -1; 3569 int next_size = 0; 3570 int cpu; 3571 3572 /* 3573 * If we are in a per_cpu trace file, don't bother by iterating over 3574 * all cpu and peek directly. 3575 */ 3576 if (cpu_file > RING_BUFFER_ALL_CPUS) { 3577 if (ring_buffer_empty_cpu(buffer, cpu_file)) 3578 return NULL; 3579 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); 3580 if (ent_cpu) 3581 *ent_cpu = cpu_file; 3582 3583 return ent; 3584 } 3585 3586 for_each_tracing_cpu(cpu) { 3587 3588 if (ring_buffer_empty_cpu(buffer, cpu)) 3589 continue; 3590 3591 ent = peek_next_entry(iter, cpu, &ts, &lost_events); 3592 3593 /* 3594 * Pick the entry with the smallest timestamp: 3595 */ 3596 if (ent && (!next || ts < next_ts)) { 3597 next = ent; 3598 next_cpu = cpu; 3599 next_ts = ts; 3600 next_lost = lost_events; 3601 next_size = iter->ent_size; 3602 } 3603 } 3604 3605 iter->ent_size = next_size; 3606 3607 if (ent_cpu) 3608 *ent_cpu = next_cpu; 3609 3610 if (ent_ts) 3611 *ent_ts = next_ts; 3612 3613 if (missing_events) 3614 *missing_events = next_lost; 3615 3616 return next; 3617 } 3618 3619 #define STATIC_FMT_BUF_SIZE 128 3620 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 3621 3622 char *trace_iter_expand_format(struct trace_iterator *iter) 3623 { 3624 char *tmp; 3625 3626 /* 3627 * iter->tr is NULL when used with tp_printk, which makes 3628 * this get called where it is not safe to call krealloc(). 3629 */ 3630 if (!iter->tr || iter->fmt == static_fmt_buf) 3631 return NULL; 3632 3633 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, 3634 GFP_KERNEL); 3635 if (tmp) { 3636 iter->fmt_size += STATIC_FMT_BUF_SIZE; 3637 iter->fmt = tmp; 3638 } 3639 3640 return tmp; 3641 } 3642 3643 /* Returns true if the string is safe to dereference from an event */ 3644 static bool trace_safe_str(struct trace_iterator *iter, const char *str) 3645 { 3646 unsigned long addr = (unsigned long)str; 3647 struct trace_event *trace_event; 3648 struct trace_event_call *event; 3649 3650 /* OK if part of the event data */ 3651 if ((addr >= (unsigned long)iter->ent) && 3652 (addr < (unsigned long)iter->ent + iter->ent_size)) 3653 return true; 3654 3655 /* OK if part of the temp seq buffer */ 3656 if ((addr >= (unsigned long)iter->tmp_seq.buffer) && 3657 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) 3658 return true; 3659 3660 /* Core rodata can not be freed */ 3661 if (is_kernel_rodata(addr)) 3662 return true; 3663 3664 if (trace_is_tracepoint_string(str)) 3665 return true; 3666 3667 /* 3668 * Now this could be a module event, referencing core module 3669 * data, which is OK. 3670 */ 3671 if (!iter->ent) 3672 return false; 3673 3674 trace_event = ftrace_find_event(iter->ent->type); 3675 if (!trace_event) 3676 return false; 3677 3678 event = container_of(trace_event, struct trace_event_call, event); 3679 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) 3680 return false; 3681 3682 /* Would rather have rodata, but this will suffice */ 3683 if (within_module_core(addr, event->module)) 3684 return true; 3685 3686 return false; 3687 } 3688 3689 /** 3690 * ignore_event - Check dereferenced fields while writing to the seq buffer 3691 * @iter: The iterator that holds the seq buffer and the event being printed 3692 * 3693 * At boot up, test_event_printk() will flag any event that dereferences 3694 * a string with "%s" that does exist in the ring buffer. It may still 3695 * be valid, as the string may point to a static string in the kernel 3696 * rodata that never gets freed. But if the string pointer is pointing 3697 * to something that was allocated, there's a chance that it can be freed 3698 * by the time the user reads the trace. This would cause a bad memory 3699 * access by the kernel and possibly crash the system. 3700 * 3701 * This function will check if the event has any fields flagged as needing 3702 * to be checked at runtime and perform those checks. 3703 * 3704 * If it is found that a field is unsafe, it will write into the @iter->seq 3705 * a message stating what was found to be unsafe. 3706 * 3707 * @return: true if the event is unsafe and should be ignored, 3708 * false otherwise. 3709 */ 3710 bool ignore_event(struct trace_iterator *iter) 3711 { 3712 struct ftrace_event_field *field; 3713 struct trace_event *trace_event; 3714 struct trace_event_call *event; 3715 struct list_head *head; 3716 struct trace_seq *seq; 3717 const void *ptr; 3718 3719 trace_event = ftrace_find_event(iter->ent->type); 3720 3721 seq = &iter->seq; 3722 3723 if (!trace_event) { 3724 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); 3725 return true; 3726 } 3727 3728 event = container_of(trace_event, struct trace_event_call, event); 3729 if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) 3730 return false; 3731 3732 head = trace_get_fields(event); 3733 if (!head) { 3734 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", 3735 trace_event_name(event)); 3736 return true; 3737 } 3738 3739 /* Offsets are from the iter->ent that points to the raw event */ 3740 ptr = iter->ent; 3741 3742 list_for_each_entry(field, head, link) { 3743 const char *str; 3744 bool good; 3745 3746 if (!field->needs_test) 3747 continue; 3748 3749 str = *(const char **)(ptr + field->offset); 3750 3751 good = trace_safe_str(iter, str); 3752 3753 /* 3754 * If you hit this warning, it is likely that the 3755 * trace event in question used %s on a string that 3756 * was saved at the time of the event, but may not be 3757 * around when the trace is read. Use __string(), 3758 * __assign_str() and __get_str() helpers in the TRACE_EVENT() 3759 * instead. See samples/trace_events/trace-events-sample.h 3760 * for reference. 3761 */ 3762 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", 3763 trace_event_name(event), field->name)) { 3764 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", 3765 trace_event_name(event), field->name); 3766 return true; 3767 } 3768 } 3769 return false; 3770 } 3771 3772 const char *trace_event_format(struct trace_iterator *iter, const char *fmt) 3773 { 3774 const char *p, *new_fmt; 3775 char *q; 3776 3777 if (WARN_ON_ONCE(!fmt)) 3778 return fmt; 3779 3780 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR) 3781 return fmt; 3782 3783 p = fmt; 3784 new_fmt = q = iter->fmt; 3785 while (*p) { 3786 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { 3787 if (!trace_iter_expand_format(iter)) 3788 return fmt; 3789 3790 q += iter->fmt - new_fmt; 3791 new_fmt = iter->fmt; 3792 } 3793 3794 *q++ = *p++; 3795 3796 /* Replace %p with %px */ 3797 if (p[-1] == '%') { 3798 if (p[0] == '%') { 3799 *q++ = *p++; 3800 } else if (p[0] == 'p' && !isalnum(p[1])) { 3801 *q++ = *p++; 3802 *q++ = 'x'; 3803 } 3804 } 3805 } 3806 *q = '\0'; 3807 3808 return new_fmt; 3809 } 3810 3811 #define STATIC_TEMP_BUF_SIZE 128 3812 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); 3813 3814 /* Find the next real entry, without updating the iterator itself */ 3815 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 3816 int *ent_cpu, u64 *ent_ts) 3817 { 3818 /* __find_next_entry will reset ent_size */ 3819 int ent_size = iter->ent_size; 3820 struct trace_entry *entry; 3821 3822 /* 3823 * If called from ftrace_dump(), then the iter->temp buffer 3824 * will be the static_temp_buf and not created from kmalloc. 3825 * If the entry size is greater than the buffer, we can 3826 * not save it. Just return NULL in that case. This is only 3827 * used to add markers when two consecutive events' time 3828 * stamps have a large delta. See trace_print_lat_context() 3829 */ 3830 if (iter->temp == static_temp_buf && 3831 STATIC_TEMP_BUF_SIZE < ent_size) 3832 return NULL; 3833 3834 /* 3835 * The __find_next_entry() may call peek_next_entry(), which may 3836 * call ring_buffer_peek() that may make the contents of iter->ent 3837 * undefined. Need to copy iter->ent now. 3838 */ 3839 if (iter->ent && iter->ent != iter->temp) { 3840 if ((!iter->temp || iter->temp_size < iter->ent_size) && 3841 !WARN_ON_ONCE(iter->temp == static_temp_buf)) { 3842 void *temp; 3843 temp = kmalloc(iter->ent_size, GFP_KERNEL); 3844 if (!temp) 3845 return NULL; 3846 kfree(iter->temp); 3847 iter->temp = temp; 3848 iter->temp_size = iter->ent_size; 3849 } 3850 memcpy(iter->temp, iter->ent, iter->ent_size); 3851 iter->ent = iter->temp; 3852 } 3853 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 3854 /* Put back the original ent_size */ 3855 iter->ent_size = ent_size; 3856 3857 return entry; 3858 } 3859 3860 /* Find the next real entry, and increment the iterator to the next entry */ 3861 void *trace_find_next_entry_inc(struct trace_iterator *iter) 3862 { 3863 iter->ent = __find_next_entry(iter, &iter->cpu, 3864 &iter->lost_events, &iter->ts); 3865 3866 if (iter->ent) 3867 trace_iterator_increment(iter); 3868 3869 return iter->ent ? iter : NULL; 3870 } 3871 3872 static void trace_consume(struct trace_iterator *iter) 3873 { 3874 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, 3875 &iter->lost_events); 3876 } 3877 3878 static void *s_next(struct seq_file *m, void *v, loff_t *pos) 3879 { 3880 struct trace_iterator *iter = m->private; 3881 int i = (int)*pos; 3882 void *ent; 3883 3884 WARN_ON_ONCE(iter->leftover); 3885 3886 (*pos)++; 3887 3888 /* can't go backwards */ 3889 if (iter->idx > i) 3890 return NULL; 3891 3892 if (iter->idx < 0) 3893 ent = trace_find_next_entry_inc(iter); 3894 else 3895 ent = iter; 3896 3897 while (ent && iter->idx < i) 3898 ent = trace_find_next_entry_inc(iter); 3899 3900 iter->pos = *pos; 3901 3902 return ent; 3903 } 3904 3905 void tracing_iter_reset(struct trace_iterator *iter, int cpu) 3906 { 3907 struct ring_buffer_iter *buf_iter; 3908 unsigned long entries = 0; 3909 u64 ts; 3910 3911 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; 3912 3913 buf_iter = trace_buffer_iter(iter, cpu); 3914 if (!buf_iter) 3915 return; 3916 3917 ring_buffer_iter_reset(buf_iter); 3918 3919 /* 3920 * We could have the case with the max latency tracers 3921 * that a reset never took place on a cpu. This is evident 3922 * by the timestamp being before the start of the buffer. 3923 */ 3924 while (ring_buffer_iter_peek(buf_iter, &ts)) { 3925 if (ts >= iter->array_buffer->time_start) 3926 break; 3927 entries++; 3928 ring_buffer_iter_advance(buf_iter); 3929 /* This could be a big loop */ 3930 cond_resched(); 3931 } 3932 3933 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; 3934 } 3935 3936 /* 3937 * The current tracer is copied to avoid a global locking 3938 * all around. 3939 */ 3940 static void *s_start(struct seq_file *m, loff_t *pos) 3941 { 3942 struct trace_iterator *iter = m->private; 3943 struct trace_array *tr = iter->tr; 3944 int cpu_file = iter->cpu_file; 3945 void *p = NULL; 3946 loff_t l = 0; 3947 int cpu; 3948 3949 mutex_lock(&trace_types_lock); 3950 if (unlikely(tr->current_trace != iter->trace)) { 3951 /* Close iter->trace before switching to the new current tracer */ 3952 if (iter->trace->close) 3953 iter->trace->close(iter); 3954 iter->trace = tr->current_trace; 3955 /* Reopen the new current tracer */ 3956 if (iter->trace->open) 3957 iter->trace->open(iter); 3958 } 3959 mutex_unlock(&trace_types_lock); 3960 3961 #ifdef CONFIG_TRACER_MAX_TRACE 3962 if (iter->snapshot && iter->trace->use_max_tr) 3963 return ERR_PTR(-EBUSY); 3964 #endif 3965 3966 if (*pos != iter->pos) { 3967 iter->ent = NULL; 3968 iter->cpu = 0; 3969 iter->idx = -1; 3970 3971 if (cpu_file == RING_BUFFER_ALL_CPUS) { 3972 for_each_tracing_cpu(cpu) 3973 tracing_iter_reset(iter, cpu); 3974 } else 3975 tracing_iter_reset(iter, cpu_file); 3976 3977 iter->leftover = 0; 3978 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 3979 ; 3980 3981 } else { 3982 /* 3983 * If we overflowed the seq_file before, then we want 3984 * to just reuse the trace_seq buffer again. 3985 */ 3986 if (iter->leftover) 3987 p = iter; 3988 else { 3989 l = *pos - 1; 3990 p = s_next(m, p, &l); 3991 } 3992 } 3993 3994 trace_event_read_lock(); 3995 trace_access_lock(cpu_file); 3996 return p; 3997 } 3998 3999 static void s_stop(struct seq_file *m, void *p) 4000 { 4001 struct trace_iterator *iter = m->private; 4002 4003 #ifdef CONFIG_TRACER_MAX_TRACE 4004 if (iter->snapshot && iter->trace->use_max_tr) 4005 return; 4006 #endif 4007 4008 trace_access_unlock(iter->cpu_file); 4009 trace_event_read_unlock(); 4010 } 4011 4012 static void 4013 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, 4014 unsigned long *entries, int cpu) 4015 { 4016 unsigned long count; 4017 4018 count = ring_buffer_entries_cpu(buf->buffer, cpu); 4019 /* 4020 * If this buffer has skipped entries, then we hold all 4021 * entries for the trace and we need to ignore the 4022 * ones before the time stamp. 4023 */ 4024 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { 4025 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; 4026 /* total is the same as the entries */ 4027 *total = count; 4028 } else 4029 *total = count + 4030 ring_buffer_overrun_cpu(buf->buffer, cpu); 4031 *entries = count; 4032 } 4033 4034 static void 4035 get_total_entries(struct array_buffer *buf, 4036 unsigned long *total, unsigned long *entries) 4037 { 4038 unsigned long t, e; 4039 int cpu; 4040 4041 *total = 0; 4042 *entries = 0; 4043 4044 for_each_tracing_cpu(cpu) { 4045 get_total_entries_cpu(buf, &t, &e, cpu); 4046 *total += t; 4047 *entries += e; 4048 } 4049 } 4050 4051 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) 4052 { 4053 unsigned long total, entries; 4054 4055 if (!tr) 4056 tr = &global_trace; 4057 4058 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); 4059 4060 return entries; 4061 } 4062 4063 unsigned long trace_total_entries(struct trace_array *tr) 4064 { 4065 unsigned long total, entries; 4066 4067 if (!tr) 4068 tr = &global_trace; 4069 4070 get_total_entries(&tr->array_buffer, &total, &entries); 4071 4072 return entries; 4073 } 4074 4075 static void print_lat_help_header(struct seq_file *m) 4076 { 4077 seq_puts(m, "# _------=> CPU# \n" 4078 "# / _-----=> irqs-off/BH-disabled\n" 4079 "# | / _----=> need-resched \n" 4080 "# || / _---=> hardirq/softirq \n" 4081 "# ||| / _--=> preempt-depth \n" 4082 "# |||| / _-=> migrate-disable \n" 4083 "# ||||| / delay \n" 4084 "# cmd pid |||||| time | caller \n" 4085 "# \\ / |||||| \\ | / \n"); 4086 } 4087 4088 static void print_event_info(struct array_buffer *buf, struct seq_file *m) 4089 { 4090 unsigned long total; 4091 unsigned long entries; 4092 4093 get_total_entries(buf, &total, &entries); 4094 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 4095 entries, total, num_online_cpus()); 4096 seq_puts(m, "#\n"); 4097 } 4098 4099 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, 4100 unsigned int flags) 4101 { 4102 bool tgid = flags & TRACE_ITER_RECORD_TGID; 4103 4104 print_event_info(buf, m); 4105 4106 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); 4107 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); 4108 } 4109 4110 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, 4111 unsigned int flags) 4112 { 4113 bool tgid = flags & TRACE_ITER_RECORD_TGID; 4114 static const char space[] = " "; 4115 int prec = tgid ? 12 : 2; 4116 4117 print_event_info(buf, m); 4118 4119 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); 4120 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); 4121 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); 4122 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); 4123 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); 4124 seq_printf(m, "# %.*s|||| / delay\n", prec, space); 4125 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); 4126 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); 4127 } 4128 4129 void 4130 print_trace_header(struct seq_file *m, struct trace_iterator *iter) 4131 { 4132 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); 4133 struct array_buffer *buf = iter->array_buffer; 4134 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); 4135 struct tracer *type = iter->trace; 4136 unsigned long entries; 4137 unsigned long total; 4138 const char *name = type->name; 4139 4140 get_total_entries(buf, &total, &entries); 4141 4142 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 4143 name, init_utsname()->release); 4144 seq_puts(m, "# -----------------------------------" 4145 "---------------------------------\n"); 4146 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" 4147 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 4148 nsecs_to_usecs(data->saved_latency), 4149 entries, 4150 total, 4151 buf->cpu, 4152 preempt_model_str(), 4153 /* These are reserved for later use */ 4154 0, 0, 0, 0); 4155 #ifdef CONFIG_SMP 4156 seq_printf(m, " #P:%d)\n", num_online_cpus()); 4157 #else 4158 seq_puts(m, ")\n"); 4159 #endif 4160 seq_puts(m, "# -----------------\n"); 4161 seq_printf(m, "# | task: %.16s-%d " 4162 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 4163 data->comm, data->pid, 4164 from_kuid_munged(seq_user_ns(m), data->uid), data->nice, 4165 data->policy, data->rt_priority); 4166 seq_puts(m, "# -----------------\n"); 4167 4168 if (data->critical_start) { 4169 seq_puts(m, "# => started at: "); 4170 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 4171 trace_print_seq(m, &iter->seq); 4172 seq_puts(m, "\n# => ended at: "); 4173 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 4174 trace_print_seq(m, &iter->seq); 4175 seq_puts(m, "\n#\n"); 4176 } 4177 4178 seq_puts(m, "#\n"); 4179 } 4180 4181 static void test_cpu_buff_start(struct trace_iterator *iter) 4182 { 4183 struct trace_seq *s = &iter->seq; 4184 struct trace_array *tr = iter->tr; 4185 4186 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE)) 4187 return; 4188 4189 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) 4190 return; 4191 4192 if (cpumask_available(iter->started) && 4193 cpumask_test_cpu(iter->cpu, iter->started)) 4194 return; 4195 4196 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) 4197 return; 4198 4199 if (cpumask_available(iter->started)) 4200 cpumask_set_cpu(iter->cpu, iter->started); 4201 4202 /* Don't print started cpu buffer for the first entry of the trace */ 4203 if (iter->idx > 1) 4204 trace_seq_printf(s, "##### CPU %u buffer started ####\n", 4205 iter->cpu); 4206 } 4207 4208 static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 4209 { 4210 struct trace_array *tr = iter->tr; 4211 struct trace_seq *s = &iter->seq; 4212 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); 4213 struct trace_entry *entry; 4214 struct trace_event *event; 4215 4216 entry = iter->ent; 4217 4218 test_cpu_buff_start(iter); 4219 4220 event = ftrace_find_event(entry->type); 4221 4222 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { 4223 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 4224 trace_print_lat_context(iter); 4225 else 4226 trace_print_context(iter); 4227 } 4228 4229 if (trace_seq_has_overflowed(s)) 4230 return TRACE_TYPE_PARTIAL_LINE; 4231 4232 if (event) { 4233 if (tr->trace_flags & TRACE_ITER_FIELDS) 4234 return print_event_fields(iter, event); 4235 /* 4236 * For TRACE_EVENT() events, the print_fmt is not 4237 * safe to use if the array has delta offsets 4238 * Force printing via the fields. 4239 */ 4240 if ((tr->text_delta) && 4241 event->type > __TRACE_LAST_TYPE) 4242 return print_event_fields(iter, event); 4243 4244 return event->funcs->trace(iter, sym_flags, event); 4245 } 4246 4247 trace_seq_printf(s, "Unknown type %d\n", entry->type); 4248 4249 return trace_handle_return(s); 4250 } 4251 4252 static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 4253 { 4254 struct trace_array *tr = iter->tr; 4255 struct trace_seq *s = &iter->seq; 4256 struct trace_entry *entry; 4257 struct trace_event *event; 4258 4259 entry = iter->ent; 4260 4261 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) 4262 trace_seq_printf(s, "%d %d %llu ", 4263 entry->pid, iter->cpu, iter->ts); 4264 4265 if (trace_seq_has_overflowed(s)) 4266 return TRACE_TYPE_PARTIAL_LINE; 4267 4268 event = ftrace_find_event(entry->type); 4269 if (event) 4270 return event->funcs->raw(iter, 0, event); 4271 4272 trace_seq_printf(s, "%d ?\n", entry->type); 4273 4274 return trace_handle_return(s); 4275 } 4276 4277 static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 4278 { 4279 struct trace_array *tr = iter->tr; 4280 struct trace_seq *s = &iter->seq; 4281 unsigned char newline = '\n'; 4282 struct trace_entry *entry; 4283 struct trace_event *event; 4284 4285 entry = iter->ent; 4286 4287 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { 4288 SEQ_PUT_HEX_FIELD(s, entry->pid); 4289 SEQ_PUT_HEX_FIELD(s, iter->cpu); 4290 SEQ_PUT_HEX_FIELD(s, iter->ts); 4291 if (trace_seq_has_overflowed(s)) 4292 return TRACE_TYPE_PARTIAL_LINE; 4293 } 4294 4295 event = ftrace_find_event(entry->type); 4296 if (event) { 4297 enum print_line_t ret = event->funcs->hex(iter, 0, event); 4298 if (ret != TRACE_TYPE_HANDLED) 4299 return ret; 4300 } 4301 4302 SEQ_PUT_FIELD(s, newline); 4303 4304 return trace_handle_return(s); 4305 } 4306 4307 static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 4308 { 4309 struct trace_array *tr = iter->tr; 4310 struct trace_seq *s = &iter->seq; 4311 struct trace_entry *entry; 4312 struct trace_event *event; 4313 4314 entry = iter->ent; 4315 4316 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) { 4317 SEQ_PUT_FIELD(s, entry->pid); 4318 SEQ_PUT_FIELD(s, iter->cpu); 4319 SEQ_PUT_FIELD(s, iter->ts); 4320 if (trace_seq_has_overflowed(s)) 4321 return TRACE_TYPE_PARTIAL_LINE; 4322 } 4323 4324 event = ftrace_find_event(entry->type); 4325 return event ? event->funcs->binary(iter, 0, event) : 4326 TRACE_TYPE_HANDLED; 4327 } 4328 4329 int trace_empty(struct trace_iterator *iter) 4330 { 4331 struct ring_buffer_iter *buf_iter; 4332 int cpu; 4333 4334 /* If we are looking at one CPU buffer, only check that one */ 4335 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 4336 cpu = iter->cpu_file; 4337 buf_iter = trace_buffer_iter(iter, cpu); 4338 if (buf_iter) { 4339 if (!ring_buffer_iter_empty(buf_iter)) 4340 return 0; 4341 } else { 4342 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 4343 return 0; 4344 } 4345 return 1; 4346 } 4347 4348 for_each_tracing_cpu(cpu) { 4349 buf_iter = trace_buffer_iter(iter, cpu); 4350 if (buf_iter) { 4351 if (!ring_buffer_iter_empty(buf_iter)) 4352 return 0; 4353 } else { 4354 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 4355 return 0; 4356 } 4357 } 4358 4359 return 1; 4360 } 4361 4362 /* Called with trace_event_read_lock() held. */ 4363 enum print_line_t print_trace_line(struct trace_iterator *iter) 4364 { 4365 struct trace_array *tr = iter->tr; 4366 unsigned long trace_flags = tr->trace_flags; 4367 enum print_line_t ret; 4368 4369 if (iter->lost_events) { 4370 if (iter->lost_events == (unsigned long)-1) 4371 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", 4372 iter->cpu); 4373 else 4374 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 4375 iter->cpu, iter->lost_events); 4376 if (trace_seq_has_overflowed(&iter->seq)) 4377 return TRACE_TYPE_PARTIAL_LINE; 4378 } 4379 4380 if (iter->trace && iter->trace->print_line) { 4381 ret = iter->trace->print_line(iter); 4382 if (ret != TRACE_TYPE_UNHANDLED) 4383 return ret; 4384 } 4385 4386 if (iter->ent->type == TRACE_BPUTS && 4387 trace_flags & TRACE_ITER_PRINTK && 4388 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 4389 return trace_print_bputs_msg_only(iter); 4390 4391 if (iter->ent->type == TRACE_BPRINT && 4392 trace_flags & TRACE_ITER_PRINTK && 4393 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 4394 return trace_print_bprintk_msg_only(iter); 4395 4396 if (iter->ent->type == TRACE_PRINT && 4397 trace_flags & TRACE_ITER_PRINTK && 4398 trace_flags & TRACE_ITER_PRINTK_MSGONLY) 4399 return trace_print_printk_msg_only(iter); 4400 4401 if (trace_flags & TRACE_ITER_BIN) 4402 return print_bin_fmt(iter); 4403 4404 if (trace_flags & TRACE_ITER_HEX) 4405 return print_hex_fmt(iter); 4406 4407 if (trace_flags & TRACE_ITER_RAW) 4408 return print_raw_fmt(iter); 4409 4410 return print_trace_fmt(iter); 4411 } 4412 4413 void trace_latency_header(struct seq_file *m) 4414 { 4415 struct trace_iterator *iter = m->private; 4416 struct trace_array *tr = iter->tr; 4417 4418 /* print nothing if the buffers are empty */ 4419 if (trace_empty(iter)) 4420 return; 4421 4422 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 4423 print_trace_header(m, iter); 4424 4425 if (!(tr->trace_flags & TRACE_ITER_VERBOSE)) 4426 print_lat_help_header(m); 4427 } 4428 4429 void trace_default_header(struct seq_file *m) 4430 { 4431 struct trace_iterator *iter = m->private; 4432 struct trace_array *tr = iter->tr; 4433 unsigned long trace_flags = tr->trace_flags; 4434 4435 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) 4436 return; 4437 4438 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 4439 /* print nothing if the buffers are empty */ 4440 if (trace_empty(iter)) 4441 return; 4442 print_trace_header(m, iter); 4443 if (!(trace_flags & TRACE_ITER_VERBOSE)) 4444 print_lat_help_header(m); 4445 } else { 4446 if (!(trace_flags & TRACE_ITER_VERBOSE)) { 4447 if (trace_flags & TRACE_ITER_IRQ_INFO) 4448 print_func_help_header_irq(iter->array_buffer, 4449 m, trace_flags); 4450 else 4451 print_func_help_header(iter->array_buffer, m, 4452 trace_flags); 4453 } 4454 } 4455 } 4456 4457 static void test_ftrace_alive(struct seq_file *m) 4458 { 4459 if (!ftrace_is_dead()) 4460 return; 4461 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" 4462 "# MAY BE MISSING FUNCTION EVENTS\n"); 4463 } 4464 4465 #ifdef CONFIG_TRACER_MAX_TRACE 4466 static void show_snapshot_main_help(struct seq_file *m) 4467 { 4468 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" 4469 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 4470 "# Takes a snapshot of the main buffer.\n" 4471 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" 4472 "# (Doesn't have to be '2' works with any number that\n" 4473 "# is not a '0' or '1')\n"); 4474 } 4475 4476 static void show_snapshot_percpu_help(struct seq_file *m) 4477 { 4478 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); 4479 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 4480 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 4481 "# Takes a snapshot of the main buffer for this cpu.\n"); 4482 #else 4483 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" 4484 "# Must use main snapshot file to allocate.\n"); 4485 #endif 4486 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" 4487 "# (Doesn't have to be '2' works with any number that\n" 4488 "# is not a '0' or '1')\n"); 4489 } 4490 4491 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) 4492 { 4493 if (iter->tr->allocated_snapshot) 4494 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); 4495 else 4496 seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); 4497 4498 seq_puts(m, "# Snapshot commands:\n"); 4499 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 4500 show_snapshot_main_help(m); 4501 else 4502 show_snapshot_percpu_help(m); 4503 } 4504 #else 4505 /* Should never be called */ 4506 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } 4507 #endif 4508 4509 static int s_show(struct seq_file *m, void *v) 4510 { 4511 struct trace_iterator *iter = v; 4512 int ret; 4513 4514 if (iter->ent == NULL) { 4515 if (iter->tr) { 4516 seq_printf(m, "# tracer: %s\n", iter->trace->name); 4517 seq_puts(m, "#\n"); 4518 test_ftrace_alive(m); 4519 } 4520 if (iter->snapshot && trace_empty(iter)) 4521 print_snapshot_help(m, iter); 4522 else if (iter->trace && iter->trace->print_header) 4523 iter->trace->print_header(m); 4524 else 4525 trace_default_header(m); 4526 4527 } else if (iter->leftover) { 4528 /* 4529 * If we filled the seq_file buffer earlier, we 4530 * want to just show it now. 4531 */ 4532 ret = trace_print_seq(m, &iter->seq); 4533 4534 /* ret should this time be zero, but you never know */ 4535 iter->leftover = ret; 4536 4537 } else { 4538 ret = print_trace_line(iter); 4539 if (ret == TRACE_TYPE_PARTIAL_LINE) { 4540 iter->seq.full = 0; 4541 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 4542 } 4543 ret = trace_print_seq(m, &iter->seq); 4544 /* 4545 * If we overflow the seq_file buffer, then it will 4546 * ask us for this data again at start up. 4547 * Use that instead. 4548 * ret is 0 if seq_file write succeeded. 4549 * -1 otherwise. 4550 */ 4551 iter->leftover = ret; 4552 } 4553 4554 return 0; 4555 } 4556 4557 /* 4558 * Should be used after trace_array_get(), trace_types_lock 4559 * ensures that i_cdev was already initialized. 4560 */ 4561 static inline int tracing_get_cpu(struct inode *inode) 4562 { 4563 if (inode->i_cdev) /* See trace_create_cpu_file() */ 4564 return (long)inode->i_cdev - 1; 4565 return RING_BUFFER_ALL_CPUS; 4566 } 4567 4568 static const struct seq_operations tracer_seq_ops = { 4569 .start = s_start, 4570 .next = s_next, 4571 .stop = s_stop, 4572 .show = s_show, 4573 }; 4574 4575 /* 4576 * Note, as iter itself can be allocated and freed in different 4577 * ways, this function is only used to free its content, and not 4578 * the iterator itself. The only requirement to all the allocations 4579 * is that it must zero all fields (kzalloc), as freeing works with 4580 * ethier allocated content or NULL. 4581 */ 4582 static void free_trace_iter_content(struct trace_iterator *iter) 4583 { 4584 /* The fmt is either NULL, allocated or points to static_fmt_buf */ 4585 if (iter->fmt != static_fmt_buf) 4586 kfree(iter->fmt); 4587 4588 kfree(iter->temp); 4589 kfree(iter->buffer_iter); 4590 mutex_destroy(&iter->mutex); 4591 free_cpumask_var(iter->started); 4592 } 4593 4594 static struct trace_iterator * 4595 __tracing_open(struct inode *inode, struct file *file, bool snapshot) 4596 { 4597 struct trace_array *tr = inode->i_private; 4598 struct trace_iterator *iter; 4599 int cpu; 4600 4601 if (tracing_disabled) 4602 return ERR_PTR(-ENODEV); 4603 4604 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 4605 if (!iter) 4606 return ERR_PTR(-ENOMEM); 4607 4608 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter), 4609 GFP_KERNEL); 4610 if (!iter->buffer_iter) 4611 goto release; 4612 4613 /* 4614 * trace_find_next_entry() may need to save off iter->ent. 4615 * It will place it into the iter->temp buffer. As most 4616 * events are less than 128, allocate a buffer of that size. 4617 * If one is greater, then trace_find_next_entry() will 4618 * allocate a new buffer to adjust for the bigger iter->ent. 4619 * It's not critical if it fails to get allocated here. 4620 */ 4621 iter->temp = kmalloc(128, GFP_KERNEL); 4622 if (iter->temp) 4623 iter->temp_size = 128; 4624 4625 /* 4626 * trace_event_printf() may need to modify given format 4627 * string to replace %p with %px so that it shows real address 4628 * instead of hash value. However, that is only for the event 4629 * tracing, other tracer may not need. Defer the allocation 4630 * until it is needed. 4631 */ 4632 iter->fmt = NULL; 4633 iter->fmt_size = 0; 4634 4635 mutex_lock(&trace_types_lock); 4636 iter->trace = tr->current_trace; 4637 4638 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 4639 goto fail; 4640 4641 iter->tr = tr; 4642 4643 #ifdef CONFIG_TRACER_MAX_TRACE 4644 /* Currently only the top directory has a snapshot */ 4645 if (tr->current_trace->print_max || snapshot) 4646 iter->array_buffer = &tr->max_buffer; 4647 else 4648 #endif 4649 iter->array_buffer = &tr->array_buffer; 4650 iter->snapshot = snapshot; 4651 iter->pos = -1; 4652 iter->cpu_file = tracing_get_cpu(inode); 4653 mutex_init(&iter->mutex); 4654 4655 /* Notify the tracer early; before we stop tracing. */ 4656 if (iter->trace->open) 4657 iter->trace->open(iter); 4658 4659 /* Annotate start of buffers if we had overruns */ 4660 if (ring_buffer_overruns(iter->array_buffer->buffer)) 4661 iter->iter_flags |= TRACE_FILE_ANNOTATE; 4662 4663 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 4664 if (trace_clocks[tr->clock_id].in_ns) 4665 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 4666 4667 /* 4668 * If pause-on-trace is enabled, then stop the trace while 4669 * dumping, unless this is the "snapshot" file 4670 */ 4671 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE)) 4672 tracing_stop_tr(tr); 4673 4674 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 4675 for_each_tracing_cpu(cpu) { 4676 iter->buffer_iter[cpu] = 4677 ring_buffer_read_prepare(iter->array_buffer->buffer, 4678 cpu, GFP_KERNEL); 4679 } 4680 ring_buffer_read_prepare_sync(); 4681 for_each_tracing_cpu(cpu) { 4682 ring_buffer_read_start(iter->buffer_iter[cpu]); 4683 tracing_iter_reset(iter, cpu); 4684 } 4685 } else { 4686 cpu = iter->cpu_file; 4687 iter->buffer_iter[cpu] = 4688 ring_buffer_read_prepare(iter->array_buffer->buffer, 4689 cpu, GFP_KERNEL); 4690 ring_buffer_read_prepare_sync(); 4691 ring_buffer_read_start(iter->buffer_iter[cpu]); 4692 tracing_iter_reset(iter, cpu); 4693 } 4694 4695 mutex_unlock(&trace_types_lock); 4696 4697 return iter; 4698 4699 fail: 4700 mutex_unlock(&trace_types_lock); 4701 free_trace_iter_content(iter); 4702 release: 4703 seq_release_private(inode, file); 4704 return ERR_PTR(-ENOMEM); 4705 } 4706 4707 int tracing_open_generic(struct inode *inode, struct file *filp) 4708 { 4709 int ret; 4710 4711 ret = tracing_check_open_get_tr(NULL); 4712 if (ret) 4713 return ret; 4714 4715 filp->private_data = inode->i_private; 4716 return 0; 4717 } 4718 4719 bool tracing_is_disabled(void) 4720 { 4721 return (tracing_disabled) ? true: false; 4722 } 4723 4724 /* 4725 * Open and update trace_array ref count. 4726 * Must have the current trace_array passed to it. 4727 */ 4728 int tracing_open_generic_tr(struct inode *inode, struct file *filp) 4729 { 4730 struct trace_array *tr = inode->i_private; 4731 int ret; 4732 4733 ret = tracing_check_open_get_tr(tr); 4734 if (ret) 4735 return ret; 4736 4737 filp->private_data = inode->i_private; 4738 4739 return 0; 4740 } 4741 4742 /* 4743 * The private pointer of the inode is the trace_event_file. 4744 * Update the tr ref count associated to it. 4745 */ 4746 int tracing_open_file_tr(struct inode *inode, struct file *filp) 4747 { 4748 struct trace_event_file *file = inode->i_private; 4749 int ret; 4750 4751 ret = tracing_check_open_get_tr(file->tr); 4752 if (ret) 4753 return ret; 4754 4755 mutex_lock(&event_mutex); 4756 4757 /* Fail if the file is marked for removal */ 4758 if (file->flags & EVENT_FILE_FL_FREED) { 4759 trace_array_put(file->tr); 4760 ret = -ENODEV; 4761 } else { 4762 event_file_get(file); 4763 } 4764 4765 mutex_unlock(&event_mutex); 4766 if (ret) 4767 return ret; 4768 4769 filp->private_data = inode->i_private; 4770 4771 return 0; 4772 } 4773 4774 int tracing_release_file_tr(struct inode *inode, struct file *filp) 4775 { 4776 struct trace_event_file *file = inode->i_private; 4777 4778 trace_array_put(file->tr); 4779 event_file_put(file); 4780 4781 return 0; 4782 } 4783 4784 int tracing_single_release_file_tr(struct inode *inode, struct file *filp) 4785 { 4786 tracing_release_file_tr(inode, filp); 4787 return single_release(inode, filp); 4788 } 4789 4790 static int tracing_mark_open(struct inode *inode, struct file *filp) 4791 { 4792 stream_open(inode, filp); 4793 return tracing_open_generic_tr(inode, filp); 4794 } 4795 4796 static int tracing_release(struct inode *inode, struct file *file) 4797 { 4798 struct trace_array *tr = inode->i_private; 4799 struct seq_file *m = file->private_data; 4800 struct trace_iterator *iter; 4801 int cpu; 4802 4803 if (!(file->f_mode & FMODE_READ)) { 4804 trace_array_put(tr); 4805 return 0; 4806 } 4807 4808 /* Writes do not use seq_file */ 4809 iter = m->private; 4810 mutex_lock(&trace_types_lock); 4811 4812 for_each_tracing_cpu(cpu) { 4813 if (iter->buffer_iter[cpu]) 4814 ring_buffer_read_finish(iter->buffer_iter[cpu]); 4815 } 4816 4817 if (iter->trace && iter->trace->close) 4818 iter->trace->close(iter); 4819 4820 if (!iter->snapshot && tr->stop_count) 4821 /* reenable tracing if it was previously enabled */ 4822 tracing_start_tr(tr); 4823 4824 __trace_array_put(tr); 4825 4826 mutex_unlock(&trace_types_lock); 4827 4828 free_trace_iter_content(iter); 4829 seq_release_private(inode, file); 4830 4831 return 0; 4832 } 4833 4834 int tracing_release_generic_tr(struct inode *inode, struct file *file) 4835 { 4836 struct trace_array *tr = inode->i_private; 4837 4838 trace_array_put(tr); 4839 return 0; 4840 } 4841 4842 static int tracing_single_release_tr(struct inode *inode, struct file *file) 4843 { 4844 struct trace_array *tr = inode->i_private; 4845 4846 trace_array_put(tr); 4847 4848 return single_release(inode, file); 4849 } 4850 4851 static int tracing_open(struct inode *inode, struct file *file) 4852 { 4853 struct trace_array *tr = inode->i_private; 4854 struct trace_iterator *iter; 4855 int ret; 4856 4857 ret = tracing_check_open_get_tr(tr); 4858 if (ret) 4859 return ret; 4860 4861 /* If this file was open for write, then erase contents */ 4862 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 4863 int cpu = tracing_get_cpu(inode); 4864 struct array_buffer *trace_buf = &tr->array_buffer; 4865 4866 #ifdef CONFIG_TRACER_MAX_TRACE 4867 if (tr->current_trace->print_max) 4868 trace_buf = &tr->max_buffer; 4869 #endif 4870 4871 if (cpu == RING_BUFFER_ALL_CPUS) 4872 tracing_reset_online_cpus(trace_buf); 4873 else 4874 tracing_reset_cpu(trace_buf, cpu); 4875 } 4876 4877 if (file->f_mode & FMODE_READ) { 4878 iter = __tracing_open(inode, file, false); 4879 if (IS_ERR(iter)) 4880 ret = PTR_ERR(iter); 4881 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) 4882 iter->iter_flags |= TRACE_FILE_LAT_FMT; 4883 } 4884 4885 if (ret < 0) 4886 trace_array_put(tr); 4887 4888 return ret; 4889 } 4890 4891 /* 4892 * Some tracers are not suitable for instance buffers. 4893 * A tracer is always available for the global array (toplevel) 4894 * or if it explicitly states that it is. 4895 */ 4896 static bool 4897 trace_ok_for_array(struct tracer *t, struct trace_array *tr) 4898 { 4899 #ifdef CONFIG_TRACER_SNAPSHOT 4900 /* arrays with mapped buffer range do not have snapshots */ 4901 if (tr->range_addr_start && t->use_max_tr) 4902 return false; 4903 #endif 4904 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; 4905 } 4906 4907 /* Find the next tracer that this trace array may use */ 4908 static struct tracer * 4909 get_tracer_for_array(struct trace_array *tr, struct tracer *t) 4910 { 4911 while (t && !trace_ok_for_array(t, tr)) 4912 t = t->next; 4913 4914 return t; 4915 } 4916 4917 static void * 4918 t_next(struct seq_file *m, void *v, loff_t *pos) 4919 { 4920 struct trace_array *tr = m->private; 4921 struct tracer *t = v; 4922 4923 (*pos)++; 4924 4925 if (t) 4926 t = get_tracer_for_array(tr, t->next); 4927 4928 return t; 4929 } 4930 4931 static void *t_start(struct seq_file *m, loff_t *pos) 4932 { 4933 struct trace_array *tr = m->private; 4934 struct tracer *t; 4935 loff_t l = 0; 4936 4937 mutex_lock(&trace_types_lock); 4938 4939 t = get_tracer_for_array(tr, trace_types); 4940 for (; t && l < *pos; t = t_next(m, t, &l)) 4941 ; 4942 4943 return t; 4944 } 4945 4946 static void t_stop(struct seq_file *m, void *p) 4947 { 4948 mutex_unlock(&trace_types_lock); 4949 } 4950 4951 static int t_show(struct seq_file *m, void *v) 4952 { 4953 struct tracer *t = v; 4954 4955 if (!t) 4956 return 0; 4957 4958 seq_puts(m, t->name); 4959 if (t->next) 4960 seq_putc(m, ' '); 4961 else 4962 seq_putc(m, '\n'); 4963 4964 return 0; 4965 } 4966 4967 static const struct seq_operations show_traces_seq_ops = { 4968 .start = t_start, 4969 .next = t_next, 4970 .stop = t_stop, 4971 .show = t_show, 4972 }; 4973 4974 static int show_traces_open(struct inode *inode, struct file *file) 4975 { 4976 struct trace_array *tr = inode->i_private; 4977 struct seq_file *m; 4978 int ret; 4979 4980 ret = tracing_check_open_get_tr(tr); 4981 if (ret) 4982 return ret; 4983 4984 ret = seq_open(file, &show_traces_seq_ops); 4985 if (ret) { 4986 trace_array_put(tr); 4987 return ret; 4988 } 4989 4990 m = file->private_data; 4991 m->private = tr; 4992 4993 return 0; 4994 } 4995 4996 static int tracing_seq_release(struct inode *inode, struct file *file) 4997 { 4998 struct trace_array *tr = inode->i_private; 4999 5000 trace_array_put(tr); 5001 return seq_release(inode, file); 5002 } 5003 5004 static ssize_t 5005 tracing_write_stub(struct file *filp, const char __user *ubuf, 5006 size_t count, loff_t *ppos) 5007 { 5008 return count; 5009 } 5010 5011 loff_t tracing_lseek(struct file *file, loff_t offset, int whence) 5012 { 5013 int ret; 5014 5015 if (file->f_mode & FMODE_READ) 5016 ret = seq_lseek(file, offset, whence); 5017 else 5018 file->f_pos = ret = 0; 5019 5020 return ret; 5021 } 5022 5023 static const struct file_operations tracing_fops = { 5024 .open = tracing_open, 5025 .read = seq_read, 5026 .read_iter = seq_read_iter, 5027 .splice_read = copy_splice_read, 5028 .write = tracing_write_stub, 5029 .llseek = tracing_lseek, 5030 .release = tracing_release, 5031 }; 5032 5033 static const struct file_operations show_traces_fops = { 5034 .open = show_traces_open, 5035 .read = seq_read, 5036 .llseek = seq_lseek, 5037 .release = tracing_seq_release, 5038 }; 5039 5040 static ssize_t 5041 tracing_cpumask_read(struct file *filp, char __user *ubuf, 5042 size_t count, loff_t *ppos) 5043 { 5044 struct trace_array *tr = file_inode(filp)->i_private; 5045 char *mask_str; 5046 int len; 5047 5048 len = snprintf(NULL, 0, "%*pb\n", 5049 cpumask_pr_args(tr->tracing_cpumask)) + 1; 5050 mask_str = kmalloc(len, GFP_KERNEL); 5051 if (!mask_str) 5052 return -ENOMEM; 5053 5054 len = snprintf(mask_str, len, "%*pb\n", 5055 cpumask_pr_args(tr->tracing_cpumask)); 5056 if (len >= count) { 5057 count = -EINVAL; 5058 goto out_err; 5059 } 5060 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 5061 5062 out_err: 5063 kfree(mask_str); 5064 5065 return count; 5066 } 5067 5068 int tracing_set_cpumask(struct trace_array *tr, 5069 cpumask_var_t tracing_cpumask_new) 5070 { 5071 int cpu; 5072 5073 if (!tr) 5074 return -EINVAL; 5075 5076 local_irq_disable(); 5077 arch_spin_lock(&tr->max_lock); 5078 for_each_tracing_cpu(cpu) { 5079 /* 5080 * Increase/decrease the disabled counter if we are 5081 * about to flip a bit in the cpumask: 5082 */ 5083 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && 5084 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 5085 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); 5086 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); 5087 #ifdef CONFIG_TRACER_MAX_TRACE 5088 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); 5089 #endif 5090 } 5091 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && 5092 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 5093 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); 5094 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); 5095 #ifdef CONFIG_TRACER_MAX_TRACE 5096 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); 5097 #endif 5098 } 5099 } 5100 arch_spin_unlock(&tr->max_lock); 5101 local_irq_enable(); 5102 5103 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 5104 5105 return 0; 5106 } 5107 5108 static ssize_t 5109 tracing_cpumask_write(struct file *filp, const char __user *ubuf, 5110 size_t count, loff_t *ppos) 5111 { 5112 struct trace_array *tr = file_inode(filp)->i_private; 5113 cpumask_var_t tracing_cpumask_new; 5114 int err; 5115 5116 if (count == 0 || count > KMALLOC_MAX_SIZE) 5117 return -EINVAL; 5118 5119 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 5120 return -ENOMEM; 5121 5122 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 5123 if (err) 5124 goto err_free; 5125 5126 err = tracing_set_cpumask(tr, tracing_cpumask_new); 5127 if (err) 5128 goto err_free; 5129 5130 free_cpumask_var(tracing_cpumask_new); 5131 5132 return count; 5133 5134 err_free: 5135 free_cpumask_var(tracing_cpumask_new); 5136 5137 return err; 5138 } 5139 5140 static const struct file_operations tracing_cpumask_fops = { 5141 .open = tracing_open_generic_tr, 5142 .read = tracing_cpumask_read, 5143 .write = tracing_cpumask_write, 5144 .release = tracing_release_generic_tr, 5145 .llseek = generic_file_llseek, 5146 }; 5147 5148 static int tracing_trace_options_show(struct seq_file *m, void *v) 5149 { 5150 struct tracer_opt *trace_opts; 5151 struct trace_array *tr = m->private; 5152 u32 tracer_flags; 5153 int i; 5154 5155 guard(mutex)(&trace_types_lock); 5156 5157 tracer_flags = tr->current_trace->flags->val; 5158 trace_opts = tr->current_trace->flags->opts; 5159 5160 for (i = 0; trace_options[i]; i++) { 5161 if (tr->trace_flags & (1 << i)) 5162 seq_printf(m, "%s\n", trace_options[i]); 5163 else 5164 seq_printf(m, "no%s\n", trace_options[i]); 5165 } 5166 5167 for (i = 0; trace_opts[i].name; i++) { 5168 if (tracer_flags & trace_opts[i].bit) 5169 seq_printf(m, "%s\n", trace_opts[i].name); 5170 else 5171 seq_printf(m, "no%s\n", trace_opts[i].name); 5172 } 5173 5174 return 0; 5175 } 5176 5177 static int __set_tracer_option(struct trace_array *tr, 5178 struct tracer_flags *tracer_flags, 5179 struct tracer_opt *opts, int neg) 5180 { 5181 struct tracer *trace = tracer_flags->trace; 5182 int ret; 5183 5184 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); 5185 if (ret) 5186 return ret; 5187 5188 if (neg) 5189 tracer_flags->val &= ~opts->bit; 5190 else 5191 tracer_flags->val |= opts->bit; 5192 return 0; 5193 } 5194 5195 /* Try to assign a tracer specific option */ 5196 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) 5197 { 5198 struct tracer *trace = tr->current_trace; 5199 struct tracer_flags *tracer_flags = trace->flags; 5200 struct tracer_opt *opts = NULL; 5201 int i; 5202 5203 for (i = 0; tracer_flags->opts[i].name; i++) { 5204 opts = &tracer_flags->opts[i]; 5205 5206 if (strcmp(cmp, opts->name) == 0) 5207 return __set_tracer_option(tr, trace->flags, opts, neg); 5208 } 5209 5210 return -EINVAL; 5211 } 5212 5213 /* Some tracers require overwrite to stay enabled */ 5214 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) 5215 { 5216 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set) 5217 return -1; 5218 5219 return 0; 5220 } 5221 5222 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) 5223 { 5224 if ((mask == TRACE_ITER_RECORD_TGID) || 5225 (mask == TRACE_ITER_RECORD_CMD) || 5226 (mask == TRACE_ITER_TRACE_PRINTK)) 5227 lockdep_assert_held(&event_mutex); 5228 5229 /* do nothing if flag is already set */ 5230 if (!!(tr->trace_flags & mask) == !!enabled) 5231 return 0; 5232 5233 /* Give the tracer a chance to approve the change */ 5234 if (tr->current_trace->flag_changed) 5235 if (tr->current_trace->flag_changed(tr, mask, !!enabled)) 5236 return -EINVAL; 5237 5238 if (mask == TRACE_ITER_TRACE_PRINTK) { 5239 if (enabled) { 5240 update_printk_trace(tr); 5241 } else { 5242 /* 5243 * The global_trace cannot clear this. 5244 * It's flag only gets cleared if another instance sets it. 5245 */ 5246 if (printk_trace == &global_trace) 5247 return -EINVAL; 5248 /* 5249 * An instance must always have it set. 5250 * by default, that's the global_trace instane. 5251 */ 5252 if (printk_trace == tr) 5253 update_printk_trace(&global_trace); 5254 } 5255 } 5256 5257 if (enabled) 5258 tr->trace_flags |= mask; 5259 else 5260 tr->trace_flags &= ~mask; 5261 5262 if (mask == TRACE_ITER_RECORD_CMD) 5263 trace_event_enable_cmd_record(enabled); 5264 5265 if (mask == TRACE_ITER_RECORD_TGID) { 5266 5267 if (trace_alloc_tgid_map() < 0) { 5268 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID; 5269 return -ENOMEM; 5270 } 5271 5272 trace_event_enable_tgid_record(enabled); 5273 } 5274 5275 if (mask == TRACE_ITER_EVENT_FORK) 5276 trace_event_follow_fork(tr, enabled); 5277 5278 if (mask == TRACE_ITER_FUNC_FORK) 5279 ftrace_pid_follow_fork(tr, enabled); 5280 5281 if (mask == TRACE_ITER_OVERWRITE) { 5282 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); 5283 #ifdef CONFIG_TRACER_MAX_TRACE 5284 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); 5285 #endif 5286 } 5287 5288 if (mask == TRACE_ITER_PRINTK) { 5289 trace_printk_start_stop_comm(enabled); 5290 trace_printk_control(enabled); 5291 } 5292 5293 return 0; 5294 } 5295 5296 int trace_set_options(struct trace_array *tr, char *option) 5297 { 5298 char *cmp; 5299 int neg = 0; 5300 int ret; 5301 size_t orig_len = strlen(option); 5302 int len; 5303 5304 cmp = strstrip(option); 5305 5306 len = str_has_prefix(cmp, "no"); 5307 if (len) 5308 neg = 1; 5309 5310 cmp += len; 5311 5312 mutex_lock(&event_mutex); 5313 mutex_lock(&trace_types_lock); 5314 5315 ret = match_string(trace_options, -1, cmp); 5316 /* If no option could be set, test the specific tracer options */ 5317 if (ret < 0) 5318 ret = set_tracer_option(tr, cmp, neg); 5319 else 5320 ret = set_tracer_flag(tr, 1 << ret, !neg); 5321 5322 mutex_unlock(&trace_types_lock); 5323 mutex_unlock(&event_mutex); 5324 5325 /* 5326 * If the first trailing whitespace is replaced with '\0' by strstrip, 5327 * turn it back into a space. 5328 */ 5329 if (orig_len > strlen(option)) 5330 option[strlen(option)] = ' '; 5331 5332 return ret; 5333 } 5334 5335 static void __init apply_trace_boot_options(void) 5336 { 5337 char *buf = trace_boot_options_buf; 5338 char *option; 5339 5340 while (true) { 5341 option = strsep(&buf, ","); 5342 5343 if (!option) 5344 break; 5345 5346 if (*option) 5347 trace_set_options(&global_trace, option); 5348 5349 /* Put back the comma to allow this to be called again */ 5350 if (buf) 5351 *(buf - 1) = ','; 5352 } 5353 } 5354 5355 static ssize_t 5356 tracing_trace_options_write(struct file *filp, const char __user *ubuf, 5357 size_t cnt, loff_t *ppos) 5358 { 5359 struct seq_file *m = filp->private_data; 5360 struct trace_array *tr = m->private; 5361 char buf[64]; 5362 int ret; 5363 5364 if (cnt >= sizeof(buf)) 5365 return -EINVAL; 5366 5367 if (copy_from_user(buf, ubuf, cnt)) 5368 return -EFAULT; 5369 5370 buf[cnt] = 0; 5371 5372 ret = trace_set_options(tr, buf); 5373 if (ret < 0) 5374 return ret; 5375 5376 *ppos += cnt; 5377 5378 return cnt; 5379 } 5380 5381 static int tracing_trace_options_open(struct inode *inode, struct file *file) 5382 { 5383 struct trace_array *tr = inode->i_private; 5384 int ret; 5385 5386 ret = tracing_check_open_get_tr(tr); 5387 if (ret) 5388 return ret; 5389 5390 ret = single_open(file, tracing_trace_options_show, inode->i_private); 5391 if (ret < 0) 5392 trace_array_put(tr); 5393 5394 return ret; 5395 } 5396 5397 static const struct file_operations tracing_iter_fops = { 5398 .open = tracing_trace_options_open, 5399 .read = seq_read, 5400 .llseek = seq_lseek, 5401 .release = tracing_single_release_tr, 5402 .write = tracing_trace_options_write, 5403 }; 5404 5405 static const char readme_msg[] = 5406 "tracing mini-HOWTO:\n\n" 5407 "By default tracefs removes all OTH file permission bits.\n" 5408 "When mounting tracefs an optional group id can be specified\n" 5409 "which adds the group to every directory and file in tracefs:\n\n" 5410 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" 5411 "# echo 0 > tracing_on : quick way to disable tracing\n" 5412 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" 5413 " Important files:\n" 5414 " trace\t\t\t- The static contents of the buffer\n" 5415 "\t\t\t To clear the buffer write into this file: echo > trace\n" 5416 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" 5417 " current_tracer\t- function and latency tracers\n" 5418 " available_tracers\t- list of configured tracers for current_tracer\n" 5419 " error_log\t- error log for failed commands (that support it)\n" 5420 " buffer_size_kb\t- view and modify size of per cpu buffer\n" 5421 " buffer_total_size_kb - view total size of all cpu buffers\n\n" 5422 " trace_clock\t\t- change the clock used to order events\n" 5423 " local: Per cpu clock but may not be synced across CPUs\n" 5424 " global: Synced across CPUs but slows tracing down.\n" 5425 " counter: Not a clock, but just an increment\n" 5426 " uptime: Jiffy counter from time of boot\n" 5427 " perf: Same clock that perf events use\n" 5428 #ifdef CONFIG_X86_64 5429 " x86-tsc: TSC cycle counter\n" 5430 #endif 5431 "\n timestamp_mode\t- view the mode used to timestamp events\n" 5432 " delta: Delta difference against a buffer-wide timestamp\n" 5433 " absolute: Absolute (standalone) timestamp\n" 5434 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" 5435 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" 5436 " tracing_cpumask\t- Limit which CPUs to trace\n" 5437 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" 5438 "\t\t\t Remove sub-buffer with rmdir\n" 5439 " trace_options\t\t- Set format or modify how tracing happens\n" 5440 "\t\t\t Disable an option by prefixing 'no' to the\n" 5441 "\t\t\t option name\n" 5442 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" 5443 #ifdef CONFIG_DYNAMIC_FTRACE 5444 "\n available_filter_functions - list of functions that can be filtered on\n" 5445 " set_ftrace_filter\t- echo function name in here to only trace these\n" 5446 "\t\t\t functions\n" 5447 "\t accepts: func_full_name or glob-matching-pattern\n" 5448 "\t modules: Can select a group via module\n" 5449 "\t Format: :mod:<module-name>\n" 5450 "\t example: echo :mod:ext3 > set_ftrace_filter\n" 5451 "\t triggers: a command to perform when function is hit\n" 5452 "\t Format: <function>:<trigger>[:count]\n" 5453 "\t trigger: traceon, traceoff\n" 5454 "\t\t enable_event:<system>:<event>\n" 5455 "\t\t disable_event:<system>:<event>\n" 5456 #ifdef CONFIG_STACKTRACE 5457 "\t\t stacktrace\n" 5458 #endif 5459 #ifdef CONFIG_TRACER_SNAPSHOT 5460 "\t\t snapshot\n" 5461 #endif 5462 "\t\t dump\n" 5463 "\t\t cpudump\n" 5464 "\t example: echo do_fault:traceoff > set_ftrace_filter\n" 5465 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" 5466 "\t The first one will disable tracing every time do_fault is hit\n" 5467 "\t The second will disable tracing at most 3 times when do_trap is hit\n" 5468 "\t The first time do trap is hit and it disables tracing, the\n" 5469 "\t counter will decrement to 2. If tracing is already disabled,\n" 5470 "\t the counter will not decrement. It only decrements when the\n" 5471 "\t trigger did work\n" 5472 "\t To remove trigger without count:\n" 5473 "\t echo '!<function>:<trigger> > set_ftrace_filter\n" 5474 "\t To remove trigger with a count:\n" 5475 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" 5476 " set_ftrace_notrace\t- echo function name in here to never trace.\n" 5477 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" 5478 "\t modules: Can select a group via module command :mod:\n" 5479 "\t Does not accept triggers\n" 5480 #endif /* CONFIG_DYNAMIC_FTRACE */ 5481 #ifdef CONFIG_FUNCTION_TRACER 5482 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" 5483 "\t\t (function)\n" 5484 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" 5485 "\t\t (function)\n" 5486 #endif 5487 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 5488 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" 5489 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" 5490 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" 5491 #endif 5492 #ifdef CONFIG_TRACER_SNAPSHOT 5493 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" 5494 "\t\t\t snapshot buffer. Read the contents for more\n" 5495 "\t\t\t information\n" 5496 #endif 5497 #ifdef CONFIG_STACK_TRACER 5498 " stack_trace\t\t- Shows the max stack trace when active\n" 5499 " stack_max_size\t- Shows current max stack size that was traced\n" 5500 "\t\t\t Write into this file to reset the max size (trigger a\n" 5501 "\t\t\t new trace)\n" 5502 #ifdef CONFIG_DYNAMIC_FTRACE 5503 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" 5504 "\t\t\t traces\n" 5505 #endif 5506 #endif /* CONFIG_STACK_TRACER */ 5507 #ifdef CONFIG_DYNAMIC_EVENTS 5508 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" 5509 "\t\t\t Write into this file to define/undefine new trace events.\n" 5510 #endif 5511 #ifdef CONFIG_KPROBE_EVENTS 5512 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" 5513 "\t\t\t Write into this file to define/undefine new trace events.\n" 5514 #endif 5515 #ifdef CONFIG_UPROBE_EVENTS 5516 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" 5517 "\t\t\t Write into this file to define/undefine new trace events.\n" 5518 #endif 5519 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ 5520 defined(CONFIG_FPROBE_EVENTS) 5521 "\t accepts: event-definitions (one definition per line)\n" 5522 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) 5523 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" 5524 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" 5525 #endif 5526 #ifdef CONFIG_FPROBE_EVENTS 5527 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" 5528 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" 5529 #endif 5530 #ifdef CONFIG_HIST_TRIGGERS 5531 "\t s:[synthetic/]<event> <field> [<field>]\n" 5532 #endif 5533 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" 5534 "\t -:[<group>/][<event>]\n" 5535 #ifdef CONFIG_KPROBE_EVENTS 5536 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" 5537 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" 5538 #endif 5539 #ifdef CONFIG_UPROBE_EVENTS 5540 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" 5541 #endif 5542 "\t args: <name>=fetcharg[:type]\n" 5543 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" 5544 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 5545 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" 5546 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS 5547 "\t <argname>[->field[->field|.field...]],\n" 5548 #endif 5549 #else 5550 "\t $stack<index>, $stack, $retval, $comm,\n" 5551 #endif 5552 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" 5553 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 5554 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 5555 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 5556 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" 5557 #ifdef CONFIG_HIST_TRIGGERS 5558 "\t field: <stype> <name>;\n" 5559 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" 5560 "\t [unsigned] char/int/long\n" 5561 #endif 5562 "\t efield: For event probes ('e' types), the field is on of the fields\n" 5563 "\t of the <attached-group>/<attached-event>.\n" 5564 #endif 5565 " set_event\t\t- Enables events by name written into it\n" 5566 "\t\t\t Can enable module events via: :mod:<module>\n" 5567 " events/\t\t- Directory containing all trace event subsystems:\n" 5568 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" 5569 " events/<system>/\t- Directory containing all trace events for <system>:\n" 5570 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" 5571 "\t\t\t events\n" 5572 " filter\t\t- If set, only events passing filter are traced\n" 5573 " events/<system>/<event>/\t- Directory containing control files for\n" 5574 "\t\t\t <event>:\n" 5575 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" 5576 " filter\t\t- If set, only events passing filter are traced\n" 5577 " trigger\t\t- If set, a command to perform when event is hit\n" 5578 "\t Format: <trigger>[:count][if <filter>]\n" 5579 "\t trigger: traceon, traceoff\n" 5580 "\t enable_event:<system>:<event>\n" 5581 "\t disable_event:<system>:<event>\n" 5582 #ifdef CONFIG_HIST_TRIGGERS 5583 "\t enable_hist:<system>:<event>\n" 5584 "\t disable_hist:<system>:<event>\n" 5585 #endif 5586 #ifdef CONFIG_STACKTRACE 5587 "\t\t stacktrace\n" 5588 #endif 5589 #ifdef CONFIG_TRACER_SNAPSHOT 5590 "\t\t snapshot\n" 5591 #endif 5592 #ifdef CONFIG_HIST_TRIGGERS 5593 "\t\t hist (see below)\n" 5594 #endif 5595 "\t example: echo traceoff > events/block/block_unplug/trigger\n" 5596 "\t echo traceoff:3 > events/block/block_unplug/trigger\n" 5597 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" 5598 "\t events/block/block_unplug/trigger\n" 5599 "\t The first disables tracing every time block_unplug is hit.\n" 5600 "\t The second disables tracing the first 3 times block_unplug is hit.\n" 5601 "\t The third enables the kmalloc event the first 3 times block_unplug\n" 5602 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" 5603 "\t Like function triggers, the counter is only decremented if it\n" 5604 "\t enabled or disabled tracing.\n" 5605 "\t To remove a trigger without a count:\n" 5606 "\t echo '!<trigger> > <system>/<event>/trigger\n" 5607 "\t To remove a trigger with a count:\n" 5608 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" 5609 "\t Filters can be ignored when removing a trigger.\n" 5610 #ifdef CONFIG_HIST_TRIGGERS 5611 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 5612 "\t Format: hist:keys=<field1[,field2,...]>\n" 5613 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 5614 "\t [:values=<field1[,field2,...]>]\n" 5615 "\t [:sort=<field1[,field2,...]>]\n" 5616 "\t [:size=#entries]\n" 5617 "\t [:pause][:continue][:clear]\n" 5618 "\t [:name=histname1]\n" 5619 "\t [:nohitcount]\n" 5620 "\t [:<handler>.<action>]\n" 5621 "\t [if <filter>]\n\n" 5622 "\t Note, special fields can be used as well:\n" 5623 "\t common_timestamp - to record current timestamp\n" 5624 "\t common_cpu - to record the CPU the event happened on\n" 5625 "\n" 5626 "\t A hist trigger variable can be:\n" 5627 "\t - a reference to a field e.g. x=current_timestamp,\n" 5628 "\t - a reference to another variable e.g. y=$x,\n" 5629 "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 5630 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 5631 "\n" 5632 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" 5633 "\t multiplication(*) and division(/) operators. An operand can be either a\n" 5634 "\t variable reference, field or numeric literal.\n" 5635 "\n" 5636 "\t When a matching event is hit, an entry is added to a hash\n" 5637 "\t table using the key(s) and value(s) named, and the value of a\n" 5638 "\t sum called 'hitcount' is incremented. Keys and values\n" 5639 "\t correspond to fields in the event's format description. Keys\n" 5640 "\t can be any field, or the special string 'common_stacktrace'.\n" 5641 "\t Compound keys consisting of up to two fields can be specified\n" 5642 "\t by the 'keys' keyword. Values must correspond to numeric\n" 5643 "\t fields. Sort keys consisting of up to two fields can be\n" 5644 "\t specified using the 'sort' keyword. The sort direction can\n" 5645 "\t be modified by appending '.descending' or '.ascending' to a\n" 5646 "\t sort field. The 'size' parameter can be used to specify more\n" 5647 "\t or fewer than the default 2048 entries for the hashtable size.\n" 5648 "\t If a hist trigger is given a name using the 'name' parameter,\n" 5649 "\t its histogram data will be shared with other triggers of the\n" 5650 "\t same name, and trigger hits will update this common data.\n\n" 5651 "\t Reading the 'hist' file for the event will dump the hash\n" 5652 "\t table in its entirety to stdout. If there are multiple hist\n" 5653 "\t triggers attached to an event, there will be a table for each\n" 5654 "\t trigger in the output. The table displayed for a named\n" 5655 "\t trigger will be the same as any other instance having the\n" 5656 "\t same name. The default format used to display a given field\n" 5657 "\t can be modified by appending any of the following modifiers\n" 5658 "\t to the field name, as applicable:\n\n" 5659 "\t .hex display a number as a hex value\n" 5660 "\t .sym display an address as a symbol\n" 5661 "\t .sym-offset display an address as a symbol and offset\n" 5662 "\t .execname display a common_pid as a program name\n" 5663 "\t .syscall display a syscall id as a syscall name\n" 5664 "\t .log2 display log2 value rather than raw number\n" 5665 "\t .buckets=size display values in groups of size rather than raw number\n" 5666 "\t .usecs display a common_timestamp in microseconds\n" 5667 "\t .percent display a number of percentage value\n" 5668 "\t .graph display a bar-graph of a value\n\n" 5669 "\t The 'pause' parameter can be used to pause an existing hist\n" 5670 "\t trigger or to start a hist trigger but not log any events\n" 5671 "\t until told to do so. 'continue' can be used to start or\n" 5672 "\t restart a paused hist trigger.\n\n" 5673 "\t The 'clear' parameter will clear the contents of a running\n" 5674 "\t hist trigger and leave its current paused/active state\n" 5675 "\t unchanged.\n\n" 5676 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" 5677 "\t raw hitcount in the histogram.\n\n" 5678 "\t The enable_hist and disable_hist triggers can be used to\n" 5679 "\t have one event conditionally start and stop another event's\n" 5680 "\t already-attached hist trigger. The syntax is analogous to\n" 5681 "\t the enable_event and disable_event triggers.\n\n" 5682 "\t Hist trigger handlers and actions are executed whenever a\n" 5683 "\t a histogram entry is added or updated. They take the form:\n\n" 5684 "\t <handler>.<action>\n\n" 5685 "\t The available handlers are:\n\n" 5686 "\t onmatch(matching.event) - invoke on addition or update\n" 5687 "\t onmax(var) - invoke if var exceeds current max\n" 5688 "\t onchange(var) - invoke action if var changes\n\n" 5689 "\t The available actions are:\n\n" 5690 "\t trace(<synthetic_event>,param list) - generate synthetic event\n" 5691 "\t save(field,...) - save current event fields\n" 5692 #ifdef CONFIG_TRACER_SNAPSHOT 5693 "\t snapshot() - snapshot the trace buffer\n\n" 5694 #endif 5695 #ifdef CONFIG_SYNTH_EVENTS 5696 " events/synthetic_events\t- Create/append/remove/show synthetic events\n" 5697 "\t Write into this file to define/undefine new synthetic events.\n" 5698 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" 5699 #endif 5700 #endif 5701 ; 5702 5703 static ssize_t 5704 tracing_readme_read(struct file *filp, char __user *ubuf, 5705 size_t cnt, loff_t *ppos) 5706 { 5707 return simple_read_from_buffer(ubuf, cnt, ppos, 5708 readme_msg, strlen(readme_msg)); 5709 } 5710 5711 static const struct file_operations tracing_readme_fops = { 5712 .open = tracing_open_generic, 5713 .read = tracing_readme_read, 5714 .llseek = generic_file_llseek, 5715 }; 5716 5717 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 5718 static union trace_eval_map_item * 5719 update_eval_map(union trace_eval_map_item *ptr) 5720 { 5721 if (!ptr->map.eval_string) { 5722 if (ptr->tail.next) { 5723 ptr = ptr->tail.next; 5724 /* Set ptr to the next real item (skip head) */ 5725 ptr++; 5726 } else 5727 return NULL; 5728 } 5729 return ptr; 5730 } 5731 5732 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) 5733 { 5734 union trace_eval_map_item *ptr = v; 5735 5736 /* 5737 * Paranoid! If ptr points to end, we don't want to increment past it. 5738 * This really should never happen. 5739 */ 5740 (*pos)++; 5741 ptr = update_eval_map(ptr); 5742 if (WARN_ON_ONCE(!ptr)) 5743 return NULL; 5744 5745 ptr++; 5746 ptr = update_eval_map(ptr); 5747 5748 return ptr; 5749 } 5750 5751 static void *eval_map_start(struct seq_file *m, loff_t *pos) 5752 { 5753 union trace_eval_map_item *v; 5754 loff_t l = 0; 5755 5756 mutex_lock(&trace_eval_mutex); 5757 5758 v = trace_eval_maps; 5759 if (v) 5760 v++; 5761 5762 while (v && l < *pos) { 5763 v = eval_map_next(m, v, &l); 5764 } 5765 5766 return v; 5767 } 5768 5769 static void eval_map_stop(struct seq_file *m, void *v) 5770 { 5771 mutex_unlock(&trace_eval_mutex); 5772 } 5773 5774 static int eval_map_show(struct seq_file *m, void *v) 5775 { 5776 union trace_eval_map_item *ptr = v; 5777 5778 seq_printf(m, "%s %ld (%s)\n", 5779 ptr->map.eval_string, ptr->map.eval_value, 5780 ptr->map.system); 5781 5782 return 0; 5783 } 5784 5785 static const struct seq_operations tracing_eval_map_seq_ops = { 5786 .start = eval_map_start, 5787 .next = eval_map_next, 5788 .stop = eval_map_stop, 5789 .show = eval_map_show, 5790 }; 5791 5792 static int tracing_eval_map_open(struct inode *inode, struct file *filp) 5793 { 5794 int ret; 5795 5796 ret = tracing_check_open_get_tr(NULL); 5797 if (ret) 5798 return ret; 5799 5800 return seq_open(filp, &tracing_eval_map_seq_ops); 5801 } 5802 5803 static const struct file_operations tracing_eval_map_fops = { 5804 .open = tracing_eval_map_open, 5805 .read = seq_read, 5806 .llseek = seq_lseek, 5807 .release = seq_release, 5808 }; 5809 5810 static inline union trace_eval_map_item * 5811 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) 5812 { 5813 /* Return tail of array given the head */ 5814 return ptr + ptr->head.length + 1; 5815 } 5816 5817 static void 5818 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, 5819 int len) 5820 { 5821 struct trace_eval_map **stop; 5822 struct trace_eval_map **map; 5823 union trace_eval_map_item *map_array; 5824 union trace_eval_map_item *ptr; 5825 5826 stop = start + len; 5827 5828 /* 5829 * The trace_eval_maps contains the map plus a head and tail item, 5830 * where the head holds the module and length of array, and the 5831 * tail holds a pointer to the next list. 5832 */ 5833 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL); 5834 if (!map_array) { 5835 pr_warn("Unable to allocate trace eval mapping\n"); 5836 return; 5837 } 5838 5839 guard(mutex)(&trace_eval_mutex); 5840 5841 if (!trace_eval_maps) 5842 trace_eval_maps = map_array; 5843 else { 5844 ptr = trace_eval_maps; 5845 for (;;) { 5846 ptr = trace_eval_jmp_to_tail(ptr); 5847 if (!ptr->tail.next) 5848 break; 5849 ptr = ptr->tail.next; 5850 5851 } 5852 ptr->tail.next = map_array; 5853 } 5854 map_array->head.mod = mod; 5855 map_array->head.length = len; 5856 map_array++; 5857 5858 for (map = start; (unsigned long)map < (unsigned long)stop; map++) { 5859 map_array->map = **map; 5860 map_array++; 5861 } 5862 memset(map_array, 0, sizeof(*map_array)); 5863 } 5864 5865 static void trace_create_eval_file(struct dentry *d_tracer) 5866 { 5867 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, 5868 NULL, &tracing_eval_map_fops); 5869 } 5870 5871 #else /* CONFIG_TRACE_EVAL_MAP_FILE */ 5872 static inline void trace_create_eval_file(struct dentry *d_tracer) { } 5873 static inline void trace_insert_eval_map_file(struct module *mod, 5874 struct trace_eval_map **start, int len) { } 5875 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ 5876 5877 static void trace_insert_eval_map(struct module *mod, 5878 struct trace_eval_map **start, int len) 5879 { 5880 struct trace_eval_map **map; 5881 5882 if (len <= 0) 5883 return; 5884 5885 map = start; 5886 5887 trace_event_eval_update(map, len); 5888 5889 trace_insert_eval_map_file(mod, start, len); 5890 } 5891 5892 static ssize_t 5893 tracing_set_trace_read(struct file *filp, char __user *ubuf, 5894 size_t cnt, loff_t *ppos) 5895 { 5896 struct trace_array *tr = filp->private_data; 5897 char buf[MAX_TRACER_SIZE+2]; 5898 int r; 5899 5900 mutex_lock(&trace_types_lock); 5901 r = sprintf(buf, "%s\n", tr->current_trace->name); 5902 mutex_unlock(&trace_types_lock); 5903 5904 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5905 } 5906 5907 int tracer_init(struct tracer *t, struct trace_array *tr) 5908 { 5909 tracing_reset_online_cpus(&tr->array_buffer); 5910 return t->init(tr); 5911 } 5912 5913 static void set_buffer_entries(struct array_buffer *buf, unsigned long val) 5914 { 5915 int cpu; 5916 5917 for_each_tracing_cpu(cpu) 5918 per_cpu_ptr(buf->data, cpu)->entries = val; 5919 } 5920 5921 static void update_buffer_entries(struct array_buffer *buf, int cpu) 5922 { 5923 if (cpu == RING_BUFFER_ALL_CPUS) { 5924 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); 5925 } else { 5926 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); 5927 } 5928 } 5929 5930 #ifdef CONFIG_TRACER_MAX_TRACE 5931 /* resize @tr's buffer to the size of @size_tr's entries */ 5932 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 5933 struct array_buffer *size_buf, int cpu_id) 5934 { 5935 int cpu, ret = 0; 5936 5937 if (cpu_id == RING_BUFFER_ALL_CPUS) { 5938 for_each_tracing_cpu(cpu) { 5939 ret = ring_buffer_resize(trace_buf->buffer, 5940 per_cpu_ptr(size_buf->data, cpu)->entries, cpu); 5941 if (ret < 0) 5942 break; 5943 per_cpu_ptr(trace_buf->data, cpu)->entries = 5944 per_cpu_ptr(size_buf->data, cpu)->entries; 5945 } 5946 } else { 5947 ret = ring_buffer_resize(trace_buf->buffer, 5948 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); 5949 if (ret == 0) 5950 per_cpu_ptr(trace_buf->data, cpu_id)->entries = 5951 per_cpu_ptr(size_buf->data, cpu_id)->entries; 5952 } 5953 5954 return ret; 5955 } 5956 #endif /* CONFIG_TRACER_MAX_TRACE */ 5957 5958 static int __tracing_resize_ring_buffer(struct trace_array *tr, 5959 unsigned long size, int cpu) 5960 { 5961 int ret; 5962 5963 /* 5964 * If kernel or user changes the size of the ring buffer 5965 * we use the size that was given, and we can forget about 5966 * expanding it later. 5967 */ 5968 trace_set_ring_buffer_expanded(tr); 5969 5970 /* May be called before buffers are initialized */ 5971 if (!tr->array_buffer.buffer) 5972 return 0; 5973 5974 /* Do not allow tracing while resizing ring buffer */ 5975 tracing_stop_tr(tr); 5976 5977 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); 5978 if (ret < 0) 5979 goto out_start; 5980 5981 #ifdef CONFIG_TRACER_MAX_TRACE 5982 if (!tr->allocated_snapshot) 5983 goto out; 5984 5985 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); 5986 if (ret < 0) { 5987 int r = resize_buffer_duplicate_size(&tr->array_buffer, 5988 &tr->array_buffer, cpu); 5989 if (r < 0) { 5990 /* 5991 * AARGH! We are left with different 5992 * size max buffer!!!! 5993 * The max buffer is our "snapshot" buffer. 5994 * When a tracer needs a snapshot (one of the 5995 * latency tracers), it swaps the max buffer 5996 * with the saved snap shot. We succeeded to 5997 * update the size of the main buffer, but failed to 5998 * update the size of the max buffer. But when we tried 5999 * to reset the main buffer to the original size, we 6000 * failed there too. This is very unlikely to 6001 * happen, but if it does, warn and kill all 6002 * tracing. 6003 */ 6004 WARN_ON(1); 6005 tracing_disabled = 1; 6006 } 6007 goto out_start; 6008 } 6009 6010 update_buffer_entries(&tr->max_buffer, cpu); 6011 6012 out: 6013 #endif /* CONFIG_TRACER_MAX_TRACE */ 6014 6015 update_buffer_entries(&tr->array_buffer, cpu); 6016 out_start: 6017 tracing_start_tr(tr); 6018 return ret; 6019 } 6020 6021 ssize_t tracing_resize_ring_buffer(struct trace_array *tr, 6022 unsigned long size, int cpu_id) 6023 { 6024 guard(mutex)(&trace_types_lock); 6025 6026 if (cpu_id != RING_BUFFER_ALL_CPUS) { 6027 /* make sure, this cpu is enabled in the mask */ 6028 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) 6029 return -EINVAL; 6030 } 6031 6032 return __tracing_resize_ring_buffer(tr, size, cpu_id); 6033 } 6034 6035 struct trace_mod_entry { 6036 unsigned long mod_addr; 6037 char mod_name[MODULE_NAME_LEN]; 6038 }; 6039 6040 struct trace_scratch { 6041 unsigned long text_addr; 6042 unsigned long nr_entries; 6043 struct trace_mod_entry entries[]; 6044 }; 6045 6046 static DEFINE_MUTEX(scratch_mutex); 6047 6048 static int cmp_mod_entry(const void *key, const void *pivot) 6049 { 6050 unsigned long addr = (unsigned long)key; 6051 const struct trace_mod_entry *ent = pivot; 6052 6053 if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr) 6054 return 0; 6055 else 6056 return addr - ent->mod_addr; 6057 } 6058 6059 /** 6060 * trace_adjust_address() - Adjust prev boot address to current address. 6061 * @tr: Persistent ring buffer's trace_array. 6062 * @addr: Address in @tr which is adjusted. 6063 */ 6064 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) 6065 { 6066 struct trace_module_delta *module_delta; 6067 struct trace_scratch *tscratch; 6068 struct trace_mod_entry *entry; 6069 int idx = 0, nr_entries; 6070 6071 /* If we don't have last boot delta, return the address */ 6072 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6073 return addr; 6074 6075 /* tr->module_delta must be protected by rcu. */ 6076 guard(rcu)(); 6077 tscratch = tr->scratch; 6078 /* if there is no tscrach, module_delta must be NULL. */ 6079 module_delta = READ_ONCE(tr->module_delta); 6080 if (!module_delta || !tscratch->nr_entries || 6081 tscratch->entries[0].mod_addr > addr) { 6082 return addr + tr->text_delta; 6083 } 6084 6085 /* Note that entries must be sorted. */ 6086 nr_entries = tscratch->nr_entries; 6087 if (nr_entries == 1 || 6088 tscratch->entries[nr_entries - 1].mod_addr < addr) 6089 idx = nr_entries - 1; 6090 else { 6091 entry = __inline_bsearch((void *)addr, 6092 tscratch->entries, 6093 nr_entries - 1, 6094 sizeof(tscratch->entries[0]), 6095 cmp_mod_entry); 6096 if (entry) 6097 idx = entry - tscratch->entries; 6098 } 6099 6100 return addr + module_delta->delta[idx]; 6101 } 6102 6103 #ifdef CONFIG_MODULES 6104 static int save_mod(struct module *mod, void *data) 6105 { 6106 struct trace_array *tr = data; 6107 struct trace_scratch *tscratch; 6108 struct trace_mod_entry *entry; 6109 unsigned int size; 6110 6111 tscratch = tr->scratch; 6112 if (!tscratch) 6113 return -1; 6114 size = tr->scratch_size; 6115 6116 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) 6117 return -1; 6118 6119 entry = &tscratch->entries[tscratch->nr_entries]; 6120 6121 tscratch->nr_entries++; 6122 6123 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; 6124 strscpy(entry->mod_name, mod->name); 6125 6126 return 0; 6127 } 6128 #else 6129 static int save_mod(struct module *mod, void *data) 6130 { 6131 return 0; 6132 } 6133 #endif 6134 6135 static void update_last_data(struct trace_array *tr) 6136 { 6137 struct trace_module_delta *module_delta; 6138 struct trace_scratch *tscratch; 6139 6140 if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) 6141 return; 6142 6143 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6144 return; 6145 6146 /* Only if the buffer has previous boot data clear and update it. */ 6147 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; 6148 6149 /* Reset the module list and reload them */ 6150 if (tr->scratch) { 6151 struct trace_scratch *tscratch = tr->scratch; 6152 6153 memset(tscratch->entries, 0, 6154 flex_array_size(tscratch, entries, tscratch->nr_entries)); 6155 tscratch->nr_entries = 0; 6156 6157 guard(mutex)(&scratch_mutex); 6158 module_for_each_mod(save_mod, tr); 6159 } 6160 6161 /* 6162 * Need to clear all CPU buffers as there cannot be events 6163 * from the previous boot mixed with events with this boot 6164 * as that will cause a confusing trace. Need to clear all 6165 * CPU buffers, even for those that may currently be offline. 6166 */ 6167 tracing_reset_all_cpus(&tr->array_buffer); 6168 6169 /* Using current data now */ 6170 tr->text_delta = 0; 6171 6172 if (!tr->scratch) 6173 return; 6174 6175 tscratch = tr->scratch; 6176 module_delta = READ_ONCE(tr->module_delta); 6177 WRITE_ONCE(tr->module_delta, NULL); 6178 kfree_rcu(module_delta, rcu); 6179 6180 /* Set the persistent ring buffer meta data to this address */ 6181 tscratch->text_addr = (unsigned long)_text; 6182 } 6183 6184 /** 6185 * tracing_update_buffers - used by tracing facility to expand ring buffers 6186 * @tr: The tracing instance 6187 * 6188 * To save on memory when the tracing is never used on a system with it 6189 * configured in. The ring buffers are set to a minimum size. But once 6190 * a user starts to use the tracing facility, then they need to grow 6191 * to their default size. 6192 * 6193 * This function is to be called when a tracer is about to be used. 6194 */ 6195 int tracing_update_buffers(struct trace_array *tr) 6196 { 6197 int ret = 0; 6198 6199 mutex_lock(&trace_types_lock); 6200 6201 update_last_data(tr); 6202 6203 if (!tr->ring_buffer_expanded) 6204 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 6205 RING_BUFFER_ALL_CPUS); 6206 mutex_unlock(&trace_types_lock); 6207 6208 return ret; 6209 } 6210 6211 struct trace_option_dentry; 6212 6213 static void 6214 create_trace_option_files(struct trace_array *tr, struct tracer *tracer); 6215 6216 /* 6217 * Used to clear out the tracer before deletion of an instance. 6218 * Must have trace_types_lock held. 6219 */ 6220 static void tracing_set_nop(struct trace_array *tr) 6221 { 6222 if (tr->current_trace == &nop_trace) 6223 return; 6224 6225 tr->current_trace->enabled--; 6226 6227 if (tr->current_trace->reset) 6228 tr->current_trace->reset(tr); 6229 6230 tr->current_trace = &nop_trace; 6231 } 6232 6233 static bool tracer_options_updated; 6234 6235 static void add_tracer_options(struct trace_array *tr, struct tracer *t) 6236 { 6237 /* Only enable if the directory has been created already. */ 6238 if (!tr->dir) 6239 return; 6240 6241 /* Only create trace option files after update_tracer_options finish */ 6242 if (!tracer_options_updated) 6243 return; 6244 6245 create_trace_option_files(tr, t); 6246 } 6247 6248 int tracing_set_tracer(struct trace_array *tr, const char *buf) 6249 { 6250 struct tracer *t; 6251 #ifdef CONFIG_TRACER_MAX_TRACE 6252 bool had_max_tr; 6253 #endif 6254 int ret; 6255 6256 guard(mutex)(&trace_types_lock); 6257 6258 update_last_data(tr); 6259 6260 if (!tr->ring_buffer_expanded) { 6261 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 6262 RING_BUFFER_ALL_CPUS); 6263 if (ret < 0) 6264 return ret; 6265 ret = 0; 6266 } 6267 6268 for (t = trace_types; t; t = t->next) { 6269 if (strcmp(t->name, buf) == 0) 6270 break; 6271 } 6272 if (!t) 6273 return -EINVAL; 6274 6275 if (t == tr->current_trace) 6276 return 0; 6277 6278 #ifdef CONFIG_TRACER_SNAPSHOT 6279 if (t->use_max_tr) { 6280 local_irq_disable(); 6281 arch_spin_lock(&tr->max_lock); 6282 ret = tr->cond_snapshot ? -EBUSY : 0; 6283 arch_spin_unlock(&tr->max_lock); 6284 local_irq_enable(); 6285 if (ret) 6286 return ret; 6287 } 6288 #endif 6289 /* Some tracers won't work on kernel command line */ 6290 if (system_state < SYSTEM_RUNNING && t->noboot) { 6291 pr_warn("Tracer '%s' is not allowed on command line, ignored\n", 6292 t->name); 6293 return -EINVAL; 6294 } 6295 6296 /* Some tracers are only allowed for the top level buffer */ 6297 if (!trace_ok_for_array(t, tr)) 6298 return -EINVAL; 6299 6300 /* If trace pipe files are being read, we can't change the tracer */ 6301 if (tr->trace_ref) 6302 return -EBUSY; 6303 6304 trace_branch_disable(); 6305 6306 tr->current_trace->enabled--; 6307 6308 if (tr->current_trace->reset) 6309 tr->current_trace->reset(tr); 6310 6311 #ifdef CONFIG_TRACER_MAX_TRACE 6312 had_max_tr = tr->current_trace->use_max_tr; 6313 6314 /* Current trace needs to be nop_trace before synchronize_rcu */ 6315 tr->current_trace = &nop_trace; 6316 6317 if (had_max_tr && !t->use_max_tr) { 6318 /* 6319 * We need to make sure that the update_max_tr sees that 6320 * current_trace changed to nop_trace to keep it from 6321 * swapping the buffers after we resize it. 6322 * The update_max_tr is called from interrupts disabled 6323 * so a synchronized_sched() is sufficient. 6324 */ 6325 synchronize_rcu(); 6326 free_snapshot(tr); 6327 tracing_disarm_snapshot(tr); 6328 } 6329 6330 if (!had_max_tr && t->use_max_tr) { 6331 ret = tracing_arm_snapshot_locked(tr); 6332 if (ret) 6333 return ret; 6334 } 6335 #else 6336 tr->current_trace = &nop_trace; 6337 #endif 6338 6339 if (t->init) { 6340 ret = tracer_init(t, tr); 6341 if (ret) { 6342 #ifdef CONFIG_TRACER_MAX_TRACE 6343 if (t->use_max_tr) 6344 tracing_disarm_snapshot(tr); 6345 #endif 6346 return ret; 6347 } 6348 } 6349 6350 tr->current_trace = t; 6351 tr->current_trace->enabled++; 6352 trace_branch_enable(tr); 6353 6354 return 0; 6355 } 6356 6357 static ssize_t 6358 tracing_set_trace_write(struct file *filp, const char __user *ubuf, 6359 size_t cnt, loff_t *ppos) 6360 { 6361 struct trace_array *tr = filp->private_data; 6362 char buf[MAX_TRACER_SIZE+1]; 6363 char *name; 6364 size_t ret; 6365 int err; 6366 6367 ret = cnt; 6368 6369 if (cnt > MAX_TRACER_SIZE) 6370 cnt = MAX_TRACER_SIZE; 6371 6372 if (copy_from_user(buf, ubuf, cnt)) 6373 return -EFAULT; 6374 6375 buf[cnt] = 0; 6376 6377 name = strim(buf); 6378 6379 err = tracing_set_tracer(tr, name); 6380 if (err) 6381 return err; 6382 6383 *ppos += ret; 6384 6385 return ret; 6386 } 6387 6388 static ssize_t 6389 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, 6390 size_t cnt, loff_t *ppos) 6391 { 6392 char buf[64]; 6393 int r; 6394 6395 r = snprintf(buf, sizeof(buf), "%ld\n", 6396 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); 6397 if (r > sizeof(buf)) 6398 r = sizeof(buf); 6399 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6400 } 6401 6402 static ssize_t 6403 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, 6404 size_t cnt, loff_t *ppos) 6405 { 6406 unsigned long val; 6407 int ret; 6408 6409 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6410 if (ret) 6411 return ret; 6412 6413 *ptr = val * 1000; 6414 6415 return cnt; 6416 } 6417 6418 static ssize_t 6419 tracing_thresh_read(struct file *filp, char __user *ubuf, 6420 size_t cnt, loff_t *ppos) 6421 { 6422 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); 6423 } 6424 6425 static ssize_t 6426 tracing_thresh_write(struct file *filp, const char __user *ubuf, 6427 size_t cnt, loff_t *ppos) 6428 { 6429 struct trace_array *tr = filp->private_data; 6430 int ret; 6431 6432 guard(mutex)(&trace_types_lock); 6433 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); 6434 if (ret < 0) 6435 return ret; 6436 6437 if (tr->current_trace->update_thresh) { 6438 ret = tr->current_trace->update_thresh(tr); 6439 if (ret < 0) 6440 return ret; 6441 } 6442 6443 return cnt; 6444 } 6445 6446 #ifdef CONFIG_TRACER_MAX_TRACE 6447 6448 static ssize_t 6449 tracing_max_lat_read(struct file *filp, char __user *ubuf, 6450 size_t cnt, loff_t *ppos) 6451 { 6452 struct trace_array *tr = filp->private_data; 6453 6454 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); 6455 } 6456 6457 static ssize_t 6458 tracing_max_lat_write(struct file *filp, const char __user *ubuf, 6459 size_t cnt, loff_t *ppos) 6460 { 6461 struct trace_array *tr = filp->private_data; 6462 6463 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); 6464 } 6465 6466 #endif 6467 6468 static int open_pipe_on_cpu(struct trace_array *tr, int cpu) 6469 { 6470 if (cpu == RING_BUFFER_ALL_CPUS) { 6471 if (cpumask_empty(tr->pipe_cpumask)) { 6472 cpumask_setall(tr->pipe_cpumask); 6473 return 0; 6474 } 6475 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { 6476 cpumask_set_cpu(cpu, tr->pipe_cpumask); 6477 return 0; 6478 } 6479 return -EBUSY; 6480 } 6481 6482 static void close_pipe_on_cpu(struct trace_array *tr, int cpu) 6483 { 6484 if (cpu == RING_BUFFER_ALL_CPUS) { 6485 WARN_ON(!cpumask_full(tr->pipe_cpumask)); 6486 cpumask_clear(tr->pipe_cpumask); 6487 } else { 6488 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); 6489 cpumask_clear_cpu(cpu, tr->pipe_cpumask); 6490 } 6491 } 6492 6493 static int tracing_open_pipe(struct inode *inode, struct file *filp) 6494 { 6495 struct trace_array *tr = inode->i_private; 6496 struct trace_iterator *iter; 6497 int cpu; 6498 int ret; 6499 6500 ret = tracing_check_open_get_tr(tr); 6501 if (ret) 6502 return ret; 6503 6504 mutex_lock(&trace_types_lock); 6505 cpu = tracing_get_cpu(inode); 6506 ret = open_pipe_on_cpu(tr, cpu); 6507 if (ret) 6508 goto fail_pipe_on_cpu; 6509 6510 /* create a buffer to store the information to pass to userspace */ 6511 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 6512 if (!iter) { 6513 ret = -ENOMEM; 6514 goto fail_alloc_iter; 6515 } 6516 6517 trace_seq_init(&iter->seq); 6518 iter->trace = tr->current_trace; 6519 6520 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 6521 ret = -ENOMEM; 6522 goto fail; 6523 } 6524 6525 /* trace pipe does not show start of buffer */ 6526 cpumask_setall(iter->started); 6527 6528 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT) 6529 iter->iter_flags |= TRACE_FILE_LAT_FMT; 6530 6531 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 6532 if (trace_clocks[tr->clock_id].in_ns) 6533 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 6534 6535 iter->tr = tr; 6536 iter->array_buffer = &tr->array_buffer; 6537 iter->cpu_file = cpu; 6538 mutex_init(&iter->mutex); 6539 filp->private_data = iter; 6540 6541 if (iter->trace->pipe_open) 6542 iter->trace->pipe_open(iter); 6543 6544 nonseekable_open(inode, filp); 6545 6546 tr->trace_ref++; 6547 6548 mutex_unlock(&trace_types_lock); 6549 return ret; 6550 6551 fail: 6552 kfree(iter); 6553 fail_alloc_iter: 6554 close_pipe_on_cpu(tr, cpu); 6555 fail_pipe_on_cpu: 6556 __trace_array_put(tr); 6557 mutex_unlock(&trace_types_lock); 6558 return ret; 6559 } 6560 6561 static int tracing_release_pipe(struct inode *inode, struct file *file) 6562 { 6563 struct trace_iterator *iter = file->private_data; 6564 struct trace_array *tr = inode->i_private; 6565 6566 mutex_lock(&trace_types_lock); 6567 6568 tr->trace_ref--; 6569 6570 if (iter->trace->pipe_close) 6571 iter->trace->pipe_close(iter); 6572 close_pipe_on_cpu(tr, iter->cpu_file); 6573 mutex_unlock(&trace_types_lock); 6574 6575 free_trace_iter_content(iter); 6576 kfree(iter); 6577 6578 trace_array_put(tr); 6579 6580 return 0; 6581 } 6582 6583 static __poll_t 6584 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 6585 { 6586 struct trace_array *tr = iter->tr; 6587 6588 /* Iterators are static, they should be filled or empty */ 6589 if (trace_buffer_iter(iter, iter->cpu_file)) 6590 return EPOLLIN | EPOLLRDNORM; 6591 6592 if (tr->trace_flags & TRACE_ITER_BLOCK) 6593 /* 6594 * Always select as readable when in blocking mode 6595 */ 6596 return EPOLLIN | EPOLLRDNORM; 6597 else 6598 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, 6599 filp, poll_table, iter->tr->buffer_percent); 6600 } 6601 6602 static __poll_t 6603 tracing_poll_pipe(struct file *filp, poll_table *poll_table) 6604 { 6605 struct trace_iterator *iter = filp->private_data; 6606 6607 return trace_poll(iter, filp, poll_table); 6608 } 6609 6610 /* Must be called with iter->mutex held. */ 6611 static int tracing_wait_pipe(struct file *filp) 6612 { 6613 struct trace_iterator *iter = filp->private_data; 6614 int ret; 6615 6616 while (trace_empty(iter)) { 6617 6618 if ((filp->f_flags & O_NONBLOCK)) { 6619 return -EAGAIN; 6620 } 6621 6622 /* 6623 * We block until we read something and tracing is disabled. 6624 * We still block if tracing is disabled, but we have never 6625 * read anything. This allows a user to cat this file, and 6626 * then enable tracing. But after we have read something, 6627 * we give an EOF when tracing is again disabled. 6628 * 6629 * iter->pos will be 0 if we haven't read anything. 6630 */ 6631 if (!tracer_tracing_is_on(iter->tr) && iter->pos) 6632 break; 6633 6634 mutex_unlock(&iter->mutex); 6635 6636 ret = wait_on_pipe(iter, 0); 6637 6638 mutex_lock(&iter->mutex); 6639 6640 if (ret) 6641 return ret; 6642 } 6643 6644 return 1; 6645 } 6646 6647 /* 6648 * Consumer reader. 6649 */ 6650 static ssize_t 6651 tracing_read_pipe(struct file *filp, char __user *ubuf, 6652 size_t cnt, loff_t *ppos) 6653 { 6654 struct trace_iterator *iter = filp->private_data; 6655 ssize_t sret; 6656 6657 /* 6658 * Avoid more than one consumer on a single file descriptor 6659 * This is just a matter of traces coherency, the ring buffer itself 6660 * is protected. 6661 */ 6662 guard(mutex)(&iter->mutex); 6663 6664 /* return any leftover data */ 6665 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6666 if (sret != -EBUSY) 6667 return sret; 6668 6669 trace_seq_init(&iter->seq); 6670 6671 if (iter->trace->read) { 6672 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 6673 if (sret) 6674 return sret; 6675 } 6676 6677 waitagain: 6678 sret = tracing_wait_pipe(filp); 6679 if (sret <= 0) 6680 return sret; 6681 6682 /* stop when tracing is finished */ 6683 if (trace_empty(iter)) 6684 return 0; 6685 6686 if (cnt >= TRACE_SEQ_BUFFER_SIZE) 6687 cnt = TRACE_SEQ_BUFFER_SIZE - 1; 6688 6689 /* reset all but tr, trace, and overruns */ 6690 trace_iterator_reset(iter); 6691 cpumask_clear(iter->started); 6692 trace_seq_init(&iter->seq); 6693 6694 trace_event_read_lock(); 6695 trace_access_lock(iter->cpu_file); 6696 while (trace_find_next_entry_inc(iter) != NULL) { 6697 enum print_line_t ret; 6698 int save_len = iter->seq.seq.len; 6699 6700 ret = print_trace_line(iter); 6701 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6702 /* 6703 * If one print_trace_line() fills entire trace_seq in one shot, 6704 * trace_seq_to_user() will returns -EBUSY because save_len == 0, 6705 * In this case, we need to consume it, otherwise, loop will peek 6706 * this event next time, resulting in an infinite loop. 6707 */ 6708 if (save_len == 0) { 6709 iter->seq.full = 0; 6710 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 6711 trace_consume(iter); 6712 break; 6713 } 6714 6715 /* In other cases, don't print partial lines */ 6716 iter->seq.seq.len = save_len; 6717 break; 6718 } 6719 if (ret != TRACE_TYPE_NO_CONSUME) 6720 trace_consume(iter); 6721 6722 if (trace_seq_used(&iter->seq) >= cnt) 6723 break; 6724 6725 /* 6726 * Setting the full flag means we reached the trace_seq buffer 6727 * size and we should leave by partial output condition above. 6728 * One of the trace_seq_* functions is not used properly. 6729 */ 6730 WARN_ONCE(iter->seq.full, "full flag set for trace type %d", 6731 iter->ent->type); 6732 } 6733 trace_access_unlock(iter->cpu_file); 6734 trace_event_read_unlock(); 6735 6736 /* Now copy what we have to the user */ 6737 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6738 if (iter->seq.readpos >= trace_seq_used(&iter->seq)) 6739 trace_seq_init(&iter->seq); 6740 6741 /* 6742 * If there was nothing to send to user, in spite of consuming trace 6743 * entries, go back to wait for more entries. 6744 */ 6745 if (sret == -EBUSY) 6746 goto waitagain; 6747 6748 return sret; 6749 } 6750 6751 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, 6752 unsigned int idx) 6753 { 6754 __free_page(spd->pages[idx]); 6755 } 6756 6757 static size_t 6758 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) 6759 { 6760 size_t count; 6761 int save_len; 6762 int ret; 6763 6764 /* Seq buffer is page-sized, exactly what we need. */ 6765 for (;;) { 6766 save_len = iter->seq.seq.len; 6767 ret = print_trace_line(iter); 6768 6769 if (trace_seq_has_overflowed(&iter->seq)) { 6770 iter->seq.seq.len = save_len; 6771 break; 6772 } 6773 6774 /* 6775 * This should not be hit, because it should only 6776 * be set if the iter->seq overflowed. But check it 6777 * anyway to be safe. 6778 */ 6779 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6780 iter->seq.seq.len = save_len; 6781 break; 6782 } 6783 6784 count = trace_seq_used(&iter->seq) - save_len; 6785 if (rem < count) { 6786 rem = 0; 6787 iter->seq.seq.len = save_len; 6788 break; 6789 } 6790 6791 if (ret != TRACE_TYPE_NO_CONSUME) 6792 trace_consume(iter); 6793 rem -= count; 6794 if (!trace_find_next_entry_inc(iter)) { 6795 rem = 0; 6796 iter->ent = NULL; 6797 break; 6798 } 6799 } 6800 6801 return rem; 6802 } 6803 6804 static ssize_t tracing_splice_read_pipe(struct file *filp, 6805 loff_t *ppos, 6806 struct pipe_inode_info *pipe, 6807 size_t len, 6808 unsigned int flags) 6809 { 6810 struct page *pages_def[PIPE_DEF_BUFFERS]; 6811 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 6812 struct trace_iterator *iter = filp->private_data; 6813 struct splice_pipe_desc spd = { 6814 .pages = pages_def, 6815 .partial = partial_def, 6816 .nr_pages = 0, /* This gets updated below. */ 6817 .nr_pages_max = PIPE_DEF_BUFFERS, 6818 .ops = &default_pipe_buf_ops, 6819 .spd_release = tracing_spd_release_pipe, 6820 }; 6821 ssize_t ret; 6822 size_t rem; 6823 unsigned int i; 6824 6825 if (splice_grow_spd(pipe, &spd)) 6826 return -ENOMEM; 6827 6828 mutex_lock(&iter->mutex); 6829 6830 if (iter->trace->splice_read) { 6831 ret = iter->trace->splice_read(iter, filp, 6832 ppos, pipe, len, flags); 6833 if (ret) 6834 goto out_err; 6835 } 6836 6837 ret = tracing_wait_pipe(filp); 6838 if (ret <= 0) 6839 goto out_err; 6840 6841 if (!iter->ent && !trace_find_next_entry_inc(iter)) { 6842 ret = -EFAULT; 6843 goto out_err; 6844 } 6845 6846 trace_event_read_lock(); 6847 trace_access_lock(iter->cpu_file); 6848 6849 /* Fill as many pages as possible. */ 6850 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 6851 spd.pages[i] = alloc_page(GFP_KERNEL); 6852 if (!spd.pages[i]) 6853 break; 6854 6855 rem = tracing_fill_pipe_page(rem, iter); 6856 6857 /* Copy the data into the page, so we can start over. */ 6858 ret = trace_seq_to_buffer(&iter->seq, 6859 page_address(spd.pages[i]), 6860 min((size_t)trace_seq_used(&iter->seq), 6861 PAGE_SIZE)); 6862 if (ret < 0) { 6863 __free_page(spd.pages[i]); 6864 break; 6865 } 6866 spd.partial[i].offset = 0; 6867 spd.partial[i].len = ret; 6868 6869 trace_seq_init(&iter->seq); 6870 } 6871 6872 trace_access_unlock(iter->cpu_file); 6873 trace_event_read_unlock(); 6874 mutex_unlock(&iter->mutex); 6875 6876 spd.nr_pages = i; 6877 6878 if (i) 6879 ret = splice_to_pipe(pipe, &spd); 6880 else 6881 ret = 0; 6882 out: 6883 splice_shrink_spd(&spd); 6884 return ret; 6885 6886 out_err: 6887 mutex_unlock(&iter->mutex); 6888 goto out; 6889 } 6890 6891 static ssize_t 6892 tracing_entries_read(struct file *filp, char __user *ubuf, 6893 size_t cnt, loff_t *ppos) 6894 { 6895 struct inode *inode = file_inode(filp); 6896 struct trace_array *tr = inode->i_private; 6897 int cpu = tracing_get_cpu(inode); 6898 char buf[64]; 6899 int r = 0; 6900 ssize_t ret; 6901 6902 mutex_lock(&trace_types_lock); 6903 6904 if (cpu == RING_BUFFER_ALL_CPUS) { 6905 int cpu, buf_size_same; 6906 unsigned long size; 6907 6908 size = 0; 6909 buf_size_same = 1; 6910 /* check if all cpu sizes are same */ 6911 for_each_tracing_cpu(cpu) { 6912 /* fill in the size from first enabled cpu */ 6913 if (size == 0) 6914 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; 6915 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { 6916 buf_size_same = 0; 6917 break; 6918 } 6919 } 6920 6921 if (buf_size_same) { 6922 if (!tr->ring_buffer_expanded) 6923 r = sprintf(buf, "%lu (expanded: %lu)\n", 6924 size >> 10, 6925 trace_buf_size >> 10); 6926 else 6927 r = sprintf(buf, "%lu\n", size >> 10); 6928 } else 6929 r = sprintf(buf, "X\n"); 6930 } else 6931 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); 6932 6933 mutex_unlock(&trace_types_lock); 6934 6935 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6936 return ret; 6937 } 6938 6939 static ssize_t 6940 tracing_entries_write(struct file *filp, const char __user *ubuf, 6941 size_t cnt, loff_t *ppos) 6942 { 6943 struct inode *inode = file_inode(filp); 6944 struct trace_array *tr = inode->i_private; 6945 unsigned long val; 6946 int ret; 6947 6948 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6949 if (ret) 6950 return ret; 6951 6952 /* must have at least 1 entry */ 6953 if (!val) 6954 return -EINVAL; 6955 6956 /* value is in KB */ 6957 val <<= 10; 6958 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); 6959 if (ret < 0) 6960 return ret; 6961 6962 *ppos += cnt; 6963 6964 return cnt; 6965 } 6966 6967 static ssize_t 6968 tracing_total_entries_read(struct file *filp, char __user *ubuf, 6969 size_t cnt, loff_t *ppos) 6970 { 6971 struct trace_array *tr = filp->private_data; 6972 char buf[64]; 6973 int r, cpu; 6974 unsigned long size = 0, expanded_size = 0; 6975 6976 mutex_lock(&trace_types_lock); 6977 for_each_tracing_cpu(cpu) { 6978 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; 6979 if (!tr->ring_buffer_expanded) 6980 expanded_size += trace_buf_size >> 10; 6981 } 6982 if (tr->ring_buffer_expanded) 6983 r = sprintf(buf, "%lu\n", size); 6984 else 6985 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); 6986 mutex_unlock(&trace_types_lock); 6987 6988 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6989 } 6990 6991 #define LAST_BOOT_HEADER ((void *)1) 6992 6993 static void *l_next(struct seq_file *m, void *v, loff_t *pos) 6994 { 6995 struct trace_array *tr = m->private; 6996 struct trace_scratch *tscratch = tr->scratch; 6997 unsigned int index = *pos; 6998 6999 (*pos)++; 7000 7001 if (*pos == 1) 7002 return LAST_BOOT_HEADER; 7003 7004 /* Only show offsets of the last boot data */ 7005 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 7006 return NULL; 7007 7008 /* *pos 0 is for the header, 1 is for the first module */ 7009 index--; 7010 7011 if (index >= tscratch->nr_entries) 7012 return NULL; 7013 7014 return &tscratch->entries[index]; 7015 } 7016 7017 static void *l_start(struct seq_file *m, loff_t *pos) 7018 { 7019 mutex_lock(&scratch_mutex); 7020 7021 return l_next(m, NULL, pos); 7022 } 7023 7024 static void l_stop(struct seq_file *m, void *p) 7025 { 7026 mutex_unlock(&scratch_mutex); 7027 } 7028 7029 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) 7030 { 7031 struct trace_scratch *tscratch = tr->scratch; 7032 7033 /* 7034 * Do not leak KASLR address. This only shows the KASLR address of 7035 * the last boot. When the ring buffer is started, the LAST_BOOT 7036 * flag gets cleared, and this should only report "current". 7037 * Otherwise it shows the KASLR address from the previous boot which 7038 * should not be the same as the current boot. 7039 */ 7040 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 7041 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); 7042 else 7043 seq_puts(m, "# Current\n"); 7044 } 7045 7046 static int l_show(struct seq_file *m, void *v) 7047 { 7048 struct trace_array *tr = m->private; 7049 struct trace_mod_entry *entry = v; 7050 7051 if (v == LAST_BOOT_HEADER) { 7052 show_last_boot_header(m, tr); 7053 return 0; 7054 } 7055 7056 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); 7057 return 0; 7058 } 7059 7060 static const struct seq_operations last_boot_seq_ops = { 7061 .start = l_start, 7062 .next = l_next, 7063 .stop = l_stop, 7064 .show = l_show, 7065 }; 7066 7067 static int tracing_last_boot_open(struct inode *inode, struct file *file) 7068 { 7069 struct trace_array *tr = inode->i_private; 7070 struct seq_file *m; 7071 int ret; 7072 7073 ret = tracing_check_open_get_tr(tr); 7074 if (ret) 7075 return ret; 7076 7077 ret = seq_open(file, &last_boot_seq_ops); 7078 if (ret) { 7079 trace_array_put(tr); 7080 return ret; 7081 } 7082 7083 m = file->private_data; 7084 m->private = tr; 7085 7086 return 0; 7087 } 7088 7089 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) 7090 { 7091 struct trace_array *tr = inode->i_private; 7092 int cpu = tracing_get_cpu(inode); 7093 int ret; 7094 7095 ret = tracing_check_open_get_tr(tr); 7096 if (ret) 7097 return ret; 7098 7099 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); 7100 if (ret < 0) 7101 __trace_array_put(tr); 7102 return ret; 7103 } 7104 7105 static ssize_t 7106 tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 7107 size_t cnt, loff_t *ppos) 7108 { 7109 /* 7110 * There is no need to read what the user has written, this function 7111 * is just to make sure that there is no error when "echo" is used 7112 */ 7113 7114 *ppos += cnt; 7115 7116 return cnt; 7117 } 7118 7119 static int 7120 tracing_free_buffer_release(struct inode *inode, struct file *filp) 7121 { 7122 struct trace_array *tr = inode->i_private; 7123 7124 /* disable tracing ? */ 7125 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE) 7126 tracer_tracing_off(tr); 7127 /* resize the ring buffer to 0 */ 7128 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 7129 7130 trace_array_put(tr); 7131 7132 return 0; 7133 } 7134 7135 #define TRACE_MARKER_MAX_SIZE 4096 7136 7137 static ssize_t 7138 tracing_mark_write(struct file *filp, const char __user *ubuf, 7139 size_t cnt, loff_t *fpos) 7140 { 7141 struct trace_array *tr = filp->private_data; 7142 struct ring_buffer_event *event; 7143 enum event_trigger_type tt = ETT_NONE; 7144 struct trace_buffer *buffer; 7145 struct print_entry *entry; 7146 int meta_size; 7147 ssize_t written; 7148 size_t size; 7149 int len; 7150 7151 /* Used in tracing_mark_raw_write() as well */ 7152 #define FAULTED_STR "<faulted>" 7153 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */ 7154 7155 if (tracing_disabled) 7156 return -EINVAL; 7157 7158 if (!(tr->trace_flags & TRACE_ITER_MARKERS)) 7159 return -EINVAL; 7160 7161 if ((ssize_t)cnt < 0) 7162 return -EINVAL; 7163 7164 if (cnt > TRACE_MARKER_MAX_SIZE) 7165 cnt = TRACE_MARKER_MAX_SIZE; 7166 7167 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 7168 again: 7169 size = cnt + meta_size; 7170 7171 /* If less than "<faulted>", then make sure we can still add that */ 7172 if (cnt < FAULTED_SIZE) 7173 size += FAULTED_SIZE - cnt; 7174 7175 buffer = tr->array_buffer.buffer; 7176 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 7177 tracing_gen_ctx()); 7178 if (unlikely(!event)) { 7179 /* 7180 * If the size was greater than what was allowed, then 7181 * make it smaller and try again. 7182 */ 7183 if (size > ring_buffer_max_event_size(buffer)) { 7184 /* cnt < FAULTED size should never be bigger than max */ 7185 if (WARN_ON_ONCE(cnt < FAULTED_SIZE)) 7186 return -EBADF; 7187 cnt = ring_buffer_max_event_size(buffer) - meta_size; 7188 /* The above should only happen once */ 7189 if (WARN_ON_ONCE(cnt + meta_size == size)) 7190 return -EBADF; 7191 goto again; 7192 } 7193 7194 /* Ring buffer disabled, return as if not open for write */ 7195 return -EBADF; 7196 } 7197 7198 entry = ring_buffer_event_data(event); 7199 entry->ip = _THIS_IP_; 7200 7201 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt); 7202 if (len) { 7203 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); 7204 cnt = FAULTED_SIZE; 7205 written = -EFAULT; 7206 } else 7207 written = cnt; 7208 7209 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { 7210 /* do not add \n before testing triggers, but add \0 */ 7211 entry->buf[cnt] = '\0'; 7212 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); 7213 } 7214 7215 if (entry->buf[cnt - 1] != '\n') { 7216 entry->buf[cnt] = '\n'; 7217 entry->buf[cnt + 1] = '\0'; 7218 } else 7219 entry->buf[cnt] = '\0'; 7220 7221 if (static_branch_unlikely(&trace_marker_exports_enabled)) 7222 ftrace_exports(event, TRACE_EXPORT_MARKER); 7223 __buffer_unlock_commit(buffer, event); 7224 7225 if (tt) 7226 event_triggers_post_call(tr->trace_marker_file, tt); 7227 7228 return written; 7229 } 7230 7231 static ssize_t 7232 tracing_mark_raw_write(struct file *filp, const char __user *ubuf, 7233 size_t cnt, loff_t *fpos) 7234 { 7235 struct trace_array *tr = filp->private_data; 7236 struct ring_buffer_event *event; 7237 struct trace_buffer *buffer; 7238 struct raw_data_entry *entry; 7239 ssize_t written; 7240 int size; 7241 int len; 7242 7243 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int)) 7244 7245 if (tracing_disabled) 7246 return -EINVAL; 7247 7248 if (!(tr->trace_flags & TRACE_ITER_MARKERS)) 7249 return -EINVAL; 7250 7251 /* The marker must at least have a tag id */ 7252 if (cnt < sizeof(unsigned int)) 7253 return -EINVAL; 7254 7255 size = sizeof(*entry) + cnt; 7256 if (cnt < FAULT_SIZE_ID) 7257 size += FAULT_SIZE_ID - cnt; 7258 7259 buffer = tr->array_buffer.buffer; 7260 7261 if (size > ring_buffer_max_event_size(buffer)) 7262 return -EINVAL; 7263 7264 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, 7265 tracing_gen_ctx()); 7266 if (!event) 7267 /* Ring buffer disabled, return as if not open for write */ 7268 return -EBADF; 7269 7270 entry = ring_buffer_event_data(event); 7271 7272 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt); 7273 if (len) { 7274 entry->id = -1; 7275 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE); 7276 written = -EFAULT; 7277 } else 7278 written = cnt; 7279 7280 __buffer_unlock_commit(buffer, event); 7281 7282 return written; 7283 } 7284 7285 static int tracing_clock_show(struct seq_file *m, void *v) 7286 { 7287 struct trace_array *tr = m->private; 7288 int i; 7289 7290 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 7291 seq_printf(m, 7292 "%s%s%s%s", i ? " " : "", 7293 i == tr->clock_id ? "[" : "", trace_clocks[i].name, 7294 i == tr->clock_id ? "]" : ""); 7295 seq_putc(m, '\n'); 7296 7297 return 0; 7298 } 7299 7300 int tracing_set_clock(struct trace_array *tr, const char *clockstr) 7301 { 7302 int i; 7303 7304 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { 7305 if (strcmp(trace_clocks[i].name, clockstr) == 0) 7306 break; 7307 } 7308 if (i == ARRAY_SIZE(trace_clocks)) 7309 return -EINVAL; 7310 7311 mutex_lock(&trace_types_lock); 7312 7313 tr->clock_id = i; 7314 7315 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); 7316 7317 /* 7318 * New clock may not be consistent with the previous clock. 7319 * Reset the buffer so that it doesn't have incomparable timestamps. 7320 */ 7321 tracing_reset_online_cpus(&tr->array_buffer); 7322 7323 #ifdef CONFIG_TRACER_MAX_TRACE 7324 if (tr->max_buffer.buffer) 7325 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); 7326 tracing_reset_online_cpus(&tr->max_buffer); 7327 #endif 7328 7329 mutex_unlock(&trace_types_lock); 7330 7331 return 0; 7332 } 7333 7334 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 7335 size_t cnt, loff_t *fpos) 7336 { 7337 struct seq_file *m = filp->private_data; 7338 struct trace_array *tr = m->private; 7339 char buf[64]; 7340 const char *clockstr; 7341 int ret; 7342 7343 if (cnt >= sizeof(buf)) 7344 return -EINVAL; 7345 7346 if (copy_from_user(buf, ubuf, cnt)) 7347 return -EFAULT; 7348 7349 buf[cnt] = 0; 7350 7351 clockstr = strstrip(buf); 7352 7353 ret = tracing_set_clock(tr, clockstr); 7354 if (ret) 7355 return ret; 7356 7357 *fpos += cnt; 7358 7359 return cnt; 7360 } 7361 7362 static int tracing_clock_open(struct inode *inode, struct file *file) 7363 { 7364 struct trace_array *tr = inode->i_private; 7365 int ret; 7366 7367 ret = tracing_check_open_get_tr(tr); 7368 if (ret) 7369 return ret; 7370 7371 ret = single_open(file, tracing_clock_show, inode->i_private); 7372 if (ret < 0) 7373 trace_array_put(tr); 7374 7375 return ret; 7376 } 7377 7378 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) 7379 { 7380 struct trace_array *tr = m->private; 7381 7382 mutex_lock(&trace_types_lock); 7383 7384 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) 7385 seq_puts(m, "delta [absolute]\n"); 7386 else 7387 seq_puts(m, "[delta] absolute\n"); 7388 7389 mutex_unlock(&trace_types_lock); 7390 7391 return 0; 7392 } 7393 7394 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) 7395 { 7396 struct trace_array *tr = inode->i_private; 7397 int ret; 7398 7399 ret = tracing_check_open_get_tr(tr); 7400 if (ret) 7401 return ret; 7402 7403 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); 7404 if (ret < 0) 7405 trace_array_put(tr); 7406 7407 return ret; 7408 } 7409 7410 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) 7411 { 7412 if (rbe == this_cpu_read(trace_buffered_event)) 7413 return ring_buffer_time_stamp(buffer); 7414 7415 return ring_buffer_event_time_stamp(buffer, rbe); 7416 } 7417 7418 /* 7419 * Set or disable using the per CPU trace_buffer_event when possible. 7420 */ 7421 int tracing_set_filter_buffering(struct trace_array *tr, bool set) 7422 { 7423 guard(mutex)(&trace_types_lock); 7424 7425 if (set && tr->no_filter_buffering_ref++) 7426 return 0; 7427 7428 if (!set) { 7429 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) 7430 return -EINVAL; 7431 7432 --tr->no_filter_buffering_ref; 7433 } 7434 7435 return 0; 7436 } 7437 7438 struct ftrace_buffer_info { 7439 struct trace_iterator iter; 7440 void *spare; 7441 unsigned int spare_cpu; 7442 unsigned int spare_size; 7443 unsigned int read; 7444 }; 7445 7446 #ifdef CONFIG_TRACER_SNAPSHOT 7447 static int tracing_snapshot_open(struct inode *inode, struct file *file) 7448 { 7449 struct trace_array *tr = inode->i_private; 7450 struct trace_iterator *iter; 7451 struct seq_file *m; 7452 int ret; 7453 7454 ret = tracing_check_open_get_tr(tr); 7455 if (ret) 7456 return ret; 7457 7458 if (file->f_mode & FMODE_READ) { 7459 iter = __tracing_open(inode, file, true); 7460 if (IS_ERR(iter)) 7461 ret = PTR_ERR(iter); 7462 } else { 7463 /* Writes still need the seq_file to hold the private data */ 7464 ret = -ENOMEM; 7465 m = kzalloc(sizeof(*m), GFP_KERNEL); 7466 if (!m) 7467 goto out; 7468 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 7469 if (!iter) { 7470 kfree(m); 7471 goto out; 7472 } 7473 ret = 0; 7474 7475 iter->tr = tr; 7476 iter->array_buffer = &tr->max_buffer; 7477 iter->cpu_file = tracing_get_cpu(inode); 7478 m->private = iter; 7479 file->private_data = m; 7480 } 7481 out: 7482 if (ret < 0) 7483 trace_array_put(tr); 7484 7485 return ret; 7486 } 7487 7488 static void tracing_swap_cpu_buffer(void *tr) 7489 { 7490 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); 7491 } 7492 7493 static ssize_t 7494 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, 7495 loff_t *ppos) 7496 { 7497 struct seq_file *m = filp->private_data; 7498 struct trace_iterator *iter = m->private; 7499 struct trace_array *tr = iter->tr; 7500 unsigned long val; 7501 int ret; 7502 7503 ret = tracing_update_buffers(tr); 7504 if (ret < 0) 7505 return ret; 7506 7507 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7508 if (ret) 7509 return ret; 7510 7511 guard(mutex)(&trace_types_lock); 7512 7513 if (tr->current_trace->use_max_tr) 7514 return -EBUSY; 7515 7516 local_irq_disable(); 7517 arch_spin_lock(&tr->max_lock); 7518 if (tr->cond_snapshot) 7519 ret = -EBUSY; 7520 arch_spin_unlock(&tr->max_lock); 7521 local_irq_enable(); 7522 if (ret) 7523 return ret; 7524 7525 switch (val) { 7526 case 0: 7527 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7528 return -EINVAL; 7529 if (tr->allocated_snapshot) 7530 free_snapshot(tr); 7531 break; 7532 case 1: 7533 /* Only allow per-cpu swap if the ring buffer supports it */ 7534 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP 7535 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7536 return -EINVAL; 7537 #endif 7538 if (tr->allocated_snapshot) 7539 ret = resize_buffer_duplicate_size(&tr->max_buffer, 7540 &tr->array_buffer, iter->cpu_file); 7541 7542 ret = tracing_arm_snapshot_locked(tr); 7543 if (ret) 7544 return ret; 7545 7546 /* Now, we're going to swap */ 7547 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 7548 local_irq_disable(); 7549 update_max_tr(tr, current, smp_processor_id(), NULL); 7550 local_irq_enable(); 7551 } else { 7552 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, 7553 (void *)tr, 1); 7554 } 7555 tracing_disarm_snapshot(tr); 7556 break; 7557 default: 7558 if (tr->allocated_snapshot) { 7559 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 7560 tracing_reset_online_cpus(&tr->max_buffer); 7561 else 7562 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); 7563 } 7564 break; 7565 } 7566 7567 if (ret >= 0) { 7568 *ppos += cnt; 7569 ret = cnt; 7570 } 7571 7572 return ret; 7573 } 7574 7575 static int tracing_snapshot_release(struct inode *inode, struct file *file) 7576 { 7577 struct seq_file *m = file->private_data; 7578 int ret; 7579 7580 ret = tracing_release(inode, file); 7581 7582 if (file->f_mode & FMODE_READ) 7583 return ret; 7584 7585 /* If write only, the seq_file is just a stub */ 7586 if (m) 7587 kfree(m->private); 7588 kfree(m); 7589 7590 return 0; 7591 } 7592 7593 static int tracing_buffers_open(struct inode *inode, struct file *filp); 7594 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, 7595 size_t count, loff_t *ppos); 7596 static int tracing_buffers_release(struct inode *inode, struct file *file); 7597 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, 7598 struct pipe_inode_info *pipe, size_t len, unsigned int flags); 7599 7600 static int snapshot_raw_open(struct inode *inode, struct file *filp) 7601 { 7602 struct ftrace_buffer_info *info; 7603 int ret; 7604 7605 /* The following checks for tracefs lockdown */ 7606 ret = tracing_buffers_open(inode, filp); 7607 if (ret < 0) 7608 return ret; 7609 7610 info = filp->private_data; 7611 7612 if (info->iter.trace->use_max_tr) { 7613 tracing_buffers_release(inode, filp); 7614 return -EBUSY; 7615 } 7616 7617 info->iter.snapshot = true; 7618 info->iter.array_buffer = &info->iter.tr->max_buffer; 7619 7620 return ret; 7621 } 7622 7623 #endif /* CONFIG_TRACER_SNAPSHOT */ 7624 7625 7626 static const struct file_operations tracing_thresh_fops = { 7627 .open = tracing_open_generic, 7628 .read = tracing_thresh_read, 7629 .write = tracing_thresh_write, 7630 .llseek = generic_file_llseek, 7631 }; 7632 7633 #ifdef CONFIG_TRACER_MAX_TRACE 7634 static const struct file_operations tracing_max_lat_fops = { 7635 .open = tracing_open_generic_tr, 7636 .read = tracing_max_lat_read, 7637 .write = tracing_max_lat_write, 7638 .llseek = generic_file_llseek, 7639 .release = tracing_release_generic_tr, 7640 }; 7641 #endif 7642 7643 static const struct file_operations set_tracer_fops = { 7644 .open = tracing_open_generic_tr, 7645 .read = tracing_set_trace_read, 7646 .write = tracing_set_trace_write, 7647 .llseek = generic_file_llseek, 7648 .release = tracing_release_generic_tr, 7649 }; 7650 7651 static const struct file_operations tracing_pipe_fops = { 7652 .open = tracing_open_pipe, 7653 .poll = tracing_poll_pipe, 7654 .read = tracing_read_pipe, 7655 .splice_read = tracing_splice_read_pipe, 7656 .release = tracing_release_pipe, 7657 }; 7658 7659 static const struct file_operations tracing_entries_fops = { 7660 .open = tracing_open_generic_tr, 7661 .read = tracing_entries_read, 7662 .write = tracing_entries_write, 7663 .llseek = generic_file_llseek, 7664 .release = tracing_release_generic_tr, 7665 }; 7666 7667 static const struct file_operations tracing_buffer_meta_fops = { 7668 .open = tracing_buffer_meta_open, 7669 .read = seq_read, 7670 .llseek = seq_lseek, 7671 .release = tracing_seq_release, 7672 }; 7673 7674 static const struct file_operations tracing_total_entries_fops = { 7675 .open = tracing_open_generic_tr, 7676 .read = tracing_total_entries_read, 7677 .llseek = generic_file_llseek, 7678 .release = tracing_release_generic_tr, 7679 }; 7680 7681 static const struct file_operations tracing_free_buffer_fops = { 7682 .open = tracing_open_generic_tr, 7683 .write = tracing_free_buffer_write, 7684 .release = tracing_free_buffer_release, 7685 }; 7686 7687 static const struct file_operations tracing_mark_fops = { 7688 .open = tracing_mark_open, 7689 .write = tracing_mark_write, 7690 .release = tracing_release_generic_tr, 7691 }; 7692 7693 static const struct file_operations tracing_mark_raw_fops = { 7694 .open = tracing_mark_open, 7695 .write = tracing_mark_raw_write, 7696 .release = tracing_release_generic_tr, 7697 }; 7698 7699 static const struct file_operations trace_clock_fops = { 7700 .open = tracing_clock_open, 7701 .read = seq_read, 7702 .llseek = seq_lseek, 7703 .release = tracing_single_release_tr, 7704 .write = tracing_clock_write, 7705 }; 7706 7707 static const struct file_operations trace_time_stamp_mode_fops = { 7708 .open = tracing_time_stamp_mode_open, 7709 .read = seq_read, 7710 .llseek = seq_lseek, 7711 .release = tracing_single_release_tr, 7712 }; 7713 7714 static const struct file_operations last_boot_fops = { 7715 .open = tracing_last_boot_open, 7716 .read = seq_read, 7717 .llseek = seq_lseek, 7718 .release = tracing_seq_release, 7719 }; 7720 7721 #ifdef CONFIG_TRACER_SNAPSHOT 7722 static const struct file_operations snapshot_fops = { 7723 .open = tracing_snapshot_open, 7724 .read = seq_read, 7725 .write = tracing_snapshot_write, 7726 .llseek = tracing_lseek, 7727 .release = tracing_snapshot_release, 7728 }; 7729 7730 static const struct file_operations snapshot_raw_fops = { 7731 .open = snapshot_raw_open, 7732 .read = tracing_buffers_read, 7733 .release = tracing_buffers_release, 7734 .splice_read = tracing_buffers_splice_read, 7735 }; 7736 7737 #endif /* CONFIG_TRACER_SNAPSHOT */ 7738 7739 /* 7740 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct 7741 * @filp: The active open file structure 7742 * @ubuf: The userspace provided buffer to read value into 7743 * @cnt: The maximum number of bytes to read 7744 * @ppos: The current "file" position 7745 * 7746 * This function implements the write interface for a struct trace_min_max_param. 7747 * The filp->private_data must point to a trace_min_max_param structure that 7748 * defines where to write the value, the min and the max acceptable values, 7749 * and a lock to protect the write. 7750 */ 7751 static ssize_t 7752 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 7753 { 7754 struct trace_min_max_param *param = filp->private_data; 7755 u64 val; 7756 int err; 7757 7758 if (!param) 7759 return -EFAULT; 7760 7761 err = kstrtoull_from_user(ubuf, cnt, 10, &val); 7762 if (err) 7763 return err; 7764 7765 if (param->lock) 7766 mutex_lock(param->lock); 7767 7768 if (param->min && val < *param->min) 7769 err = -EINVAL; 7770 7771 if (param->max && val > *param->max) 7772 err = -EINVAL; 7773 7774 if (!err) 7775 *param->val = val; 7776 7777 if (param->lock) 7778 mutex_unlock(param->lock); 7779 7780 if (err) 7781 return err; 7782 7783 return cnt; 7784 } 7785 7786 /* 7787 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct 7788 * @filp: The active open file structure 7789 * @ubuf: The userspace provided buffer to read value into 7790 * @cnt: The maximum number of bytes to read 7791 * @ppos: The current "file" position 7792 * 7793 * This function implements the read interface for a struct trace_min_max_param. 7794 * The filp->private_data must point to a trace_min_max_param struct with valid 7795 * data. 7796 */ 7797 static ssize_t 7798 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 7799 { 7800 struct trace_min_max_param *param = filp->private_data; 7801 char buf[U64_STR_SIZE]; 7802 int len; 7803 u64 val; 7804 7805 if (!param) 7806 return -EFAULT; 7807 7808 val = *param->val; 7809 7810 if (cnt > sizeof(buf)) 7811 cnt = sizeof(buf); 7812 7813 len = snprintf(buf, sizeof(buf), "%llu\n", val); 7814 7815 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 7816 } 7817 7818 const struct file_operations trace_min_max_fops = { 7819 .open = tracing_open_generic, 7820 .read = trace_min_max_read, 7821 .write = trace_min_max_write, 7822 }; 7823 7824 #define TRACING_LOG_ERRS_MAX 8 7825 #define TRACING_LOG_LOC_MAX 128 7826 7827 #define CMD_PREFIX " Command: " 7828 7829 struct err_info { 7830 const char **errs; /* ptr to loc-specific array of err strings */ 7831 u8 type; /* index into errs -> specific err string */ 7832 u16 pos; /* caret position */ 7833 u64 ts; 7834 }; 7835 7836 struct tracing_log_err { 7837 struct list_head list; 7838 struct err_info info; 7839 char loc[TRACING_LOG_LOC_MAX]; /* err location */ 7840 char *cmd; /* what caused err */ 7841 }; 7842 7843 static DEFINE_MUTEX(tracing_err_log_lock); 7844 7845 static struct tracing_log_err *alloc_tracing_log_err(int len) 7846 { 7847 struct tracing_log_err *err; 7848 7849 err = kzalloc(sizeof(*err), GFP_KERNEL); 7850 if (!err) 7851 return ERR_PTR(-ENOMEM); 7852 7853 err->cmd = kzalloc(len, GFP_KERNEL); 7854 if (!err->cmd) { 7855 kfree(err); 7856 return ERR_PTR(-ENOMEM); 7857 } 7858 7859 return err; 7860 } 7861 7862 static void free_tracing_log_err(struct tracing_log_err *err) 7863 { 7864 kfree(err->cmd); 7865 kfree(err); 7866 } 7867 7868 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, 7869 int len) 7870 { 7871 struct tracing_log_err *err; 7872 char *cmd; 7873 7874 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { 7875 err = alloc_tracing_log_err(len); 7876 if (PTR_ERR(err) != -ENOMEM) 7877 tr->n_err_log_entries++; 7878 7879 return err; 7880 } 7881 cmd = kzalloc(len, GFP_KERNEL); 7882 if (!cmd) 7883 return ERR_PTR(-ENOMEM); 7884 err = list_first_entry(&tr->err_log, struct tracing_log_err, list); 7885 kfree(err->cmd); 7886 err->cmd = cmd; 7887 list_del(&err->list); 7888 7889 return err; 7890 } 7891 7892 /** 7893 * err_pos - find the position of a string within a command for error careting 7894 * @cmd: The tracing command that caused the error 7895 * @str: The string to position the caret at within @cmd 7896 * 7897 * Finds the position of the first occurrence of @str within @cmd. The 7898 * return value can be passed to tracing_log_err() for caret placement 7899 * within @cmd. 7900 * 7901 * Returns the index within @cmd of the first occurrence of @str or 0 7902 * if @str was not found. 7903 */ 7904 unsigned int err_pos(char *cmd, const char *str) 7905 { 7906 char *found; 7907 7908 if (WARN_ON(!strlen(cmd))) 7909 return 0; 7910 7911 found = strstr(cmd, str); 7912 if (found) 7913 return found - cmd; 7914 7915 return 0; 7916 } 7917 7918 /** 7919 * tracing_log_err - write an error to the tracing error log 7920 * @tr: The associated trace array for the error (NULL for top level array) 7921 * @loc: A string describing where the error occurred 7922 * @cmd: The tracing command that caused the error 7923 * @errs: The array of loc-specific static error strings 7924 * @type: The index into errs[], which produces the specific static err string 7925 * @pos: The position the caret should be placed in the cmd 7926 * 7927 * Writes an error into tracing/error_log of the form: 7928 * 7929 * <loc>: error: <text> 7930 * Command: <cmd> 7931 * ^ 7932 * 7933 * tracing/error_log is a small log file containing the last 7934 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated 7935 * unless there has been a tracing error, and the error log can be 7936 * cleared and have its memory freed by writing the empty string in 7937 * truncation mode to it i.e. echo > tracing/error_log. 7938 * 7939 * NOTE: the @errs array along with the @type param are used to 7940 * produce a static error string - this string is not copied and saved 7941 * when the error is logged - only a pointer to it is saved. See 7942 * existing callers for examples of how static strings are typically 7943 * defined for use with tracing_log_err(). 7944 */ 7945 void tracing_log_err(struct trace_array *tr, 7946 const char *loc, const char *cmd, 7947 const char **errs, u8 type, u16 pos) 7948 { 7949 struct tracing_log_err *err; 7950 int len = 0; 7951 7952 if (!tr) 7953 tr = &global_trace; 7954 7955 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; 7956 7957 guard(mutex)(&tracing_err_log_lock); 7958 7959 err = get_tracing_log_err(tr, len); 7960 if (PTR_ERR(err) == -ENOMEM) 7961 return; 7962 7963 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); 7964 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); 7965 7966 err->info.errs = errs; 7967 err->info.type = type; 7968 err->info.pos = pos; 7969 err->info.ts = local_clock(); 7970 7971 list_add_tail(&err->list, &tr->err_log); 7972 } 7973 7974 static void clear_tracing_err_log(struct trace_array *tr) 7975 { 7976 struct tracing_log_err *err, *next; 7977 7978 mutex_lock(&tracing_err_log_lock); 7979 list_for_each_entry_safe(err, next, &tr->err_log, list) { 7980 list_del(&err->list); 7981 free_tracing_log_err(err); 7982 } 7983 7984 tr->n_err_log_entries = 0; 7985 mutex_unlock(&tracing_err_log_lock); 7986 } 7987 7988 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) 7989 { 7990 struct trace_array *tr = m->private; 7991 7992 mutex_lock(&tracing_err_log_lock); 7993 7994 return seq_list_start(&tr->err_log, *pos); 7995 } 7996 7997 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) 7998 { 7999 struct trace_array *tr = m->private; 8000 8001 return seq_list_next(v, &tr->err_log, pos); 8002 } 8003 8004 static void tracing_err_log_seq_stop(struct seq_file *m, void *v) 8005 { 8006 mutex_unlock(&tracing_err_log_lock); 8007 } 8008 8009 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) 8010 { 8011 u16 i; 8012 8013 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) 8014 seq_putc(m, ' '); 8015 for (i = 0; i < pos; i++) 8016 seq_putc(m, ' '); 8017 seq_puts(m, "^\n"); 8018 } 8019 8020 static int tracing_err_log_seq_show(struct seq_file *m, void *v) 8021 { 8022 struct tracing_log_err *err = v; 8023 8024 if (err) { 8025 const char *err_text = err->info.errs[err->info.type]; 8026 u64 sec = err->info.ts; 8027 u32 nsec; 8028 8029 nsec = do_div(sec, NSEC_PER_SEC); 8030 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, 8031 err->loc, err_text); 8032 seq_printf(m, "%s", err->cmd); 8033 tracing_err_log_show_pos(m, err->info.pos); 8034 } 8035 8036 return 0; 8037 } 8038 8039 static const struct seq_operations tracing_err_log_seq_ops = { 8040 .start = tracing_err_log_seq_start, 8041 .next = tracing_err_log_seq_next, 8042 .stop = tracing_err_log_seq_stop, 8043 .show = tracing_err_log_seq_show 8044 }; 8045 8046 static int tracing_err_log_open(struct inode *inode, struct file *file) 8047 { 8048 struct trace_array *tr = inode->i_private; 8049 int ret = 0; 8050 8051 ret = tracing_check_open_get_tr(tr); 8052 if (ret) 8053 return ret; 8054 8055 /* If this file was opened for write, then erase contents */ 8056 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 8057 clear_tracing_err_log(tr); 8058 8059 if (file->f_mode & FMODE_READ) { 8060 ret = seq_open(file, &tracing_err_log_seq_ops); 8061 if (!ret) { 8062 struct seq_file *m = file->private_data; 8063 m->private = tr; 8064 } else { 8065 trace_array_put(tr); 8066 } 8067 } 8068 return ret; 8069 } 8070 8071 static ssize_t tracing_err_log_write(struct file *file, 8072 const char __user *buffer, 8073 size_t count, loff_t *ppos) 8074 { 8075 return count; 8076 } 8077 8078 static int tracing_err_log_release(struct inode *inode, struct file *file) 8079 { 8080 struct trace_array *tr = inode->i_private; 8081 8082 trace_array_put(tr); 8083 8084 if (file->f_mode & FMODE_READ) 8085 seq_release(inode, file); 8086 8087 return 0; 8088 } 8089 8090 static const struct file_operations tracing_err_log_fops = { 8091 .open = tracing_err_log_open, 8092 .write = tracing_err_log_write, 8093 .read = seq_read, 8094 .llseek = tracing_lseek, 8095 .release = tracing_err_log_release, 8096 }; 8097 8098 static int tracing_buffers_open(struct inode *inode, struct file *filp) 8099 { 8100 struct trace_array *tr = inode->i_private; 8101 struct ftrace_buffer_info *info; 8102 int ret; 8103 8104 ret = tracing_check_open_get_tr(tr); 8105 if (ret) 8106 return ret; 8107 8108 info = kvzalloc(sizeof(*info), GFP_KERNEL); 8109 if (!info) { 8110 trace_array_put(tr); 8111 return -ENOMEM; 8112 } 8113 8114 mutex_lock(&trace_types_lock); 8115 8116 info->iter.tr = tr; 8117 info->iter.cpu_file = tracing_get_cpu(inode); 8118 info->iter.trace = tr->current_trace; 8119 info->iter.array_buffer = &tr->array_buffer; 8120 info->spare = NULL; 8121 /* Force reading ring buffer for first read */ 8122 info->read = (unsigned int)-1; 8123 8124 filp->private_data = info; 8125 8126 tr->trace_ref++; 8127 8128 mutex_unlock(&trace_types_lock); 8129 8130 ret = nonseekable_open(inode, filp); 8131 if (ret < 0) 8132 trace_array_put(tr); 8133 8134 return ret; 8135 } 8136 8137 static __poll_t 8138 tracing_buffers_poll(struct file *filp, poll_table *poll_table) 8139 { 8140 struct ftrace_buffer_info *info = filp->private_data; 8141 struct trace_iterator *iter = &info->iter; 8142 8143 return trace_poll(iter, filp, poll_table); 8144 } 8145 8146 static ssize_t 8147 tracing_buffers_read(struct file *filp, char __user *ubuf, 8148 size_t count, loff_t *ppos) 8149 { 8150 struct ftrace_buffer_info *info = filp->private_data; 8151 struct trace_iterator *iter = &info->iter; 8152 void *trace_data; 8153 int page_size; 8154 ssize_t ret = 0; 8155 ssize_t size; 8156 8157 if (!count) 8158 return 0; 8159 8160 #ifdef CONFIG_TRACER_MAX_TRACE 8161 if (iter->snapshot && iter->tr->current_trace->use_max_tr) 8162 return -EBUSY; 8163 #endif 8164 8165 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8166 8167 /* Make sure the spare matches the current sub buffer size */ 8168 if (info->spare) { 8169 if (page_size != info->spare_size) { 8170 ring_buffer_free_read_page(iter->array_buffer->buffer, 8171 info->spare_cpu, info->spare); 8172 info->spare = NULL; 8173 } 8174 } 8175 8176 if (!info->spare) { 8177 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, 8178 iter->cpu_file); 8179 if (IS_ERR(info->spare)) { 8180 ret = PTR_ERR(info->spare); 8181 info->spare = NULL; 8182 } else { 8183 info->spare_cpu = iter->cpu_file; 8184 info->spare_size = page_size; 8185 } 8186 } 8187 if (!info->spare) 8188 return ret; 8189 8190 /* Do we have previous read data to read? */ 8191 if (info->read < page_size) 8192 goto read; 8193 8194 again: 8195 trace_access_lock(iter->cpu_file); 8196 ret = ring_buffer_read_page(iter->array_buffer->buffer, 8197 info->spare, 8198 count, 8199 iter->cpu_file, 0); 8200 trace_access_unlock(iter->cpu_file); 8201 8202 if (ret < 0) { 8203 if (trace_empty(iter) && !iter->closed) { 8204 if ((filp->f_flags & O_NONBLOCK)) 8205 return -EAGAIN; 8206 8207 ret = wait_on_pipe(iter, 0); 8208 if (ret) 8209 return ret; 8210 8211 goto again; 8212 } 8213 return 0; 8214 } 8215 8216 info->read = 0; 8217 read: 8218 size = page_size - info->read; 8219 if (size > count) 8220 size = count; 8221 trace_data = ring_buffer_read_page_data(info->spare); 8222 ret = copy_to_user(ubuf, trace_data + info->read, size); 8223 if (ret == size) 8224 return -EFAULT; 8225 8226 size -= ret; 8227 8228 *ppos += size; 8229 info->read += size; 8230 8231 return size; 8232 } 8233 8234 static int tracing_buffers_flush(struct file *file, fl_owner_t id) 8235 { 8236 struct ftrace_buffer_info *info = file->private_data; 8237 struct trace_iterator *iter = &info->iter; 8238 8239 iter->closed = true; 8240 /* Make sure the waiters see the new wait_index */ 8241 (void)atomic_fetch_inc_release(&iter->wait_index); 8242 8243 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8244 8245 return 0; 8246 } 8247 8248 static int tracing_buffers_release(struct inode *inode, struct file *file) 8249 { 8250 struct ftrace_buffer_info *info = file->private_data; 8251 struct trace_iterator *iter = &info->iter; 8252 8253 mutex_lock(&trace_types_lock); 8254 8255 iter->tr->trace_ref--; 8256 8257 __trace_array_put(iter->tr); 8258 8259 if (info->spare) 8260 ring_buffer_free_read_page(iter->array_buffer->buffer, 8261 info->spare_cpu, info->spare); 8262 kvfree(info); 8263 8264 mutex_unlock(&trace_types_lock); 8265 8266 return 0; 8267 } 8268 8269 struct buffer_ref { 8270 struct trace_buffer *buffer; 8271 void *page; 8272 int cpu; 8273 refcount_t refcount; 8274 }; 8275 8276 static void buffer_ref_release(struct buffer_ref *ref) 8277 { 8278 if (!refcount_dec_and_test(&ref->refcount)) 8279 return; 8280 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); 8281 kfree(ref); 8282 } 8283 8284 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, 8285 struct pipe_buffer *buf) 8286 { 8287 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 8288 8289 buffer_ref_release(ref); 8290 buf->private = 0; 8291 } 8292 8293 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, 8294 struct pipe_buffer *buf) 8295 { 8296 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 8297 8298 if (refcount_read(&ref->refcount) > INT_MAX/2) 8299 return false; 8300 8301 refcount_inc(&ref->refcount); 8302 return true; 8303 } 8304 8305 /* Pipe buffer operations for a buffer. */ 8306 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 8307 .release = buffer_pipe_buf_release, 8308 .get = buffer_pipe_buf_get, 8309 }; 8310 8311 /* 8312 * Callback from splice_to_pipe(), if we need to release some pages 8313 * at the end of the spd in case we error'ed out in filling the pipe. 8314 */ 8315 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) 8316 { 8317 struct buffer_ref *ref = 8318 (struct buffer_ref *)spd->partial[i].private; 8319 8320 buffer_ref_release(ref); 8321 spd->partial[i].private = 0; 8322 } 8323 8324 static ssize_t 8325 tracing_buffers_splice_read(struct file *file, loff_t *ppos, 8326 struct pipe_inode_info *pipe, size_t len, 8327 unsigned int flags) 8328 { 8329 struct ftrace_buffer_info *info = file->private_data; 8330 struct trace_iterator *iter = &info->iter; 8331 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 8332 struct page *pages_def[PIPE_DEF_BUFFERS]; 8333 struct splice_pipe_desc spd = { 8334 .pages = pages_def, 8335 .partial = partial_def, 8336 .nr_pages_max = PIPE_DEF_BUFFERS, 8337 .ops = &buffer_pipe_buf_ops, 8338 .spd_release = buffer_spd_release, 8339 }; 8340 struct buffer_ref *ref; 8341 bool woken = false; 8342 int page_size; 8343 int entries, i; 8344 ssize_t ret = 0; 8345 8346 #ifdef CONFIG_TRACER_MAX_TRACE 8347 if (iter->snapshot && iter->tr->current_trace->use_max_tr) 8348 return -EBUSY; 8349 #endif 8350 8351 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8352 if (*ppos & (page_size - 1)) 8353 return -EINVAL; 8354 8355 if (len & (page_size - 1)) { 8356 if (len < page_size) 8357 return -EINVAL; 8358 len &= (~(page_size - 1)); 8359 } 8360 8361 if (splice_grow_spd(pipe, &spd)) 8362 return -ENOMEM; 8363 8364 again: 8365 trace_access_lock(iter->cpu_file); 8366 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8367 8368 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { 8369 struct page *page; 8370 int r; 8371 8372 ref = kzalloc(sizeof(*ref), GFP_KERNEL); 8373 if (!ref) { 8374 ret = -ENOMEM; 8375 break; 8376 } 8377 8378 refcount_set(&ref->refcount, 1); 8379 ref->buffer = iter->array_buffer->buffer; 8380 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 8381 if (IS_ERR(ref->page)) { 8382 ret = PTR_ERR(ref->page); 8383 ref->page = NULL; 8384 kfree(ref); 8385 break; 8386 } 8387 ref->cpu = iter->cpu_file; 8388 8389 r = ring_buffer_read_page(ref->buffer, ref->page, 8390 len, iter->cpu_file, 1); 8391 if (r < 0) { 8392 ring_buffer_free_read_page(ref->buffer, ref->cpu, 8393 ref->page); 8394 kfree(ref); 8395 break; 8396 } 8397 8398 page = virt_to_page(ring_buffer_read_page_data(ref->page)); 8399 8400 spd.pages[i] = page; 8401 spd.partial[i].len = page_size; 8402 spd.partial[i].offset = 0; 8403 spd.partial[i].private = (unsigned long)ref; 8404 spd.nr_pages++; 8405 *ppos += page_size; 8406 8407 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8408 } 8409 8410 trace_access_unlock(iter->cpu_file); 8411 spd.nr_pages = i; 8412 8413 /* did we read anything? */ 8414 if (!spd.nr_pages) { 8415 8416 if (ret) 8417 goto out; 8418 8419 if (woken) 8420 goto out; 8421 8422 ret = -EAGAIN; 8423 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) 8424 goto out; 8425 8426 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 8427 if (ret) 8428 goto out; 8429 8430 /* No need to wait after waking up when tracing is off */ 8431 if (!tracer_tracing_is_on(iter->tr)) 8432 goto out; 8433 8434 /* Iterate one more time to collect any new data then exit */ 8435 woken = true; 8436 8437 goto again; 8438 } 8439 8440 ret = splice_to_pipe(pipe, &spd); 8441 out: 8442 splice_shrink_spd(&spd); 8443 8444 return ret; 8445 } 8446 8447 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 8448 { 8449 struct ftrace_buffer_info *info = file->private_data; 8450 struct trace_iterator *iter = &info->iter; 8451 int err; 8452 8453 if (cmd == TRACE_MMAP_IOCTL_GET_READER) { 8454 if (!(file->f_flags & O_NONBLOCK)) { 8455 err = ring_buffer_wait(iter->array_buffer->buffer, 8456 iter->cpu_file, 8457 iter->tr->buffer_percent, 8458 NULL, NULL); 8459 if (err) 8460 return err; 8461 } 8462 8463 return ring_buffer_map_get_reader(iter->array_buffer->buffer, 8464 iter->cpu_file); 8465 } else if (cmd) { 8466 return -ENOTTY; 8467 } 8468 8469 /* 8470 * An ioctl call with cmd 0 to the ring buffer file will wake up all 8471 * waiters 8472 */ 8473 mutex_lock(&trace_types_lock); 8474 8475 /* Make sure the waiters see the new wait_index */ 8476 (void)atomic_fetch_inc_release(&iter->wait_index); 8477 8478 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8479 8480 mutex_unlock(&trace_types_lock); 8481 return 0; 8482 } 8483 8484 #ifdef CONFIG_TRACER_MAX_TRACE 8485 static int get_snapshot_map(struct trace_array *tr) 8486 { 8487 int err = 0; 8488 8489 /* 8490 * Called with mmap_lock held. lockdep would be unhappy if we would now 8491 * take trace_types_lock. Instead use the specific 8492 * snapshot_trigger_lock. 8493 */ 8494 spin_lock(&tr->snapshot_trigger_lock); 8495 8496 if (tr->snapshot || tr->mapped == UINT_MAX) 8497 err = -EBUSY; 8498 else 8499 tr->mapped++; 8500 8501 spin_unlock(&tr->snapshot_trigger_lock); 8502 8503 /* Wait for update_max_tr() to observe iter->tr->mapped */ 8504 if (tr->mapped == 1) 8505 synchronize_rcu(); 8506 8507 return err; 8508 8509 } 8510 static void put_snapshot_map(struct trace_array *tr) 8511 { 8512 spin_lock(&tr->snapshot_trigger_lock); 8513 if (!WARN_ON(!tr->mapped)) 8514 tr->mapped--; 8515 spin_unlock(&tr->snapshot_trigger_lock); 8516 } 8517 #else 8518 static inline int get_snapshot_map(struct trace_array *tr) { return 0; } 8519 static inline void put_snapshot_map(struct trace_array *tr) { } 8520 #endif 8521 8522 static void tracing_buffers_mmap_close(struct vm_area_struct *vma) 8523 { 8524 struct ftrace_buffer_info *info = vma->vm_file->private_data; 8525 struct trace_iterator *iter = &info->iter; 8526 8527 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); 8528 put_snapshot_map(iter->tr); 8529 } 8530 8531 static const struct vm_operations_struct tracing_buffers_vmops = { 8532 .close = tracing_buffers_mmap_close, 8533 }; 8534 8535 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) 8536 { 8537 struct ftrace_buffer_info *info = filp->private_data; 8538 struct trace_iterator *iter = &info->iter; 8539 int ret = 0; 8540 8541 /* A memmap'ed buffer is not supported for user space mmap */ 8542 if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP) 8543 return -ENODEV; 8544 8545 /* Currently the boot mapped buffer is not supported for mmap */ 8546 if (iter->tr->flags & TRACE_ARRAY_FL_BOOT) 8547 return -ENODEV; 8548 8549 ret = get_snapshot_map(iter->tr); 8550 if (ret) 8551 return ret; 8552 8553 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); 8554 if (ret) 8555 put_snapshot_map(iter->tr); 8556 8557 vma->vm_ops = &tracing_buffers_vmops; 8558 8559 return ret; 8560 } 8561 8562 static const struct file_operations tracing_buffers_fops = { 8563 .open = tracing_buffers_open, 8564 .read = tracing_buffers_read, 8565 .poll = tracing_buffers_poll, 8566 .release = tracing_buffers_release, 8567 .flush = tracing_buffers_flush, 8568 .splice_read = tracing_buffers_splice_read, 8569 .unlocked_ioctl = tracing_buffers_ioctl, 8570 .mmap = tracing_buffers_mmap, 8571 }; 8572 8573 static ssize_t 8574 tracing_stats_read(struct file *filp, char __user *ubuf, 8575 size_t count, loff_t *ppos) 8576 { 8577 struct inode *inode = file_inode(filp); 8578 struct trace_array *tr = inode->i_private; 8579 struct array_buffer *trace_buf = &tr->array_buffer; 8580 int cpu = tracing_get_cpu(inode); 8581 struct trace_seq *s; 8582 unsigned long cnt; 8583 unsigned long long t; 8584 unsigned long usec_rem; 8585 8586 s = kmalloc(sizeof(*s), GFP_KERNEL); 8587 if (!s) 8588 return -ENOMEM; 8589 8590 trace_seq_init(s); 8591 8592 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); 8593 trace_seq_printf(s, "entries: %ld\n", cnt); 8594 8595 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); 8596 trace_seq_printf(s, "overrun: %ld\n", cnt); 8597 8598 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); 8599 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 8600 8601 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 8602 trace_seq_printf(s, "bytes: %ld\n", cnt); 8603 8604 if (trace_clocks[tr->clock_id].in_ns) { 8605 /* local or global for trace_clock */ 8606 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 8607 usec_rem = do_div(t, USEC_PER_SEC); 8608 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", 8609 t, usec_rem); 8610 8611 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); 8612 usec_rem = do_div(t, USEC_PER_SEC); 8613 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); 8614 } else { 8615 /* counter or tsc mode for trace_clock */ 8616 trace_seq_printf(s, "oldest event ts: %llu\n", 8617 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 8618 8619 trace_seq_printf(s, "now ts: %llu\n", 8620 ring_buffer_time_stamp(trace_buf->buffer)); 8621 } 8622 8623 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); 8624 trace_seq_printf(s, "dropped events: %ld\n", cnt); 8625 8626 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); 8627 trace_seq_printf(s, "read events: %ld\n", cnt); 8628 8629 count = simple_read_from_buffer(ubuf, count, ppos, 8630 s->buffer, trace_seq_used(s)); 8631 8632 kfree(s); 8633 8634 return count; 8635 } 8636 8637 static const struct file_operations tracing_stats_fops = { 8638 .open = tracing_open_generic_tr, 8639 .read = tracing_stats_read, 8640 .llseek = generic_file_llseek, 8641 .release = tracing_release_generic_tr, 8642 }; 8643 8644 #ifdef CONFIG_DYNAMIC_FTRACE 8645 8646 static ssize_t 8647 tracing_read_dyn_info(struct file *filp, char __user *ubuf, 8648 size_t cnt, loff_t *ppos) 8649 { 8650 ssize_t ret; 8651 char *buf; 8652 int r; 8653 8654 /* 512 should be plenty to hold the amount needed */ 8655 #define DYN_INFO_BUF_SIZE 512 8656 8657 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); 8658 if (!buf) 8659 return -ENOMEM; 8660 8661 r = scnprintf(buf, DYN_INFO_BUF_SIZE, 8662 "%ld pages:%ld groups: %ld\n" 8663 "ftrace boot update time = %llu (ns)\n" 8664 "ftrace module total update time = %llu (ns)\n", 8665 ftrace_update_tot_cnt, 8666 ftrace_number_of_pages, 8667 ftrace_number_of_groups, 8668 ftrace_update_time, 8669 ftrace_total_mod_time); 8670 8671 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8672 kfree(buf); 8673 return ret; 8674 } 8675 8676 static const struct file_operations tracing_dyn_info_fops = { 8677 .open = tracing_open_generic, 8678 .read = tracing_read_dyn_info, 8679 .llseek = generic_file_llseek, 8680 }; 8681 #endif /* CONFIG_DYNAMIC_FTRACE */ 8682 8683 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) 8684 static void 8685 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, 8686 struct trace_array *tr, struct ftrace_probe_ops *ops, 8687 void *data) 8688 { 8689 tracing_snapshot_instance(tr); 8690 } 8691 8692 static void 8693 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, 8694 struct trace_array *tr, struct ftrace_probe_ops *ops, 8695 void *data) 8696 { 8697 struct ftrace_func_mapper *mapper = data; 8698 long *count = NULL; 8699 8700 if (mapper) 8701 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 8702 8703 if (count) { 8704 8705 if (*count <= 0) 8706 return; 8707 8708 (*count)--; 8709 } 8710 8711 tracing_snapshot_instance(tr); 8712 } 8713 8714 static int 8715 ftrace_snapshot_print(struct seq_file *m, unsigned long ip, 8716 struct ftrace_probe_ops *ops, void *data) 8717 { 8718 struct ftrace_func_mapper *mapper = data; 8719 long *count = NULL; 8720 8721 seq_printf(m, "%ps:", (void *)ip); 8722 8723 seq_puts(m, "snapshot"); 8724 8725 if (mapper) 8726 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 8727 8728 if (count) 8729 seq_printf(m, ":count=%ld\n", *count); 8730 else 8731 seq_puts(m, ":unlimited\n"); 8732 8733 return 0; 8734 } 8735 8736 static int 8737 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 8738 unsigned long ip, void *init_data, void **data) 8739 { 8740 struct ftrace_func_mapper *mapper = *data; 8741 8742 if (!mapper) { 8743 mapper = allocate_ftrace_func_mapper(); 8744 if (!mapper) 8745 return -ENOMEM; 8746 *data = mapper; 8747 } 8748 8749 return ftrace_func_mapper_add_ip(mapper, ip, init_data); 8750 } 8751 8752 static void 8753 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 8754 unsigned long ip, void *data) 8755 { 8756 struct ftrace_func_mapper *mapper = data; 8757 8758 if (!ip) { 8759 if (!mapper) 8760 return; 8761 free_ftrace_func_mapper(mapper, NULL); 8762 return; 8763 } 8764 8765 ftrace_func_mapper_remove_ip(mapper, ip); 8766 } 8767 8768 static struct ftrace_probe_ops snapshot_probe_ops = { 8769 .func = ftrace_snapshot, 8770 .print = ftrace_snapshot_print, 8771 }; 8772 8773 static struct ftrace_probe_ops snapshot_count_probe_ops = { 8774 .func = ftrace_count_snapshot, 8775 .print = ftrace_snapshot_print, 8776 .init = ftrace_snapshot_init, 8777 .free = ftrace_snapshot_free, 8778 }; 8779 8780 static int 8781 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, 8782 char *glob, char *cmd, char *param, int enable) 8783 { 8784 struct ftrace_probe_ops *ops; 8785 void *count = (void *)-1; 8786 char *number; 8787 int ret; 8788 8789 if (!tr) 8790 return -ENODEV; 8791 8792 /* hash funcs only work with set_ftrace_filter */ 8793 if (!enable) 8794 return -EINVAL; 8795 8796 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; 8797 8798 if (glob[0] == '!') { 8799 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); 8800 if (!ret) 8801 tracing_disarm_snapshot(tr); 8802 8803 return ret; 8804 } 8805 8806 if (!param) 8807 goto out_reg; 8808 8809 number = strsep(¶m, ":"); 8810 8811 if (!strlen(number)) 8812 goto out_reg; 8813 8814 /* 8815 * We use the callback data field (which is a pointer) 8816 * as our counter. 8817 */ 8818 ret = kstrtoul(number, 0, (unsigned long *)&count); 8819 if (ret) 8820 return ret; 8821 8822 out_reg: 8823 ret = tracing_arm_snapshot(tr); 8824 if (ret < 0) 8825 goto out; 8826 8827 ret = register_ftrace_function_probe(glob, tr, ops, count); 8828 if (ret < 0) 8829 tracing_disarm_snapshot(tr); 8830 out: 8831 return ret < 0 ? ret : 0; 8832 } 8833 8834 static struct ftrace_func_command ftrace_snapshot_cmd = { 8835 .name = "snapshot", 8836 .func = ftrace_trace_snapshot_callback, 8837 }; 8838 8839 static __init int register_snapshot_cmd(void) 8840 { 8841 return register_ftrace_command(&ftrace_snapshot_cmd); 8842 } 8843 #else 8844 static inline __init int register_snapshot_cmd(void) { return 0; } 8845 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ 8846 8847 static struct dentry *tracing_get_dentry(struct trace_array *tr) 8848 { 8849 if (WARN_ON(!tr->dir)) 8850 return ERR_PTR(-ENODEV); 8851 8852 /* Top directory uses NULL as the parent */ 8853 if (tr->flags & TRACE_ARRAY_FL_GLOBAL) 8854 return NULL; 8855 8856 /* All sub buffers have a descriptor */ 8857 return tr->dir; 8858 } 8859 8860 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 8861 { 8862 struct dentry *d_tracer; 8863 8864 if (tr->percpu_dir) 8865 return tr->percpu_dir; 8866 8867 d_tracer = tracing_get_dentry(tr); 8868 if (IS_ERR(d_tracer)) 8869 return NULL; 8870 8871 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); 8872 8873 MEM_FAIL(!tr->percpu_dir, 8874 "Could not create tracefs directory 'per_cpu/%d'\n", cpu); 8875 8876 return tr->percpu_dir; 8877 } 8878 8879 static struct dentry * 8880 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, 8881 void *data, long cpu, const struct file_operations *fops) 8882 { 8883 struct dentry *ret = trace_create_file(name, mode, parent, data, fops); 8884 8885 if (ret) /* See tracing_get_cpu() */ 8886 d_inode(ret)->i_cdev = (void *)(cpu + 1); 8887 return ret; 8888 } 8889 8890 static void 8891 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) 8892 { 8893 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 8894 struct dentry *d_cpu; 8895 char cpu_dir[30]; /* 30 characters should be more than enough */ 8896 8897 if (!d_percpu) 8898 return; 8899 8900 snprintf(cpu_dir, 30, "cpu%ld", cpu); 8901 d_cpu = tracefs_create_dir(cpu_dir, d_percpu); 8902 if (!d_cpu) { 8903 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); 8904 return; 8905 } 8906 8907 /* per cpu trace_pipe */ 8908 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, 8909 tr, cpu, &tracing_pipe_fops); 8910 8911 /* per cpu trace */ 8912 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, 8913 tr, cpu, &tracing_fops); 8914 8915 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, 8916 tr, cpu, &tracing_buffers_fops); 8917 8918 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, 8919 tr, cpu, &tracing_stats_fops); 8920 8921 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, 8922 tr, cpu, &tracing_entries_fops); 8923 8924 if (tr->range_addr_start) 8925 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, 8926 tr, cpu, &tracing_buffer_meta_fops); 8927 #ifdef CONFIG_TRACER_SNAPSHOT 8928 if (!tr->range_addr_start) { 8929 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, 8930 tr, cpu, &snapshot_fops); 8931 8932 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, 8933 tr, cpu, &snapshot_raw_fops); 8934 } 8935 #endif 8936 } 8937 8938 #ifdef CONFIG_FTRACE_SELFTEST 8939 /* Let selftest have access to static functions in this file */ 8940 #include "trace_selftest.c" 8941 #endif 8942 8943 static ssize_t 8944 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, 8945 loff_t *ppos) 8946 { 8947 struct trace_option_dentry *topt = filp->private_data; 8948 char *buf; 8949 8950 if (topt->flags->val & topt->opt->bit) 8951 buf = "1\n"; 8952 else 8953 buf = "0\n"; 8954 8955 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 8956 } 8957 8958 static ssize_t 8959 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, 8960 loff_t *ppos) 8961 { 8962 struct trace_option_dentry *topt = filp->private_data; 8963 unsigned long val; 8964 int ret; 8965 8966 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8967 if (ret) 8968 return ret; 8969 8970 if (val != 0 && val != 1) 8971 return -EINVAL; 8972 8973 if (!!(topt->flags->val & topt->opt->bit) != val) { 8974 mutex_lock(&trace_types_lock); 8975 ret = __set_tracer_option(topt->tr, topt->flags, 8976 topt->opt, !val); 8977 mutex_unlock(&trace_types_lock); 8978 if (ret) 8979 return ret; 8980 } 8981 8982 *ppos += cnt; 8983 8984 return cnt; 8985 } 8986 8987 static int tracing_open_options(struct inode *inode, struct file *filp) 8988 { 8989 struct trace_option_dentry *topt = inode->i_private; 8990 int ret; 8991 8992 ret = tracing_check_open_get_tr(topt->tr); 8993 if (ret) 8994 return ret; 8995 8996 filp->private_data = inode->i_private; 8997 return 0; 8998 } 8999 9000 static int tracing_release_options(struct inode *inode, struct file *file) 9001 { 9002 struct trace_option_dentry *topt = file->private_data; 9003 9004 trace_array_put(topt->tr); 9005 return 0; 9006 } 9007 9008 static const struct file_operations trace_options_fops = { 9009 .open = tracing_open_options, 9010 .read = trace_options_read, 9011 .write = trace_options_write, 9012 .llseek = generic_file_llseek, 9013 .release = tracing_release_options, 9014 }; 9015 9016 /* 9017 * In order to pass in both the trace_array descriptor as well as the index 9018 * to the flag that the trace option file represents, the trace_array 9019 * has a character array of trace_flags_index[], which holds the index 9020 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. 9021 * The address of this character array is passed to the flag option file 9022 * read/write callbacks. 9023 * 9024 * In order to extract both the index and the trace_array descriptor, 9025 * get_tr_index() uses the following algorithm. 9026 * 9027 * idx = *ptr; 9028 * 9029 * As the pointer itself contains the address of the index (remember 9030 * index[1] == 1). 9031 * 9032 * Then to get the trace_array descriptor, by subtracting that index 9033 * from the ptr, we get to the start of the index itself. 9034 * 9035 * ptr - idx == &index[0] 9036 * 9037 * Then a simple container_of() from that pointer gets us to the 9038 * trace_array descriptor. 9039 */ 9040 static void get_tr_index(void *data, struct trace_array **ptr, 9041 unsigned int *pindex) 9042 { 9043 *pindex = *(unsigned char *)data; 9044 9045 *ptr = container_of(data - *pindex, struct trace_array, 9046 trace_flags_index); 9047 } 9048 9049 static ssize_t 9050 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, 9051 loff_t *ppos) 9052 { 9053 void *tr_index = filp->private_data; 9054 struct trace_array *tr; 9055 unsigned int index; 9056 char *buf; 9057 9058 get_tr_index(tr_index, &tr, &index); 9059 9060 if (tr->trace_flags & (1 << index)) 9061 buf = "1\n"; 9062 else 9063 buf = "0\n"; 9064 9065 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 9066 } 9067 9068 static ssize_t 9069 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, 9070 loff_t *ppos) 9071 { 9072 void *tr_index = filp->private_data; 9073 struct trace_array *tr; 9074 unsigned int index; 9075 unsigned long val; 9076 int ret; 9077 9078 get_tr_index(tr_index, &tr, &index); 9079 9080 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9081 if (ret) 9082 return ret; 9083 9084 if (val != 0 && val != 1) 9085 return -EINVAL; 9086 9087 mutex_lock(&event_mutex); 9088 mutex_lock(&trace_types_lock); 9089 ret = set_tracer_flag(tr, 1 << index, val); 9090 mutex_unlock(&trace_types_lock); 9091 mutex_unlock(&event_mutex); 9092 9093 if (ret < 0) 9094 return ret; 9095 9096 *ppos += cnt; 9097 9098 return cnt; 9099 } 9100 9101 static const struct file_operations trace_options_core_fops = { 9102 .open = tracing_open_generic, 9103 .read = trace_options_core_read, 9104 .write = trace_options_core_write, 9105 .llseek = generic_file_llseek, 9106 }; 9107 9108 struct dentry *trace_create_file(const char *name, 9109 umode_t mode, 9110 struct dentry *parent, 9111 void *data, 9112 const struct file_operations *fops) 9113 { 9114 struct dentry *ret; 9115 9116 ret = tracefs_create_file(name, mode, parent, data, fops); 9117 if (!ret) 9118 pr_warn("Could not create tracefs '%s' entry\n", name); 9119 9120 return ret; 9121 } 9122 9123 9124 static struct dentry *trace_options_init_dentry(struct trace_array *tr) 9125 { 9126 struct dentry *d_tracer; 9127 9128 if (tr->options) 9129 return tr->options; 9130 9131 d_tracer = tracing_get_dentry(tr); 9132 if (IS_ERR(d_tracer)) 9133 return NULL; 9134 9135 tr->options = tracefs_create_dir("options", d_tracer); 9136 if (!tr->options) { 9137 pr_warn("Could not create tracefs directory 'options'\n"); 9138 return NULL; 9139 } 9140 9141 return tr->options; 9142 } 9143 9144 static void 9145 create_trace_option_file(struct trace_array *tr, 9146 struct trace_option_dentry *topt, 9147 struct tracer_flags *flags, 9148 struct tracer_opt *opt) 9149 { 9150 struct dentry *t_options; 9151 9152 t_options = trace_options_init_dentry(tr); 9153 if (!t_options) 9154 return; 9155 9156 topt->flags = flags; 9157 topt->opt = opt; 9158 topt->tr = tr; 9159 9160 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, 9161 t_options, topt, &trace_options_fops); 9162 9163 } 9164 9165 static void 9166 create_trace_option_files(struct trace_array *tr, struct tracer *tracer) 9167 { 9168 struct trace_option_dentry *topts; 9169 struct trace_options *tr_topts; 9170 struct tracer_flags *flags; 9171 struct tracer_opt *opts; 9172 int cnt; 9173 int i; 9174 9175 if (!tracer) 9176 return; 9177 9178 flags = tracer->flags; 9179 9180 if (!flags || !flags->opts) 9181 return; 9182 9183 /* 9184 * If this is an instance, only create flags for tracers 9185 * the instance may have. 9186 */ 9187 if (!trace_ok_for_array(tracer, tr)) 9188 return; 9189 9190 for (i = 0; i < tr->nr_topts; i++) { 9191 /* Make sure there's no duplicate flags. */ 9192 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags)) 9193 return; 9194 } 9195 9196 opts = flags->opts; 9197 9198 for (cnt = 0; opts[cnt].name; cnt++) 9199 ; 9200 9201 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); 9202 if (!topts) 9203 return; 9204 9205 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), 9206 GFP_KERNEL); 9207 if (!tr_topts) { 9208 kfree(topts); 9209 return; 9210 } 9211 9212 tr->topts = tr_topts; 9213 tr->topts[tr->nr_topts].tracer = tracer; 9214 tr->topts[tr->nr_topts].topts = topts; 9215 tr->nr_topts++; 9216 9217 for (cnt = 0; opts[cnt].name; cnt++) { 9218 create_trace_option_file(tr, &topts[cnt], flags, 9219 &opts[cnt]); 9220 MEM_FAIL(topts[cnt].entry == NULL, 9221 "Failed to create trace option: %s", 9222 opts[cnt].name); 9223 } 9224 } 9225 9226 static struct dentry * 9227 create_trace_option_core_file(struct trace_array *tr, 9228 const char *option, long index) 9229 { 9230 struct dentry *t_options; 9231 9232 t_options = trace_options_init_dentry(tr); 9233 if (!t_options) 9234 return NULL; 9235 9236 return trace_create_file(option, TRACE_MODE_WRITE, t_options, 9237 (void *)&tr->trace_flags_index[index], 9238 &trace_options_core_fops); 9239 } 9240 9241 static void create_trace_options_dir(struct trace_array *tr) 9242 { 9243 struct dentry *t_options; 9244 bool top_level = tr == &global_trace; 9245 int i; 9246 9247 t_options = trace_options_init_dentry(tr); 9248 if (!t_options) 9249 return; 9250 9251 for (i = 0; trace_options[i]; i++) { 9252 if (top_level || 9253 !((1 << i) & TOP_LEVEL_TRACE_FLAGS)) 9254 create_trace_option_core_file(tr, trace_options[i], i); 9255 } 9256 } 9257 9258 static ssize_t 9259 rb_simple_read(struct file *filp, char __user *ubuf, 9260 size_t cnt, loff_t *ppos) 9261 { 9262 struct trace_array *tr = filp->private_data; 9263 char buf[64]; 9264 int r; 9265 9266 r = tracer_tracing_is_on(tr); 9267 r = sprintf(buf, "%d\n", r); 9268 9269 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9270 } 9271 9272 static ssize_t 9273 rb_simple_write(struct file *filp, const char __user *ubuf, 9274 size_t cnt, loff_t *ppos) 9275 { 9276 struct trace_array *tr = filp->private_data; 9277 struct trace_buffer *buffer = tr->array_buffer.buffer; 9278 unsigned long val; 9279 int ret; 9280 9281 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9282 if (ret) 9283 return ret; 9284 9285 if (buffer) { 9286 mutex_lock(&trace_types_lock); 9287 if (!!val == tracer_tracing_is_on(tr)) { 9288 val = 0; /* do nothing */ 9289 } else if (val) { 9290 tracer_tracing_on(tr); 9291 if (tr->current_trace->start) 9292 tr->current_trace->start(tr); 9293 } else { 9294 tracer_tracing_off(tr); 9295 if (tr->current_trace->stop) 9296 tr->current_trace->stop(tr); 9297 /* Wake up any waiters */ 9298 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); 9299 } 9300 mutex_unlock(&trace_types_lock); 9301 } 9302 9303 (*ppos)++; 9304 9305 return cnt; 9306 } 9307 9308 static const struct file_operations rb_simple_fops = { 9309 .open = tracing_open_generic_tr, 9310 .read = rb_simple_read, 9311 .write = rb_simple_write, 9312 .release = tracing_release_generic_tr, 9313 .llseek = default_llseek, 9314 }; 9315 9316 static ssize_t 9317 buffer_percent_read(struct file *filp, char __user *ubuf, 9318 size_t cnt, loff_t *ppos) 9319 { 9320 struct trace_array *tr = filp->private_data; 9321 char buf[64]; 9322 int r; 9323 9324 r = tr->buffer_percent; 9325 r = sprintf(buf, "%d\n", r); 9326 9327 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9328 } 9329 9330 static ssize_t 9331 buffer_percent_write(struct file *filp, const char __user *ubuf, 9332 size_t cnt, loff_t *ppos) 9333 { 9334 struct trace_array *tr = filp->private_data; 9335 unsigned long val; 9336 int ret; 9337 9338 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9339 if (ret) 9340 return ret; 9341 9342 if (val > 100) 9343 return -EINVAL; 9344 9345 tr->buffer_percent = val; 9346 9347 (*ppos)++; 9348 9349 return cnt; 9350 } 9351 9352 static const struct file_operations buffer_percent_fops = { 9353 .open = tracing_open_generic_tr, 9354 .read = buffer_percent_read, 9355 .write = buffer_percent_write, 9356 .release = tracing_release_generic_tr, 9357 .llseek = default_llseek, 9358 }; 9359 9360 static ssize_t 9361 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 9362 { 9363 struct trace_array *tr = filp->private_data; 9364 size_t size; 9365 char buf[64]; 9366 int order; 9367 int r; 9368 9369 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9370 size = (PAGE_SIZE << order) / 1024; 9371 9372 r = sprintf(buf, "%zd\n", size); 9373 9374 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9375 } 9376 9377 static ssize_t 9378 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 9379 size_t cnt, loff_t *ppos) 9380 { 9381 struct trace_array *tr = filp->private_data; 9382 unsigned long val; 9383 int old_order; 9384 int order; 9385 int pages; 9386 int ret; 9387 9388 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9389 if (ret) 9390 return ret; 9391 9392 val *= 1024; /* value passed in is in KB */ 9393 9394 pages = DIV_ROUND_UP(val, PAGE_SIZE); 9395 order = fls(pages - 1); 9396 9397 /* limit between 1 and 128 system pages */ 9398 if (order < 0 || order > 7) 9399 return -EINVAL; 9400 9401 /* Do not allow tracing while changing the order of the ring buffer */ 9402 tracing_stop_tr(tr); 9403 9404 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9405 if (old_order == order) 9406 goto out; 9407 9408 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 9409 if (ret) 9410 goto out; 9411 9412 #ifdef CONFIG_TRACER_MAX_TRACE 9413 9414 if (!tr->allocated_snapshot) 9415 goto out_max; 9416 9417 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 9418 if (ret) { 9419 /* Put back the old order */ 9420 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); 9421 if (WARN_ON_ONCE(cnt)) { 9422 /* 9423 * AARGH! We are left with different orders! 9424 * The max buffer is our "snapshot" buffer. 9425 * When a tracer needs a snapshot (one of the 9426 * latency tracers), it swaps the max buffer 9427 * with the saved snap shot. We succeeded to 9428 * update the order of the main buffer, but failed to 9429 * update the order of the max buffer. But when we tried 9430 * to reset the main buffer to the original size, we 9431 * failed there too. This is very unlikely to 9432 * happen, but if it does, warn and kill all 9433 * tracing. 9434 */ 9435 tracing_disabled = 1; 9436 } 9437 goto out; 9438 } 9439 out_max: 9440 #endif 9441 (*ppos)++; 9442 out: 9443 if (ret) 9444 cnt = ret; 9445 tracing_start_tr(tr); 9446 return cnt; 9447 } 9448 9449 static const struct file_operations buffer_subbuf_size_fops = { 9450 .open = tracing_open_generic_tr, 9451 .read = buffer_subbuf_size_read, 9452 .write = buffer_subbuf_size_write, 9453 .release = tracing_release_generic_tr, 9454 .llseek = default_llseek, 9455 }; 9456 9457 static struct dentry *trace_instance_dir; 9458 9459 static void 9460 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 9461 9462 #ifdef CONFIG_MODULES 9463 static int make_mod_delta(struct module *mod, void *data) 9464 { 9465 struct trace_module_delta *module_delta; 9466 struct trace_scratch *tscratch; 9467 struct trace_mod_entry *entry; 9468 struct trace_array *tr = data; 9469 int i; 9470 9471 tscratch = tr->scratch; 9472 module_delta = READ_ONCE(tr->module_delta); 9473 for (i = 0; i < tscratch->nr_entries; i++) { 9474 entry = &tscratch->entries[i]; 9475 if (strcmp(mod->name, entry->mod_name)) 9476 continue; 9477 if (mod->state == MODULE_STATE_GOING) 9478 module_delta->delta[i] = 0; 9479 else 9480 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base 9481 - entry->mod_addr; 9482 break; 9483 } 9484 return 0; 9485 } 9486 #else 9487 static int make_mod_delta(struct module *mod, void *data) 9488 { 9489 return 0; 9490 } 9491 #endif 9492 9493 static int mod_addr_comp(const void *a, const void *b, const void *data) 9494 { 9495 const struct trace_mod_entry *e1 = a; 9496 const struct trace_mod_entry *e2 = b; 9497 9498 return e1->mod_addr > e2->mod_addr ? 1 : -1; 9499 } 9500 9501 static void setup_trace_scratch(struct trace_array *tr, 9502 struct trace_scratch *tscratch, unsigned int size) 9503 { 9504 struct trace_module_delta *module_delta; 9505 struct trace_mod_entry *entry; 9506 int i, nr_entries; 9507 9508 if (!tscratch) 9509 return; 9510 9511 tr->scratch = tscratch; 9512 tr->scratch_size = size; 9513 9514 if (tscratch->text_addr) 9515 tr->text_delta = (unsigned long)_text - tscratch->text_addr; 9516 9517 if (struct_size(tscratch, entries, tscratch->nr_entries) > size) 9518 goto reset; 9519 9520 /* Check if each module name is a valid string */ 9521 for (i = 0; i < tscratch->nr_entries; i++) { 9522 int n; 9523 9524 entry = &tscratch->entries[i]; 9525 9526 for (n = 0; n < MODULE_NAME_LEN; n++) { 9527 if (entry->mod_name[n] == '\0') 9528 break; 9529 if (!isprint(entry->mod_name[n])) 9530 goto reset; 9531 } 9532 if (n == MODULE_NAME_LEN) 9533 goto reset; 9534 } 9535 9536 /* Sort the entries so that we can find appropriate module from address. */ 9537 nr_entries = tscratch->nr_entries; 9538 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), 9539 mod_addr_comp, NULL, NULL); 9540 9541 if (IS_ENABLED(CONFIG_MODULES)) { 9542 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL); 9543 if (!module_delta) { 9544 pr_info("module_delta allocation failed. Not able to decode module address."); 9545 goto reset; 9546 } 9547 init_rcu_head(&module_delta->rcu); 9548 } else 9549 module_delta = NULL; 9550 WRITE_ONCE(tr->module_delta, module_delta); 9551 9552 /* Scan modules to make text delta for modules. */ 9553 module_for_each_mod(make_mod_delta, tr); 9554 return; 9555 reset: 9556 /* Invalid trace modules */ 9557 memset(tscratch, 0, size); 9558 } 9559 9560 static int 9561 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 9562 { 9563 enum ring_buffer_flags rb_flags; 9564 struct trace_scratch *tscratch; 9565 unsigned int scratch_size = 0; 9566 9567 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0; 9568 9569 buf->tr = tr; 9570 9571 if (tr->range_addr_start && tr->range_addr_size) { 9572 /* Add scratch buffer to handle 128 modules */ 9573 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, 9574 tr->range_addr_start, 9575 tr->range_addr_size, 9576 struct_size(tscratch, entries, 128)); 9577 9578 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); 9579 setup_trace_scratch(tr, tscratch, scratch_size); 9580 9581 /* 9582 * This is basically the same as a mapped buffer, 9583 * with the same restrictions. 9584 */ 9585 tr->mapped++; 9586 } else { 9587 buf->buffer = ring_buffer_alloc(size, rb_flags); 9588 } 9589 if (!buf->buffer) 9590 return -ENOMEM; 9591 9592 buf->data = alloc_percpu(struct trace_array_cpu); 9593 if (!buf->data) { 9594 ring_buffer_free(buf->buffer); 9595 buf->buffer = NULL; 9596 return -ENOMEM; 9597 } 9598 9599 /* Allocate the first page for all buffers */ 9600 set_buffer_entries(&tr->array_buffer, 9601 ring_buffer_size(tr->array_buffer.buffer, 0)); 9602 9603 return 0; 9604 } 9605 9606 static void free_trace_buffer(struct array_buffer *buf) 9607 { 9608 if (buf->buffer) { 9609 ring_buffer_free(buf->buffer); 9610 buf->buffer = NULL; 9611 free_percpu(buf->data); 9612 buf->data = NULL; 9613 } 9614 } 9615 9616 static int allocate_trace_buffers(struct trace_array *tr, int size) 9617 { 9618 int ret; 9619 9620 ret = allocate_trace_buffer(tr, &tr->array_buffer, size); 9621 if (ret) 9622 return ret; 9623 9624 #ifdef CONFIG_TRACER_MAX_TRACE 9625 /* Fix mapped buffer trace arrays do not have snapshot buffers */ 9626 if (tr->range_addr_start) 9627 return 0; 9628 9629 ret = allocate_trace_buffer(tr, &tr->max_buffer, 9630 allocate_snapshot ? size : 1); 9631 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { 9632 free_trace_buffer(&tr->array_buffer); 9633 return -ENOMEM; 9634 } 9635 tr->allocated_snapshot = allocate_snapshot; 9636 9637 allocate_snapshot = false; 9638 #endif 9639 9640 return 0; 9641 } 9642 9643 static void free_trace_buffers(struct trace_array *tr) 9644 { 9645 if (!tr) 9646 return; 9647 9648 free_trace_buffer(&tr->array_buffer); 9649 kfree(tr->module_delta); 9650 9651 #ifdef CONFIG_TRACER_MAX_TRACE 9652 free_trace_buffer(&tr->max_buffer); 9653 #endif 9654 } 9655 9656 static void init_trace_flags_index(struct trace_array *tr) 9657 { 9658 int i; 9659 9660 /* Used by the trace options files */ 9661 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) 9662 tr->trace_flags_index[i] = i; 9663 } 9664 9665 static void __update_tracer_options(struct trace_array *tr) 9666 { 9667 struct tracer *t; 9668 9669 for (t = trace_types; t; t = t->next) 9670 add_tracer_options(tr, t); 9671 } 9672 9673 static void update_tracer_options(struct trace_array *tr) 9674 { 9675 mutex_lock(&trace_types_lock); 9676 tracer_options_updated = true; 9677 __update_tracer_options(tr); 9678 mutex_unlock(&trace_types_lock); 9679 } 9680 9681 /* Must have trace_types_lock held */ 9682 struct trace_array *trace_array_find(const char *instance) 9683 { 9684 struct trace_array *tr, *found = NULL; 9685 9686 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9687 if (tr->name && strcmp(tr->name, instance) == 0) { 9688 found = tr; 9689 break; 9690 } 9691 } 9692 9693 return found; 9694 } 9695 9696 struct trace_array *trace_array_find_get(const char *instance) 9697 { 9698 struct trace_array *tr; 9699 9700 mutex_lock(&trace_types_lock); 9701 tr = trace_array_find(instance); 9702 if (tr) 9703 tr->ref++; 9704 mutex_unlock(&trace_types_lock); 9705 9706 return tr; 9707 } 9708 9709 static int trace_array_create_dir(struct trace_array *tr) 9710 { 9711 int ret; 9712 9713 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); 9714 if (!tr->dir) 9715 return -EINVAL; 9716 9717 ret = event_trace_add_tracer(tr->dir, tr); 9718 if (ret) { 9719 tracefs_remove(tr->dir); 9720 return ret; 9721 } 9722 9723 init_tracer_tracefs(tr, tr->dir); 9724 __update_tracer_options(tr); 9725 9726 return ret; 9727 } 9728 9729 static struct trace_array * 9730 trace_array_create_systems(const char *name, const char *systems, 9731 unsigned long range_addr_start, 9732 unsigned long range_addr_size) 9733 { 9734 struct trace_array *tr; 9735 int ret; 9736 9737 ret = -ENOMEM; 9738 tr = kzalloc(sizeof(*tr), GFP_KERNEL); 9739 if (!tr) 9740 return ERR_PTR(ret); 9741 9742 tr->name = kstrdup(name, GFP_KERNEL); 9743 if (!tr->name) 9744 goto out_free_tr; 9745 9746 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) 9747 goto out_free_tr; 9748 9749 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) 9750 goto out_free_tr; 9751 9752 if (systems) { 9753 tr->system_names = kstrdup_const(systems, GFP_KERNEL); 9754 if (!tr->system_names) 9755 goto out_free_tr; 9756 } 9757 9758 /* Only for boot up memory mapped ring buffers */ 9759 tr->range_addr_start = range_addr_start; 9760 tr->range_addr_size = range_addr_size; 9761 9762 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; 9763 9764 cpumask_copy(tr->tracing_cpumask, cpu_all_mask); 9765 9766 raw_spin_lock_init(&tr->start_lock); 9767 9768 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 9769 #ifdef CONFIG_TRACER_MAX_TRACE 9770 spin_lock_init(&tr->snapshot_trigger_lock); 9771 #endif 9772 tr->current_trace = &nop_trace; 9773 9774 INIT_LIST_HEAD(&tr->systems); 9775 INIT_LIST_HEAD(&tr->events); 9776 INIT_LIST_HEAD(&tr->hist_vars); 9777 INIT_LIST_HEAD(&tr->err_log); 9778 9779 #ifdef CONFIG_MODULES 9780 INIT_LIST_HEAD(&tr->mod_events); 9781 #endif 9782 9783 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 9784 goto out_free_tr; 9785 9786 /* The ring buffer is defaultly expanded */ 9787 trace_set_ring_buffer_expanded(tr); 9788 9789 if (ftrace_allocate_ftrace_ops(tr) < 0) 9790 goto out_free_tr; 9791 9792 ftrace_init_trace_array(tr); 9793 9794 init_trace_flags_index(tr); 9795 9796 if (trace_instance_dir) { 9797 ret = trace_array_create_dir(tr); 9798 if (ret) 9799 goto out_free_tr; 9800 } else 9801 __trace_early_add_events(tr); 9802 9803 list_add(&tr->list, &ftrace_trace_arrays); 9804 9805 tr->ref++; 9806 9807 return tr; 9808 9809 out_free_tr: 9810 ftrace_free_ftrace_ops(tr); 9811 free_trace_buffers(tr); 9812 free_cpumask_var(tr->pipe_cpumask); 9813 free_cpumask_var(tr->tracing_cpumask); 9814 kfree_const(tr->system_names); 9815 kfree(tr->range_name); 9816 kfree(tr->name); 9817 kfree(tr); 9818 9819 return ERR_PTR(ret); 9820 } 9821 9822 static struct trace_array *trace_array_create(const char *name) 9823 { 9824 return trace_array_create_systems(name, NULL, 0, 0); 9825 } 9826 9827 static int instance_mkdir(const char *name) 9828 { 9829 struct trace_array *tr; 9830 int ret; 9831 9832 guard(mutex)(&event_mutex); 9833 guard(mutex)(&trace_types_lock); 9834 9835 ret = -EEXIST; 9836 if (trace_array_find(name)) 9837 return -EEXIST; 9838 9839 tr = trace_array_create(name); 9840 9841 ret = PTR_ERR_OR_ZERO(tr); 9842 9843 return ret; 9844 } 9845 9846 #ifdef CONFIG_MMU 9847 static u64 map_pages(unsigned long start, unsigned long size) 9848 { 9849 unsigned long vmap_start, vmap_end; 9850 struct vm_struct *area; 9851 int ret; 9852 9853 area = get_vm_area(size, VM_IOREMAP); 9854 if (!area) 9855 return 0; 9856 9857 vmap_start = (unsigned long) area->addr; 9858 vmap_end = vmap_start + size; 9859 9860 ret = vmap_page_range(vmap_start, vmap_end, 9861 start, pgprot_nx(PAGE_KERNEL)); 9862 if (ret < 0) { 9863 free_vm_area(area); 9864 return 0; 9865 } 9866 9867 return (u64)vmap_start; 9868 } 9869 #else 9870 static inline u64 map_pages(unsigned long start, unsigned long size) 9871 { 9872 return 0; 9873 } 9874 #endif 9875 9876 /** 9877 * trace_array_get_by_name - Create/Lookup a trace array, given its name. 9878 * @name: The name of the trace array to be looked up/created. 9879 * @systems: A list of systems to create event directories for (NULL for all) 9880 * 9881 * Returns pointer to trace array with given name. 9882 * NULL, if it cannot be created. 9883 * 9884 * NOTE: This function increments the reference counter associated with the 9885 * trace array returned. This makes sure it cannot be freed while in use. 9886 * Use trace_array_put() once the trace array is no longer needed. 9887 * If the trace_array is to be freed, trace_array_destroy() needs to 9888 * be called after the trace_array_put(), or simply let user space delete 9889 * it from the tracefs instances directory. But until the 9890 * trace_array_put() is called, user space can not delete it. 9891 * 9892 */ 9893 struct trace_array *trace_array_get_by_name(const char *name, const char *systems) 9894 { 9895 struct trace_array *tr; 9896 9897 guard(mutex)(&event_mutex); 9898 guard(mutex)(&trace_types_lock); 9899 9900 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9901 if (tr->name && strcmp(tr->name, name) == 0) { 9902 tr->ref++; 9903 return tr; 9904 } 9905 } 9906 9907 tr = trace_array_create_systems(name, systems, 0, 0); 9908 9909 if (IS_ERR(tr)) 9910 tr = NULL; 9911 else 9912 tr->ref++; 9913 9914 return tr; 9915 } 9916 EXPORT_SYMBOL_GPL(trace_array_get_by_name); 9917 9918 static int __remove_instance(struct trace_array *tr) 9919 { 9920 int i; 9921 9922 /* Reference counter for a newly created trace array = 1. */ 9923 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) 9924 return -EBUSY; 9925 9926 list_del(&tr->list); 9927 9928 /* Disable all the flags that were enabled coming in */ 9929 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { 9930 if ((1 << i) & ZEROED_TRACE_FLAGS) 9931 set_tracer_flag(tr, 1 << i, 0); 9932 } 9933 9934 if (printk_trace == tr) 9935 update_printk_trace(&global_trace); 9936 9937 tracing_set_nop(tr); 9938 clear_ftrace_function_probes(tr); 9939 event_trace_del_tracer(tr); 9940 ftrace_clear_pids(tr); 9941 ftrace_destroy_function_files(tr); 9942 tracefs_remove(tr->dir); 9943 free_percpu(tr->last_func_repeats); 9944 free_trace_buffers(tr); 9945 clear_tracing_err_log(tr); 9946 9947 if (tr->range_name) { 9948 reserve_mem_release_by_name(tr->range_name); 9949 kfree(tr->range_name); 9950 } 9951 9952 for (i = 0; i < tr->nr_topts; i++) { 9953 kfree(tr->topts[i].topts); 9954 } 9955 kfree(tr->topts); 9956 9957 free_cpumask_var(tr->pipe_cpumask); 9958 free_cpumask_var(tr->tracing_cpumask); 9959 kfree_const(tr->system_names); 9960 kfree(tr->name); 9961 kfree(tr); 9962 9963 return 0; 9964 } 9965 9966 int trace_array_destroy(struct trace_array *this_tr) 9967 { 9968 struct trace_array *tr; 9969 9970 if (!this_tr) 9971 return -EINVAL; 9972 9973 guard(mutex)(&event_mutex); 9974 guard(mutex)(&trace_types_lock); 9975 9976 9977 /* Making sure trace array exists before destroying it. */ 9978 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9979 if (tr == this_tr) 9980 return __remove_instance(tr); 9981 } 9982 9983 return -ENODEV; 9984 } 9985 EXPORT_SYMBOL_GPL(trace_array_destroy); 9986 9987 static int instance_rmdir(const char *name) 9988 { 9989 struct trace_array *tr; 9990 9991 guard(mutex)(&event_mutex); 9992 guard(mutex)(&trace_types_lock); 9993 9994 tr = trace_array_find(name); 9995 if (!tr) 9996 return -ENODEV; 9997 9998 return __remove_instance(tr); 9999 } 10000 10001 static __init void create_trace_instances(struct dentry *d_tracer) 10002 { 10003 struct trace_array *tr; 10004 10005 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, 10006 instance_mkdir, 10007 instance_rmdir); 10008 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) 10009 return; 10010 10011 guard(mutex)(&event_mutex); 10012 guard(mutex)(&trace_types_lock); 10013 10014 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10015 if (!tr->name) 10016 continue; 10017 if (MEM_FAIL(trace_array_create_dir(tr) < 0, 10018 "Failed to create instance directory\n")) 10019 return; 10020 } 10021 } 10022 10023 static void 10024 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) 10025 { 10026 int cpu; 10027 10028 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, 10029 tr, &show_traces_fops); 10030 10031 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, 10032 tr, &set_tracer_fops); 10033 10034 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, 10035 tr, &tracing_cpumask_fops); 10036 10037 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, 10038 tr, &tracing_iter_fops); 10039 10040 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, 10041 tr, &tracing_fops); 10042 10043 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, 10044 tr, &tracing_pipe_fops); 10045 10046 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, 10047 tr, &tracing_entries_fops); 10048 10049 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, 10050 tr, &tracing_total_entries_fops); 10051 10052 trace_create_file("free_buffer", 0200, d_tracer, 10053 tr, &tracing_free_buffer_fops); 10054 10055 trace_create_file("trace_marker", 0220, d_tracer, 10056 tr, &tracing_mark_fops); 10057 10058 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); 10059 10060 trace_create_file("trace_marker_raw", 0220, d_tracer, 10061 tr, &tracing_mark_raw_fops); 10062 10063 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, 10064 &trace_clock_fops); 10065 10066 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, 10067 tr, &rb_simple_fops); 10068 10069 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, 10070 &trace_time_stamp_mode_fops); 10071 10072 tr->buffer_percent = 50; 10073 10074 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 10075 tr, &buffer_percent_fops); 10076 10077 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, 10078 tr, &buffer_subbuf_size_fops); 10079 10080 create_trace_options_dir(tr); 10081 10082 #ifdef CONFIG_TRACER_MAX_TRACE 10083 trace_create_maxlat_file(tr, d_tracer); 10084 #endif 10085 10086 if (ftrace_create_function_files(tr, d_tracer)) 10087 MEM_FAIL(1, "Could not allocate function filter files"); 10088 10089 if (tr->range_addr_start) { 10090 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, 10091 tr, &last_boot_fops); 10092 #ifdef CONFIG_TRACER_SNAPSHOT 10093 } else { 10094 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, 10095 tr, &snapshot_fops); 10096 #endif 10097 } 10098 10099 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, 10100 tr, &tracing_err_log_fops); 10101 10102 for_each_tracing_cpu(cpu) 10103 tracing_init_tracefs_percpu(tr, cpu); 10104 10105 ftrace_init_tracefs(tr, d_tracer); 10106 } 10107 10108 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 10109 { 10110 struct vfsmount *mnt; 10111 struct file_system_type *type; 10112 10113 /* 10114 * To maintain backward compatibility for tools that mount 10115 * debugfs to get to the tracing facility, tracefs is automatically 10116 * mounted to the debugfs/tracing directory. 10117 */ 10118 type = get_fs_type("tracefs"); 10119 if (!type) 10120 return NULL; 10121 mnt = vfs_submount(mntpt, type, "tracefs", NULL); 10122 put_filesystem(type); 10123 if (IS_ERR(mnt)) 10124 return NULL; 10125 mntget(mnt); 10126 10127 return mnt; 10128 } 10129 10130 /** 10131 * tracing_init_dentry - initialize top level trace array 10132 * 10133 * This is called when creating files or directories in the tracing 10134 * directory. It is called via fs_initcall() by any of the boot up code 10135 * and expects to return the dentry of the top level tracing directory. 10136 */ 10137 int tracing_init_dentry(void) 10138 { 10139 struct trace_array *tr = &global_trace; 10140 10141 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10142 pr_warn("Tracing disabled due to lockdown\n"); 10143 return -EPERM; 10144 } 10145 10146 /* The top level trace array uses NULL as parent */ 10147 if (tr->dir) 10148 return 0; 10149 10150 if (WARN_ON(!tracefs_initialized())) 10151 return -ENODEV; 10152 10153 /* 10154 * As there may still be users that expect the tracing 10155 * files to exist in debugfs/tracing, we must automount 10156 * the tracefs file system there, so older tools still 10157 * work with the newer kernel. 10158 */ 10159 tr->dir = debugfs_create_automount("tracing", NULL, 10160 trace_automount, NULL); 10161 10162 return 0; 10163 } 10164 10165 extern struct trace_eval_map *__start_ftrace_eval_maps[]; 10166 extern struct trace_eval_map *__stop_ftrace_eval_maps[]; 10167 10168 static struct workqueue_struct *eval_map_wq __initdata; 10169 static struct work_struct eval_map_work __initdata; 10170 static struct work_struct tracerfs_init_work __initdata; 10171 10172 static void __init eval_map_work_func(struct work_struct *work) 10173 { 10174 int len; 10175 10176 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; 10177 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len); 10178 } 10179 10180 static int __init trace_eval_init(void) 10181 { 10182 INIT_WORK(&eval_map_work, eval_map_work_func); 10183 10184 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0); 10185 if (!eval_map_wq) { 10186 pr_err("Unable to allocate eval_map_wq\n"); 10187 /* Do work here */ 10188 eval_map_work_func(&eval_map_work); 10189 return -ENOMEM; 10190 } 10191 10192 queue_work(eval_map_wq, &eval_map_work); 10193 return 0; 10194 } 10195 10196 subsys_initcall(trace_eval_init); 10197 10198 static int __init trace_eval_sync(void) 10199 { 10200 /* Make sure the eval map updates are finished */ 10201 if (eval_map_wq) 10202 destroy_workqueue(eval_map_wq); 10203 return 0; 10204 } 10205 10206 late_initcall_sync(trace_eval_sync); 10207 10208 10209 #ifdef CONFIG_MODULES 10210 10211 bool module_exists(const char *module) 10212 { 10213 /* All modules have the symbol __this_module */ 10214 static const char this_mod[] = "__this_module"; 10215 char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2]; 10216 unsigned long val; 10217 int n; 10218 10219 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); 10220 10221 if (n > sizeof(modname) - 1) 10222 return false; 10223 10224 val = module_kallsyms_lookup_name(modname); 10225 return val != 0; 10226 } 10227 10228 static void trace_module_add_evals(struct module *mod) 10229 { 10230 if (!mod->num_trace_evals) 10231 return; 10232 10233 /* 10234 * Modules with bad taint do not have events created, do 10235 * not bother with enums either. 10236 */ 10237 if (trace_module_has_bad_taint(mod)) 10238 return; 10239 10240 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals); 10241 } 10242 10243 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 10244 static void trace_module_remove_evals(struct module *mod) 10245 { 10246 union trace_eval_map_item *map; 10247 union trace_eval_map_item **last = &trace_eval_maps; 10248 10249 if (!mod->num_trace_evals) 10250 return; 10251 10252 guard(mutex)(&trace_eval_mutex); 10253 10254 map = trace_eval_maps; 10255 10256 while (map) { 10257 if (map->head.mod == mod) 10258 break; 10259 map = trace_eval_jmp_to_tail(map); 10260 last = &map->tail.next; 10261 map = map->tail.next; 10262 } 10263 if (!map) 10264 return; 10265 10266 *last = trace_eval_jmp_to_tail(map)->tail.next; 10267 kfree(map); 10268 } 10269 #else 10270 static inline void trace_module_remove_evals(struct module *mod) { } 10271 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 10272 10273 static void trace_module_record(struct module *mod, bool add) 10274 { 10275 struct trace_array *tr; 10276 unsigned long flags; 10277 10278 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10279 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); 10280 /* Update any persistent trace array that has already been started */ 10281 if (flags == TRACE_ARRAY_FL_BOOT && add) { 10282 guard(mutex)(&scratch_mutex); 10283 save_mod(mod, tr); 10284 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { 10285 /* Update delta if the module loaded in previous boot */ 10286 make_mod_delta(mod, tr); 10287 } 10288 } 10289 } 10290 10291 static int trace_module_notify(struct notifier_block *self, 10292 unsigned long val, void *data) 10293 { 10294 struct module *mod = data; 10295 10296 switch (val) { 10297 case MODULE_STATE_COMING: 10298 trace_module_add_evals(mod); 10299 trace_module_record(mod, true); 10300 break; 10301 case MODULE_STATE_GOING: 10302 trace_module_remove_evals(mod); 10303 trace_module_record(mod, false); 10304 break; 10305 } 10306 10307 return NOTIFY_OK; 10308 } 10309 10310 static struct notifier_block trace_module_nb = { 10311 .notifier_call = trace_module_notify, 10312 .priority = 0, 10313 }; 10314 #endif /* CONFIG_MODULES */ 10315 10316 static __init void tracer_init_tracefs_work_func(struct work_struct *work) 10317 { 10318 10319 event_trace_init(); 10320 10321 init_tracer_tracefs(&global_trace, NULL); 10322 ftrace_init_tracefs_toplevel(&global_trace, NULL); 10323 10324 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, 10325 &global_trace, &tracing_thresh_fops); 10326 10327 trace_create_file("README", TRACE_MODE_READ, NULL, 10328 NULL, &tracing_readme_fops); 10329 10330 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, 10331 NULL, &tracing_saved_cmdlines_fops); 10332 10333 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, 10334 NULL, &tracing_saved_cmdlines_size_fops); 10335 10336 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, 10337 NULL, &tracing_saved_tgids_fops); 10338 10339 trace_create_eval_file(NULL); 10340 10341 #ifdef CONFIG_MODULES 10342 register_module_notifier(&trace_module_nb); 10343 #endif 10344 10345 #ifdef CONFIG_DYNAMIC_FTRACE 10346 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, 10347 NULL, &tracing_dyn_info_fops); 10348 #endif 10349 10350 create_trace_instances(NULL); 10351 10352 update_tracer_options(&global_trace); 10353 } 10354 10355 static __init int tracer_init_tracefs(void) 10356 { 10357 int ret; 10358 10359 trace_access_lock_init(); 10360 10361 ret = tracing_init_dentry(); 10362 if (ret) 10363 return 0; 10364 10365 if (eval_map_wq) { 10366 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); 10367 queue_work(eval_map_wq, &tracerfs_init_work); 10368 } else { 10369 tracer_init_tracefs_work_func(NULL); 10370 } 10371 10372 rv_init_interface(); 10373 10374 return 0; 10375 } 10376 10377 fs_initcall(tracer_init_tracefs); 10378 10379 static int trace_die_panic_handler(struct notifier_block *self, 10380 unsigned long ev, void *unused); 10381 10382 static struct notifier_block trace_panic_notifier = { 10383 .notifier_call = trace_die_panic_handler, 10384 .priority = INT_MAX - 1, 10385 }; 10386 10387 static struct notifier_block trace_die_notifier = { 10388 .notifier_call = trace_die_panic_handler, 10389 .priority = INT_MAX - 1, 10390 }; 10391 10392 /* 10393 * The idea is to execute the following die/panic callback early, in order 10394 * to avoid showing irrelevant information in the trace (like other panic 10395 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall 10396 * warnings get disabled (to prevent potential log flooding). 10397 */ 10398 static int trace_die_panic_handler(struct notifier_block *self, 10399 unsigned long ev, void *unused) 10400 { 10401 if (!ftrace_dump_on_oops_enabled()) 10402 return NOTIFY_DONE; 10403 10404 /* The die notifier requires DIE_OOPS to trigger */ 10405 if (self == &trace_die_notifier && ev != DIE_OOPS) 10406 return NOTIFY_DONE; 10407 10408 ftrace_dump(DUMP_PARAM); 10409 10410 return NOTIFY_DONE; 10411 } 10412 10413 /* 10414 * printk is set to max of 1024, we really don't need it that big. 10415 * Nothing should be printing 1000 characters anyway. 10416 */ 10417 #define TRACE_MAX_PRINT 1000 10418 10419 /* 10420 * Define here KERN_TRACE so that we have one place to modify 10421 * it if we decide to change what log level the ftrace dump 10422 * should be at. 10423 */ 10424 #define KERN_TRACE KERN_EMERG 10425 10426 void 10427 trace_printk_seq(struct trace_seq *s) 10428 { 10429 /* Probably should print a warning here. */ 10430 if (s->seq.len >= TRACE_MAX_PRINT) 10431 s->seq.len = TRACE_MAX_PRINT; 10432 10433 /* 10434 * More paranoid code. Although the buffer size is set to 10435 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just 10436 * an extra layer of protection. 10437 */ 10438 if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) 10439 s->seq.len = s->seq.size - 1; 10440 10441 /* should be zero ended, but we are paranoid. */ 10442 s->buffer[s->seq.len] = 0; 10443 10444 printk(KERN_TRACE "%s", s->buffer); 10445 10446 trace_seq_init(s); 10447 } 10448 10449 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) 10450 { 10451 iter->tr = tr; 10452 iter->trace = iter->tr->current_trace; 10453 iter->cpu_file = RING_BUFFER_ALL_CPUS; 10454 iter->array_buffer = &tr->array_buffer; 10455 10456 if (iter->trace && iter->trace->open) 10457 iter->trace->open(iter); 10458 10459 /* Annotate start of buffers if we had overruns */ 10460 if (ring_buffer_overruns(iter->array_buffer->buffer)) 10461 iter->iter_flags |= TRACE_FILE_ANNOTATE; 10462 10463 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 10464 if (trace_clocks[iter->tr->clock_id].in_ns) 10465 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 10466 10467 /* Can not use kmalloc for iter.temp and iter.fmt */ 10468 iter->temp = static_temp_buf; 10469 iter->temp_size = STATIC_TEMP_BUF_SIZE; 10470 iter->fmt = static_fmt_buf; 10471 iter->fmt_size = STATIC_FMT_BUF_SIZE; 10472 } 10473 10474 void trace_init_global_iter(struct trace_iterator *iter) 10475 { 10476 trace_init_iter(iter, &global_trace); 10477 } 10478 10479 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) 10480 { 10481 /* use static because iter can be a bit big for the stack */ 10482 static struct trace_iterator iter; 10483 unsigned int old_userobj; 10484 unsigned long flags; 10485 int cnt = 0, cpu; 10486 10487 /* 10488 * Always turn off tracing when we dump. 10489 * We don't need to show trace output of what happens 10490 * between multiple crashes. 10491 * 10492 * If the user does a sysrq-z, then they can re-enable 10493 * tracing with echo 1 > tracing_on. 10494 */ 10495 tracer_tracing_off(tr); 10496 10497 local_irq_save(flags); 10498 10499 /* Simulate the iterator */ 10500 trace_init_iter(&iter, tr); 10501 10502 for_each_tracing_cpu(cpu) { 10503 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); 10504 } 10505 10506 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ; 10507 10508 /* don't look at user memory in panic mode */ 10509 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ; 10510 10511 if (dump_mode == DUMP_ORIG) 10512 iter.cpu_file = raw_smp_processor_id(); 10513 else 10514 iter.cpu_file = RING_BUFFER_ALL_CPUS; 10515 10516 if (tr == &global_trace) 10517 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 10518 else 10519 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); 10520 10521 /* Did function tracer already get disabled? */ 10522 if (ftrace_is_dead()) { 10523 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); 10524 printk("# MAY BE MISSING FUNCTION EVENTS\n"); 10525 } 10526 10527 /* 10528 * We need to stop all tracing on all CPUS to read 10529 * the next buffer. This is a bit expensive, but is 10530 * not done often. We fill all what we can read, 10531 * and then release the locks again. 10532 */ 10533 10534 while (!trace_empty(&iter)) { 10535 10536 if (!cnt) 10537 printk(KERN_TRACE "---------------------------------\n"); 10538 10539 cnt++; 10540 10541 trace_iterator_reset(&iter); 10542 iter.iter_flags |= TRACE_FILE_LAT_FMT; 10543 10544 if (trace_find_next_entry_inc(&iter) != NULL) { 10545 int ret; 10546 10547 ret = print_trace_line(&iter); 10548 if (ret != TRACE_TYPE_NO_CONSUME) 10549 trace_consume(&iter); 10550 } 10551 touch_nmi_watchdog(); 10552 10553 trace_printk_seq(&iter.seq); 10554 } 10555 10556 if (!cnt) 10557 printk(KERN_TRACE " (ftrace buffer empty)\n"); 10558 else 10559 printk(KERN_TRACE "---------------------------------\n"); 10560 10561 tr->trace_flags |= old_userobj; 10562 10563 for_each_tracing_cpu(cpu) { 10564 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); 10565 } 10566 local_irq_restore(flags); 10567 } 10568 10569 static void ftrace_dump_by_param(void) 10570 { 10571 bool first_param = true; 10572 char dump_param[MAX_TRACER_SIZE]; 10573 char *buf, *token, *inst_name; 10574 struct trace_array *tr; 10575 10576 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); 10577 buf = dump_param; 10578 10579 while ((token = strsep(&buf, ",")) != NULL) { 10580 if (first_param) { 10581 first_param = false; 10582 if (!strcmp("0", token)) 10583 continue; 10584 else if (!strcmp("1", token)) { 10585 ftrace_dump_one(&global_trace, DUMP_ALL); 10586 continue; 10587 } 10588 else if (!strcmp("2", token) || 10589 !strcmp("orig_cpu", token)) { 10590 ftrace_dump_one(&global_trace, DUMP_ORIG); 10591 continue; 10592 } 10593 } 10594 10595 inst_name = strsep(&token, "="); 10596 tr = trace_array_find(inst_name); 10597 if (!tr) { 10598 printk(KERN_TRACE "Instance %s not found\n", inst_name); 10599 continue; 10600 } 10601 10602 if (token && (!strcmp("2", token) || 10603 !strcmp("orig_cpu", token))) 10604 ftrace_dump_one(tr, DUMP_ORIG); 10605 else 10606 ftrace_dump_one(tr, DUMP_ALL); 10607 } 10608 } 10609 10610 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) 10611 { 10612 static atomic_t dump_running; 10613 10614 /* Only allow one dump user at a time. */ 10615 if (atomic_inc_return(&dump_running) != 1) { 10616 atomic_dec(&dump_running); 10617 return; 10618 } 10619 10620 switch (oops_dump_mode) { 10621 case DUMP_ALL: 10622 ftrace_dump_one(&global_trace, DUMP_ALL); 10623 break; 10624 case DUMP_ORIG: 10625 ftrace_dump_one(&global_trace, DUMP_ORIG); 10626 break; 10627 case DUMP_PARAM: 10628 ftrace_dump_by_param(); 10629 break; 10630 case DUMP_NONE: 10631 break; 10632 default: 10633 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); 10634 ftrace_dump_one(&global_trace, DUMP_ALL); 10635 } 10636 10637 atomic_dec(&dump_running); 10638 } 10639 EXPORT_SYMBOL_GPL(ftrace_dump); 10640 10641 #define WRITE_BUFSIZE 4096 10642 10643 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, 10644 size_t count, loff_t *ppos, 10645 int (*createfn)(const char *)) 10646 { 10647 char *kbuf, *buf, *tmp; 10648 int ret = 0; 10649 size_t done = 0; 10650 size_t size; 10651 10652 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 10653 if (!kbuf) 10654 return -ENOMEM; 10655 10656 while (done < count) { 10657 size = count - done; 10658 10659 if (size >= WRITE_BUFSIZE) 10660 size = WRITE_BUFSIZE - 1; 10661 10662 if (copy_from_user(kbuf, buffer + done, size)) { 10663 ret = -EFAULT; 10664 goto out; 10665 } 10666 kbuf[size] = '\0'; 10667 buf = kbuf; 10668 do { 10669 tmp = strchr(buf, '\n'); 10670 if (tmp) { 10671 *tmp = '\0'; 10672 size = tmp - buf + 1; 10673 } else { 10674 size = strlen(buf); 10675 if (done + size < count) { 10676 if (buf != kbuf) 10677 break; 10678 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ 10679 pr_warn("Line length is too long: Should be less than %d\n", 10680 WRITE_BUFSIZE - 2); 10681 ret = -EINVAL; 10682 goto out; 10683 } 10684 } 10685 done += size; 10686 10687 /* Remove comments */ 10688 tmp = strchr(buf, '#'); 10689 10690 if (tmp) 10691 *tmp = '\0'; 10692 10693 ret = createfn(buf); 10694 if (ret) 10695 goto out; 10696 buf += size; 10697 10698 } while (done < count); 10699 } 10700 ret = done; 10701 10702 out: 10703 kfree(kbuf); 10704 10705 return ret; 10706 } 10707 10708 #ifdef CONFIG_TRACER_MAX_TRACE 10709 __init static bool tr_needs_alloc_snapshot(const char *name) 10710 { 10711 char *test; 10712 int len = strlen(name); 10713 bool ret; 10714 10715 if (!boot_snapshot_index) 10716 return false; 10717 10718 if (strncmp(name, boot_snapshot_info, len) == 0 && 10719 boot_snapshot_info[len] == '\t') 10720 return true; 10721 10722 test = kmalloc(strlen(name) + 3, GFP_KERNEL); 10723 if (!test) 10724 return false; 10725 10726 sprintf(test, "\t%s\t", name); 10727 ret = strstr(boot_snapshot_info, test) == NULL; 10728 kfree(test); 10729 return ret; 10730 } 10731 10732 __init static void do_allocate_snapshot(const char *name) 10733 { 10734 if (!tr_needs_alloc_snapshot(name)) 10735 return; 10736 10737 /* 10738 * When allocate_snapshot is set, the next call to 10739 * allocate_trace_buffers() (called by trace_array_get_by_name()) 10740 * will allocate the snapshot buffer. That will alse clear 10741 * this flag. 10742 */ 10743 allocate_snapshot = true; 10744 } 10745 #else 10746 static inline void do_allocate_snapshot(const char *name) { } 10747 #endif 10748 10749 __init static void enable_instances(void) 10750 { 10751 struct trace_array *tr; 10752 bool memmap_area = false; 10753 char *curr_str; 10754 char *name; 10755 char *str; 10756 char *tok; 10757 10758 /* A tab is always appended */ 10759 boot_instance_info[boot_instance_index - 1] = '\0'; 10760 str = boot_instance_info; 10761 10762 while ((curr_str = strsep(&str, "\t"))) { 10763 phys_addr_t start = 0; 10764 phys_addr_t size = 0; 10765 unsigned long addr = 0; 10766 bool traceprintk = false; 10767 bool traceoff = false; 10768 char *flag_delim; 10769 char *addr_delim; 10770 char *rname __free(kfree) = NULL; 10771 10772 tok = strsep(&curr_str, ","); 10773 10774 flag_delim = strchr(tok, '^'); 10775 addr_delim = strchr(tok, '@'); 10776 10777 if (addr_delim) 10778 *addr_delim++ = '\0'; 10779 10780 if (flag_delim) 10781 *flag_delim++ = '\0'; 10782 10783 name = tok; 10784 10785 if (flag_delim) { 10786 char *flag; 10787 10788 while ((flag = strsep(&flag_delim, "^"))) { 10789 if (strcmp(flag, "traceoff") == 0) { 10790 traceoff = true; 10791 } else if ((strcmp(flag, "printk") == 0) || 10792 (strcmp(flag, "traceprintk") == 0) || 10793 (strcmp(flag, "trace_printk") == 0)) { 10794 traceprintk = true; 10795 } else { 10796 pr_info("Tracing: Invalid instance flag '%s' for %s\n", 10797 flag, name); 10798 } 10799 } 10800 } 10801 10802 tok = addr_delim; 10803 if (tok && isdigit(*tok)) { 10804 start = memparse(tok, &tok); 10805 if (!start) { 10806 pr_warn("Tracing: Invalid boot instance address for %s\n", 10807 name); 10808 continue; 10809 } 10810 if (*tok != ':') { 10811 pr_warn("Tracing: No size specified for instance %s\n", name); 10812 continue; 10813 } 10814 tok++; 10815 size = memparse(tok, &tok); 10816 if (!size) { 10817 pr_warn("Tracing: Invalid boot instance size for %s\n", 10818 name); 10819 continue; 10820 } 10821 memmap_area = true; 10822 } else if (tok) { 10823 if (!reserve_mem_find_by_name(tok, &start, &size)) { 10824 start = 0; 10825 pr_warn("Failed to map boot instance %s to %s\n", name, tok); 10826 continue; 10827 } 10828 rname = kstrdup(tok, GFP_KERNEL); 10829 } 10830 10831 if (start) { 10832 /* Start and size must be page aligned */ 10833 if (start & ~PAGE_MASK) { 10834 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); 10835 continue; 10836 } 10837 if (size & ~PAGE_MASK) { 10838 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); 10839 continue; 10840 } 10841 10842 if (memmap_area) 10843 addr = map_pages(start, size); 10844 else 10845 addr = (unsigned long)phys_to_virt(start); 10846 if (addr) { 10847 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", 10848 name, &start, (unsigned long)size); 10849 } else { 10850 pr_warn("Tracing: Failed to map boot instance %s\n", name); 10851 continue; 10852 } 10853 } else { 10854 /* Only non mapped buffers have snapshot buffers */ 10855 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) 10856 do_allocate_snapshot(name); 10857 } 10858 10859 tr = trace_array_create_systems(name, NULL, addr, size); 10860 if (IS_ERR(tr)) { 10861 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); 10862 continue; 10863 } 10864 10865 if (traceoff) 10866 tracer_tracing_off(tr); 10867 10868 if (traceprintk) 10869 update_printk_trace(tr); 10870 10871 /* 10872 * memmap'd buffers can not be freed. 10873 */ 10874 if (memmap_area) { 10875 tr->flags |= TRACE_ARRAY_FL_MEMMAP; 10876 tr->ref++; 10877 } 10878 10879 if (start) { 10880 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 10881 tr->range_name = no_free_ptr(rname); 10882 } 10883 10884 while ((tok = strsep(&curr_str, ","))) { 10885 early_enable_events(tr, tok, true); 10886 } 10887 } 10888 } 10889 10890 __init static int tracer_alloc_buffers(void) 10891 { 10892 int ring_buf_size; 10893 int ret = -ENOMEM; 10894 10895 10896 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10897 pr_warn("Tracing disabled due to lockdown\n"); 10898 return -EPERM; 10899 } 10900 10901 /* 10902 * Make sure we don't accidentally add more trace options 10903 * than we have bits for. 10904 */ 10905 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); 10906 10907 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 10908 goto out; 10909 10910 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) 10911 goto out_free_buffer_mask; 10912 10913 /* Only allocate trace_printk buffers if a trace_printk exists */ 10914 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) 10915 /* Must be called before global_trace.buffer is allocated */ 10916 trace_printk_init_buffers(); 10917 10918 /* To save memory, keep the ring buffer size to its minimum */ 10919 if (global_trace.ring_buffer_expanded) 10920 ring_buf_size = trace_buf_size; 10921 else 10922 ring_buf_size = 1; 10923 10924 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 10925 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); 10926 10927 raw_spin_lock_init(&global_trace.start_lock); 10928 10929 /* 10930 * The prepare callbacks allocates some memory for the ring buffer. We 10931 * don't free the buffer if the CPU goes down. If we were to free 10932 * the buffer, then the user would lose any trace that was in the 10933 * buffer. The memory will be removed once the "instance" is removed. 10934 */ 10935 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, 10936 "trace/RB:prepare", trace_rb_cpu_prepare, 10937 NULL); 10938 if (ret < 0) 10939 goto out_free_cpumask; 10940 /* Used for event triggers */ 10941 ret = -ENOMEM; 10942 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); 10943 if (!temp_buffer) 10944 goto out_rm_hp_state; 10945 10946 if (trace_create_savedcmd() < 0) 10947 goto out_free_temp_buffer; 10948 10949 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) 10950 goto out_free_savedcmd; 10951 10952 /* TODO: make the number of buffers hot pluggable with CPUS */ 10953 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { 10954 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); 10955 goto out_free_pipe_cpumask; 10956 } 10957 if (global_trace.buffer_disabled) 10958 tracing_off(); 10959 10960 if (trace_boot_clock) { 10961 ret = tracing_set_clock(&global_trace, trace_boot_clock); 10962 if (ret < 0) 10963 pr_warn("Trace clock %s not defined, going back to default\n", 10964 trace_boot_clock); 10965 } 10966 10967 /* 10968 * register_tracer() might reference current_trace, so it 10969 * needs to be set before we register anything. This is 10970 * just a bootstrap of current_trace anyway. 10971 */ 10972 global_trace.current_trace = &nop_trace; 10973 10974 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 10975 #ifdef CONFIG_TRACER_MAX_TRACE 10976 spin_lock_init(&global_trace.snapshot_trigger_lock); 10977 #endif 10978 ftrace_init_global_array_ops(&global_trace); 10979 10980 #ifdef CONFIG_MODULES 10981 INIT_LIST_HEAD(&global_trace.mod_events); 10982 #endif 10983 10984 init_trace_flags_index(&global_trace); 10985 10986 register_tracer(&nop_trace); 10987 10988 /* Function tracing may start here (via kernel command line) */ 10989 init_function_trace(); 10990 10991 /* All seems OK, enable tracing */ 10992 tracing_disabled = 0; 10993 10994 atomic_notifier_chain_register(&panic_notifier_list, 10995 &trace_panic_notifier); 10996 10997 register_die_notifier(&trace_die_notifier); 10998 10999 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 11000 11001 INIT_LIST_HEAD(&global_trace.systems); 11002 INIT_LIST_HEAD(&global_trace.events); 11003 INIT_LIST_HEAD(&global_trace.hist_vars); 11004 INIT_LIST_HEAD(&global_trace.err_log); 11005 list_add(&global_trace.list, &ftrace_trace_arrays); 11006 11007 apply_trace_boot_options(); 11008 11009 register_snapshot_cmd(); 11010 11011 return 0; 11012 11013 out_free_pipe_cpumask: 11014 free_cpumask_var(global_trace.pipe_cpumask); 11015 out_free_savedcmd: 11016 trace_free_saved_cmdlines_buffer(); 11017 out_free_temp_buffer: 11018 ring_buffer_free(temp_buffer); 11019 out_rm_hp_state: 11020 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); 11021 out_free_cpumask: 11022 free_cpumask_var(global_trace.tracing_cpumask); 11023 out_free_buffer_mask: 11024 free_cpumask_var(tracing_buffer_mask); 11025 out: 11026 return ret; 11027 } 11028 11029 #ifdef CONFIG_FUNCTION_TRACER 11030 /* Used to set module cached ftrace filtering at boot up */ 11031 __init struct trace_array *trace_get_global_array(void) 11032 { 11033 return &global_trace; 11034 } 11035 #endif 11036 11037 void __init ftrace_boot_snapshot(void) 11038 { 11039 #ifdef CONFIG_TRACER_MAX_TRACE 11040 struct trace_array *tr; 11041 11042 if (!snapshot_at_boot) 11043 return; 11044 11045 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 11046 if (!tr->allocated_snapshot) 11047 continue; 11048 11049 tracing_snapshot_instance(tr); 11050 trace_array_puts(tr, "** Boot snapshot taken **\n"); 11051 } 11052 #endif 11053 } 11054 11055 void __init early_trace_init(void) 11056 { 11057 if (tracepoint_printk) { 11058 tracepoint_print_iter = 11059 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); 11060 if (MEM_FAIL(!tracepoint_print_iter, 11061 "Failed to allocate trace iterator\n")) 11062 tracepoint_printk = 0; 11063 else 11064 static_key_enable(&tracepoint_printk_key.key); 11065 } 11066 tracer_alloc_buffers(); 11067 11068 init_events(); 11069 } 11070 11071 void __init trace_init(void) 11072 { 11073 trace_event_init(); 11074 11075 if (boot_instance_index) 11076 enable_instances(); 11077 } 11078 11079 __init static void clear_boot_tracer(void) 11080 { 11081 /* 11082 * The default tracer at boot buffer is an init section. 11083 * This function is called in lateinit. If we did not 11084 * find the boot tracer, then clear it out, to prevent 11085 * later registration from accessing the buffer that is 11086 * about to be freed. 11087 */ 11088 if (!default_bootup_tracer) 11089 return; 11090 11091 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", 11092 default_bootup_tracer); 11093 default_bootup_tracer = NULL; 11094 } 11095 11096 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 11097 __init static void tracing_set_default_clock(void) 11098 { 11099 /* sched_clock_stable() is determined in late_initcall */ 11100 if (!trace_boot_clock && !sched_clock_stable()) { 11101 if (security_locked_down(LOCKDOWN_TRACEFS)) { 11102 pr_warn("Can not set tracing clock due to lockdown\n"); 11103 return; 11104 } 11105 11106 printk(KERN_WARNING 11107 "Unstable clock detected, switching default tracing clock to \"global\"\n" 11108 "If you want to keep using the local clock, then add:\n" 11109 " \"trace_clock=local\"\n" 11110 "on the kernel command line\n"); 11111 tracing_set_clock(&global_trace, "global"); 11112 } 11113 } 11114 #else 11115 static inline void tracing_set_default_clock(void) { } 11116 #endif 11117 11118 __init static int late_trace_init(void) 11119 { 11120 if (tracepoint_printk && tracepoint_printk_stop_on_boot) { 11121 static_key_disable(&tracepoint_printk_key.key); 11122 tracepoint_printk = 0; 11123 } 11124 11125 if (traceoff_after_boot) 11126 tracing_off(); 11127 11128 tracing_set_default_clock(); 11129 clear_boot_tracer(); 11130 return 0; 11131 } 11132 11133 late_initcall_sync(late_trace_init); 11134