1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ring buffer based function tracer 4 * 5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> 6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 7 * 8 * Originally taken from the RT patch by: 9 * Arnaldo Carvalho de Melo <acme@redhat.com> 10 * 11 * Based on code from the latency_tracer, that is: 12 * Copyright (C) 2004-2006 Ingo Molnar 13 * Copyright (C) 2004 Nadia Yvette Chambers 14 */ 15 #include <linux/ring_buffer.h> 16 #include <linux/utsname.h> 17 #include <linux/stacktrace.h> 18 #include <linux/writeback.h> 19 #include <linux/kallsyms.h> 20 #include <linux/security.h> 21 #include <linux/seq_file.h> 22 #include <linux/irqflags.h> 23 #include <linux/syscalls.h> 24 #include <linux/debugfs.h> 25 #include <linux/tracefs.h> 26 #include <linux/pagemap.h> 27 #include <linux/hardirq.h> 28 #include <linux/linkage.h> 29 #include <linux/uaccess.h> 30 #include <linux/cleanup.h> 31 #include <linux/vmalloc.h> 32 #include <linux/ftrace.h> 33 #include <linux/module.h> 34 #include <linux/percpu.h> 35 #include <linux/splice.h> 36 #include <linux/kdebug.h> 37 #include <linux/string.h> 38 #include <linux/mount.h> 39 #include <linux/rwsem.h> 40 #include <linux/slab.h> 41 #include <linux/ctype.h> 42 #include <linux/init.h> 43 #include <linux/panic_notifier.h> 44 #include <linux/poll.h> 45 #include <linux/nmi.h> 46 #include <linux/fs.h> 47 #include <linux/trace.h> 48 #include <linux/sched/clock.h> 49 #include <linux/sched/rt.h> 50 #include <linux/fsnotify.h> 51 #include <linux/irq_work.h> 52 #include <linux/workqueue.h> 53 #include <linux/sort.h> 54 #include <linux/io.h> /* vmap_page_range() */ 55 #include <linux/fs_context.h> 56 57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 58 59 #include "trace.h" 60 #include "trace_output.h" 61 62 #ifdef CONFIG_FTRACE_STARTUP_TEST 63 /* 64 * We need to change this state when a selftest is running. 65 * A selftest will lurk into the ring-buffer to count the 66 * entries inserted during the selftest although some concurrent 67 * insertions into the ring-buffer such as trace_printk could occurred 68 * at the same time, giving false positive or negative results. 69 */ 70 static bool __read_mostly tracing_selftest_running; 71 72 /* 73 * If boot-time tracing including tracers/events via kernel cmdline 74 * is running, we do not want to run SELFTEST. 75 */ 76 bool __read_mostly tracing_selftest_disabled; 77 78 void __init disable_tracing_selftest(const char *reason) 79 { 80 if (!tracing_selftest_disabled) { 81 tracing_selftest_disabled = true; 82 pr_info("Ftrace startup test is disabled due to %s\n", reason); 83 } 84 } 85 #else 86 #define tracing_selftest_running 0 87 #define tracing_selftest_disabled 0 88 #endif 89 90 /* Pipe tracepoints to printk */ 91 static struct trace_iterator *tracepoint_print_iter; 92 int tracepoint_printk; 93 static bool tracepoint_printk_stop_on_boot __initdata; 94 static bool traceoff_after_boot __initdata; 95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); 96 97 /* Store tracers and their flags per instance */ 98 struct tracers { 99 struct list_head list; 100 struct tracer *tracer; 101 struct tracer_flags *flags; 102 }; 103 104 /* 105 * To prevent the comm cache from being overwritten when no 106 * tracing is active, only save the comm when a trace event 107 * occurred. 108 */ 109 DEFINE_PER_CPU(bool, trace_taskinfo_save); 110 111 /* 112 * Kill all tracing for good (never come back). 113 * It is initialized to 1 but will turn to zero if the initialization 114 * of the tracer is successful. But that is the only place that sets 115 * this back to zero. 116 */ 117 static int tracing_disabled = 1; 118 119 cpumask_var_t __read_mostly tracing_buffer_mask; 120 121 #define MAX_TRACER_SIZE 100 122 /* 123 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 124 * 125 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops 126 * is set, then ftrace_dump is called. This will output the contents 127 * of the ftrace buffers to the console. This is very useful for 128 * capturing traces that lead to crashes and outputing it to a 129 * serial console. 130 * 131 * It is default off, but you can enable it with either specifying 132 * "ftrace_dump_on_oops" in the kernel command line, or setting 133 * /proc/sys/kernel/ftrace_dump_on_oops 134 * Set 1 if you want to dump buffers of all CPUs 135 * Set 2 if you want to dump the buffer of the CPU that triggered oops 136 * Set instance name if you want to dump the specific trace instance 137 * Multiple instance dump is also supported, and instances are seperated 138 * by commas. 139 */ 140 /* Set to string format zero to disable by default */ 141 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; 142 143 /* When set, tracing will stop when a WARN*() is hit */ 144 static int __disable_trace_on_warning; 145 146 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 147 void *buffer, size_t *lenp, loff_t *ppos); 148 static const struct ctl_table trace_sysctl_table[] = { 149 { 150 .procname = "ftrace_dump_on_oops", 151 .data = &ftrace_dump_on_oops, 152 .maxlen = MAX_TRACER_SIZE, 153 .mode = 0644, 154 .proc_handler = proc_dostring, 155 }, 156 { 157 .procname = "traceoff_on_warning", 158 .data = &__disable_trace_on_warning, 159 .maxlen = sizeof(__disable_trace_on_warning), 160 .mode = 0644, 161 .proc_handler = proc_dointvec, 162 }, 163 { 164 .procname = "tracepoint_printk", 165 .data = &tracepoint_printk, 166 .maxlen = sizeof(tracepoint_printk), 167 .mode = 0644, 168 .proc_handler = tracepoint_printk_sysctl, 169 }, 170 }; 171 172 static int __init init_trace_sysctls(void) 173 { 174 register_sysctl_init("kernel", trace_sysctl_table); 175 return 0; 176 } 177 subsys_initcall(init_trace_sysctls); 178 179 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 180 /* Map of enums to their values, for "eval_map" file */ 181 struct trace_eval_map_head { 182 struct module *mod; 183 unsigned long length; 184 }; 185 186 union trace_eval_map_item; 187 188 struct trace_eval_map_tail { 189 /* 190 * "end" is first and points to NULL as it must be different 191 * than "mod" or "eval_string" 192 */ 193 union trace_eval_map_item *next; 194 const char *end; /* points to NULL */ 195 }; 196 197 static DEFINE_MUTEX(trace_eval_mutex); 198 199 /* 200 * The trace_eval_maps are saved in an array with two extra elements, 201 * one at the beginning, and one at the end. The beginning item contains 202 * the count of the saved maps (head.length), and the module they 203 * belong to if not built in (head.mod). The ending item contains a 204 * pointer to the next array of saved eval_map items. 205 */ 206 union trace_eval_map_item { 207 struct trace_eval_map map; 208 struct trace_eval_map_head head; 209 struct trace_eval_map_tail tail; 210 }; 211 212 static union trace_eval_map_item *trace_eval_maps; 213 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 214 215 int tracing_set_tracer(struct trace_array *tr, const char *buf); 216 static void ftrace_trace_userstack(struct trace_array *tr, 217 struct trace_buffer *buffer, 218 unsigned int trace_ctx); 219 220 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 221 static char *default_bootup_tracer; 222 223 static bool allocate_snapshot; 224 static bool snapshot_at_boot; 225 226 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; 227 static int boot_instance_index; 228 229 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata; 230 static int boot_snapshot_index; 231 232 static int __init set_cmdline_ftrace(char *str) 233 { 234 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 235 default_bootup_tracer = bootup_tracer_buf; 236 /* We are using ftrace early, expand it */ 237 trace_set_ring_buffer_expanded(NULL); 238 return 1; 239 } 240 __setup("ftrace=", set_cmdline_ftrace); 241 242 int ftrace_dump_on_oops_enabled(void) 243 { 244 if (!strcmp("0", ftrace_dump_on_oops)) 245 return 0; 246 else 247 return 1; 248 } 249 250 static int __init set_ftrace_dump_on_oops(char *str) 251 { 252 if (!*str) { 253 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 254 return 1; 255 } 256 257 if (*str == ',') { 258 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 259 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); 260 return 1; 261 } 262 263 if (*str++ == '=') { 264 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); 265 return 1; 266 } 267 268 return 0; 269 } 270 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 271 272 static int __init stop_trace_on_warning(char *str) 273 { 274 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 275 __disable_trace_on_warning = 1; 276 return 1; 277 } 278 __setup("traceoff_on_warning", stop_trace_on_warning); 279 280 static int __init boot_alloc_snapshot(char *str) 281 { 282 char *slot = boot_snapshot_info + boot_snapshot_index; 283 int left = sizeof(boot_snapshot_info) - boot_snapshot_index; 284 int ret; 285 286 if (str[0] == '=') { 287 str++; 288 if (strlen(str) >= left) 289 return -1; 290 291 ret = snprintf(slot, left, "%s\t", str); 292 boot_snapshot_index += ret; 293 } else { 294 allocate_snapshot = true; 295 /* We also need the main ring buffer expanded */ 296 trace_set_ring_buffer_expanded(NULL); 297 } 298 return 1; 299 } 300 __setup("alloc_snapshot", boot_alloc_snapshot); 301 302 303 static int __init boot_snapshot(char *str) 304 { 305 snapshot_at_boot = true; 306 boot_alloc_snapshot(str); 307 return 1; 308 } 309 __setup("ftrace_boot_snapshot", boot_snapshot); 310 311 312 static int __init boot_instance(char *str) 313 { 314 char *slot = boot_instance_info + boot_instance_index; 315 int left = sizeof(boot_instance_info) - boot_instance_index; 316 int ret; 317 318 if (strlen(str) >= left) 319 return -1; 320 321 ret = snprintf(slot, left, "%s\t", str); 322 boot_instance_index += ret; 323 324 return 1; 325 } 326 __setup("trace_instance=", boot_instance); 327 328 329 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; 330 331 static int __init set_trace_boot_options(char *str) 332 { 333 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); 334 return 1; 335 } 336 __setup("trace_options=", set_trace_boot_options); 337 338 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; 339 static char *trace_boot_clock __initdata; 340 341 static int __init set_trace_boot_clock(char *str) 342 { 343 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); 344 trace_boot_clock = trace_boot_clock_buf; 345 return 1; 346 } 347 __setup("trace_clock=", set_trace_boot_clock); 348 349 static int __init set_tracepoint_printk(char *str) 350 { 351 /* Ignore the "tp_printk_stop_on_boot" param */ 352 if (*str == '_') 353 return 0; 354 355 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 356 tracepoint_printk = 1; 357 return 1; 358 } 359 __setup("tp_printk", set_tracepoint_printk); 360 361 static int __init set_tracepoint_printk_stop(char *str) 362 { 363 tracepoint_printk_stop_on_boot = true; 364 return 1; 365 } 366 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); 367 368 static int __init set_traceoff_after_boot(char *str) 369 { 370 traceoff_after_boot = true; 371 return 1; 372 } 373 __setup("traceoff_after_boot", set_traceoff_after_boot); 374 375 unsigned long long ns2usecs(u64 nsec) 376 { 377 nsec += 500; 378 do_div(nsec, 1000); 379 return nsec; 380 } 381 382 static void 383 trace_process_export(struct trace_export *export, 384 struct ring_buffer_event *event, int flag) 385 { 386 struct trace_entry *entry; 387 unsigned int size = 0; 388 389 if (export->flags & flag) { 390 entry = ring_buffer_event_data(event); 391 size = ring_buffer_event_length(event); 392 export->write(export, entry, size); 393 } 394 } 395 396 static DEFINE_MUTEX(ftrace_export_lock); 397 398 static struct trace_export __rcu *ftrace_exports_list __read_mostly; 399 400 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); 401 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); 402 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); 403 404 static inline void ftrace_exports_enable(struct trace_export *export) 405 { 406 if (export->flags & TRACE_EXPORT_FUNCTION) 407 static_branch_inc(&trace_function_exports_enabled); 408 409 if (export->flags & TRACE_EXPORT_EVENT) 410 static_branch_inc(&trace_event_exports_enabled); 411 412 if (export->flags & TRACE_EXPORT_MARKER) 413 static_branch_inc(&trace_marker_exports_enabled); 414 } 415 416 static inline void ftrace_exports_disable(struct trace_export *export) 417 { 418 if (export->flags & TRACE_EXPORT_FUNCTION) 419 static_branch_dec(&trace_function_exports_enabled); 420 421 if (export->flags & TRACE_EXPORT_EVENT) 422 static_branch_dec(&trace_event_exports_enabled); 423 424 if (export->flags & TRACE_EXPORT_MARKER) 425 static_branch_dec(&trace_marker_exports_enabled); 426 } 427 428 static void ftrace_exports(struct ring_buffer_event *event, int flag) 429 { 430 struct trace_export *export; 431 432 guard(preempt_notrace)(); 433 434 export = rcu_dereference_raw_check(ftrace_exports_list); 435 while (export) { 436 trace_process_export(export, event, flag); 437 export = rcu_dereference_raw_check(export->next); 438 } 439 } 440 441 static inline void 442 add_trace_export(struct trace_export **list, struct trace_export *export) 443 { 444 rcu_assign_pointer(export->next, *list); 445 /* 446 * We are entering export into the list but another 447 * CPU might be walking that list. We need to make sure 448 * the export->next pointer is valid before another CPU sees 449 * the export pointer included into the list. 450 */ 451 rcu_assign_pointer(*list, export); 452 } 453 454 static inline int 455 rm_trace_export(struct trace_export **list, struct trace_export *export) 456 { 457 struct trace_export **p; 458 459 for (p = list; *p != NULL; p = &(*p)->next) 460 if (*p == export) 461 break; 462 463 if (*p != export) 464 return -1; 465 466 rcu_assign_pointer(*p, (*p)->next); 467 468 return 0; 469 } 470 471 static inline void 472 add_ftrace_export(struct trace_export **list, struct trace_export *export) 473 { 474 ftrace_exports_enable(export); 475 476 add_trace_export(list, export); 477 } 478 479 static inline int 480 rm_ftrace_export(struct trace_export **list, struct trace_export *export) 481 { 482 int ret; 483 484 ret = rm_trace_export(list, export); 485 ftrace_exports_disable(export); 486 487 return ret; 488 } 489 490 int register_ftrace_export(struct trace_export *export) 491 { 492 if (WARN_ON_ONCE(!export->write)) 493 return -1; 494 495 guard(mutex)(&ftrace_export_lock); 496 497 add_ftrace_export(&ftrace_exports_list, export); 498 499 return 0; 500 } 501 EXPORT_SYMBOL_GPL(register_ftrace_export); 502 503 int unregister_ftrace_export(struct trace_export *export) 504 { 505 guard(mutex)(&ftrace_export_lock); 506 return rm_ftrace_export(&ftrace_exports_list, export); 507 } 508 EXPORT_SYMBOL_GPL(unregister_ftrace_export); 509 510 /* trace_flags holds trace_options default values */ 511 #define TRACE_DEFAULT_FLAGS \ 512 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \ 513 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \ 514 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \ 515 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \ 516 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \ 517 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \ 518 TRACE_ITER(COPY_MARKER)) 519 520 /* trace_options that are only supported by global_trace */ 521 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \ 522 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \ 523 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS) 524 525 /* trace_flags that are default zero for instances */ 526 #define ZEROED_TRACE_FLAGS \ 527 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \ 528 TRACE_ITER(COPY_MARKER)) 529 530 /* 531 * The global_trace is the descriptor that holds the top-level tracing 532 * buffers for the live tracing. 533 */ 534 static struct trace_array global_trace = { 535 .trace_flags = TRACE_DEFAULT_FLAGS, 536 }; 537 538 static struct trace_array *printk_trace = &global_trace; 539 540 /* List of trace_arrays interested in the top level trace_marker */ 541 static LIST_HEAD(marker_copies); 542 543 static __always_inline bool printk_binsafe(struct trace_array *tr) 544 { 545 /* 546 * The binary format of traceprintk can cause a crash if used 547 * by a buffer from another boot. Force the use of the 548 * non binary version of trace_printk if the trace_printk 549 * buffer is a boot mapped ring buffer. 550 */ 551 return !(tr->flags & TRACE_ARRAY_FL_BOOT); 552 } 553 554 static void update_printk_trace(struct trace_array *tr) 555 { 556 if (printk_trace == tr) 557 return; 558 559 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK); 560 printk_trace = tr; 561 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK); 562 } 563 564 /* Returns true if the status of tr changed */ 565 static bool update_marker_trace(struct trace_array *tr, int enabled) 566 { 567 lockdep_assert_held(&event_mutex); 568 569 if (enabled) { 570 if (!list_empty(&tr->marker_list)) 571 return false; 572 573 list_add_rcu(&tr->marker_list, &marker_copies); 574 tr->trace_flags |= TRACE_ITER(COPY_MARKER); 575 return true; 576 } 577 578 if (list_empty(&tr->marker_list)) 579 return false; 580 581 list_del_init(&tr->marker_list); 582 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); 583 return true; 584 } 585 586 void trace_set_ring_buffer_expanded(struct trace_array *tr) 587 { 588 if (!tr) 589 tr = &global_trace; 590 tr->ring_buffer_expanded = true; 591 } 592 593 LIST_HEAD(ftrace_trace_arrays); 594 595 int trace_array_get(struct trace_array *this_tr) 596 { 597 struct trace_array *tr; 598 599 guard(mutex)(&trace_types_lock); 600 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 601 if (tr == this_tr) { 602 tr->ref++; 603 return 0; 604 } 605 } 606 607 return -ENODEV; 608 } 609 610 static void __trace_array_put(struct trace_array *this_tr) 611 { 612 WARN_ON(!this_tr->ref); 613 this_tr->ref--; 614 } 615 616 /** 617 * trace_array_put - Decrement the reference counter for this trace array. 618 * @this_tr : pointer to the trace array 619 * 620 * NOTE: Use this when we no longer need the trace array returned by 621 * trace_array_get_by_name(). This ensures the trace array can be later 622 * destroyed. 623 * 624 */ 625 void trace_array_put(struct trace_array *this_tr) 626 { 627 if (!this_tr) 628 return; 629 630 guard(mutex)(&trace_types_lock); 631 __trace_array_put(this_tr); 632 } 633 EXPORT_SYMBOL_GPL(trace_array_put); 634 635 int tracing_check_open_get_tr(struct trace_array *tr) 636 { 637 int ret; 638 639 ret = security_locked_down(LOCKDOWN_TRACEFS); 640 if (ret) 641 return ret; 642 643 if (tracing_disabled) 644 return -ENODEV; 645 646 if (tr && trace_array_get(tr) < 0) 647 return -ENODEV; 648 649 return 0; 650 } 651 652 /** 653 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list 654 * @filtered_pids: The list of pids to check 655 * @search_pid: The PID to find in @filtered_pids 656 * 657 * Returns true if @search_pid is found in @filtered_pids, and false otherwise. 658 */ 659 bool 660 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) 661 { 662 return trace_pid_list_is_set(filtered_pids, search_pid); 663 } 664 665 /** 666 * trace_ignore_this_task - should a task be ignored for tracing 667 * @filtered_pids: The list of pids to check 668 * @filtered_no_pids: The list of pids not to be traced 669 * @task: The task that should be ignored if not filtered 670 * 671 * Checks if @task should be traced or not from @filtered_pids. 672 * Returns true if @task should *NOT* be traced. 673 * Returns false if @task should be traced. 674 */ 675 bool 676 trace_ignore_this_task(struct trace_pid_list *filtered_pids, 677 struct trace_pid_list *filtered_no_pids, 678 struct task_struct *task) 679 { 680 /* 681 * If filtered_no_pids is not empty, and the task's pid is listed 682 * in filtered_no_pids, then return true. 683 * Otherwise, if filtered_pids is empty, that means we can 684 * trace all tasks. If it has content, then only trace pids 685 * within filtered_pids. 686 */ 687 688 return (filtered_pids && 689 !trace_find_filtered_pid(filtered_pids, task->pid)) || 690 (filtered_no_pids && 691 trace_find_filtered_pid(filtered_no_pids, task->pid)); 692 } 693 694 /** 695 * trace_filter_add_remove_task - Add or remove a task from a pid_list 696 * @pid_list: The list to modify 697 * @self: The current task for fork or NULL for exit 698 * @task: The task to add or remove 699 * 700 * If adding a task, if @self is defined, the task is only added if @self 701 * is also included in @pid_list. This happens on fork and tasks should 702 * only be added when the parent is listed. If @self is NULL, then the 703 * @task pid will be removed from the list, which would happen on exit 704 * of a task. 705 */ 706 void trace_filter_add_remove_task(struct trace_pid_list *pid_list, 707 struct task_struct *self, 708 struct task_struct *task) 709 { 710 if (!pid_list) 711 return; 712 713 /* For forks, we only add if the forking task is listed */ 714 if (self) { 715 if (!trace_find_filtered_pid(pid_list, self->pid)) 716 return; 717 } 718 719 /* "self" is set for forks, and NULL for exits */ 720 if (self) 721 trace_pid_list_set(pid_list, task->pid); 722 else 723 trace_pid_list_clear(pid_list, task->pid); 724 } 725 726 /** 727 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list 728 * @pid_list: The pid list to show 729 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) 730 * @pos: The position of the file 731 * 732 * This is used by the seq_file "next" operation to iterate the pids 733 * listed in a trace_pid_list structure. 734 * 735 * Returns the pid+1 as we want to display pid of zero, but NULL would 736 * stop the iteration. 737 */ 738 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) 739 { 740 long pid = (unsigned long)v; 741 unsigned int next; 742 743 (*pos)++; 744 745 /* pid already is +1 of the actual previous bit */ 746 if (trace_pid_list_next(pid_list, pid, &next) < 0) 747 return NULL; 748 749 pid = next; 750 751 /* Return pid + 1 to allow zero to be represented */ 752 return (void *)(pid + 1); 753 } 754 755 /** 756 * trace_pid_start - Used for seq_file to start reading pid lists 757 * @pid_list: The pid list to show 758 * @pos: The position of the file 759 * 760 * This is used by seq_file "start" operation to start the iteration 761 * of listing pids. 762 * 763 * Returns the pid+1 as we want to display pid of zero, but NULL would 764 * stop the iteration. 765 */ 766 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) 767 { 768 unsigned long pid; 769 unsigned int first; 770 loff_t l = 0; 771 772 if (trace_pid_list_first(pid_list, &first) < 0) 773 return NULL; 774 775 pid = first; 776 777 /* Return pid + 1 so that zero can be the exit value */ 778 for (pid++; pid && l < *pos; 779 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) 780 ; 781 return (void *)pid; 782 } 783 784 /** 785 * trace_pid_show - show the current pid in seq_file processing 786 * @m: The seq_file structure to write into 787 * @v: A void pointer of the pid (+1) value to display 788 * 789 * Can be directly used by seq_file operations to display the current 790 * pid value. 791 */ 792 int trace_pid_show(struct seq_file *m, void *v) 793 { 794 unsigned long pid = (unsigned long)v - 1; 795 796 seq_printf(m, "%lu\n", pid); 797 return 0; 798 } 799 800 /* 128 should be much more than enough */ 801 #define PID_BUF_SIZE 127 802 803 int trace_pid_write(struct trace_pid_list *filtered_pids, 804 struct trace_pid_list **new_pid_list, 805 const char __user *ubuf, size_t cnt) 806 { 807 struct trace_pid_list *pid_list; 808 struct trace_parser parser; 809 unsigned long val; 810 int nr_pids = 0; 811 ssize_t read = 0; 812 ssize_t ret; 813 loff_t pos; 814 pid_t pid; 815 816 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) 817 return -ENOMEM; 818 819 /* 820 * Always recreate a new array. The write is an all or nothing 821 * operation. Always create a new array when adding new pids by 822 * the user. If the operation fails, then the current list is 823 * not modified. 824 */ 825 pid_list = trace_pid_list_alloc(); 826 if (!pid_list) { 827 trace_parser_put(&parser); 828 return -ENOMEM; 829 } 830 831 if (filtered_pids) { 832 /* copy the current bits to the new max */ 833 ret = trace_pid_list_first(filtered_pids, &pid); 834 while (!ret) { 835 ret = trace_pid_list_set(pid_list, pid); 836 if (ret < 0) 837 goto out; 838 839 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); 840 nr_pids++; 841 } 842 } 843 844 ret = 0; 845 while (cnt > 0) { 846 847 pos = 0; 848 849 ret = trace_get_user(&parser, ubuf, cnt, &pos); 850 if (ret < 0) 851 break; 852 853 read += ret; 854 ubuf += ret; 855 cnt -= ret; 856 857 if (!trace_parser_loaded(&parser)) 858 break; 859 860 ret = -EINVAL; 861 if (kstrtoul(parser.buffer, 0, &val)) 862 break; 863 864 pid = (pid_t)val; 865 866 if (trace_pid_list_set(pid_list, pid) < 0) { 867 ret = -1; 868 break; 869 } 870 nr_pids++; 871 872 trace_parser_clear(&parser); 873 ret = 0; 874 } 875 out: 876 trace_parser_put(&parser); 877 878 if (ret < 0) { 879 trace_pid_list_free(pid_list); 880 return ret; 881 } 882 883 if (!nr_pids) { 884 /* Cleared the list of pids */ 885 trace_pid_list_free(pid_list); 886 pid_list = NULL; 887 } 888 889 *new_pid_list = pid_list; 890 891 return read; 892 } 893 894 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) 895 { 896 u64 ts; 897 898 /* Early boot up does not have a buffer yet */ 899 if (!buf->buffer) 900 return trace_clock_local(); 901 902 ts = ring_buffer_time_stamp(buf->buffer); 903 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); 904 905 return ts; 906 } 907 908 u64 ftrace_now(int cpu) 909 { 910 return buffer_ftrace_now(&global_trace.array_buffer, cpu); 911 } 912 913 /** 914 * tracing_is_enabled - Show if global_trace has been enabled 915 * 916 * Shows if the global trace has been enabled or not. It uses the 917 * mirror flag "buffer_disabled" to be used in fast paths such as for 918 * the irqsoff tracer. But it may be inaccurate due to races. If you 919 * need to know the accurate state, use tracing_is_on() which is a little 920 * slower, but accurate. 921 */ 922 int tracing_is_enabled(void) 923 { 924 /* 925 * For quick access (irqsoff uses this in fast path), just 926 * return the mirror variable of the state of the ring buffer. 927 * It's a little racy, but we don't really care. 928 */ 929 return !global_trace.buffer_disabled; 930 } 931 932 /* 933 * trace_buf_size is the size in bytes that is allocated 934 * for a buffer. Note, the number of bytes is always rounded 935 * to page size. 936 * 937 * This number is purposely set to a low number of 16384. 938 * If the dump on oops happens, it will be much appreciated 939 * to not have to wait for all that output. Anyway this can be 940 * boot time and run time configurable. 941 */ 942 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ 943 944 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; 945 946 /* trace_types holds a link list of available tracers. */ 947 static struct tracer *trace_types __read_mostly; 948 949 /* 950 * trace_types_lock is used to protect the trace_types list. 951 */ 952 DEFINE_MUTEX(trace_types_lock); 953 954 /* 955 * serialize the access of the ring buffer 956 * 957 * ring buffer serializes readers, but it is low level protection. 958 * The validity of the events (which returns by ring_buffer_peek() ..etc) 959 * are not protected by ring buffer. 960 * 961 * The content of events may become garbage if we allow other process consumes 962 * these events concurrently: 963 * A) the page of the consumed events may become a normal page 964 * (not reader page) in ring buffer, and this page will be rewritten 965 * by events producer. 966 * B) The page of the consumed events may become a page for splice_read, 967 * and this page will be returned to system. 968 * 969 * These primitives allow multi process access to different cpu ring buffer 970 * concurrently. 971 * 972 * These primitives don't distinguish read-only and read-consume access. 973 * Multi read-only access are also serialized. 974 */ 975 976 #ifdef CONFIG_SMP 977 static DECLARE_RWSEM(all_cpu_access_lock); 978 static DEFINE_PER_CPU(struct mutex, cpu_access_lock); 979 980 static inline void trace_access_lock(int cpu) 981 { 982 if (cpu == RING_BUFFER_ALL_CPUS) { 983 /* gain it for accessing the whole ring buffer. */ 984 down_write(&all_cpu_access_lock); 985 } else { 986 /* gain it for accessing a cpu ring buffer. */ 987 988 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ 989 down_read(&all_cpu_access_lock); 990 991 /* Secondly block other access to this @cpu ring buffer. */ 992 mutex_lock(&per_cpu(cpu_access_lock, cpu)); 993 } 994 } 995 996 static inline void trace_access_unlock(int cpu) 997 { 998 if (cpu == RING_BUFFER_ALL_CPUS) { 999 up_write(&all_cpu_access_lock); 1000 } else { 1001 mutex_unlock(&per_cpu(cpu_access_lock, cpu)); 1002 up_read(&all_cpu_access_lock); 1003 } 1004 } 1005 1006 static inline void trace_access_lock_init(void) 1007 { 1008 int cpu; 1009 1010 for_each_possible_cpu(cpu) 1011 mutex_init(&per_cpu(cpu_access_lock, cpu)); 1012 } 1013 1014 #else 1015 1016 static DEFINE_MUTEX(access_lock); 1017 1018 static inline void trace_access_lock(int cpu) 1019 { 1020 (void)cpu; 1021 mutex_lock(&access_lock); 1022 } 1023 1024 static inline void trace_access_unlock(int cpu) 1025 { 1026 (void)cpu; 1027 mutex_unlock(&access_lock); 1028 } 1029 1030 static inline void trace_access_lock_init(void) 1031 { 1032 } 1033 1034 #endif 1035 1036 #ifdef CONFIG_STACKTRACE 1037 static void __ftrace_trace_stack(struct trace_array *tr, 1038 struct trace_buffer *buffer, 1039 unsigned int trace_ctx, 1040 int skip, struct pt_regs *regs); 1041 static inline void ftrace_trace_stack(struct trace_array *tr, 1042 struct trace_buffer *buffer, 1043 unsigned int trace_ctx, 1044 int skip, struct pt_regs *regs); 1045 1046 #else 1047 static inline void __ftrace_trace_stack(struct trace_array *tr, 1048 struct trace_buffer *buffer, 1049 unsigned int trace_ctx, 1050 int skip, struct pt_regs *regs) 1051 { 1052 } 1053 static inline void ftrace_trace_stack(struct trace_array *tr, 1054 struct trace_buffer *buffer, 1055 unsigned long trace_ctx, 1056 int skip, struct pt_regs *regs) 1057 { 1058 } 1059 1060 #endif 1061 1062 static __always_inline void 1063 trace_event_setup(struct ring_buffer_event *event, 1064 int type, unsigned int trace_ctx) 1065 { 1066 struct trace_entry *ent = ring_buffer_event_data(event); 1067 1068 tracing_generic_entry_update(ent, type, trace_ctx); 1069 } 1070 1071 static __always_inline struct ring_buffer_event * 1072 __trace_buffer_lock_reserve(struct trace_buffer *buffer, 1073 int type, 1074 unsigned long len, 1075 unsigned int trace_ctx) 1076 { 1077 struct ring_buffer_event *event; 1078 1079 event = ring_buffer_lock_reserve(buffer, len); 1080 if (event != NULL) 1081 trace_event_setup(event, type, trace_ctx); 1082 1083 return event; 1084 } 1085 1086 void tracer_tracing_on(struct trace_array *tr) 1087 { 1088 if (tr->array_buffer.buffer) 1089 ring_buffer_record_on(tr->array_buffer.buffer); 1090 /* 1091 * This flag is looked at when buffers haven't been allocated 1092 * yet, or by some tracers (like irqsoff), that just want to 1093 * know if the ring buffer has been disabled, but it can handle 1094 * races of where it gets disabled but we still do a record. 1095 * As the check is in the fast path of the tracers, it is more 1096 * important to be fast than accurate. 1097 */ 1098 tr->buffer_disabled = 0; 1099 } 1100 1101 /** 1102 * tracing_on - enable tracing buffers 1103 * 1104 * This function enables tracing buffers that may have been 1105 * disabled with tracing_off. 1106 */ 1107 void tracing_on(void) 1108 { 1109 tracer_tracing_on(&global_trace); 1110 } 1111 EXPORT_SYMBOL_GPL(tracing_on); 1112 1113 1114 static __always_inline void 1115 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) 1116 { 1117 __this_cpu_write(trace_taskinfo_save, true); 1118 1119 /* If this is the temp buffer, we need to commit fully */ 1120 if (this_cpu_read(trace_buffered_event) == event) { 1121 /* Length is in event->array[0] */ 1122 ring_buffer_write(buffer, event->array[0], &event->array[1]); 1123 /* Release the temp buffer */ 1124 this_cpu_dec(trace_buffered_event_cnt); 1125 /* ring_buffer_unlock_commit() enables preemption */ 1126 preempt_enable_notrace(); 1127 } else 1128 ring_buffer_unlock_commit(buffer); 1129 } 1130 1131 int __trace_array_puts(struct trace_array *tr, unsigned long ip, 1132 const char *str, int size) 1133 { 1134 struct ring_buffer_event *event; 1135 struct trace_buffer *buffer; 1136 struct print_entry *entry; 1137 unsigned int trace_ctx; 1138 int alloc; 1139 1140 if (!(tr->trace_flags & TRACE_ITER(PRINTK))) 1141 return 0; 1142 1143 if (unlikely(tracing_selftest_running && tr == &global_trace)) 1144 return 0; 1145 1146 if (unlikely(tracing_disabled)) 1147 return 0; 1148 1149 alloc = sizeof(*entry) + size + 2; /* possible \n added */ 1150 1151 trace_ctx = tracing_gen_ctx(); 1152 buffer = tr->array_buffer.buffer; 1153 guard(ring_buffer_nest)(buffer); 1154 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 1155 trace_ctx); 1156 if (!event) 1157 return 0; 1158 1159 entry = ring_buffer_event_data(event); 1160 entry->ip = ip; 1161 1162 memcpy(&entry->buf, str, size); 1163 1164 /* Add a newline if necessary */ 1165 if (entry->buf[size - 1] != '\n') { 1166 entry->buf[size] = '\n'; 1167 entry->buf[size + 1] = '\0'; 1168 } else 1169 entry->buf[size] = '\0'; 1170 1171 __buffer_unlock_commit(buffer, event); 1172 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); 1173 return size; 1174 } 1175 EXPORT_SYMBOL_GPL(__trace_array_puts); 1176 1177 /** 1178 * __trace_puts - write a constant string into the trace buffer. 1179 * @ip: The address of the caller 1180 * @str: The constant string to write 1181 * @size: The size of the string. 1182 */ 1183 int __trace_puts(unsigned long ip, const char *str, int size) 1184 { 1185 return __trace_array_puts(printk_trace, ip, str, size); 1186 } 1187 EXPORT_SYMBOL_GPL(__trace_puts); 1188 1189 /** 1190 * __trace_bputs - write the pointer to a constant string into trace buffer 1191 * @ip: The address of the caller 1192 * @str: The constant string to write to the buffer to 1193 */ 1194 int __trace_bputs(unsigned long ip, const char *str) 1195 { 1196 struct trace_array *tr = READ_ONCE(printk_trace); 1197 struct ring_buffer_event *event; 1198 struct trace_buffer *buffer; 1199 struct bputs_entry *entry; 1200 unsigned int trace_ctx; 1201 int size = sizeof(struct bputs_entry); 1202 1203 if (!printk_binsafe(tr)) 1204 return __trace_puts(ip, str, strlen(str)); 1205 1206 if (!(tr->trace_flags & TRACE_ITER(PRINTK))) 1207 return 0; 1208 1209 if (unlikely(tracing_selftest_running || tracing_disabled)) 1210 return 0; 1211 1212 trace_ctx = tracing_gen_ctx(); 1213 buffer = tr->array_buffer.buffer; 1214 1215 guard(ring_buffer_nest)(buffer); 1216 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, 1217 trace_ctx); 1218 if (!event) 1219 return 0; 1220 1221 entry = ring_buffer_event_data(event); 1222 entry->ip = ip; 1223 entry->str = str; 1224 1225 __buffer_unlock_commit(buffer, event); 1226 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); 1227 1228 return 1; 1229 } 1230 EXPORT_SYMBOL_GPL(__trace_bputs); 1231 1232 #ifdef CONFIG_TRACER_SNAPSHOT 1233 static void tracing_snapshot_instance_cond(struct trace_array *tr, 1234 void *cond_data) 1235 { 1236 struct tracer *tracer = tr->current_trace; 1237 unsigned long flags; 1238 1239 if (in_nmi()) { 1240 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); 1241 trace_array_puts(tr, "*** snapshot is being ignored ***\n"); 1242 return; 1243 } 1244 1245 if (!tr->allocated_snapshot) { 1246 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); 1247 trace_array_puts(tr, "*** stopping trace here! ***\n"); 1248 tracer_tracing_off(tr); 1249 return; 1250 } 1251 1252 /* Note, snapshot can not be used when the tracer uses it */ 1253 if (tracer->use_max_tr) { 1254 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); 1255 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 1256 return; 1257 } 1258 1259 if (tr->mapped) { 1260 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); 1261 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 1262 return; 1263 } 1264 1265 local_irq_save(flags); 1266 update_max_tr(tr, current, smp_processor_id(), cond_data); 1267 local_irq_restore(flags); 1268 } 1269 1270 void tracing_snapshot_instance(struct trace_array *tr) 1271 { 1272 tracing_snapshot_instance_cond(tr, NULL); 1273 } 1274 1275 /** 1276 * tracing_snapshot - take a snapshot of the current buffer. 1277 * 1278 * This causes a swap between the snapshot buffer and the current live 1279 * tracing buffer. You can use this to take snapshots of the live 1280 * trace when some condition is triggered, but continue to trace. 1281 * 1282 * Note, make sure to allocate the snapshot with either 1283 * a tracing_snapshot_alloc(), or by doing it manually 1284 * with: echo 1 > /sys/kernel/tracing/snapshot 1285 * 1286 * If the snapshot buffer is not allocated, it will stop tracing. 1287 * Basically making a permanent snapshot. 1288 */ 1289 void tracing_snapshot(void) 1290 { 1291 struct trace_array *tr = &global_trace; 1292 1293 tracing_snapshot_instance(tr); 1294 } 1295 EXPORT_SYMBOL_GPL(tracing_snapshot); 1296 1297 /** 1298 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. 1299 * @tr: The tracing instance to snapshot 1300 * @cond_data: The data to be tested conditionally, and possibly saved 1301 * 1302 * This is the same as tracing_snapshot() except that the snapshot is 1303 * conditional - the snapshot will only happen if the 1304 * cond_snapshot.update() implementation receiving the cond_data 1305 * returns true, which means that the trace array's cond_snapshot 1306 * update() operation used the cond_data to determine whether the 1307 * snapshot should be taken, and if it was, presumably saved it along 1308 * with the snapshot. 1309 */ 1310 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1311 { 1312 tracing_snapshot_instance_cond(tr, cond_data); 1313 } 1314 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1315 1316 /** 1317 * tracing_cond_snapshot_data - get the user data associated with a snapshot 1318 * @tr: The tracing instance 1319 * 1320 * When the user enables a conditional snapshot using 1321 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved 1322 * with the snapshot. This accessor is used to retrieve it. 1323 * 1324 * Should not be called from cond_snapshot.update(), since it takes 1325 * the tr->max_lock lock, which the code calling 1326 * cond_snapshot.update() has already done. 1327 * 1328 * Returns the cond_data associated with the trace array's snapshot. 1329 */ 1330 void *tracing_cond_snapshot_data(struct trace_array *tr) 1331 { 1332 void *cond_data = NULL; 1333 1334 local_irq_disable(); 1335 arch_spin_lock(&tr->max_lock); 1336 1337 if (tr->cond_snapshot) 1338 cond_data = tr->cond_snapshot->cond_data; 1339 1340 arch_spin_unlock(&tr->max_lock); 1341 local_irq_enable(); 1342 1343 return cond_data; 1344 } 1345 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1346 1347 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 1348 struct array_buffer *size_buf, int cpu_id); 1349 static void set_buffer_entries(struct array_buffer *buf, unsigned long val); 1350 1351 int tracing_alloc_snapshot_instance(struct trace_array *tr) 1352 { 1353 int order; 1354 int ret; 1355 1356 if (!tr->allocated_snapshot) { 1357 1358 /* Make the snapshot buffer have the same order as main buffer */ 1359 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 1360 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 1361 if (ret < 0) 1362 return ret; 1363 1364 /* allocate spare buffer */ 1365 ret = resize_buffer_duplicate_size(&tr->max_buffer, 1366 &tr->array_buffer, RING_BUFFER_ALL_CPUS); 1367 if (ret < 0) 1368 return ret; 1369 1370 tr->allocated_snapshot = true; 1371 } 1372 1373 return 0; 1374 } 1375 1376 static void free_snapshot(struct trace_array *tr) 1377 { 1378 /* 1379 * We don't free the ring buffer. instead, resize it because 1380 * The max_tr ring buffer has some state (e.g. ring->clock) and 1381 * we want preserve it. 1382 */ 1383 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); 1384 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); 1385 set_buffer_entries(&tr->max_buffer, 1); 1386 tracing_reset_online_cpus(&tr->max_buffer); 1387 tr->allocated_snapshot = false; 1388 } 1389 1390 static int tracing_arm_snapshot_locked(struct trace_array *tr) 1391 { 1392 int ret; 1393 1394 lockdep_assert_held(&trace_types_lock); 1395 1396 spin_lock(&tr->snapshot_trigger_lock); 1397 if (tr->snapshot == UINT_MAX || tr->mapped) { 1398 spin_unlock(&tr->snapshot_trigger_lock); 1399 return -EBUSY; 1400 } 1401 1402 tr->snapshot++; 1403 spin_unlock(&tr->snapshot_trigger_lock); 1404 1405 ret = tracing_alloc_snapshot_instance(tr); 1406 if (ret) { 1407 spin_lock(&tr->snapshot_trigger_lock); 1408 tr->snapshot--; 1409 spin_unlock(&tr->snapshot_trigger_lock); 1410 } 1411 1412 return ret; 1413 } 1414 1415 int tracing_arm_snapshot(struct trace_array *tr) 1416 { 1417 guard(mutex)(&trace_types_lock); 1418 return tracing_arm_snapshot_locked(tr); 1419 } 1420 1421 void tracing_disarm_snapshot(struct trace_array *tr) 1422 { 1423 spin_lock(&tr->snapshot_trigger_lock); 1424 if (!WARN_ON(!tr->snapshot)) 1425 tr->snapshot--; 1426 spin_unlock(&tr->snapshot_trigger_lock); 1427 } 1428 1429 /** 1430 * tracing_alloc_snapshot - allocate snapshot buffer. 1431 * 1432 * This only allocates the snapshot buffer if it isn't already 1433 * allocated - it doesn't also take a snapshot. 1434 * 1435 * This is meant to be used in cases where the snapshot buffer needs 1436 * to be set up for events that can't sleep but need to be able to 1437 * trigger a snapshot. 1438 */ 1439 int tracing_alloc_snapshot(void) 1440 { 1441 struct trace_array *tr = &global_trace; 1442 int ret; 1443 1444 ret = tracing_alloc_snapshot_instance(tr); 1445 WARN_ON(ret < 0); 1446 1447 return ret; 1448 } 1449 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1450 1451 /** 1452 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. 1453 * 1454 * This is similar to tracing_snapshot(), but it will allocate the 1455 * snapshot buffer if it isn't already allocated. Use this only 1456 * where it is safe to sleep, as the allocation may sleep. 1457 * 1458 * This causes a swap between the snapshot buffer and the current live 1459 * tracing buffer. You can use this to take snapshots of the live 1460 * trace when some condition is triggered, but continue to trace. 1461 */ 1462 void tracing_snapshot_alloc(void) 1463 { 1464 int ret; 1465 1466 ret = tracing_alloc_snapshot(); 1467 if (ret < 0) 1468 return; 1469 1470 tracing_snapshot(); 1471 } 1472 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1473 1474 /** 1475 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance 1476 * @tr: The tracing instance 1477 * @cond_data: User data to associate with the snapshot 1478 * @update: Implementation of the cond_snapshot update function 1479 * 1480 * Check whether the conditional snapshot for the given instance has 1481 * already been enabled, or if the current tracer is already using a 1482 * snapshot; if so, return -EBUSY, else create a cond_snapshot and 1483 * save the cond_data and update function inside. 1484 * 1485 * Returns 0 if successful, error otherwise. 1486 */ 1487 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, 1488 cond_update_fn_t update) 1489 { 1490 struct cond_snapshot *cond_snapshot __free(kfree) = 1491 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); 1492 int ret; 1493 1494 if (!cond_snapshot) 1495 return -ENOMEM; 1496 1497 cond_snapshot->cond_data = cond_data; 1498 cond_snapshot->update = update; 1499 1500 guard(mutex)(&trace_types_lock); 1501 1502 if (tr->current_trace->use_max_tr) 1503 return -EBUSY; 1504 1505 /* 1506 * The cond_snapshot can only change to NULL without the 1507 * trace_types_lock. We don't care if we race with it going 1508 * to NULL, but we want to make sure that it's not set to 1509 * something other than NULL when we get here, which we can 1510 * do safely with only holding the trace_types_lock and not 1511 * having to take the max_lock. 1512 */ 1513 if (tr->cond_snapshot) 1514 return -EBUSY; 1515 1516 ret = tracing_arm_snapshot_locked(tr); 1517 if (ret) 1518 return ret; 1519 1520 local_irq_disable(); 1521 arch_spin_lock(&tr->max_lock); 1522 tr->cond_snapshot = no_free_ptr(cond_snapshot); 1523 arch_spin_unlock(&tr->max_lock); 1524 local_irq_enable(); 1525 1526 return 0; 1527 } 1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1529 1530 /** 1531 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance 1532 * @tr: The tracing instance 1533 * 1534 * Check whether the conditional snapshot for the given instance is 1535 * enabled; if so, free the cond_snapshot associated with it, 1536 * otherwise return -EINVAL. 1537 * 1538 * Returns 0 if successful, error otherwise. 1539 */ 1540 int tracing_snapshot_cond_disable(struct trace_array *tr) 1541 { 1542 int ret = 0; 1543 1544 local_irq_disable(); 1545 arch_spin_lock(&tr->max_lock); 1546 1547 if (!tr->cond_snapshot) 1548 ret = -EINVAL; 1549 else { 1550 kfree(tr->cond_snapshot); 1551 tr->cond_snapshot = NULL; 1552 } 1553 1554 arch_spin_unlock(&tr->max_lock); 1555 local_irq_enable(); 1556 1557 tracing_disarm_snapshot(tr); 1558 1559 return ret; 1560 } 1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1562 #else 1563 void tracing_snapshot(void) 1564 { 1565 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); 1566 } 1567 EXPORT_SYMBOL_GPL(tracing_snapshot); 1568 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1569 { 1570 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used"); 1571 } 1572 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1573 int tracing_alloc_snapshot(void) 1574 { 1575 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); 1576 return -ENODEV; 1577 } 1578 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1579 void tracing_snapshot_alloc(void) 1580 { 1581 /* Give warning */ 1582 tracing_snapshot(); 1583 } 1584 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1585 void *tracing_cond_snapshot_data(struct trace_array *tr) 1586 { 1587 return NULL; 1588 } 1589 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1590 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) 1591 { 1592 return -ENODEV; 1593 } 1594 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1595 int tracing_snapshot_cond_disable(struct trace_array *tr) 1596 { 1597 return false; 1598 } 1599 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1600 #define free_snapshot(tr) do { } while (0) 1601 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) 1602 #endif /* CONFIG_TRACER_SNAPSHOT */ 1603 1604 void tracer_tracing_off(struct trace_array *tr) 1605 { 1606 if (tr->array_buffer.buffer) 1607 ring_buffer_record_off(tr->array_buffer.buffer); 1608 /* 1609 * This flag is looked at when buffers haven't been allocated 1610 * yet, or by some tracers (like irqsoff), that just want to 1611 * know if the ring buffer has been disabled, but it can handle 1612 * races of where it gets disabled but we still do a record. 1613 * As the check is in the fast path of the tracers, it is more 1614 * important to be fast than accurate. 1615 */ 1616 tr->buffer_disabled = 1; 1617 } 1618 1619 /** 1620 * tracer_tracing_disable() - temporary disable the buffer from write 1621 * @tr: The trace array to disable its buffer for 1622 * 1623 * Expects trace_tracing_enable() to re-enable tracing. 1624 * The difference between this and tracer_tracing_off() is that this 1625 * is a counter and can nest, whereas, tracer_tracing_off() can 1626 * be called multiple times and a single trace_tracing_on() will 1627 * enable it. 1628 */ 1629 void tracer_tracing_disable(struct trace_array *tr) 1630 { 1631 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 1632 return; 1633 1634 ring_buffer_record_disable(tr->array_buffer.buffer); 1635 } 1636 1637 /** 1638 * tracer_tracing_enable() - counter part of tracer_tracing_disable() 1639 * @tr: The trace array that had tracer_tracincg_disable() called on it 1640 * 1641 * This is called after tracer_tracing_disable() has been called on @tr, 1642 * when it's safe to re-enable tracing. 1643 */ 1644 void tracer_tracing_enable(struct trace_array *tr) 1645 { 1646 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 1647 return; 1648 1649 ring_buffer_record_enable(tr->array_buffer.buffer); 1650 } 1651 1652 /** 1653 * tracing_off - turn off tracing buffers 1654 * 1655 * This function stops the tracing buffers from recording data. 1656 * It does not disable any overhead the tracers themselves may 1657 * be causing. This function simply causes all recording to 1658 * the ring buffers to fail. 1659 */ 1660 void tracing_off(void) 1661 { 1662 tracer_tracing_off(&global_trace); 1663 } 1664 EXPORT_SYMBOL_GPL(tracing_off); 1665 1666 void disable_trace_on_warning(void) 1667 { 1668 if (__disable_trace_on_warning) { 1669 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, 1670 "Disabling tracing due to warning\n"); 1671 tracing_off(); 1672 } 1673 } 1674 1675 /** 1676 * tracer_tracing_is_on - show real state of ring buffer enabled 1677 * @tr : the trace array to know if ring buffer is enabled 1678 * 1679 * Shows real state of the ring buffer if it is enabled or not. 1680 */ 1681 bool tracer_tracing_is_on(struct trace_array *tr) 1682 { 1683 if (tr->array_buffer.buffer) 1684 return ring_buffer_record_is_set_on(tr->array_buffer.buffer); 1685 return !tr->buffer_disabled; 1686 } 1687 1688 /** 1689 * tracing_is_on - show state of ring buffers enabled 1690 */ 1691 int tracing_is_on(void) 1692 { 1693 return tracer_tracing_is_on(&global_trace); 1694 } 1695 EXPORT_SYMBOL_GPL(tracing_is_on); 1696 1697 static int __init set_buf_size(char *str) 1698 { 1699 unsigned long buf_size; 1700 1701 if (!str) 1702 return 0; 1703 buf_size = memparse(str, &str); 1704 /* 1705 * nr_entries can not be zero and the startup 1706 * tests require some buffer space. Therefore 1707 * ensure we have at least 4096 bytes of buffer. 1708 */ 1709 trace_buf_size = max(4096UL, buf_size); 1710 return 1; 1711 } 1712 __setup("trace_buf_size=", set_buf_size); 1713 1714 static int __init set_tracing_thresh(char *str) 1715 { 1716 unsigned long threshold; 1717 int ret; 1718 1719 if (!str) 1720 return 0; 1721 ret = kstrtoul(str, 0, &threshold); 1722 if (ret < 0) 1723 return 0; 1724 tracing_thresh = threshold * 1000; 1725 return 1; 1726 } 1727 __setup("tracing_thresh=", set_tracing_thresh); 1728 1729 unsigned long nsecs_to_usecs(unsigned long nsecs) 1730 { 1731 return nsecs / 1000; 1732 } 1733 1734 /* 1735 * TRACE_FLAGS is defined as a tuple matching bit masks with strings. 1736 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that 1737 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list 1738 * of strings in the order that the evals (enum) were defined. 1739 */ 1740 #undef C 1741 #define C(a, b) b 1742 1743 /* These must match the bit positions in trace_iterator_flags */ 1744 static const char *trace_options[] = { 1745 TRACE_FLAGS 1746 NULL 1747 }; 1748 1749 static struct { 1750 u64 (*func)(void); 1751 const char *name; 1752 int in_ns; /* is this clock in nanoseconds? */ 1753 } trace_clocks[] = { 1754 { trace_clock_local, "local", 1 }, 1755 { trace_clock_global, "global", 1 }, 1756 { trace_clock_counter, "counter", 0 }, 1757 { trace_clock_jiffies, "uptime", 0 }, 1758 { trace_clock, "perf", 1 }, 1759 { ktime_get_mono_fast_ns, "mono", 1 }, 1760 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1761 { ktime_get_boot_fast_ns, "boot", 1 }, 1762 { ktime_get_tai_fast_ns, "tai", 1 }, 1763 ARCH_TRACE_CLOCKS 1764 }; 1765 1766 bool trace_clock_in_ns(struct trace_array *tr) 1767 { 1768 if (trace_clocks[tr->clock_id].in_ns) 1769 return true; 1770 1771 return false; 1772 } 1773 1774 /* 1775 * trace_parser_get_init - gets the buffer for trace parser 1776 */ 1777 int trace_parser_get_init(struct trace_parser *parser, int size) 1778 { 1779 memset(parser, 0, sizeof(*parser)); 1780 1781 parser->buffer = kmalloc(size, GFP_KERNEL); 1782 if (!parser->buffer) 1783 return 1; 1784 1785 parser->size = size; 1786 return 0; 1787 } 1788 1789 /* 1790 * trace_parser_put - frees the buffer for trace parser 1791 */ 1792 void trace_parser_put(struct trace_parser *parser) 1793 { 1794 kfree(parser->buffer); 1795 parser->buffer = NULL; 1796 } 1797 1798 /* 1799 * trace_get_user - reads the user input string separated by space 1800 * (matched by isspace(ch)) 1801 * 1802 * For each string found the 'struct trace_parser' is updated, 1803 * and the function returns. 1804 * 1805 * Returns number of bytes read. 1806 * 1807 * See kernel/trace/trace.h for 'struct trace_parser' details. 1808 */ 1809 int trace_get_user(struct trace_parser *parser, const char __user *ubuf, 1810 size_t cnt, loff_t *ppos) 1811 { 1812 char ch; 1813 size_t read = 0; 1814 ssize_t ret; 1815 1816 if (!*ppos) 1817 trace_parser_clear(parser); 1818 1819 ret = get_user(ch, ubuf++); 1820 if (ret) 1821 goto fail; 1822 1823 read++; 1824 cnt--; 1825 1826 /* 1827 * The parser is not finished with the last write, 1828 * continue reading the user input without skipping spaces. 1829 */ 1830 if (!parser->cont) { 1831 /* skip white space */ 1832 while (cnt && isspace(ch)) { 1833 ret = get_user(ch, ubuf++); 1834 if (ret) 1835 goto fail; 1836 read++; 1837 cnt--; 1838 } 1839 1840 parser->idx = 0; 1841 1842 /* only spaces were written */ 1843 if (isspace(ch) || !ch) { 1844 *ppos += read; 1845 return read; 1846 } 1847 } 1848 1849 /* read the non-space input */ 1850 while (cnt && !isspace(ch) && ch) { 1851 if (parser->idx < parser->size - 1) 1852 parser->buffer[parser->idx++] = ch; 1853 else { 1854 ret = -EINVAL; 1855 goto fail; 1856 } 1857 1858 ret = get_user(ch, ubuf++); 1859 if (ret) 1860 goto fail; 1861 read++; 1862 cnt--; 1863 } 1864 1865 /* We either got finished input or we have to wait for another call. */ 1866 if (isspace(ch) || !ch) { 1867 parser->buffer[parser->idx] = 0; 1868 parser->cont = false; 1869 } else if (parser->idx < parser->size - 1) { 1870 parser->cont = true; 1871 parser->buffer[parser->idx++] = ch; 1872 /* Make sure the parsed string always terminates with '\0'. */ 1873 parser->buffer[parser->idx] = 0; 1874 } else { 1875 ret = -EINVAL; 1876 goto fail; 1877 } 1878 1879 *ppos += read; 1880 return read; 1881 fail: 1882 trace_parser_fail(parser); 1883 return ret; 1884 } 1885 1886 /* TODO add a seq_buf_to_buffer() */ 1887 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 1888 { 1889 int len; 1890 1891 if (trace_seq_used(s) <= s->readpos) 1892 return -EBUSY; 1893 1894 len = trace_seq_used(s) - s->readpos; 1895 if (cnt > len) 1896 cnt = len; 1897 memcpy(buf, s->buffer + s->readpos, cnt); 1898 1899 s->readpos += cnt; 1900 return cnt; 1901 } 1902 1903 unsigned long __read_mostly tracing_thresh; 1904 1905 #ifdef CONFIG_TRACER_MAX_TRACE 1906 static const struct file_operations tracing_max_lat_fops; 1907 1908 #ifdef LATENCY_FS_NOTIFY 1909 1910 static struct workqueue_struct *fsnotify_wq; 1911 1912 static void latency_fsnotify_workfn(struct work_struct *work) 1913 { 1914 struct trace_array *tr = container_of(work, struct trace_array, 1915 fsnotify_work); 1916 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY); 1917 } 1918 1919 static void latency_fsnotify_workfn_irq(struct irq_work *iwork) 1920 { 1921 struct trace_array *tr = container_of(iwork, struct trace_array, 1922 fsnotify_irqwork); 1923 queue_work(fsnotify_wq, &tr->fsnotify_work); 1924 } 1925 1926 static void trace_create_maxlat_file(struct trace_array *tr, 1927 struct dentry *d_tracer) 1928 { 1929 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); 1930 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); 1931 tr->d_max_latency = trace_create_file("tracing_max_latency", 1932 TRACE_MODE_WRITE, 1933 d_tracer, tr, 1934 &tracing_max_lat_fops); 1935 } 1936 1937 __init static int latency_fsnotify_init(void) 1938 { 1939 fsnotify_wq = alloc_workqueue("tr_max_lat_wq", 1940 WQ_UNBOUND | WQ_HIGHPRI, 0); 1941 if (!fsnotify_wq) { 1942 pr_err("Unable to allocate tr_max_lat_wq\n"); 1943 return -ENOMEM; 1944 } 1945 return 0; 1946 } 1947 1948 late_initcall_sync(latency_fsnotify_init); 1949 1950 void latency_fsnotify(struct trace_array *tr) 1951 { 1952 if (!fsnotify_wq) 1953 return; 1954 /* 1955 * We cannot call queue_work(&tr->fsnotify_work) from here because it's 1956 * possible that we are called from __schedule() or do_idle(), which 1957 * could cause a deadlock. 1958 */ 1959 irq_work_queue(&tr->fsnotify_irqwork); 1960 } 1961 1962 #else /* !LATENCY_FS_NOTIFY */ 1963 1964 #define trace_create_maxlat_file(tr, d_tracer) \ 1965 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ 1966 d_tracer, tr, &tracing_max_lat_fops) 1967 1968 #endif 1969 1970 /* 1971 * Copy the new maximum trace into the separate maximum-trace 1972 * structure. (this way the maximum trace is permanently saved, 1973 * for later retrieval via /sys/kernel/tracing/tracing_max_latency) 1974 */ 1975 static void 1976 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1977 { 1978 struct array_buffer *trace_buf = &tr->array_buffer; 1979 struct array_buffer *max_buf = &tr->max_buffer; 1980 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); 1981 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); 1982 1983 max_buf->cpu = cpu; 1984 max_buf->time_start = data->preempt_timestamp; 1985 1986 max_data->saved_latency = tr->max_latency; 1987 max_data->critical_start = data->critical_start; 1988 max_data->critical_end = data->critical_end; 1989 1990 strscpy(max_data->comm, tsk->comm); 1991 max_data->pid = tsk->pid; 1992 /* 1993 * If tsk == current, then use current_uid(), as that does not use 1994 * RCU. The irq tracer can be called out of RCU scope. 1995 */ 1996 if (tsk == current) 1997 max_data->uid = current_uid(); 1998 else 1999 max_data->uid = task_uid(tsk); 2000 2001 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 2002 max_data->policy = tsk->policy; 2003 max_data->rt_priority = tsk->rt_priority; 2004 2005 /* record this tasks comm */ 2006 tracing_record_cmdline(tsk); 2007 latency_fsnotify(tr); 2008 } 2009 2010 /** 2011 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 2012 * @tr: tracer 2013 * @tsk: the task with the latency 2014 * @cpu: The cpu that initiated the trace. 2015 * @cond_data: User data associated with a conditional snapshot 2016 * 2017 * Flip the buffers between the @tr and the max_tr and record information 2018 * about which task was the cause of this latency. 2019 */ 2020 void 2021 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, 2022 void *cond_data) 2023 { 2024 if (tr->stop_count) 2025 return; 2026 2027 WARN_ON_ONCE(!irqs_disabled()); 2028 2029 if (!tr->allocated_snapshot) { 2030 /* Only the nop tracer should hit this when disabling */ 2031 WARN_ON_ONCE(tr->current_trace != &nop_trace); 2032 return; 2033 } 2034 2035 arch_spin_lock(&tr->max_lock); 2036 2037 /* Inherit the recordable setting from array_buffer */ 2038 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) 2039 ring_buffer_record_on(tr->max_buffer.buffer); 2040 else 2041 ring_buffer_record_off(tr->max_buffer.buffer); 2042 2043 #ifdef CONFIG_TRACER_SNAPSHOT 2044 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { 2045 arch_spin_unlock(&tr->max_lock); 2046 return; 2047 } 2048 #endif 2049 swap(tr->array_buffer.buffer, tr->max_buffer.buffer); 2050 2051 __update_max_tr(tr, tsk, cpu); 2052 2053 arch_spin_unlock(&tr->max_lock); 2054 2055 /* Any waiters on the old snapshot buffer need to wake up */ 2056 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); 2057 } 2058 2059 /** 2060 * update_max_tr_single - only copy one trace over, and reset the rest 2061 * @tr: tracer 2062 * @tsk: task with the latency 2063 * @cpu: the cpu of the buffer to copy. 2064 * 2065 * Flip the trace of a single CPU buffer between the @tr and the max_tr. 2066 */ 2067 void 2068 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 2069 { 2070 int ret; 2071 2072 if (tr->stop_count) 2073 return; 2074 2075 WARN_ON_ONCE(!irqs_disabled()); 2076 if (!tr->allocated_snapshot) { 2077 /* Only the nop tracer should hit this when disabling */ 2078 WARN_ON_ONCE(tr->current_trace != &nop_trace); 2079 return; 2080 } 2081 2082 arch_spin_lock(&tr->max_lock); 2083 2084 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); 2085 2086 if (ret == -EBUSY) { 2087 /* 2088 * We failed to swap the buffer due to a commit taking 2089 * place on this CPU. We fail to record, but we reset 2090 * the max trace buffer (no one writes directly to it) 2091 * and flag that it failed. 2092 * Another reason is resize is in progress. 2093 */ 2094 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, 2095 "Failed to swap buffers due to commit or resize in progress\n"); 2096 } 2097 2098 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 2099 2100 __update_max_tr(tr, tsk, cpu); 2101 arch_spin_unlock(&tr->max_lock); 2102 } 2103 2104 #endif /* CONFIG_TRACER_MAX_TRACE */ 2105 2106 struct pipe_wait { 2107 struct trace_iterator *iter; 2108 int wait_index; 2109 }; 2110 2111 static bool wait_pipe_cond(void *data) 2112 { 2113 struct pipe_wait *pwait = data; 2114 struct trace_iterator *iter = pwait->iter; 2115 2116 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) 2117 return true; 2118 2119 return iter->closed; 2120 } 2121 2122 static int wait_on_pipe(struct trace_iterator *iter, int full) 2123 { 2124 struct pipe_wait pwait; 2125 int ret; 2126 2127 /* Iterators are static, they should be filled or empty */ 2128 if (trace_buffer_iter(iter, iter->cpu_file)) 2129 return 0; 2130 2131 pwait.wait_index = atomic_read_acquire(&iter->wait_index); 2132 pwait.iter = iter; 2133 2134 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, 2135 wait_pipe_cond, &pwait); 2136 2137 #ifdef CONFIG_TRACER_MAX_TRACE 2138 /* 2139 * Make sure this is still the snapshot buffer, as if a snapshot were 2140 * to happen, this would now be the main buffer. 2141 */ 2142 if (iter->snapshot) 2143 iter->array_buffer = &iter->tr->max_buffer; 2144 #endif 2145 return ret; 2146 } 2147 2148 #ifdef CONFIG_FTRACE_STARTUP_TEST 2149 static bool selftests_can_run; 2150 2151 struct trace_selftests { 2152 struct list_head list; 2153 struct tracer *type; 2154 }; 2155 2156 static LIST_HEAD(postponed_selftests); 2157 2158 static int save_selftest(struct tracer *type) 2159 { 2160 struct trace_selftests *selftest; 2161 2162 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); 2163 if (!selftest) 2164 return -ENOMEM; 2165 2166 selftest->type = type; 2167 list_add(&selftest->list, &postponed_selftests); 2168 return 0; 2169 } 2170 2171 static int run_tracer_selftest(struct tracer *type) 2172 { 2173 struct trace_array *tr = &global_trace; 2174 struct tracer_flags *saved_flags = tr->current_trace_flags; 2175 struct tracer *saved_tracer = tr->current_trace; 2176 int ret; 2177 2178 if (!type->selftest || tracing_selftest_disabled) 2179 return 0; 2180 2181 /* 2182 * If a tracer registers early in boot up (before scheduling is 2183 * initialized and such), then do not run its selftests yet. 2184 * Instead, run it a little later in the boot process. 2185 */ 2186 if (!selftests_can_run) 2187 return save_selftest(type); 2188 2189 if (!tracing_is_on()) { 2190 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", 2191 type->name); 2192 return 0; 2193 } 2194 2195 /* 2196 * Run a selftest on this tracer. 2197 * Here we reset the trace buffer, and set the current 2198 * tracer to be this tracer. The tracer can then run some 2199 * internal tracing to verify that everything is in order. 2200 * If we fail, we do not register this tracer. 2201 */ 2202 tracing_reset_online_cpus(&tr->array_buffer); 2203 2204 tr->current_trace = type; 2205 tr->current_trace_flags = type->flags ? : type->default_flags; 2206 2207 #ifdef CONFIG_TRACER_MAX_TRACE 2208 if (type->use_max_tr) { 2209 /* If we expanded the buffers, make sure the max is expanded too */ 2210 if (tr->ring_buffer_expanded) 2211 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, 2212 RING_BUFFER_ALL_CPUS); 2213 tr->allocated_snapshot = true; 2214 } 2215 #endif 2216 2217 /* the test is responsible for initializing and enabling */ 2218 pr_info("Testing tracer %s: ", type->name); 2219 ret = type->selftest(type, tr); 2220 /* the test is responsible for resetting too */ 2221 tr->current_trace = saved_tracer; 2222 tr->current_trace_flags = saved_flags; 2223 if (ret) { 2224 printk(KERN_CONT "FAILED!\n"); 2225 /* Add the warning after printing 'FAILED' */ 2226 WARN_ON(1); 2227 return -1; 2228 } 2229 /* Only reset on passing, to avoid touching corrupted buffers */ 2230 tracing_reset_online_cpus(&tr->array_buffer); 2231 2232 #ifdef CONFIG_TRACER_MAX_TRACE 2233 if (type->use_max_tr) { 2234 tr->allocated_snapshot = false; 2235 2236 /* Shrink the max buffer again */ 2237 if (tr->ring_buffer_expanded) 2238 ring_buffer_resize(tr->max_buffer.buffer, 1, 2239 RING_BUFFER_ALL_CPUS); 2240 } 2241 #endif 2242 2243 printk(KERN_CONT "PASSED\n"); 2244 return 0; 2245 } 2246 2247 static int do_run_tracer_selftest(struct tracer *type) 2248 { 2249 int ret; 2250 2251 /* 2252 * Tests can take a long time, especially if they are run one after the 2253 * other, as does happen during bootup when all the tracers are 2254 * registered. This could cause the soft lockup watchdog to trigger. 2255 */ 2256 cond_resched(); 2257 2258 tracing_selftest_running = true; 2259 ret = run_tracer_selftest(type); 2260 tracing_selftest_running = false; 2261 2262 return ret; 2263 } 2264 2265 static __init int init_trace_selftests(void) 2266 { 2267 struct trace_selftests *p, *n; 2268 struct tracer *t, **last; 2269 int ret; 2270 2271 selftests_can_run = true; 2272 2273 guard(mutex)(&trace_types_lock); 2274 2275 if (list_empty(&postponed_selftests)) 2276 return 0; 2277 2278 pr_info("Running postponed tracer tests:\n"); 2279 2280 tracing_selftest_running = true; 2281 list_for_each_entry_safe(p, n, &postponed_selftests, list) { 2282 /* This loop can take minutes when sanitizers are enabled, so 2283 * lets make sure we allow RCU processing. 2284 */ 2285 cond_resched(); 2286 ret = run_tracer_selftest(p->type); 2287 /* If the test fails, then warn and remove from available_tracers */ 2288 if (ret < 0) { 2289 WARN(1, "tracer: %s failed selftest, disabling\n", 2290 p->type->name); 2291 last = &trace_types; 2292 for (t = trace_types; t; t = t->next) { 2293 if (t == p->type) { 2294 *last = t->next; 2295 break; 2296 } 2297 last = &t->next; 2298 } 2299 } 2300 list_del(&p->list); 2301 kfree(p); 2302 } 2303 tracing_selftest_running = false; 2304 2305 return 0; 2306 } 2307 core_initcall(init_trace_selftests); 2308 #else 2309 static inline int do_run_tracer_selftest(struct tracer *type) 2310 { 2311 return 0; 2312 } 2313 #endif /* CONFIG_FTRACE_STARTUP_TEST */ 2314 2315 static int add_tracer(struct trace_array *tr, struct tracer *t); 2316 2317 static void __init apply_trace_boot_options(void); 2318 2319 static void free_tracers(struct trace_array *tr) 2320 { 2321 struct tracers *t, *n; 2322 2323 lockdep_assert_held(&trace_types_lock); 2324 2325 list_for_each_entry_safe(t, n, &tr->tracers, list) { 2326 list_del(&t->list); 2327 kfree(t->flags); 2328 kfree(t); 2329 } 2330 } 2331 2332 /** 2333 * register_tracer - register a tracer with the ftrace system. 2334 * @type: the plugin for the tracer 2335 * 2336 * Register a new plugin tracer. 2337 */ 2338 int __init register_tracer(struct tracer *type) 2339 { 2340 struct trace_array *tr; 2341 struct tracer *t; 2342 int ret = 0; 2343 2344 if (!type->name) { 2345 pr_info("Tracer must have a name\n"); 2346 return -1; 2347 } 2348 2349 if (strlen(type->name) >= MAX_TRACER_SIZE) { 2350 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 2351 return -1; 2352 } 2353 2354 if (security_locked_down(LOCKDOWN_TRACEFS)) { 2355 pr_warn("Can not register tracer %s due to lockdown\n", 2356 type->name); 2357 return -EPERM; 2358 } 2359 2360 mutex_lock(&trace_types_lock); 2361 2362 for (t = trace_types; t; t = t->next) { 2363 if (strcmp(type->name, t->name) == 0) { 2364 /* already found */ 2365 pr_info("Tracer %s already registered\n", 2366 type->name); 2367 ret = -1; 2368 goto out; 2369 } 2370 } 2371 2372 /* store the tracer for __set_tracer_option */ 2373 if (type->flags) 2374 type->flags->trace = type; 2375 2376 ret = do_run_tracer_selftest(type); 2377 if (ret < 0) 2378 goto out; 2379 2380 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2381 ret = add_tracer(tr, type); 2382 if (ret < 0) { 2383 /* The tracer will still exist but without options */ 2384 pr_warn("Failed to create tracer options for %s\n", type->name); 2385 break; 2386 } 2387 } 2388 2389 type->next = trace_types; 2390 trace_types = type; 2391 2392 out: 2393 mutex_unlock(&trace_types_lock); 2394 2395 if (ret || !default_bootup_tracer) 2396 return ret; 2397 2398 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) 2399 return 0; 2400 2401 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 2402 /* Do we want this tracer to start on bootup? */ 2403 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0); 2404 default_bootup_tracer = NULL; 2405 2406 apply_trace_boot_options(); 2407 2408 /* disable other selftests, since this will break it. */ 2409 disable_tracing_selftest("running a tracer"); 2410 2411 return 0; 2412 } 2413 2414 static void tracing_reset_cpu(struct array_buffer *buf, int cpu) 2415 { 2416 struct trace_buffer *buffer = buf->buffer; 2417 2418 if (!buffer) 2419 return; 2420 2421 ring_buffer_record_disable(buffer); 2422 2423 /* Make sure all commits have finished */ 2424 synchronize_rcu(); 2425 ring_buffer_reset_cpu(buffer, cpu); 2426 2427 ring_buffer_record_enable(buffer); 2428 } 2429 2430 void tracing_reset_online_cpus(struct array_buffer *buf) 2431 { 2432 struct trace_buffer *buffer = buf->buffer; 2433 2434 if (!buffer) 2435 return; 2436 2437 ring_buffer_record_disable(buffer); 2438 2439 /* Make sure all commits have finished */ 2440 synchronize_rcu(); 2441 2442 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2443 2444 ring_buffer_reset_online_cpus(buffer); 2445 2446 ring_buffer_record_enable(buffer); 2447 } 2448 2449 static void tracing_reset_all_cpus(struct array_buffer *buf) 2450 { 2451 struct trace_buffer *buffer = buf->buffer; 2452 2453 if (!buffer) 2454 return; 2455 2456 ring_buffer_record_disable(buffer); 2457 2458 /* Make sure all commits have finished */ 2459 synchronize_rcu(); 2460 2461 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2462 2463 ring_buffer_reset(buffer); 2464 2465 ring_buffer_record_enable(buffer); 2466 } 2467 2468 /* Must have trace_types_lock held */ 2469 void tracing_reset_all_online_cpus_unlocked(void) 2470 { 2471 struct trace_array *tr; 2472 2473 lockdep_assert_held(&trace_types_lock); 2474 2475 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2476 if (!tr->clear_trace) 2477 continue; 2478 tr->clear_trace = false; 2479 tracing_reset_online_cpus(&tr->array_buffer); 2480 #ifdef CONFIG_TRACER_MAX_TRACE 2481 tracing_reset_online_cpus(&tr->max_buffer); 2482 #endif 2483 } 2484 } 2485 2486 void tracing_reset_all_online_cpus(void) 2487 { 2488 guard(mutex)(&trace_types_lock); 2489 tracing_reset_all_online_cpus_unlocked(); 2490 } 2491 2492 int is_tracing_stopped(void) 2493 { 2494 return global_trace.stop_count; 2495 } 2496 2497 static void tracing_start_tr(struct trace_array *tr) 2498 { 2499 struct trace_buffer *buffer; 2500 2501 if (tracing_disabled) 2502 return; 2503 2504 guard(raw_spinlock_irqsave)(&tr->start_lock); 2505 if (--tr->stop_count) { 2506 if (WARN_ON_ONCE(tr->stop_count < 0)) { 2507 /* Someone screwed up their debugging */ 2508 tr->stop_count = 0; 2509 } 2510 return; 2511 } 2512 2513 /* Prevent the buffers from switching */ 2514 arch_spin_lock(&tr->max_lock); 2515 2516 buffer = tr->array_buffer.buffer; 2517 if (buffer) 2518 ring_buffer_record_enable(buffer); 2519 2520 #ifdef CONFIG_TRACER_MAX_TRACE 2521 buffer = tr->max_buffer.buffer; 2522 if (buffer) 2523 ring_buffer_record_enable(buffer); 2524 #endif 2525 2526 arch_spin_unlock(&tr->max_lock); 2527 } 2528 2529 /** 2530 * tracing_start - quick start of the tracer 2531 * 2532 * If tracing is enabled but was stopped by tracing_stop, 2533 * this will start the tracer back up. 2534 */ 2535 void tracing_start(void) 2536 2537 { 2538 return tracing_start_tr(&global_trace); 2539 } 2540 2541 static void tracing_stop_tr(struct trace_array *tr) 2542 { 2543 struct trace_buffer *buffer; 2544 2545 guard(raw_spinlock_irqsave)(&tr->start_lock); 2546 if (tr->stop_count++) 2547 return; 2548 2549 /* Prevent the buffers from switching */ 2550 arch_spin_lock(&tr->max_lock); 2551 2552 buffer = tr->array_buffer.buffer; 2553 if (buffer) 2554 ring_buffer_record_disable(buffer); 2555 2556 #ifdef CONFIG_TRACER_MAX_TRACE 2557 buffer = tr->max_buffer.buffer; 2558 if (buffer) 2559 ring_buffer_record_disable(buffer); 2560 #endif 2561 2562 arch_spin_unlock(&tr->max_lock); 2563 } 2564 2565 /** 2566 * tracing_stop - quick stop of the tracer 2567 * 2568 * Light weight way to stop tracing. Use in conjunction with 2569 * tracing_start. 2570 */ 2571 void tracing_stop(void) 2572 { 2573 return tracing_stop_tr(&global_trace); 2574 } 2575 2576 /* 2577 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 2578 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 2579 * simplifies those functions and keeps them in sync. 2580 */ 2581 enum print_line_t trace_handle_return(struct trace_seq *s) 2582 { 2583 return trace_seq_has_overflowed(s) ? 2584 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 2585 } 2586 EXPORT_SYMBOL_GPL(trace_handle_return); 2587 2588 static unsigned short migration_disable_value(void) 2589 { 2590 #if defined(CONFIG_SMP) 2591 return current->migration_disabled; 2592 #else 2593 return 0; 2594 #endif 2595 } 2596 2597 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) 2598 { 2599 unsigned int trace_flags = irqs_status; 2600 unsigned int pc; 2601 2602 pc = preempt_count(); 2603 2604 if (pc & NMI_MASK) 2605 trace_flags |= TRACE_FLAG_NMI; 2606 if (pc & HARDIRQ_MASK) 2607 trace_flags |= TRACE_FLAG_HARDIRQ; 2608 if (in_serving_softirq()) 2609 trace_flags |= TRACE_FLAG_SOFTIRQ; 2610 if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) 2611 trace_flags |= TRACE_FLAG_BH_OFF; 2612 2613 if (tif_need_resched()) 2614 trace_flags |= TRACE_FLAG_NEED_RESCHED; 2615 if (test_preempt_need_resched()) 2616 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; 2617 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) 2618 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; 2619 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | 2620 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; 2621 } 2622 2623 struct ring_buffer_event * 2624 trace_buffer_lock_reserve(struct trace_buffer *buffer, 2625 int type, 2626 unsigned long len, 2627 unsigned int trace_ctx) 2628 { 2629 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); 2630 } 2631 2632 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); 2633 DEFINE_PER_CPU(int, trace_buffered_event_cnt); 2634 static int trace_buffered_event_ref; 2635 2636 /** 2637 * trace_buffered_event_enable - enable buffering events 2638 * 2639 * When events are being filtered, it is quicker to use a temporary 2640 * buffer to write the event data into if there's a likely chance 2641 * that it will not be committed. The discard of the ring buffer 2642 * is not as fast as committing, and is much slower than copying 2643 * a commit. 2644 * 2645 * When an event is to be filtered, allocate per cpu buffers to 2646 * write the event data into, and if the event is filtered and discarded 2647 * it is simply dropped, otherwise, the entire data is to be committed 2648 * in one shot. 2649 */ 2650 void trace_buffered_event_enable(void) 2651 { 2652 struct ring_buffer_event *event; 2653 struct page *page; 2654 int cpu; 2655 2656 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2657 2658 if (trace_buffered_event_ref++) 2659 return; 2660 2661 for_each_tracing_cpu(cpu) { 2662 page = alloc_pages_node(cpu_to_node(cpu), 2663 GFP_KERNEL | __GFP_NORETRY, 0); 2664 /* This is just an optimization and can handle failures */ 2665 if (!page) { 2666 pr_err("Failed to allocate event buffer\n"); 2667 break; 2668 } 2669 2670 event = page_address(page); 2671 memset(event, 0, sizeof(*event)); 2672 2673 per_cpu(trace_buffered_event, cpu) = event; 2674 2675 scoped_guard(preempt,) { 2676 if (cpu == smp_processor_id() && 2677 __this_cpu_read(trace_buffered_event) != 2678 per_cpu(trace_buffered_event, cpu)) 2679 WARN_ON_ONCE(1); 2680 } 2681 } 2682 } 2683 2684 static void enable_trace_buffered_event(void *data) 2685 { 2686 this_cpu_dec(trace_buffered_event_cnt); 2687 } 2688 2689 static void disable_trace_buffered_event(void *data) 2690 { 2691 this_cpu_inc(trace_buffered_event_cnt); 2692 } 2693 2694 /** 2695 * trace_buffered_event_disable - disable buffering events 2696 * 2697 * When a filter is removed, it is faster to not use the buffered 2698 * events, and to commit directly into the ring buffer. Free up 2699 * the temp buffers when there are no more users. This requires 2700 * special synchronization with current events. 2701 */ 2702 void trace_buffered_event_disable(void) 2703 { 2704 int cpu; 2705 2706 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2707 2708 if (WARN_ON_ONCE(!trace_buffered_event_ref)) 2709 return; 2710 2711 if (--trace_buffered_event_ref) 2712 return; 2713 2714 /* For each CPU, set the buffer as used. */ 2715 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, 2716 NULL, true); 2717 2718 /* Wait for all current users to finish */ 2719 synchronize_rcu(); 2720 2721 for_each_tracing_cpu(cpu) { 2722 free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); 2723 per_cpu(trace_buffered_event, cpu) = NULL; 2724 } 2725 2726 /* 2727 * Wait for all CPUs that potentially started checking if they can use 2728 * their event buffer only after the previous synchronize_rcu() call and 2729 * they still read a valid pointer from trace_buffered_event. It must be 2730 * ensured they don't see cleared trace_buffered_event_cnt else they 2731 * could wrongly decide to use the pointed-to buffer which is now freed. 2732 */ 2733 synchronize_rcu(); 2734 2735 /* For each CPU, relinquish the buffer */ 2736 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, 2737 true); 2738 } 2739 2740 static struct trace_buffer *temp_buffer; 2741 2742 struct ring_buffer_event * 2743 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, 2744 struct trace_event_file *trace_file, 2745 int type, unsigned long len, 2746 unsigned int trace_ctx) 2747 { 2748 struct ring_buffer_event *entry; 2749 struct trace_array *tr = trace_file->tr; 2750 int val; 2751 2752 *current_rb = tr->array_buffer.buffer; 2753 2754 if (!tr->no_filter_buffering_ref && 2755 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { 2756 preempt_disable_notrace(); 2757 /* 2758 * Filtering is on, so try to use the per cpu buffer first. 2759 * This buffer will simulate a ring_buffer_event, 2760 * where the type_len is zero and the array[0] will 2761 * hold the full length. 2762 * (see include/linux/ring-buffer.h for details on 2763 * how the ring_buffer_event is structured). 2764 * 2765 * Using a temp buffer during filtering and copying it 2766 * on a matched filter is quicker than writing directly 2767 * into the ring buffer and then discarding it when 2768 * it doesn't match. That is because the discard 2769 * requires several atomic operations to get right. 2770 * Copying on match and doing nothing on a failed match 2771 * is still quicker than no copy on match, but having 2772 * to discard out of the ring buffer on a failed match. 2773 */ 2774 if ((entry = __this_cpu_read(trace_buffered_event))) { 2775 int max_len = PAGE_SIZE - struct_size(entry, array, 1); 2776 2777 val = this_cpu_inc_return(trace_buffered_event_cnt); 2778 2779 /* 2780 * Preemption is disabled, but interrupts and NMIs 2781 * can still come in now. If that happens after 2782 * the above increment, then it will have to go 2783 * back to the old method of allocating the event 2784 * on the ring buffer, and if the filter fails, it 2785 * will have to call ring_buffer_discard_commit() 2786 * to remove it. 2787 * 2788 * Need to also check the unlikely case that the 2789 * length is bigger than the temp buffer size. 2790 * If that happens, then the reserve is pretty much 2791 * guaranteed to fail, as the ring buffer currently 2792 * only allows events less than a page. But that may 2793 * change in the future, so let the ring buffer reserve 2794 * handle the failure in that case. 2795 */ 2796 if (val == 1 && likely(len <= max_len)) { 2797 trace_event_setup(entry, type, trace_ctx); 2798 entry->array[0] = len; 2799 /* Return with preemption disabled */ 2800 return entry; 2801 } 2802 this_cpu_dec(trace_buffered_event_cnt); 2803 } 2804 /* __trace_buffer_lock_reserve() disables preemption */ 2805 preempt_enable_notrace(); 2806 } 2807 2808 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2809 trace_ctx); 2810 /* 2811 * If tracing is off, but we have triggers enabled 2812 * we still need to look at the event data. Use the temp_buffer 2813 * to store the trace event for the trigger to use. It's recursive 2814 * safe and will not be recorded anywhere. 2815 */ 2816 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { 2817 *current_rb = temp_buffer; 2818 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2819 trace_ctx); 2820 } 2821 return entry; 2822 } 2823 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); 2824 2825 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); 2826 static DEFINE_MUTEX(tracepoint_printk_mutex); 2827 2828 static void output_printk(struct trace_event_buffer *fbuffer) 2829 { 2830 struct trace_event_call *event_call; 2831 struct trace_event_file *file; 2832 struct trace_event *event; 2833 unsigned long flags; 2834 struct trace_iterator *iter = tracepoint_print_iter; 2835 2836 /* We should never get here if iter is NULL */ 2837 if (WARN_ON_ONCE(!iter)) 2838 return; 2839 2840 event_call = fbuffer->trace_file->event_call; 2841 if (!event_call || !event_call->event.funcs || 2842 !event_call->event.funcs->trace) 2843 return; 2844 2845 file = fbuffer->trace_file; 2846 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 2847 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 2848 !filter_match_preds(file->filter, fbuffer->entry))) 2849 return; 2850 2851 event = &fbuffer->trace_file->event_call->event; 2852 2853 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); 2854 trace_seq_init(&iter->seq); 2855 iter->ent = fbuffer->entry; 2856 event_call->event.funcs->trace(iter, 0, event); 2857 trace_seq_putc(&iter->seq, 0); 2858 printk("%s", iter->seq.buffer); 2859 2860 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); 2861 } 2862 2863 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 2864 void *buffer, size_t *lenp, 2865 loff_t *ppos) 2866 { 2867 int save_tracepoint_printk; 2868 int ret; 2869 2870 guard(mutex)(&tracepoint_printk_mutex); 2871 save_tracepoint_printk = tracepoint_printk; 2872 2873 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2874 2875 /* 2876 * This will force exiting early, as tracepoint_printk 2877 * is always zero when tracepoint_printk_iter is not allocated 2878 */ 2879 if (!tracepoint_print_iter) 2880 tracepoint_printk = 0; 2881 2882 if (save_tracepoint_printk == tracepoint_printk) 2883 return ret; 2884 2885 if (tracepoint_printk) 2886 static_key_enable(&tracepoint_printk_key.key); 2887 else 2888 static_key_disable(&tracepoint_printk_key.key); 2889 2890 return ret; 2891 } 2892 2893 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) 2894 { 2895 enum event_trigger_type tt = ETT_NONE; 2896 struct trace_event_file *file = fbuffer->trace_file; 2897 2898 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, 2899 fbuffer->entry, &tt)) 2900 goto discard; 2901 2902 if (static_key_false(&tracepoint_printk_key.key)) 2903 output_printk(fbuffer); 2904 2905 if (static_branch_unlikely(&trace_event_exports_enabled)) 2906 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); 2907 2908 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, 2909 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); 2910 2911 discard: 2912 if (tt) 2913 event_triggers_post_call(file, tt); 2914 2915 } 2916 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2917 2918 /* 2919 * Skip 3: 2920 * 2921 * trace_buffer_unlock_commit_regs() 2922 * trace_event_buffer_commit() 2923 * trace_event_raw_event_xxx() 2924 */ 2925 # define STACK_SKIP 3 2926 2927 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2928 struct trace_buffer *buffer, 2929 struct ring_buffer_event *event, 2930 unsigned int trace_ctx, 2931 struct pt_regs *regs) 2932 { 2933 __buffer_unlock_commit(buffer, event); 2934 2935 /* 2936 * If regs is not set, then skip the necessary functions. 2937 * Note, we can still get here via blktrace, wakeup tracer 2938 * and mmiotrace, but that's ok if they lose a function or 2939 * two. They are not that meaningful. 2940 */ 2941 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); 2942 ftrace_trace_userstack(tr, buffer, trace_ctx); 2943 } 2944 2945 /* 2946 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. 2947 */ 2948 void 2949 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, 2950 struct ring_buffer_event *event) 2951 { 2952 __buffer_unlock_commit(buffer, event); 2953 } 2954 2955 void 2956 trace_function(struct trace_array *tr, unsigned long ip, unsigned long 2957 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) 2958 { 2959 struct trace_buffer *buffer = tr->array_buffer.buffer; 2960 struct ring_buffer_event *event; 2961 struct ftrace_entry *entry; 2962 int size = sizeof(*entry); 2963 2964 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); 2965 2966 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, 2967 trace_ctx); 2968 if (!event) 2969 return; 2970 entry = ring_buffer_event_data(event); 2971 entry->ip = ip; 2972 entry->parent_ip = parent_ip; 2973 2974 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 2975 if (fregs) { 2976 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) 2977 entry->args[i] = ftrace_regs_get_argument(fregs, i); 2978 } 2979 #endif 2980 2981 if (static_branch_unlikely(&trace_function_exports_enabled)) 2982 ftrace_exports(event, TRACE_EXPORT_FUNCTION); 2983 __buffer_unlock_commit(buffer, event); 2984 } 2985 2986 #ifdef CONFIG_STACKTRACE 2987 2988 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ 2989 #define FTRACE_KSTACK_NESTING 4 2990 2991 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) 2992 2993 struct ftrace_stack { 2994 unsigned long calls[FTRACE_KSTACK_ENTRIES]; 2995 }; 2996 2997 2998 struct ftrace_stacks { 2999 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; 3000 }; 3001 3002 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); 3003 static DEFINE_PER_CPU(int, ftrace_stack_reserve); 3004 3005 static void __ftrace_trace_stack(struct trace_array *tr, 3006 struct trace_buffer *buffer, 3007 unsigned int trace_ctx, 3008 int skip, struct pt_regs *regs) 3009 { 3010 struct ring_buffer_event *event; 3011 unsigned int size, nr_entries; 3012 struct ftrace_stack *fstack; 3013 struct stack_entry *entry; 3014 int stackidx; 3015 3016 /* 3017 * Add one, for this function and the call to save_stack_trace() 3018 * If regs is set, then these functions will not be in the way. 3019 */ 3020 #ifndef CONFIG_UNWINDER_ORC 3021 if (!regs) 3022 skip++; 3023 #endif 3024 3025 guard(preempt_notrace)(); 3026 3027 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; 3028 3029 /* This should never happen. If it does, yell once and skip */ 3030 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) 3031 goto out; 3032 3033 /* 3034 * The above __this_cpu_inc_return() is 'atomic' cpu local. An 3035 * interrupt will either see the value pre increment or post 3036 * increment. If the interrupt happens pre increment it will have 3037 * restored the counter when it returns. We just need a barrier to 3038 * keep gcc from moving things around. 3039 */ 3040 barrier(); 3041 3042 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; 3043 size = ARRAY_SIZE(fstack->calls); 3044 3045 if (regs) { 3046 nr_entries = stack_trace_save_regs(regs, fstack->calls, 3047 size, skip); 3048 } else { 3049 nr_entries = stack_trace_save(fstack->calls, size, skip); 3050 } 3051 3052 #ifdef CONFIG_DYNAMIC_FTRACE 3053 /* Mark entry of stack trace as trampoline code */ 3054 if (tr->ops && tr->ops->trampoline) { 3055 unsigned long tramp_start = tr->ops->trampoline; 3056 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; 3057 unsigned long *calls = fstack->calls; 3058 3059 for (int i = 0; i < nr_entries; i++) { 3060 if (calls[i] >= tramp_start && calls[i] < tramp_end) 3061 calls[i] = FTRACE_TRAMPOLINE_MARKER; 3062 } 3063 } 3064 #endif 3065 3066 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, 3067 struct_size(entry, caller, nr_entries), 3068 trace_ctx); 3069 if (!event) 3070 goto out; 3071 entry = ring_buffer_event_data(event); 3072 3073 entry->size = nr_entries; 3074 memcpy(&entry->caller, fstack->calls, 3075 flex_array_size(entry, caller, nr_entries)); 3076 3077 __buffer_unlock_commit(buffer, event); 3078 3079 out: 3080 /* Again, don't let gcc optimize things here */ 3081 barrier(); 3082 __this_cpu_dec(ftrace_stack_reserve); 3083 } 3084 3085 static inline void ftrace_trace_stack(struct trace_array *tr, 3086 struct trace_buffer *buffer, 3087 unsigned int trace_ctx, 3088 int skip, struct pt_regs *regs) 3089 { 3090 if (!(tr->trace_flags & TRACE_ITER(STACKTRACE))) 3091 return; 3092 3093 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); 3094 } 3095 3096 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, 3097 int skip) 3098 { 3099 struct trace_buffer *buffer = tr->array_buffer.buffer; 3100 3101 if (rcu_is_watching()) { 3102 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 3103 return; 3104 } 3105 3106 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) 3107 return; 3108 3109 /* 3110 * When an NMI triggers, RCU is enabled via ct_nmi_enter(), 3111 * but if the above rcu_is_watching() failed, then the NMI 3112 * triggered someplace critical, and ct_irq_enter() should 3113 * not be called from NMI. 3114 */ 3115 if (unlikely(in_nmi())) 3116 return; 3117 3118 ct_irq_enter_irqson(); 3119 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 3120 ct_irq_exit_irqson(); 3121 } 3122 3123 /** 3124 * trace_dump_stack - record a stack back trace in the trace buffer 3125 * @skip: Number of functions to skip (helper handlers) 3126 */ 3127 void trace_dump_stack(int skip) 3128 { 3129 if (tracing_disabled || tracing_selftest_running) 3130 return; 3131 3132 #ifndef CONFIG_UNWINDER_ORC 3133 /* Skip 1 to skip this function. */ 3134 skip++; 3135 #endif 3136 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, 3137 tracing_gen_ctx(), skip, NULL); 3138 } 3139 EXPORT_SYMBOL_GPL(trace_dump_stack); 3140 3141 #ifdef CONFIG_USER_STACKTRACE_SUPPORT 3142 static DEFINE_PER_CPU(int, user_stack_count); 3143 3144 static void 3145 ftrace_trace_userstack(struct trace_array *tr, 3146 struct trace_buffer *buffer, unsigned int trace_ctx) 3147 { 3148 struct ring_buffer_event *event; 3149 struct userstack_entry *entry; 3150 3151 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE))) 3152 return; 3153 3154 /* 3155 * NMIs can not handle page faults, even with fix ups. 3156 * The save user stack can (and often does) fault. 3157 */ 3158 if (unlikely(in_nmi())) 3159 return; 3160 3161 /* 3162 * prevent recursion, since the user stack tracing may 3163 * trigger other kernel events. 3164 */ 3165 guard(preempt)(); 3166 if (__this_cpu_read(user_stack_count)) 3167 return; 3168 3169 __this_cpu_inc(user_stack_count); 3170 3171 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 3172 sizeof(*entry), trace_ctx); 3173 if (!event) 3174 goto out_drop_count; 3175 entry = ring_buffer_event_data(event); 3176 3177 entry->tgid = current->tgid; 3178 memset(&entry->caller, 0, sizeof(entry->caller)); 3179 3180 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); 3181 __buffer_unlock_commit(buffer, event); 3182 3183 out_drop_count: 3184 __this_cpu_dec(user_stack_count); 3185 } 3186 #else /* CONFIG_USER_STACKTRACE_SUPPORT */ 3187 static void ftrace_trace_userstack(struct trace_array *tr, 3188 struct trace_buffer *buffer, 3189 unsigned int trace_ctx) 3190 { 3191 } 3192 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ 3193 3194 #endif /* CONFIG_STACKTRACE */ 3195 3196 static inline void 3197 func_repeats_set_delta_ts(struct func_repeats_entry *entry, 3198 unsigned long long delta) 3199 { 3200 entry->bottom_delta_ts = delta & U32_MAX; 3201 entry->top_delta_ts = (delta >> 32); 3202 } 3203 3204 void trace_last_func_repeats(struct trace_array *tr, 3205 struct trace_func_repeats *last_info, 3206 unsigned int trace_ctx) 3207 { 3208 struct trace_buffer *buffer = tr->array_buffer.buffer; 3209 struct func_repeats_entry *entry; 3210 struct ring_buffer_event *event; 3211 u64 delta; 3212 3213 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, 3214 sizeof(*entry), trace_ctx); 3215 if (!event) 3216 return; 3217 3218 delta = ring_buffer_event_time_stamp(buffer, event) - 3219 last_info->ts_last_call; 3220 3221 entry = ring_buffer_event_data(event); 3222 entry->ip = last_info->ip; 3223 entry->parent_ip = last_info->parent_ip; 3224 entry->count = last_info->count; 3225 func_repeats_set_delta_ts(entry, delta); 3226 3227 __buffer_unlock_commit(buffer, event); 3228 } 3229 3230 /* created for use with alloc_percpu */ 3231 struct trace_buffer_struct { 3232 int nesting; 3233 char buffer[4][TRACE_BUF_SIZE]; 3234 }; 3235 3236 static struct trace_buffer_struct __percpu *trace_percpu_buffer; 3237 3238 /* 3239 * This allows for lockless recording. If we're nested too deeply, then 3240 * this returns NULL. 3241 */ 3242 static char *get_trace_buf(void) 3243 { 3244 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); 3245 3246 if (!trace_percpu_buffer || buffer->nesting >= 4) 3247 return NULL; 3248 3249 buffer->nesting++; 3250 3251 /* Interrupts must see nesting incremented before we use the buffer */ 3252 barrier(); 3253 return &buffer->buffer[buffer->nesting - 1][0]; 3254 } 3255 3256 static void put_trace_buf(void) 3257 { 3258 /* Don't let the decrement of nesting leak before this */ 3259 barrier(); 3260 this_cpu_dec(trace_percpu_buffer->nesting); 3261 } 3262 3263 static int alloc_percpu_trace_buffer(void) 3264 { 3265 struct trace_buffer_struct __percpu *buffers; 3266 3267 if (trace_percpu_buffer) 3268 return 0; 3269 3270 buffers = alloc_percpu(struct trace_buffer_struct); 3271 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) 3272 return -ENOMEM; 3273 3274 trace_percpu_buffer = buffers; 3275 return 0; 3276 } 3277 3278 static int buffers_allocated; 3279 3280 void trace_printk_init_buffers(void) 3281 { 3282 if (buffers_allocated) 3283 return; 3284 3285 if (alloc_percpu_trace_buffer()) 3286 return; 3287 3288 /* trace_printk() is for debug use only. Don't use it in production. */ 3289 3290 pr_warn("\n"); 3291 pr_warn("**********************************************************\n"); 3292 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 3293 pr_warn("** **\n"); 3294 pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); 3295 pr_warn("** **\n"); 3296 pr_warn("** This means that this is a DEBUG kernel and it is **\n"); 3297 pr_warn("** unsafe for production use. **\n"); 3298 pr_warn("** **\n"); 3299 pr_warn("** If you see this message and you are not debugging **\n"); 3300 pr_warn("** the kernel, report this immediately to your vendor! **\n"); 3301 pr_warn("** **\n"); 3302 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 3303 pr_warn("**********************************************************\n"); 3304 3305 /* Expand the buffers to set size */ 3306 tracing_update_buffers(&global_trace); 3307 3308 buffers_allocated = 1; 3309 3310 /* 3311 * trace_printk_init_buffers() can be called by modules. 3312 * If that happens, then we need to start cmdline recording 3313 * directly here. If the global_trace.buffer is already 3314 * allocated here, then this was called by module code. 3315 */ 3316 if (global_trace.array_buffer.buffer) 3317 tracing_start_cmdline_record(); 3318 } 3319 EXPORT_SYMBOL_GPL(trace_printk_init_buffers); 3320 3321 void trace_printk_start_comm(void) 3322 { 3323 /* Start tracing comms if trace printk is set */ 3324 if (!buffers_allocated) 3325 return; 3326 tracing_start_cmdline_record(); 3327 } 3328 3329 static void trace_printk_start_stop_comm(int enabled) 3330 { 3331 if (!buffers_allocated) 3332 return; 3333 3334 if (enabled) 3335 tracing_start_cmdline_record(); 3336 else 3337 tracing_stop_cmdline_record(); 3338 } 3339 3340 /** 3341 * trace_vbprintk - write binary msg to tracing buffer 3342 * @ip: The address of the caller 3343 * @fmt: The string format to write to the buffer 3344 * @args: Arguments for @fmt 3345 */ 3346 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 3347 { 3348 struct ring_buffer_event *event; 3349 struct trace_buffer *buffer; 3350 struct trace_array *tr = READ_ONCE(printk_trace); 3351 struct bprint_entry *entry; 3352 unsigned int trace_ctx; 3353 char *tbuffer; 3354 int len = 0, size; 3355 3356 if (!printk_binsafe(tr)) 3357 return trace_vprintk(ip, fmt, args); 3358 3359 if (unlikely(tracing_selftest_running || tracing_disabled)) 3360 return 0; 3361 3362 /* Don't pollute graph traces with trace_vprintk internals */ 3363 pause_graph_tracing(); 3364 3365 trace_ctx = tracing_gen_ctx(); 3366 guard(preempt_notrace)(); 3367 3368 tbuffer = get_trace_buf(); 3369 if (!tbuffer) { 3370 len = 0; 3371 goto out_nobuffer; 3372 } 3373 3374 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); 3375 3376 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) 3377 goto out_put; 3378 3379 size = sizeof(*entry) + sizeof(u32) * len; 3380 buffer = tr->array_buffer.buffer; 3381 scoped_guard(ring_buffer_nest, buffer) { 3382 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, 3383 trace_ctx); 3384 if (!event) 3385 goto out_put; 3386 entry = ring_buffer_event_data(event); 3387 entry->ip = ip; 3388 entry->fmt = fmt; 3389 3390 memcpy(entry->buf, tbuffer, sizeof(u32) * len); 3391 __buffer_unlock_commit(buffer, event); 3392 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); 3393 } 3394 out_put: 3395 put_trace_buf(); 3396 3397 out_nobuffer: 3398 unpause_graph_tracing(); 3399 3400 return len; 3401 } 3402 EXPORT_SYMBOL_GPL(trace_vbprintk); 3403 3404 static __printf(3, 0) 3405 int __trace_array_vprintk(struct trace_buffer *buffer, 3406 unsigned long ip, const char *fmt, va_list args) 3407 { 3408 struct ring_buffer_event *event; 3409 int len = 0, size; 3410 struct print_entry *entry; 3411 unsigned int trace_ctx; 3412 char *tbuffer; 3413 3414 if (tracing_disabled) 3415 return 0; 3416 3417 /* Don't pollute graph traces with trace_vprintk internals */ 3418 pause_graph_tracing(); 3419 3420 trace_ctx = tracing_gen_ctx(); 3421 guard(preempt_notrace)(); 3422 3423 3424 tbuffer = get_trace_buf(); 3425 if (!tbuffer) { 3426 len = 0; 3427 goto out_nobuffer; 3428 } 3429 3430 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); 3431 3432 size = sizeof(*entry) + len + 1; 3433 scoped_guard(ring_buffer_nest, buffer) { 3434 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 3435 trace_ctx); 3436 if (!event) 3437 goto out; 3438 entry = ring_buffer_event_data(event); 3439 entry->ip = ip; 3440 3441 memcpy(&entry->buf, tbuffer, len + 1); 3442 __buffer_unlock_commit(buffer, event); 3443 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); 3444 } 3445 out: 3446 put_trace_buf(); 3447 3448 out_nobuffer: 3449 unpause_graph_tracing(); 3450 3451 return len; 3452 } 3453 3454 int trace_array_vprintk(struct trace_array *tr, 3455 unsigned long ip, const char *fmt, va_list args) 3456 { 3457 if (tracing_selftest_running && tr == &global_trace) 3458 return 0; 3459 3460 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); 3461 } 3462 3463 /** 3464 * trace_array_printk - Print a message to a specific instance 3465 * @tr: The instance trace_array descriptor 3466 * @ip: The instruction pointer that this is called from. 3467 * @fmt: The format to print (printf format) 3468 * 3469 * If a subsystem sets up its own instance, they have the right to 3470 * printk strings into their tracing instance buffer using this 3471 * function. Note, this function will not write into the top level 3472 * buffer (use trace_printk() for that), as writing into the top level 3473 * buffer should only have events that can be individually disabled. 3474 * trace_printk() is only used for debugging a kernel, and should not 3475 * be ever incorporated in normal use. 3476 * 3477 * trace_array_printk() can be used, as it will not add noise to the 3478 * top level tracing buffer. 3479 * 3480 * Note, trace_array_init_printk() must be called on @tr before this 3481 * can be used. 3482 */ 3483 int trace_array_printk(struct trace_array *tr, 3484 unsigned long ip, const char *fmt, ...) 3485 { 3486 int ret; 3487 va_list ap; 3488 3489 if (!tr) 3490 return -ENOENT; 3491 3492 /* This is only allowed for created instances */ 3493 if (tr == &global_trace) 3494 return 0; 3495 3496 if (!(tr->trace_flags & TRACE_ITER(PRINTK))) 3497 return 0; 3498 3499 va_start(ap, fmt); 3500 ret = trace_array_vprintk(tr, ip, fmt, ap); 3501 va_end(ap); 3502 return ret; 3503 } 3504 EXPORT_SYMBOL_GPL(trace_array_printk); 3505 3506 /** 3507 * trace_array_init_printk - Initialize buffers for trace_array_printk() 3508 * @tr: The trace array to initialize the buffers for 3509 * 3510 * As trace_array_printk() only writes into instances, they are OK to 3511 * have in the kernel (unlike trace_printk()). This needs to be called 3512 * before trace_array_printk() can be used on a trace_array. 3513 */ 3514 int trace_array_init_printk(struct trace_array *tr) 3515 { 3516 if (!tr) 3517 return -ENOENT; 3518 3519 /* This is only allowed for created instances */ 3520 if (tr == &global_trace) 3521 return -EINVAL; 3522 3523 return alloc_percpu_trace_buffer(); 3524 } 3525 EXPORT_SYMBOL_GPL(trace_array_init_printk); 3526 3527 int trace_array_printk_buf(struct trace_buffer *buffer, 3528 unsigned long ip, const char *fmt, ...) 3529 { 3530 int ret; 3531 va_list ap; 3532 3533 if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK))) 3534 return 0; 3535 3536 va_start(ap, fmt); 3537 ret = __trace_array_vprintk(buffer, ip, fmt, ap); 3538 va_end(ap); 3539 return ret; 3540 } 3541 3542 int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 3543 { 3544 return trace_array_vprintk(printk_trace, ip, fmt, args); 3545 } 3546 EXPORT_SYMBOL_GPL(trace_vprintk); 3547 3548 static void trace_iterator_increment(struct trace_iterator *iter) 3549 { 3550 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); 3551 3552 iter->idx++; 3553 if (buf_iter) 3554 ring_buffer_iter_advance(buf_iter); 3555 } 3556 3557 static struct trace_entry * 3558 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, 3559 unsigned long *lost_events) 3560 { 3561 struct ring_buffer_event *event; 3562 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 3563 3564 if (buf_iter) { 3565 event = ring_buffer_iter_peek(buf_iter, ts); 3566 if (lost_events) 3567 *lost_events = ring_buffer_iter_dropped(buf_iter) ? 3568 (unsigned long)-1 : 0; 3569 } else { 3570 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, 3571 lost_events); 3572 } 3573 3574 if (event) { 3575 iter->ent_size = ring_buffer_event_length(event); 3576 return ring_buffer_event_data(event); 3577 } 3578 iter->ent_size = 0; 3579 return NULL; 3580 } 3581 3582 static struct trace_entry * 3583 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, 3584 unsigned long *missing_events, u64 *ent_ts) 3585 { 3586 struct trace_buffer *buffer = iter->array_buffer->buffer; 3587 struct trace_entry *ent, *next = NULL; 3588 unsigned long lost_events = 0, next_lost = 0; 3589 int cpu_file = iter->cpu_file; 3590 u64 next_ts = 0, ts; 3591 int next_cpu = -1; 3592 int next_size = 0; 3593 int cpu; 3594 3595 /* 3596 * If we are in a per_cpu trace file, don't bother by iterating over 3597 * all cpu and peek directly. 3598 */ 3599 if (cpu_file > RING_BUFFER_ALL_CPUS) { 3600 if (ring_buffer_empty_cpu(buffer, cpu_file)) 3601 return NULL; 3602 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); 3603 if (ent_cpu) 3604 *ent_cpu = cpu_file; 3605 3606 return ent; 3607 } 3608 3609 for_each_tracing_cpu(cpu) { 3610 3611 if (ring_buffer_empty_cpu(buffer, cpu)) 3612 continue; 3613 3614 ent = peek_next_entry(iter, cpu, &ts, &lost_events); 3615 3616 /* 3617 * Pick the entry with the smallest timestamp: 3618 */ 3619 if (ent && (!next || ts < next_ts)) { 3620 next = ent; 3621 next_cpu = cpu; 3622 next_ts = ts; 3623 next_lost = lost_events; 3624 next_size = iter->ent_size; 3625 } 3626 } 3627 3628 iter->ent_size = next_size; 3629 3630 if (ent_cpu) 3631 *ent_cpu = next_cpu; 3632 3633 if (ent_ts) 3634 *ent_ts = next_ts; 3635 3636 if (missing_events) 3637 *missing_events = next_lost; 3638 3639 return next; 3640 } 3641 3642 #define STATIC_FMT_BUF_SIZE 128 3643 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 3644 3645 char *trace_iter_expand_format(struct trace_iterator *iter) 3646 { 3647 char *tmp; 3648 3649 /* 3650 * iter->tr is NULL when used with tp_printk, which makes 3651 * this get called where it is not safe to call krealloc(). 3652 */ 3653 if (!iter->tr || iter->fmt == static_fmt_buf) 3654 return NULL; 3655 3656 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, 3657 GFP_KERNEL); 3658 if (tmp) { 3659 iter->fmt_size += STATIC_FMT_BUF_SIZE; 3660 iter->fmt = tmp; 3661 } 3662 3663 return tmp; 3664 } 3665 3666 /* Returns true if the string is safe to dereference from an event */ 3667 static bool trace_safe_str(struct trace_iterator *iter, const char *str) 3668 { 3669 unsigned long addr = (unsigned long)str; 3670 struct trace_event *trace_event; 3671 struct trace_event_call *event; 3672 3673 /* OK if part of the event data */ 3674 if ((addr >= (unsigned long)iter->ent) && 3675 (addr < (unsigned long)iter->ent + iter->ent_size)) 3676 return true; 3677 3678 /* OK if part of the temp seq buffer */ 3679 if ((addr >= (unsigned long)iter->tmp_seq.buffer) && 3680 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) 3681 return true; 3682 3683 /* Core rodata can not be freed */ 3684 if (is_kernel_rodata(addr)) 3685 return true; 3686 3687 if (trace_is_tracepoint_string(str)) 3688 return true; 3689 3690 /* 3691 * Now this could be a module event, referencing core module 3692 * data, which is OK. 3693 */ 3694 if (!iter->ent) 3695 return false; 3696 3697 trace_event = ftrace_find_event(iter->ent->type); 3698 if (!trace_event) 3699 return false; 3700 3701 event = container_of(trace_event, struct trace_event_call, event); 3702 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) 3703 return false; 3704 3705 /* Would rather have rodata, but this will suffice */ 3706 if (within_module_core(addr, event->module)) 3707 return true; 3708 3709 return false; 3710 } 3711 3712 /** 3713 * ignore_event - Check dereferenced fields while writing to the seq buffer 3714 * @iter: The iterator that holds the seq buffer and the event being printed 3715 * 3716 * At boot up, test_event_printk() will flag any event that dereferences 3717 * a string with "%s" that does exist in the ring buffer. It may still 3718 * be valid, as the string may point to a static string in the kernel 3719 * rodata that never gets freed. But if the string pointer is pointing 3720 * to something that was allocated, there's a chance that it can be freed 3721 * by the time the user reads the trace. This would cause a bad memory 3722 * access by the kernel and possibly crash the system. 3723 * 3724 * This function will check if the event has any fields flagged as needing 3725 * to be checked at runtime and perform those checks. 3726 * 3727 * If it is found that a field is unsafe, it will write into the @iter->seq 3728 * a message stating what was found to be unsafe. 3729 * 3730 * @return: true if the event is unsafe and should be ignored, 3731 * false otherwise. 3732 */ 3733 bool ignore_event(struct trace_iterator *iter) 3734 { 3735 struct ftrace_event_field *field; 3736 struct trace_event *trace_event; 3737 struct trace_event_call *event; 3738 struct list_head *head; 3739 struct trace_seq *seq; 3740 const void *ptr; 3741 3742 trace_event = ftrace_find_event(iter->ent->type); 3743 3744 seq = &iter->seq; 3745 3746 if (!trace_event) { 3747 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); 3748 return true; 3749 } 3750 3751 event = container_of(trace_event, struct trace_event_call, event); 3752 if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) 3753 return false; 3754 3755 head = trace_get_fields(event); 3756 if (!head) { 3757 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", 3758 trace_event_name(event)); 3759 return true; 3760 } 3761 3762 /* Offsets are from the iter->ent that points to the raw event */ 3763 ptr = iter->ent; 3764 3765 list_for_each_entry(field, head, link) { 3766 const char *str; 3767 bool good; 3768 3769 if (!field->needs_test) 3770 continue; 3771 3772 str = *(const char **)(ptr + field->offset); 3773 3774 good = trace_safe_str(iter, str); 3775 3776 /* 3777 * If you hit this warning, it is likely that the 3778 * trace event in question used %s on a string that 3779 * was saved at the time of the event, but may not be 3780 * around when the trace is read. Use __string(), 3781 * __assign_str() and __get_str() helpers in the TRACE_EVENT() 3782 * instead. See samples/trace_events/trace-events-sample.h 3783 * for reference. 3784 */ 3785 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", 3786 trace_event_name(event), field->name)) { 3787 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", 3788 trace_event_name(event), field->name); 3789 return true; 3790 } 3791 } 3792 return false; 3793 } 3794 3795 const char *trace_event_format(struct trace_iterator *iter, const char *fmt) 3796 { 3797 const char *p, *new_fmt; 3798 char *q; 3799 3800 if (WARN_ON_ONCE(!fmt)) 3801 return fmt; 3802 3803 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR)) 3804 return fmt; 3805 3806 p = fmt; 3807 new_fmt = q = iter->fmt; 3808 while (*p) { 3809 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { 3810 if (!trace_iter_expand_format(iter)) 3811 return fmt; 3812 3813 q += iter->fmt - new_fmt; 3814 new_fmt = iter->fmt; 3815 } 3816 3817 *q++ = *p++; 3818 3819 /* Replace %p with %px */ 3820 if (p[-1] == '%') { 3821 if (p[0] == '%') { 3822 *q++ = *p++; 3823 } else if (p[0] == 'p' && !isalnum(p[1])) { 3824 *q++ = *p++; 3825 *q++ = 'x'; 3826 } 3827 } 3828 } 3829 *q = '\0'; 3830 3831 return new_fmt; 3832 } 3833 3834 #define STATIC_TEMP_BUF_SIZE 128 3835 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); 3836 3837 /* Find the next real entry, without updating the iterator itself */ 3838 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 3839 int *ent_cpu, u64 *ent_ts) 3840 { 3841 /* __find_next_entry will reset ent_size */ 3842 int ent_size = iter->ent_size; 3843 struct trace_entry *entry; 3844 3845 /* 3846 * If called from ftrace_dump(), then the iter->temp buffer 3847 * will be the static_temp_buf and not created from kmalloc. 3848 * If the entry size is greater than the buffer, we can 3849 * not save it. Just return NULL in that case. This is only 3850 * used to add markers when two consecutive events' time 3851 * stamps have a large delta. See trace_print_lat_context() 3852 */ 3853 if (iter->temp == static_temp_buf && 3854 STATIC_TEMP_BUF_SIZE < ent_size) 3855 return NULL; 3856 3857 /* 3858 * The __find_next_entry() may call peek_next_entry(), which may 3859 * call ring_buffer_peek() that may make the contents of iter->ent 3860 * undefined. Need to copy iter->ent now. 3861 */ 3862 if (iter->ent && iter->ent != iter->temp) { 3863 if ((!iter->temp || iter->temp_size < iter->ent_size) && 3864 !WARN_ON_ONCE(iter->temp == static_temp_buf)) { 3865 void *temp; 3866 temp = kmalloc(iter->ent_size, GFP_KERNEL); 3867 if (!temp) 3868 return NULL; 3869 kfree(iter->temp); 3870 iter->temp = temp; 3871 iter->temp_size = iter->ent_size; 3872 } 3873 memcpy(iter->temp, iter->ent, iter->ent_size); 3874 iter->ent = iter->temp; 3875 } 3876 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 3877 /* Put back the original ent_size */ 3878 iter->ent_size = ent_size; 3879 3880 return entry; 3881 } 3882 3883 /* Find the next real entry, and increment the iterator to the next entry */ 3884 void *trace_find_next_entry_inc(struct trace_iterator *iter) 3885 { 3886 iter->ent = __find_next_entry(iter, &iter->cpu, 3887 &iter->lost_events, &iter->ts); 3888 3889 if (iter->ent) 3890 trace_iterator_increment(iter); 3891 3892 return iter->ent ? iter : NULL; 3893 } 3894 3895 static void trace_consume(struct trace_iterator *iter) 3896 { 3897 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, 3898 &iter->lost_events); 3899 } 3900 3901 static void *s_next(struct seq_file *m, void *v, loff_t *pos) 3902 { 3903 struct trace_iterator *iter = m->private; 3904 int i = (int)*pos; 3905 void *ent; 3906 3907 WARN_ON_ONCE(iter->leftover); 3908 3909 (*pos)++; 3910 3911 /* can't go backwards */ 3912 if (iter->idx > i) 3913 return NULL; 3914 3915 if (iter->idx < 0) 3916 ent = trace_find_next_entry_inc(iter); 3917 else 3918 ent = iter; 3919 3920 while (ent && iter->idx < i) 3921 ent = trace_find_next_entry_inc(iter); 3922 3923 iter->pos = *pos; 3924 3925 return ent; 3926 } 3927 3928 void tracing_iter_reset(struct trace_iterator *iter, int cpu) 3929 { 3930 struct ring_buffer_iter *buf_iter; 3931 unsigned long entries = 0; 3932 u64 ts; 3933 3934 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; 3935 3936 buf_iter = trace_buffer_iter(iter, cpu); 3937 if (!buf_iter) 3938 return; 3939 3940 ring_buffer_iter_reset(buf_iter); 3941 3942 /* 3943 * We could have the case with the max latency tracers 3944 * that a reset never took place on a cpu. This is evident 3945 * by the timestamp being before the start of the buffer. 3946 */ 3947 while (ring_buffer_iter_peek(buf_iter, &ts)) { 3948 if (ts >= iter->array_buffer->time_start) 3949 break; 3950 entries++; 3951 ring_buffer_iter_advance(buf_iter); 3952 /* This could be a big loop */ 3953 cond_resched(); 3954 } 3955 3956 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; 3957 } 3958 3959 /* 3960 * The current tracer is copied to avoid a global locking 3961 * all around. 3962 */ 3963 static void *s_start(struct seq_file *m, loff_t *pos) 3964 { 3965 struct trace_iterator *iter = m->private; 3966 struct trace_array *tr = iter->tr; 3967 int cpu_file = iter->cpu_file; 3968 void *p = NULL; 3969 loff_t l = 0; 3970 int cpu; 3971 3972 mutex_lock(&trace_types_lock); 3973 if (unlikely(tr->current_trace != iter->trace)) { 3974 /* Close iter->trace before switching to the new current tracer */ 3975 if (iter->trace->close) 3976 iter->trace->close(iter); 3977 iter->trace = tr->current_trace; 3978 /* Reopen the new current tracer */ 3979 if (iter->trace->open) 3980 iter->trace->open(iter); 3981 } 3982 mutex_unlock(&trace_types_lock); 3983 3984 #ifdef CONFIG_TRACER_MAX_TRACE 3985 if (iter->snapshot && iter->trace->use_max_tr) 3986 return ERR_PTR(-EBUSY); 3987 #endif 3988 3989 if (*pos != iter->pos) { 3990 iter->ent = NULL; 3991 iter->cpu = 0; 3992 iter->idx = -1; 3993 3994 if (cpu_file == RING_BUFFER_ALL_CPUS) { 3995 for_each_tracing_cpu(cpu) 3996 tracing_iter_reset(iter, cpu); 3997 } else 3998 tracing_iter_reset(iter, cpu_file); 3999 4000 iter->leftover = 0; 4001 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 4002 ; 4003 4004 } else { 4005 /* 4006 * If we overflowed the seq_file before, then we want 4007 * to just reuse the trace_seq buffer again. 4008 */ 4009 if (iter->leftover) 4010 p = iter; 4011 else { 4012 l = *pos - 1; 4013 p = s_next(m, p, &l); 4014 } 4015 } 4016 4017 trace_event_read_lock(); 4018 trace_access_lock(cpu_file); 4019 return p; 4020 } 4021 4022 static void s_stop(struct seq_file *m, void *p) 4023 { 4024 struct trace_iterator *iter = m->private; 4025 4026 #ifdef CONFIG_TRACER_MAX_TRACE 4027 if (iter->snapshot && iter->trace->use_max_tr) 4028 return; 4029 #endif 4030 4031 trace_access_unlock(iter->cpu_file); 4032 trace_event_read_unlock(); 4033 } 4034 4035 static void 4036 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, 4037 unsigned long *entries, int cpu) 4038 { 4039 unsigned long count; 4040 4041 count = ring_buffer_entries_cpu(buf->buffer, cpu); 4042 /* 4043 * If this buffer has skipped entries, then we hold all 4044 * entries for the trace and we need to ignore the 4045 * ones before the time stamp. 4046 */ 4047 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { 4048 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; 4049 /* total is the same as the entries */ 4050 *total = count; 4051 } else 4052 *total = count + 4053 ring_buffer_overrun_cpu(buf->buffer, cpu); 4054 *entries = count; 4055 } 4056 4057 static void 4058 get_total_entries(struct array_buffer *buf, 4059 unsigned long *total, unsigned long *entries) 4060 { 4061 unsigned long t, e; 4062 int cpu; 4063 4064 *total = 0; 4065 *entries = 0; 4066 4067 for_each_tracing_cpu(cpu) { 4068 get_total_entries_cpu(buf, &t, &e, cpu); 4069 *total += t; 4070 *entries += e; 4071 } 4072 } 4073 4074 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) 4075 { 4076 unsigned long total, entries; 4077 4078 if (!tr) 4079 tr = &global_trace; 4080 4081 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); 4082 4083 return entries; 4084 } 4085 4086 unsigned long trace_total_entries(struct trace_array *tr) 4087 { 4088 unsigned long total, entries; 4089 4090 if (!tr) 4091 tr = &global_trace; 4092 4093 get_total_entries(&tr->array_buffer, &total, &entries); 4094 4095 return entries; 4096 } 4097 4098 static void print_lat_help_header(struct seq_file *m) 4099 { 4100 seq_puts(m, "# _------=> CPU# \n" 4101 "# / _-----=> irqs-off/BH-disabled\n" 4102 "# | / _----=> need-resched \n" 4103 "# || / _---=> hardirq/softirq \n" 4104 "# ||| / _--=> preempt-depth \n" 4105 "# |||| / _-=> migrate-disable \n" 4106 "# ||||| / delay \n" 4107 "# cmd pid |||||| time | caller \n" 4108 "# \\ / |||||| \\ | / \n"); 4109 } 4110 4111 static void print_event_info(struct array_buffer *buf, struct seq_file *m) 4112 { 4113 unsigned long total; 4114 unsigned long entries; 4115 4116 get_total_entries(buf, &total, &entries); 4117 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 4118 entries, total, num_online_cpus()); 4119 seq_puts(m, "#\n"); 4120 } 4121 4122 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, 4123 unsigned int flags) 4124 { 4125 bool tgid = flags & TRACE_ITER(RECORD_TGID); 4126 4127 print_event_info(buf, m); 4128 4129 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); 4130 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); 4131 } 4132 4133 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, 4134 unsigned int flags) 4135 { 4136 bool tgid = flags & TRACE_ITER(RECORD_TGID); 4137 static const char space[] = " "; 4138 int prec = tgid ? 12 : 2; 4139 4140 print_event_info(buf, m); 4141 4142 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); 4143 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); 4144 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); 4145 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); 4146 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); 4147 seq_printf(m, "# %.*s|||| / delay\n", prec, space); 4148 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); 4149 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); 4150 } 4151 4152 void 4153 print_trace_header(struct seq_file *m, struct trace_iterator *iter) 4154 { 4155 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); 4156 struct array_buffer *buf = iter->array_buffer; 4157 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); 4158 struct tracer *type = iter->trace; 4159 unsigned long entries; 4160 unsigned long total; 4161 const char *name = type->name; 4162 4163 get_total_entries(buf, &total, &entries); 4164 4165 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 4166 name, init_utsname()->release); 4167 seq_puts(m, "# -----------------------------------" 4168 "---------------------------------\n"); 4169 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" 4170 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 4171 nsecs_to_usecs(data->saved_latency), 4172 entries, 4173 total, 4174 buf->cpu, 4175 preempt_model_str(), 4176 /* These are reserved for later use */ 4177 0, 0, 0, 0); 4178 #ifdef CONFIG_SMP 4179 seq_printf(m, " #P:%d)\n", num_online_cpus()); 4180 #else 4181 seq_puts(m, ")\n"); 4182 #endif 4183 seq_puts(m, "# -----------------\n"); 4184 seq_printf(m, "# | task: %.16s-%d " 4185 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 4186 data->comm, data->pid, 4187 from_kuid_munged(seq_user_ns(m), data->uid), data->nice, 4188 data->policy, data->rt_priority); 4189 seq_puts(m, "# -----------------\n"); 4190 4191 if (data->critical_start) { 4192 seq_puts(m, "# => started at: "); 4193 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 4194 trace_print_seq(m, &iter->seq); 4195 seq_puts(m, "\n# => ended at: "); 4196 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 4197 trace_print_seq(m, &iter->seq); 4198 seq_puts(m, "\n#\n"); 4199 } 4200 4201 seq_puts(m, "#\n"); 4202 } 4203 4204 static void test_cpu_buff_start(struct trace_iterator *iter) 4205 { 4206 struct trace_seq *s = &iter->seq; 4207 struct trace_array *tr = iter->tr; 4208 4209 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE))) 4210 return; 4211 4212 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) 4213 return; 4214 4215 if (cpumask_available(iter->started) && 4216 cpumask_test_cpu(iter->cpu, iter->started)) 4217 return; 4218 4219 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) 4220 return; 4221 4222 if (cpumask_available(iter->started)) 4223 cpumask_set_cpu(iter->cpu, iter->started); 4224 4225 /* Don't print started cpu buffer for the first entry of the trace */ 4226 if (iter->idx > 1) 4227 trace_seq_printf(s, "##### CPU %u buffer started ####\n", 4228 iter->cpu); 4229 } 4230 4231 #ifdef CONFIG_FTRACE_SYSCALLS 4232 static bool is_syscall_event(struct trace_event *event) 4233 { 4234 return (event->funcs == &enter_syscall_print_funcs) || 4235 (event->funcs == &exit_syscall_print_funcs); 4236 4237 } 4238 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT 4239 #else 4240 static inline bool is_syscall_event(struct trace_event *event) 4241 { 4242 return false; 4243 } 4244 #define syscall_buf_size 0 4245 #endif /* CONFIG_FTRACE_SYSCALLS */ 4246 4247 static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 4248 { 4249 struct trace_array *tr = iter->tr; 4250 struct trace_seq *s = &iter->seq; 4251 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); 4252 struct trace_entry *entry; 4253 struct trace_event *event; 4254 4255 entry = iter->ent; 4256 4257 test_cpu_buff_start(iter); 4258 4259 event = ftrace_find_event(entry->type); 4260 4261 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 4262 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 4263 trace_print_lat_context(iter); 4264 else 4265 trace_print_context(iter); 4266 } 4267 4268 if (trace_seq_has_overflowed(s)) 4269 return TRACE_TYPE_PARTIAL_LINE; 4270 4271 if (event) { 4272 if (tr->trace_flags & TRACE_ITER(FIELDS)) 4273 return print_event_fields(iter, event); 4274 /* 4275 * For TRACE_EVENT() events, the print_fmt is not 4276 * safe to use if the array has delta offsets 4277 * Force printing via the fields. 4278 */ 4279 if ((tr->text_delta)) { 4280 /* ftrace and system call events are still OK */ 4281 if ((event->type > __TRACE_LAST_TYPE) && 4282 !is_syscall_event(event)) 4283 return print_event_fields(iter, event); 4284 } 4285 return event->funcs->trace(iter, sym_flags, event); 4286 } 4287 4288 trace_seq_printf(s, "Unknown type %d\n", entry->type); 4289 4290 return trace_handle_return(s); 4291 } 4292 4293 static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 4294 { 4295 struct trace_array *tr = iter->tr; 4296 struct trace_seq *s = &iter->seq; 4297 struct trace_entry *entry; 4298 struct trace_event *event; 4299 4300 entry = iter->ent; 4301 4302 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) 4303 trace_seq_printf(s, "%d %d %llu ", 4304 entry->pid, iter->cpu, iter->ts); 4305 4306 if (trace_seq_has_overflowed(s)) 4307 return TRACE_TYPE_PARTIAL_LINE; 4308 4309 event = ftrace_find_event(entry->type); 4310 if (event) 4311 return event->funcs->raw(iter, 0, event); 4312 4313 trace_seq_printf(s, "%d ?\n", entry->type); 4314 4315 return trace_handle_return(s); 4316 } 4317 4318 static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 4319 { 4320 struct trace_array *tr = iter->tr; 4321 struct trace_seq *s = &iter->seq; 4322 unsigned char newline = '\n'; 4323 struct trace_entry *entry; 4324 struct trace_event *event; 4325 4326 entry = iter->ent; 4327 4328 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 4329 SEQ_PUT_HEX_FIELD(s, entry->pid); 4330 SEQ_PUT_HEX_FIELD(s, iter->cpu); 4331 SEQ_PUT_HEX_FIELD(s, iter->ts); 4332 if (trace_seq_has_overflowed(s)) 4333 return TRACE_TYPE_PARTIAL_LINE; 4334 } 4335 4336 event = ftrace_find_event(entry->type); 4337 if (event) { 4338 enum print_line_t ret = event->funcs->hex(iter, 0, event); 4339 if (ret != TRACE_TYPE_HANDLED) 4340 return ret; 4341 } 4342 4343 SEQ_PUT_FIELD(s, newline); 4344 4345 return trace_handle_return(s); 4346 } 4347 4348 static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 4349 { 4350 struct trace_array *tr = iter->tr; 4351 struct trace_seq *s = &iter->seq; 4352 struct trace_entry *entry; 4353 struct trace_event *event; 4354 4355 entry = iter->ent; 4356 4357 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 4358 SEQ_PUT_FIELD(s, entry->pid); 4359 SEQ_PUT_FIELD(s, iter->cpu); 4360 SEQ_PUT_FIELD(s, iter->ts); 4361 if (trace_seq_has_overflowed(s)) 4362 return TRACE_TYPE_PARTIAL_LINE; 4363 } 4364 4365 event = ftrace_find_event(entry->type); 4366 return event ? event->funcs->binary(iter, 0, event) : 4367 TRACE_TYPE_HANDLED; 4368 } 4369 4370 int trace_empty(struct trace_iterator *iter) 4371 { 4372 struct ring_buffer_iter *buf_iter; 4373 int cpu; 4374 4375 /* If we are looking at one CPU buffer, only check that one */ 4376 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 4377 cpu = iter->cpu_file; 4378 buf_iter = trace_buffer_iter(iter, cpu); 4379 if (buf_iter) { 4380 if (!ring_buffer_iter_empty(buf_iter)) 4381 return 0; 4382 } else { 4383 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 4384 return 0; 4385 } 4386 return 1; 4387 } 4388 4389 for_each_tracing_cpu(cpu) { 4390 buf_iter = trace_buffer_iter(iter, cpu); 4391 if (buf_iter) { 4392 if (!ring_buffer_iter_empty(buf_iter)) 4393 return 0; 4394 } else { 4395 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 4396 return 0; 4397 } 4398 } 4399 4400 return 1; 4401 } 4402 4403 /* Called with trace_event_read_lock() held. */ 4404 enum print_line_t print_trace_line(struct trace_iterator *iter) 4405 { 4406 struct trace_array *tr = iter->tr; 4407 unsigned long trace_flags = tr->trace_flags; 4408 enum print_line_t ret; 4409 4410 if (iter->lost_events) { 4411 if (iter->lost_events == (unsigned long)-1) 4412 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", 4413 iter->cpu); 4414 else 4415 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 4416 iter->cpu, iter->lost_events); 4417 if (trace_seq_has_overflowed(&iter->seq)) 4418 return TRACE_TYPE_PARTIAL_LINE; 4419 } 4420 4421 if (iter->trace && iter->trace->print_line) { 4422 ret = iter->trace->print_line(iter); 4423 if (ret != TRACE_TYPE_UNHANDLED) 4424 return ret; 4425 } 4426 4427 if (iter->ent->type == TRACE_BPUTS && 4428 trace_flags & TRACE_ITER(PRINTK) && 4429 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 4430 return trace_print_bputs_msg_only(iter); 4431 4432 if (iter->ent->type == TRACE_BPRINT && 4433 trace_flags & TRACE_ITER(PRINTK) && 4434 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 4435 return trace_print_bprintk_msg_only(iter); 4436 4437 if (iter->ent->type == TRACE_PRINT && 4438 trace_flags & TRACE_ITER(PRINTK) && 4439 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 4440 return trace_print_printk_msg_only(iter); 4441 4442 if (trace_flags & TRACE_ITER(BIN)) 4443 return print_bin_fmt(iter); 4444 4445 if (trace_flags & TRACE_ITER(HEX)) 4446 return print_hex_fmt(iter); 4447 4448 if (trace_flags & TRACE_ITER(RAW)) 4449 return print_raw_fmt(iter); 4450 4451 return print_trace_fmt(iter); 4452 } 4453 4454 void trace_latency_header(struct seq_file *m) 4455 { 4456 struct trace_iterator *iter = m->private; 4457 struct trace_array *tr = iter->tr; 4458 4459 /* print nothing if the buffers are empty */ 4460 if (trace_empty(iter)) 4461 return; 4462 4463 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 4464 print_trace_header(m, iter); 4465 4466 if (!(tr->trace_flags & TRACE_ITER(VERBOSE))) 4467 print_lat_help_header(m); 4468 } 4469 4470 void trace_default_header(struct seq_file *m) 4471 { 4472 struct trace_iterator *iter = m->private; 4473 struct trace_array *tr = iter->tr; 4474 unsigned long trace_flags = tr->trace_flags; 4475 4476 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO))) 4477 return; 4478 4479 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 4480 /* print nothing if the buffers are empty */ 4481 if (trace_empty(iter)) 4482 return; 4483 print_trace_header(m, iter); 4484 if (!(trace_flags & TRACE_ITER(VERBOSE))) 4485 print_lat_help_header(m); 4486 } else { 4487 if (!(trace_flags & TRACE_ITER(VERBOSE))) { 4488 if (trace_flags & TRACE_ITER(IRQ_INFO)) 4489 print_func_help_header_irq(iter->array_buffer, 4490 m, trace_flags); 4491 else 4492 print_func_help_header(iter->array_buffer, m, 4493 trace_flags); 4494 } 4495 } 4496 } 4497 4498 static void test_ftrace_alive(struct seq_file *m) 4499 { 4500 if (!ftrace_is_dead()) 4501 return; 4502 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" 4503 "# MAY BE MISSING FUNCTION EVENTS\n"); 4504 } 4505 4506 #ifdef CONFIG_TRACER_MAX_TRACE 4507 static void show_snapshot_main_help(struct seq_file *m) 4508 { 4509 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" 4510 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 4511 "# Takes a snapshot of the main buffer.\n" 4512 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" 4513 "# (Doesn't have to be '2' works with any number that\n" 4514 "# is not a '0' or '1')\n"); 4515 } 4516 4517 static void show_snapshot_percpu_help(struct seq_file *m) 4518 { 4519 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); 4520 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 4521 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 4522 "# Takes a snapshot of the main buffer for this cpu.\n"); 4523 #else 4524 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" 4525 "# Must use main snapshot file to allocate.\n"); 4526 #endif 4527 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" 4528 "# (Doesn't have to be '2' works with any number that\n" 4529 "# is not a '0' or '1')\n"); 4530 } 4531 4532 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) 4533 { 4534 if (iter->tr->allocated_snapshot) 4535 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); 4536 else 4537 seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); 4538 4539 seq_puts(m, "# Snapshot commands:\n"); 4540 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 4541 show_snapshot_main_help(m); 4542 else 4543 show_snapshot_percpu_help(m); 4544 } 4545 #else 4546 /* Should never be called */ 4547 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } 4548 #endif 4549 4550 static int s_show(struct seq_file *m, void *v) 4551 { 4552 struct trace_iterator *iter = v; 4553 int ret; 4554 4555 if (iter->ent == NULL) { 4556 if (iter->tr) { 4557 seq_printf(m, "# tracer: %s\n", iter->trace->name); 4558 seq_puts(m, "#\n"); 4559 test_ftrace_alive(m); 4560 } 4561 if (iter->snapshot && trace_empty(iter)) 4562 print_snapshot_help(m, iter); 4563 else if (iter->trace && iter->trace->print_header) 4564 iter->trace->print_header(m); 4565 else 4566 trace_default_header(m); 4567 4568 } else if (iter->leftover) { 4569 /* 4570 * If we filled the seq_file buffer earlier, we 4571 * want to just show it now. 4572 */ 4573 ret = trace_print_seq(m, &iter->seq); 4574 4575 /* ret should this time be zero, but you never know */ 4576 iter->leftover = ret; 4577 4578 } else { 4579 ret = print_trace_line(iter); 4580 if (ret == TRACE_TYPE_PARTIAL_LINE) { 4581 iter->seq.full = 0; 4582 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 4583 } 4584 ret = trace_print_seq(m, &iter->seq); 4585 /* 4586 * If we overflow the seq_file buffer, then it will 4587 * ask us for this data again at start up. 4588 * Use that instead. 4589 * ret is 0 if seq_file write succeeded. 4590 * -1 otherwise. 4591 */ 4592 iter->leftover = ret; 4593 } 4594 4595 return 0; 4596 } 4597 4598 /* 4599 * Should be used after trace_array_get(), trace_types_lock 4600 * ensures that i_cdev was already initialized. 4601 */ 4602 static inline int tracing_get_cpu(struct inode *inode) 4603 { 4604 if (inode->i_cdev) /* See trace_create_cpu_file() */ 4605 return (long)inode->i_cdev - 1; 4606 return RING_BUFFER_ALL_CPUS; 4607 } 4608 4609 static const struct seq_operations tracer_seq_ops = { 4610 .start = s_start, 4611 .next = s_next, 4612 .stop = s_stop, 4613 .show = s_show, 4614 }; 4615 4616 /* 4617 * Note, as iter itself can be allocated and freed in different 4618 * ways, this function is only used to free its content, and not 4619 * the iterator itself. The only requirement to all the allocations 4620 * is that it must zero all fields (kzalloc), as freeing works with 4621 * ethier allocated content or NULL. 4622 */ 4623 static void free_trace_iter_content(struct trace_iterator *iter) 4624 { 4625 /* The fmt is either NULL, allocated or points to static_fmt_buf */ 4626 if (iter->fmt != static_fmt_buf) 4627 kfree(iter->fmt); 4628 4629 kfree(iter->temp); 4630 kfree(iter->buffer_iter); 4631 mutex_destroy(&iter->mutex); 4632 free_cpumask_var(iter->started); 4633 } 4634 4635 static struct trace_iterator * 4636 __tracing_open(struct inode *inode, struct file *file, bool snapshot) 4637 { 4638 struct trace_array *tr = inode->i_private; 4639 struct trace_iterator *iter; 4640 int cpu; 4641 4642 if (tracing_disabled) 4643 return ERR_PTR(-ENODEV); 4644 4645 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 4646 if (!iter) 4647 return ERR_PTR(-ENOMEM); 4648 4649 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter), 4650 GFP_KERNEL); 4651 if (!iter->buffer_iter) 4652 goto release; 4653 4654 /* 4655 * trace_find_next_entry() may need to save off iter->ent. 4656 * It will place it into the iter->temp buffer. As most 4657 * events are less than 128, allocate a buffer of that size. 4658 * If one is greater, then trace_find_next_entry() will 4659 * allocate a new buffer to adjust for the bigger iter->ent. 4660 * It's not critical if it fails to get allocated here. 4661 */ 4662 iter->temp = kmalloc(128, GFP_KERNEL); 4663 if (iter->temp) 4664 iter->temp_size = 128; 4665 4666 /* 4667 * trace_event_printf() may need to modify given format 4668 * string to replace %p with %px so that it shows real address 4669 * instead of hash value. However, that is only for the event 4670 * tracing, other tracer may not need. Defer the allocation 4671 * until it is needed. 4672 */ 4673 iter->fmt = NULL; 4674 iter->fmt_size = 0; 4675 4676 mutex_lock(&trace_types_lock); 4677 iter->trace = tr->current_trace; 4678 4679 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 4680 goto fail; 4681 4682 iter->tr = tr; 4683 4684 #ifdef CONFIG_TRACER_MAX_TRACE 4685 /* Currently only the top directory has a snapshot */ 4686 if (tr->current_trace->print_max || snapshot) 4687 iter->array_buffer = &tr->max_buffer; 4688 else 4689 #endif 4690 iter->array_buffer = &tr->array_buffer; 4691 iter->snapshot = snapshot; 4692 iter->pos = -1; 4693 iter->cpu_file = tracing_get_cpu(inode); 4694 mutex_init(&iter->mutex); 4695 4696 /* Notify the tracer early; before we stop tracing. */ 4697 if (iter->trace->open) 4698 iter->trace->open(iter); 4699 4700 /* Annotate start of buffers if we had overruns */ 4701 if (ring_buffer_overruns(iter->array_buffer->buffer)) 4702 iter->iter_flags |= TRACE_FILE_ANNOTATE; 4703 4704 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 4705 if (trace_clocks[tr->clock_id].in_ns) 4706 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 4707 4708 /* 4709 * If pause-on-trace is enabled, then stop the trace while 4710 * dumping, unless this is the "snapshot" file 4711 */ 4712 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) 4713 tracing_stop_tr(tr); 4714 4715 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 4716 for_each_tracing_cpu(cpu) { 4717 iter->buffer_iter[cpu] = 4718 ring_buffer_read_start(iter->array_buffer->buffer, 4719 cpu, GFP_KERNEL); 4720 tracing_iter_reset(iter, cpu); 4721 } 4722 } else { 4723 cpu = iter->cpu_file; 4724 iter->buffer_iter[cpu] = 4725 ring_buffer_read_start(iter->array_buffer->buffer, 4726 cpu, GFP_KERNEL); 4727 tracing_iter_reset(iter, cpu); 4728 } 4729 4730 mutex_unlock(&trace_types_lock); 4731 4732 return iter; 4733 4734 fail: 4735 mutex_unlock(&trace_types_lock); 4736 free_trace_iter_content(iter); 4737 release: 4738 seq_release_private(inode, file); 4739 return ERR_PTR(-ENOMEM); 4740 } 4741 4742 int tracing_open_generic(struct inode *inode, struct file *filp) 4743 { 4744 int ret; 4745 4746 ret = tracing_check_open_get_tr(NULL); 4747 if (ret) 4748 return ret; 4749 4750 filp->private_data = inode->i_private; 4751 return 0; 4752 } 4753 4754 bool tracing_is_disabled(void) 4755 { 4756 return (tracing_disabled) ? true: false; 4757 } 4758 4759 /* 4760 * Open and update trace_array ref count. 4761 * Must have the current trace_array passed to it. 4762 */ 4763 int tracing_open_generic_tr(struct inode *inode, struct file *filp) 4764 { 4765 struct trace_array *tr = inode->i_private; 4766 int ret; 4767 4768 ret = tracing_check_open_get_tr(tr); 4769 if (ret) 4770 return ret; 4771 4772 filp->private_data = inode->i_private; 4773 4774 return 0; 4775 } 4776 4777 /* 4778 * The private pointer of the inode is the trace_event_file. 4779 * Update the tr ref count associated to it. 4780 */ 4781 int tracing_open_file_tr(struct inode *inode, struct file *filp) 4782 { 4783 struct trace_event_file *file = inode->i_private; 4784 int ret; 4785 4786 ret = tracing_check_open_get_tr(file->tr); 4787 if (ret) 4788 return ret; 4789 4790 guard(mutex)(&event_mutex); 4791 4792 /* Fail if the file is marked for removal */ 4793 if (file->flags & EVENT_FILE_FL_FREED) { 4794 trace_array_put(file->tr); 4795 return -ENODEV; 4796 } else { 4797 event_file_get(file); 4798 } 4799 4800 filp->private_data = inode->i_private; 4801 4802 return 0; 4803 } 4804 4805 int tracing_release_file_tr(struct inode *inode, struct file *filp) 4806 { 4807 struct trace_event_file *file = inode->i_private; 4808 4809 trace_array_put(file->tr); 4810 event_file_put(file); 4811 4812 return 0; 4813 } 4814 4815 int tracing_single_release_file_tr(struct inode *inode, struct file *filp) 4816 { 4817 tracing_release_file_tr(inode, filp); 4818 return single_release(inode, filp); 4819 } 4820 4821 static int tracing_release(struct inode *inode, struct file *file) 4822 { 4823 struct trace_array *tr = inode->i_private; 4824 struct seq_file *m = file->private_data; 4825 struct trace_iterator *iter; 4826 int cpu; 4827 4828 if (!(file->f_mode & FMODE_READ)) { 4829 trace_array_put(tr); 4830 return 0; 4831 } 4832 4833 /* Writes do not use seq_file */ 4834 iter = m->private; 4835 mutex_lock(&trace_types_lock); 4836 4837 for_each_tracing_cpu(cpu) { 4838 if (iter->buffer_iter[cpu]) 4839 ring_buffer_read_finish(iter->buffer_iter[cpu]); 4840 } 4841 4842 if (iter->trace && iter->trace->close) 4843 iter->trace->close(iter); 4844 4845 if (!iter->snapshot && tr->stop_count) 4846 /* reenable tracing if it was previously enabled */ 4847 tracing_start_tr(tr); 4848 4849 __trace_array_put(tr); 4850 4851 mutex_unlock(&trace_types_lock); 4852 4853 free_trace_iter_content(iter); 4854 seq_release_private(inode, file); 4855 4856 return 0; 4857 } 4858 4859 int tracing_release_generic_tr(struct inode *inode, struct file *file) 4860 { 4861 struct trace_array *tr = inode->i_private; 4862 4863 trace_array_put(tr); 4864 return 0; 4865 } 4866 4867 static int tracing_single_release_tr(struct inode *inode, struct file *file) 4868 { 4869 struct trace_array *tr = inode->i_private; 4870 4871 trace_array_put(tr); 4872 4873 return single_release(inode, file); 4874 } 4875 4876 static int tracing_open(struct inode *inode, struct file *file) 4877 { 4878 struct trace_array *tr = inode->i_private; 4879 struct trace_iterator *iter; 4880 int ret; 4881 4882 ret = tracing_check_open_get_tr(tr); 4883 if (ret) 4884 return ret; 4885 4886 /* If this file was open for write, then erase contents */ 4887 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 4888 int cpu = tracing_get_cpu(inode); 4889 struct array_buffer *trace_buf = &tr->array_buffer; 4890 4891 #ifdef CONFIG_TRACER_MAX_TRACE 4892 if (tr->current_trace->print_max) 4893 trace_buf = &tr->max_buffer; 4894 #endif 4895 4896 if (cpu == RING_BUFFER_ALL_CPUS) 4897 tracing_reset_online_cpus(trace_buf); 4898 else 4899 tracing_reset_cpu(trace_buf, cpu); 4900 } 4901 4902 if (file->f_mode & FMODE_READ) { 4903 iter = __tracing_open(inode, file, false); 4904 if (IS_ERR(iter)) 4905 ret = PTR_ERR(iter); 4906 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 4907 iter->iter_flags |= TRACE_FILE_LAT_FMT; 4908 } 4909 4910 if (ret < 0) 4911 trace_array_put(tr); 4912 4913 return ret; 4914 } 4915 4916 /* 4917 * Some tracers are not suitable for instance buffers. 4918 * A tracer is always available for the global array (toplevel) 4919 * or if it explicitly states that it is. 4920 */ 4921 static bool 4922 trace_ok_for_array(struct tracer *t, struct trace_array *tr) 4923 { 4924 #ifdef CONFIG_TRACER_SNAPSHOT 4925 /* arrays with mapped buffer range do not have snapshots */ 4926 if (tr->range_addr_start && t->use_max_tr) 4927 return false; 4928 #endif 4929 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; 4930 } 4931 4932 /* Find the next tracer that this trace array may use */ 4933 static struct tracer * 4934 get_tracer_for_array(struct trace_array *tr, struct tracer *t) 4935 { 4936 while (t && !trace_ok_for_array(t, tr)) 4937 t = t->next; 4938 4939 return t; 4940 } 4941 4942 static void * 4943 t_next(struct seq_file *m, void *v, loff_t *pos) 4944 { 4945 struct trace_array *tr = m->private; 4946 struct tracer *t = v; 4947 4948 (*pos)++; 4949 4950 if (t) 4951 t = get_tracer_for_array(tr, t->next); 4952 4953 return t; 4954 } 4955 4956 static void *t_start(struct seq_file *m, loff_t *pos) 4957 { 4958 struct trace_array *tr = m->private; 4959 struct tracer *t; 4960 loff_t l = 0; 4961 4962 mutex_lock(&trace_types_lock); 4963 4964 t = get_tracer_for_array(tr, trace_types); 4965 for (; t && l < *pos; t = t_next(m, t, &l)) 4966 ; 4967 4968 return t; 4969 } 4970 4971 static void t_stop(struct seq_file *m, void *p) 4972 { 4973 mutex_unlock(&trace_types_lock); 4974 } 4975 4976 static int t_show(struct seq_file *m, void *v) 4977 { 4978 struct tracer *t = v; 4979 4980 if (!t) 4981 return 0; 4982 4983 seq_puts(m, t->name); 4984 if (t->next) 4985 seq_putc(m, ' '); 4986 else 4987 seq_putc(m, '\n'); 4988 4989 return 0; 4990 } 4991 4992 static const struct seq_operations show_traces_seq_ops = { 4993 .start = t_start, 4994 .next = t_next, 4995 .stop = t_stop, 4996 .show = t_show, 4997 }; 4998 4999 static int show_traces_open(struct inode *inode, struct file *file) 5000 { 5001 struct trace_array *tr = inode->i_private; 5002 struct seq_file *m; 5003 int ret; 5004 5005 ret = tracing_check_open_get_tr(tr); 5006 if (ret) 5007 return ret; 5008 5009 ret = seq_open(file, &show_traces_seq_ops); 5010 if (ret) { 5011 trace_array_put(tr); 5012 return ret; 5013 } 5014 5015 m = file->private_data; 5016 m->private = tr; 5017 5018 return 0; 5019 } 5020 5021 static int tracing_seq_release(struct inode *inode, struct file *file) 5022 { 5023 struct trace_array *tr = inode->i_private; 5024 5025 trace_array_put(tr); 5026 return seq_release(inode, file); 5027 } 5028 5029 static ssize_t 5030 tracing_write_stub(struct file *filp, const char __user *ubuf, 5031 size_t count, loff_t *ppos) 5032 { 5033 return count; 5034 } 5035 5036 loff_t tracing_lseek(struct file *file, loff_t offset, int whence) 5037 { 5038 int ret; 5039 5040 if (file->f_mode & FMODE_READ) 5041 ret = seq_lseek(file, offset, whence); 5042 else 5043 file->f_pos = ret = 0; 5044 5045 return ret; 5046 } 5047 5048 static const struct file_operations tracing_fops = { 5049 .open = tracing_open, 5050 .read = seq_read, 5051 .read_iter = seq_read_iter, 5052 .splice_read = copy_splice_read, 5053 .write = tracing_write_stub, 5054 .llseek = tracing_lseek, 5055 .release = tracing_release, 5056 }; 5057 5058 static const struct file_operations show_traces_fops = { 5059 .open = show_traces_open, 5060 .read = seq_read, 5061 .llseek = seq_lseek, 5062 .release = tracing_seq_release, 5063 }; 5064 5065 static ssize_t 5066 tracing_cpumask_read(struct file *filp, char __user *ubuf, 5067 size_t count, loff_t *ppos) 5068 { 5069 struct trace_array *tr = file_inode(filp)->i_private; 5070 char *mask_str __free(kfree) = NULL; 5071 int len; 5072 5073 len = snprintf(NULL, 0, "%*pb\n", 5074 cpumask_pr_args(tr->tracing_cpumask)) + 1; 5075 mask_str = kmalloc(len, GFP_KERNEL); 5076 if (!mask_str) 5077 return -ENOMEM; 5078 5079 len = snprintf(mask_str, len, "%*pb\n", 5080 cpumask_pr_args(tr->tracing_cpumask)); 5081 if (len >= count) 5082 return -EINVAL; 5083 5084 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 5085 } 5086 5087 int tracing_set_cpumask(struct trace_array *tr, 5088 cpumask_var_t tracing_cpumask_new) 5089 { 5090 int cpu; 5091 5092 if (!tr) 5093 return -EINVAL; 5094 5095 local_irq_disable(); 5096 arch_spin_lock(&tr->max_lock); 5097 for_each_tracing_cpu(cpu) { 5098 /* 5099 * Increase/decrease the disabled counter if we are 5100 * about to flip a bit in the cpumask: 5101 */ 5102 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && 5103 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 5104 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); 5105 #ifdef CONFIG_TRACER_MAX_TRACE 5106 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); 5107 #endif 5108 } 5109 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && 5110 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 5111 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); 5112 #ifdef CONFIG_TRACER_MAX_TRACE 5113 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); 5114 #endif 5115 } 5116 } 5117 arch_spin_unlock(&tr->max_lock); 5118 local_irq_enable(); 5119 5120 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 5121 5122 return 0; 5123 } 5124 5125 static ssize_t 5126 tracing_cpumask_write(struct file *filp, const char __user *ubuf, 5127 size_t count, loff_t *ppos) 5128 { 5129 struct trace_array *tr = file_inode(filp)->i_private; 5130 cpumask_var_t tracing_cpumask_new; 5131 int err; 5132 5133 if (count == 0 || count > KMALLOC_MAX_SIZE) 5134 return -EINVAL; 5135 5136 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 5137 return -ENOMEM; 5138 5139 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 5140 if (err) 5141 goto err_free; 5142 5143 err = tracing_set_cpumask(tr, tracing_cpumask_new); 5144 if (err) 5145 goto err_free; 5146 5147 free_cpumask_var(tracing_cpumask_new); 5148 5149 return count; 5150 5151 err_free: 5152 free_cpumask_var(tracing_cpumask_new); 5153 5154 return err; 5155 } 5156 5157 static const struct file_operations tracing_cpumask_fops = { 5158 .open = tracing_open_generic_tr, 5159 .read = tracing_cpumask_read, 5160 .write = tracing_cpumask_write, 5161 .release = tracing_release_generic_tr, 5162 .llseek = generic_file_llseek, 5163 }; 5164 5165 static int tracing_trace_options_show(struct seq_file *m, void *v) 5166 { 5167 struct tracer_opt *trace_opts; 5168 struct trace_array *tr = m->private; 5169 struct tracer_flags *flags; 5170 u32 tracer_flags; 5171 int i; 5172 5173 guard(mutex)(&trace_types_lock); 5174 5175 for (i = 0; trace_options[i]; i++) { 5176 if (tr->trace_flags & (1ULL << i)) 5177 seq_printf(m, "%s\n", trace_options[i]); 5178 else 5179 seq_printf(m, "no%s\n", trace_options[i]); 5180 } 5181 5182 flags = tr->current_trace_flags; 5183 if (!flags || !flags->opts) 5184 return 0; 5185 5186 tracer_flags = flags->val; 5187 trace_opts = flags->opts; 5188 5189 for (i = 0; trace_opts[i].name; i++) { 5190 if (tracer_flags & trace_opts[i].bit) 5191 seq_printf(m, "%s\n", trace_opts[i].name); 5192 else 5193 seq_printf(m, "no%s\n", trace_opts[i].name); 5194 } 5195 5196 return 0; 5197 } 5198 5199 static int __set_tracer_option(struct trace_array *tr, 5200 struct tracer_flags *tracer_flags, 5201 struct tracer_opt *opts, int neg) 5202 { 5203 struct tracer *trace = tracer_flags->trace; 5204 int ret = 0; 5205 5206 if (trace->set_flag) 5207 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); 5208 if (ret) 5209 return ret; 5210 5211 if (neg) 5212 tracer_flags->val &= ~opts->bit; 5213 else 5214 tracer_flags->val |= opts->bit; 5215 return 0; 5216 } 5217 5218 /* Try to assign a tracer specific option */ 5219 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) 5220 { 5221 struct tracer_flags *tracer_flags = tr->current_trace_flags; 5222 struct tracer_opt *opts = NULL; 5223 int i; 5224 5225 if (!tracer_flags || !tracer_flags->opts) 5226 return 0; 5227 5228 for (i = 0; tracer_flags->opts[i].name; i++) { 5229 opts = &tracer_flags->opts[i]; 5230 5231 if (strcmp(cmp, opts->name) == 0) 5232 return __set_tracer_option(tr, tracer_flags, opts, neg); 5233 } 5234 5235 return -EINVAL; 5236 } 5237 5238 /* Some tracers require overwrite to stay enabled */ 5239 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set) 5240 { 5241 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set) 5242 return -1; 5243 5244 return 0; 5245 } 5246 5247 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) 5248 { 5249 switch (mask) { 5250 case TRACE_ITER(RECORD_TGID): 5251 case TRACE_ITER(RECORD_CMD): 5252 case TRACE_ITER(TRACE_PRINTK): 5253 case TRACE_ITER(COPY_MARKER): 5254 lockdep_assert_held(&event_mutex); 5255 } 5256 5257 /* do nothing if flag is already set */ 5258 if (!!(tr->trace_flags & mask) == !!enabled) 5259 return 0; 5260 5261 /* Give the tracer a chance to approve the change */ 5262 if (tr->current_trace->flag_changed) 5263 if (tr->current_trace->flag_changed(tr, mask, !!enabled)) 5264 return -EINVAL; 5265 5266 switch (mask) { 5267 case TRACE_ITER(TRACE_PRINTK): 5268 if (enabled) { 5269 update_printk_trace(tr); 5270 } else { 5271 /* 5272 * The global_trace cannot clear this. 5273 * It's flag only gets cleared if another instance sets it. 5274 */ 5275 if (printk_trace == &global_trace) 5276 return -EINVAL; 5277 /* 5278 * An instance must always have it set. 5279 * by default, that's the global_trace instane. 5280 */ 5281 if (printk_trace == tr) 5282 update_printk_trace(&global_trace); 5283 } 5284 break; 5285 5286 case TRACE_ITER(COPY_MARKER): 5287 update_marker_trace(tr, enabled); 5288 /* update_marker_trace updates the tr->trace_flags */ 5289 return 0; 5290 } 5291 5292 if (enabled) 5293 tr->trace_flags |= mask; 5294 else 5295 tr->trace_flags &= ~mask; 5296 5297 switch (mask) { 5298 case TRACE_ITER(RECORD_CMD): 5299 trace_event_enable_cmd_record(enabled); 5300 break; 5301 5302 case TRACE_ITER(RECORD_TGID): 5303 5304 if (trace_alloc_tgid_map() < 0) { 5305 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID); 5306 return -ENOMEM; 5307 } 5308 5309 trace_event_enable_tgid_record(enabled); 5310 break; 5311 5312 case TRACE_ITER(EVENT_FORK): 5313 trace_event_follow_fork(tr, enabled); 5314 break; 5315 5316 case TRACE_ITER(FUNC_FORK): 5317 ftrace_pid_follow_fork(tr, enabled); 5318 break; 5319 5320 case TRACE_ITER(OVERWRITE): 5321 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); 5322 #ifdef CONFIG_TRACER_MAX_TRACE 5323 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); 5324 #endif 5325 break; 5326 5327 case TRACE_ITER(PRINTK): 5328 trace_printk_start_stop_comm(enabled); 5329 trace_printk_control(enabled); 5330 break; 5331 5332 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER) 5333 case TRACE_GRAPH_GRAPH_TIME: 5334 ftrace_graph_graph_time_control(enabled); 5335 break; 5336 #endif 5337 } 5338 5339 return 0; 5340 } 5341 5342 int trace_set_options(struct trace_array *tr, char *option) 5343 { 5344 char *cmp; 5345 int neg = 0; 5346 int ret; 5347 size_t orig_len = strlen(option); 5348 int len; 5349 5350 cmp = strstrip(option); 5351 5352 len = str_has_prefix(cmp, "no"); 5353 if (len) 5354 neg = 1; 5355 5356 cmp += len; 5357 5358 mutex_lock(&event_mutex); 5359 mutex_lock(&trace_types_lock); 5360 5361 ret = match_string(trace_options, -1, cmp); 5362 /* If no option could be set, test the specific tracer options */ 5363 if (ret < 0) 5364 ret = set_tracer_option(tr, cmp, neg); 5365 else 5366 ret = set_tracer_flag(tr, 1ULL << ret, !neg); 5367 5368 mutex_unlock(&trace_types_lock); 5369 mutex_unlock(&event_mutex); 5370 5371 /* 5372 * If the first trailing whitespace is replaced with '\0' by strstrip, 5373 * turn it back into a space. 5374 */ 5375 if (orig_len > strlen(option)) 5376 option[strlen(option)] = ' '; 5377 5378 return ret; 5379 } 5380 5381 static void __init apply_trace_boot_options(void) 5382 { 5383 char *buf = trace_boot_options_buf; 5384 char *option; 5385 5386 while (true) { 5387 option = strsep(&buf, ","); 5388 5389 if (!option) 5390 break; 5391 5392 if (*option) 5393 trace_set_options(&global_trace, option); 5394 5395 /* Put back the comma to allow this to be called again */ 5396 if (buf) 5397 *(buf - 1) = ','; 5398 } 5399 } 5400 5401 static ssize_t 5402 tracing_trace_options_write(struct file *filp, const char __user *ubuf, 5403 size_t cnt, loff_t *ppos) 5404 { 5405 struct seq_file *m = filp->private_data; 5406 struct trace_array *tr = m->private; 5407 char buf[64]; 5408 int ret; 5409 5410 if (cnt >= sizeof(buf)) 5411 return -EINVAL; 5412 5413 if (copy_from_user(buf, ubuf, cnt)) 5414 return -EFAULT; 5415 5416 buf[cnt] = 0; 5417 5418 ret = trace_set_options(tr, buf); 5419 if (ret < 0) 5420 return ret; 5421 5422 *ppos += cnt; 5423 5424 return cnt; 5425 } 5426 5427 static int tracing_trace_options_open(struct inode *inode, struct file *file) 5428 { 5429 struct trace_array *tr = inode->i_private; 5430 int ret; 5431 5432 ret = tracing_check_open_get_tr(tr); 5433 if (ret) 5434 return ret; 5435 5436 ret = single_open(file, tracing_trace_options_show, inode->i_private); 5437 if (ret < 0) 5438 trace_array_put(tr); 5439 5440 return ret; 5441 } 5442 5443 static const struct file_operations tracing_iter_fops = { 5444 .open = tracing_trace_options_open, 5445 .read = seq_read, 5446 .llseek = seq_lseek, 5447 .release = tracing_single_release_tr, 5448 .write = tracing_trace_options_write, 5449 }; 5450 5451 static const char readme_msg[] = 5452 "tracing mini-HOWTO:\n\n" 5453 "By default tracefs removes all OTH file permission bits.\n" 5454 "When mounting tracefs an optional group id can be specified\n" 5455 "which adds the group to every directory and file in tracefs:\n\n" 5456 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" 5457 "# echo 0 > tracing_on : quick way to disable tracing\n" 5458 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" 5459 " Important files:\n" 5460 " trace\t\t\t- The static contents of the buffer\n" 5461 "\t\t\t To clear the buffer write into this file: echo > trace\n" 5462 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" 5463 " current_tracer\t- function and latency tracers\n" 5464 " available_tracers\t- list of configured tracers for current_tracer\n" 5465 " error_log\t- error log for failed commands (that support it)\n" 5466 " buffer_size_kb\t- view and modify size of per cpu buffer\n" 5467 " buffer_total_size_kb - view total size of all cpu buffers\n\n" 5468 " trace_clock\t\t- change the clock used to order events\n" 5469 " local: Per cpu clock but may not be synced across CPUs\n" 5470 " global: Synced across CPUs but slows tracing down.\n" 5471 " counter: Not a clock, but just an increment\n" 5472 " uptime: Jiffy counter from time of boot\n" 5473 " perf: Same clock that perf events use\n" 5474 #ifdef CONFIG_X86_64 5475 " x86-tsc: TSC cycle counter\n" 5476 #endif 5477 "\n timestamp_mode\t- view the mode used to timestamp events\n" 5478 " delta: Delta difference against a buffer-wide timestamp\n" 5479 " absolute: Absolute (standalone) timestamp\n" 5480 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" 5481 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" 5482 " tracing_cpumask\t- Limit which CPUs to trace\n" 5483 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" 5484 "\t\t\t Remove sub-buffer with rmdir\n" 5485 " trace_options\t\t- Set format or modify how tracing happens\n" 5486 "\t\t\t Disable an option by prefixing 'no' to the\n" 5487 "\t\t\t option name\n" 5488 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" 5489 #ifdef CONFIG_DYNAMIC_FTRACE 5490 "\n available_filter_functions - list of functions that can be filtered on\n" 5491 " set_ftrace_filter\t- echo function name in here to only trace these\n" 5492 "\t\t\t functions\n" 5493 "\t accepts: func_full_name or glob-matching-pattern\n" 5494 "\t modules: Can select a group via module\n" 5495 "\t Format: :mod:<module-name>\n" 5496 "\t example: echo :mod:ext3 > set_ftrace_filter\n" 5497 "\t triggers: a command to perform when function is hit\n" 5498 "\t Format: <function>:<trigger>[:count]\n" 5499 "\t trigger: traceon, traceoff\n" 5500 "\t\t enable_event:<system>:<event>\n" 5501 "\t\t disable_event:<system>:<event>\n" 5502 #ifdef CONFIG_STACKTRACE 5503 "\t\t stacktrace\n" 5504 #endif 5505 #ifdef CONFIG_TRACER_SNAPSHOT 5506 "\t\t snapshot\n" 5507 #endif 5508 "\t\t dump\n" 5509 "\t\t cpudump\n" 5510 "\t example: echo do_fault:traceoff > set_ftrace_filter\n" 5511 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" 5512 "\t The first one will disable tracing every time do_fault is hit\n" 5513 "\t The second will disable tracing at most 3 times when do_trap is hit\n" 5514 "\t The first time do trap is hit and it disables tracing, the\n" 5515 "\t counter will decrement to 2. If tracing is already disabled,\n" 5516 "\t the counter will not decrement. It only decrements when the\n" 5517 "\t trigger did work\n" 5518 "\t To remove trigger without count:\n" 5519 "\t echo '!<function>:<trigger> > set_ftrace_filter\n" 5520 "\t To remove trigger with a count:\n" 5521 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" 5522 " set_ftrace_notrace\t- echo function name in here to never trace.\n" 5523 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" 5524 "\t modules: Can select a group via module command :mod:\n" 5525 "\t Does not accept triggers\n" 5526 #endif /* CONFIG_DYNAMIC_FTRACE */ 5527 #ifdef CONFIG_FUNCTION_TRACER 5528 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" 5529 "\t\t (function)\n" 5530 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" 5531 "\t\t (function)\n" 5532 #endif 5533 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 5534 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" 5535 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" 5536 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" 5537 #endif 5538 #ifdef CONFIG_TRACER_SNAPSHOT 5539 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" 5540 "\t\t\t snapshot buffer. Read the contents for more\n" 5541 "\t\t\t information\n" 5542 #endif 5543 #ifdef CONFIG_STACK_TRACER 5544 " stack_trace\t\t- Shows the max stack trace when active\n" 5545 " stack_max_size\t- Shows current max stack size that was traced\n" 5546 "\t\t\t Write into this file to reset the max size (trigger a\n" 5547 "\t\t\t new trace)\n" 5548 #ifdef CONFIG_DYNAMIC_FTRACE 5549 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" 5550 "\t\t\t traces\n" 5551 #endif 5552 #endif /* CONFIG_STACK_TRACER */ 5553 #ifdef CONFIG_DYNAMIC_EVENTS 5554 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" 5555 "\t\t\t Write into this file to define/undefine new trace events.\n" 5556 #endif 5557 #ifdef CONFIG_KPROBE_EVENTS 5558 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" 5559 "\t\t\t Write into this file to define/undefine new trace events.\n" 5560 #endif 5561 #ifdef CONFIG_UPROBE_EVENTS 5562 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" 5563 "\t\t\t Write into this file to define/undefine new trace events.\n" 5564 #endif 5565 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ 5566 defined(CONFIG_FPROBE_EVENTS) 5567 "\t accepts: event-definitions (one definition per line)\n" 5568 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) 5569 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" 5570 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" 5571 #endif 5572 #ifdef CONFIG_FPROBE_EVENTS 5573 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" 5574 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" 5575 #endif 5576 #ifdef CONFIG_HIST_TRIGGERS 5577 "\t s:[synthetic/]<event> <field> [<field>]\n" 5578 #endif 5579 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" 5580 "\t -:[<group>/][<event>]\n" 5581 #ifdef CONFIG_KPROBE_EVENTS 5582 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" 5583 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" 5584 #endif 5585 #ifdef CONFIG_UPROBE_EVENTS 5586 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" 5587 #endif 5588 "\t args: <name>=fetcharg[:type]\n" 5589 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" 5590 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 5591 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" 5592 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS 5593 "\t <argname>[->field[->field|.field...]],\n" 5594 #endif 5595 #else 5596 "\t $stack<index>, $stack, $retval, $comm,\n" 5597 #endif 5598 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" 5599 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 5600 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 5601 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 5602 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" 5603 #ifdef CONFIG_HIST_TRIGGERS 5604 "\t field: <stype> <name>;\n" 5605 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" 5606 "\t [unsigned] char/int/long\n" 5607 #endif 5608 "\t efield: For event probes ('e' types), the field is on of the fields\n" 5609 "\t of the <attached-group>/<attached-event>.\n" 5610 #endif 5611 " set_event\t\t- Enables events by name written into it\n" 5612 "\t\t\t Can enable module events via: :mod:<module>\n" 5613 " events/\t\t- Directory containing all trace event subsystems:\n" 5614 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" 5615 " events/<system>/\t- Directory containing all trace events for <system>:\n" 5616 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" 5617 "\t\t\t events\n" 5618 " filter\t\t- If set, only events passing filter are traced\n" 5619 " events/<system>/<event>/\t- Directory containing control files for\n" 5620 "\t\t\t <event>:\n" 5621 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" 5622 " filter\t\t- If set, only events passing filter are traced\n" 5623 " trigger\t\t- If set, a command to perform when event is hit\n" 5624 "\t Format: <trigger>[:count][if <filter>]\n" 5625 "\t trigger: traceon, traceoff\n" 5626 "\t enable_event:<system>:<event>\n" 5627 "\t disable_event:<system>:<event>\n" 5628 #ifdef CONFIG_HIST_TRIGGERS 5629 "\t enable_hist:<system>:<event>\n" 5630 "\t disable_hist:<system>:<event>\n" 5631 #endif 5632 #ifdef CONFIG_STACKTRACE 5633 "\t\t stacktrace\n" 5634 #endif 5635 #ifdef CONFIG_TRACER_SNAPSHOT 5636 "\t\t snapshot\n" 5637 #endif 5638 #ifdef CONFIG_HIST_TRIGGERS 5639 "\t\t hist (see below)\n" 5640 #endif 5641 "\t example: echo traceoff > events/block/block_unplug/trigger\n" 5642 "\t echo traceoff:3 > events/block/block_unplug/trigger\n" 5643 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" 5644 "\t events/block/block_unplug/trigger\n" 5645 "\t The first disables tracing every time block_unplug is hit.\n" 5646 "\t The second disables tracing the first 3 times block_unplug is hit.\n" 5647 "\t The third enables the kmalloc event the first 3 times block_unplug\n" 5648 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" 5649 "\t Like function triggers, the counter is only decremented if it\n" 5650 "\t enabled or disabled tracing.\n" 5651 "\t To remove a trigger without a count:\n" 5652 "\t echo '!<trigger> > <system>/<event>/trigger\n" 5653 "\t To remove a trigger with a count:\n" 5654 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" 5655 "\t Filters can be ignored when removing a trigger.\n" 5656 #ifdef CONFIG_HIST_TRIGGERS 5657 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 5658 "\t Format: hist:keys=<field1[,field2,...]>\n" 5659 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 5660 "\t [:values=<field1[,field2,...]>]\n" 5661 "\t [:sort=<field1[,field2,...]>]\n" 5662 "\t [:size=#entries]\n" 5663 "\t [:pause][:continue][:clear]\n" 5664 "\t [:name=histname1]\n" 5665 "\t [:nohitcount]\n" 5666 "\t [:<handler>.<action>]\n" 5667 "\t [if <filter>]\n\n" 5668 "\t Note, special fields can be used as well:\n" 5669 "\t common_timestamp - to record current timestamp\n" 5670 "\t common_cpu - to record the CPU the event happened on\n" 5671 "\n" 5672 "\t A hist trigger variable can be:\n" 5673 "\t - a reference to a field e.g. x=current_timestamp,\n" 5674 "\t - a reference to another variable e.g. y=$x,\n" 5675 "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 5676 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 5677 "\n" 5678 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" 5679 "\t multiplication(*) and division(/) operators. An operand can be either a\n" 5680 "\t variable reference, field or numeric literal.\n" 5681 "\n" 5682 "\t When a matching event is hit, an entry is added to a hash\n" 5683 "\t table using the key(s) and value(s) named, and the value of a\n" 5684 "\t sum called 'hitcount' is incremented. Keys and values\n" 5685 "\t correspond to fields in the event's format description. Keys\n" 5686 "\t can be any field, or the special string 'common_stacktrace'.\n" 5687 "\t Compound keys consisting of up to two fields can be specified\n" 5688 "\t by the 'keys' keyword. Values must correspond to numeric\n" 5689 "\t fields. Sort keys consisting of up to two fields can be\n" 5690 "\t specified using the 'sort' keyword. The sort direction can\n" 5691 "\t be modified by appending '.descending' or '.ascending' to a\n" 5692 "\t sort field. The 'size' parameter can be used to specify more\n" 5693 "\t or fewer than the default 2048 entries for the hashtable size.\n" 5694 "\t If a hist trigger is given a name using the 'name' parameter,\n" 5695 "\t its histogram data will be shared with other triggers of the\n" 5696 "\t same name, and trigger hits will update this common data.\n\n" 5697 "\t Reading the 'hist' file for the event will dump the hash\n" 5698 "\t table in its entirety to stdout. If there are multiple hist\n" 5699 "\t triggers attached to an event, there will be a table for each\n" 5700 "\t trigger in the output. The table displayed for a named\n" 5701 "\t trigger will be the same as any other instance having the\n" 5702 "\t same name. The default format used to display a given field\n" 5703 "\t can be modified by appending any of the following modifiers\n" 5704 "\t to the field name, as applicable:\n\n" 5705 "\t .hex display a number as a hex value\n" 5706 "\t .sym display an address as a symbol\n" 5707 "\t .sym-offset display an address as a symbol and offset\n" 5708 "\t .execname display a common_pid as a program name\n" 5709 "\t .syscall display a syscall id as a syscall name\n" 5710 "\t .log2 display log2 value rather than raw number\n" 5711 "\t .buckets=size display values in groups of size rather than raw number\n" 5712 "\t .usecs display a common_timestamp in microseconds\n" 5713 "\t .percent display a number of percentage value\n" 5714 "\t .graph display a bar-graph of a value\n\n" 5715 "\t The 'pause' parameter can be used to pause an existing hist\n" 5716 "\t trigger or to start a hist trigger but not log any events\n" 5717 "\t until told to do so. 'continue' can be used to start or\n" 5718 "\t restart a paused hist trigger.\n\n" 5719 "\t The 'clear' parameter will clear the contents of a running\n" 5720 "\t hist trigger and leave its current paused/active state\n" 5721 "\t unchanged.\n\n" 5722 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" 5723 "\t raw hitcount in the histogram.\n\n" 5724 "\t The enable_hist and disable_hist triggers can be used to\n" 5725 "\t have one event conditionally start and stop another event's\n" 5726 "\t already-attached hist trigger. The syntax is analogous to\n" 5727 "\t the enable_event and disable_event triggers.\n\n" 5728 "\t Hist trigger handlers and actions are executed whenever a\n" 5729 "\t a histogram entry is added or updated. They take the form:\n\n" 5730 "\t <handler>.<action>\n\n" 5731 "\t The available handlers are:\n\n" 5732 "\t onmatch(matching.event) - invoke on addition or update\n" 5733 "\t onmax(var) - invoke if var exceeds current max\n" 5734 "\t onchange(var) - invoke action if var changes\n\n" 5735 "\t The available actions are:\n\n" 5736 "\t trace(<synthetic_event>,param list) - generate synthetic event\n" 5737 "\t save(field,...) - save current event fields\n" 5738 #ifdef CONFIG_TRACER_SNAPSHOT 5739 "\t snapshot() - snapshot the trace buffer\n\n" 5740 #endif 5741 #ifdef CONFIG_SYNTH_EVENTS 5742 " events/synthetic_events\t- Create/append/remove/show synthetic events\n" 5743 "\t Write into this file to define/undefine new synthetic events.\n" 5744 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" 5745 #endif 5746 #endif 5747 ; 5748 5749 static ssize_t 5750 tracing_readme_read(struct file *filp, char __user *ubuf, 5751 size_t cnt, loff_t *ppos) 5752 { 5753 return simple_read_from_buffer(ubuf, cnt, ppos, 5754 readme_msg, strlen(readme_msg)); 5755 } 5756 5757 static const struct file_operations tracing_readme_fops = { 5758 .open = tracing_open_generic, 5759 .read = tracing_readme_read, 5760 .llseek = generic_file_llseek, 5761 }; 5762 5763 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 5764 static union trace_eval_map_item * 5765 update_eval_map(union trace_eval_map_item *ptr) 5766 { 5767 if (!ptr->map.eval_string) { 5768 if (ptr->tail.next) { 5769 ptr = ptr->tail.next; 5770 /* Set ptr to the next real item (skip head) */ 5771 ptr++; 5772 } else 5773 return NULL; 5774 } 5775 return ptr; 5776 } 5777 5778 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) 5779 { 5780 union trace_eval_map_item *ptr = v; 5781 5782 /* 5783 * Paranoid! If ptr points to end, we don't want to increment past it. 5784 * This really should never happen. 5785 */ 5786 (*pos)++; 5787 ptr = update_eval_map(ptr); 5788 if (WARN_ON_ONCE(!ptr)) 5789 return NULL; 5790 5791 ptr++; 5792 ptr = update_eval_map(ptr); 5793 5794 return ptr; 5795 } 5796 5797 static void *eval_map_start(struct seq_file *m, loff_t *pos) 5798 { 5799 union trace_eval_map_item *v; 5800 loff_t l = 0; 5801 5802 mutex_lock(&trace_eval_mutex); 5803 5804 v = trace_eval_maps; 5805 if (v) 5806 v++; 5807 5808 while (v && l < *pos) { 5809 v = eval_map_next(m, v, &l); 5810 } 5811 5812 return v; 5813 } 5814 5815 static void eval_map_stop(struct seq_file *m, void *v) 5816 { 5817 mutex_unlock(&trace_eval_mutex); 5818 } 5819 5820 static int eval_map_show(struct seq_file *m, void *v) 5821 { 5822 union trace_eval_map_item *ptr = v; 5823 5824 seq_printf(m, "%s %ld (%s)\n", 5825 ptr->map.eval_string, ptr->map.eval_value, 5826 ptr->map.system); 5827 5828 return 0; 5829 } 5830 5831 static const struct seq_operations tracing_eval_map_seq_ops = { 5832 .start = eval_map_start, 5833 .next = eval_map_next, 5834 .stop = eval_map_stop, 5835 .show = eval_map_show, 5836 }; 5837 5838 static int tracing_eval_map_open(struct inode *inode, struct file *filp) 5839 { 5840 int ret; 5841 5842 ret = tracing_check_open_get_tr(NULL); 5843 if (ret) 5844 return ret; 5845 5846 return seq_open(filp, &tracing_eval_map_seq_ops); 5847 } 5848 5849 static const struct file_operations tracing_eval_map_fops = { 5850 .open = tracing_eval_map_open, 5851 .read = seq_read, 5852 .llseek = seq_lseek, 5853 .release = seq_release, 5854 }; 5855 5856 static inline union trace_eval_map_item * 5857 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) 5858 { 5859 /* Return tail of array given the head */ 5860 return ptr + ptr->head.length + 1; 5861 } 5862 5863 static void 5864 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, 5865 int len) 5866 { 5867 struct trace_eval_map **stop; 5868 struct trace_eval_map **map; 5869 union trace_eval_map_item *map_array; 5870 union trace_eval_map_item *ptr; 5871 5872 stop = start + len; 5873 5874 /* 5875 * The trace_eval_maps contains the map plus a head and tail item, 5876 * where the head holds the module and length of array, and the 5877 * tail holds a pointer to the next list. 5878 */ 5879 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL); 5880 if (!map_array) { 5881 pr_warn("Unable to allocate trace eval mapping\n"); 5882 return; 5883 } 5884 5885 guard(mutex)(&trace_eval_mutex); 5886 5887 if (!trace_eval_maps) 5888 trace_eval_maps = map_array; 5889 else { 5890 ptr = trace_eval_maps; 5891 for (;;) { 5892 ptr = trace_eval_jmp_to_tail(ptr); 5893 if (!ptr->tail.next) 5894 break; 5895 ptr = ptr->tail.next; 5896 5897 } 5898 ptr->tail.next = map_array; 5899 } 5900 map_array->head.mod = mod; 5901 map_array->head.length = len; 5902 map_array++; 5903 5904 for (map = start; (unsigned long)map < (unsigned long)stop; map++) { 5905 map_array->map = **map; 5906 map_array++; 5907 } 5908 memset(map_array, 0, sizeof(*map_array)); 5909 } 5910 5911 static void trace_create_eval_file(struct dentry *d_tracer) 5912 { 5913 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, 5914 NULL, &tracing_eval_map_fops); 5915 } 5916 5917 #else /* CONFIG_TRACE_EVAL_MAP_FILE */ 5918 static inline void trace_create_eval_file(struct dentry *d_tracer) { } 5919 static inline void trace_insert_eval_map_file(struct module *mod, 5920 struct trace_eval_map **start, int len) { } 5921 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ 5922 5923 static void 5924 trace_event_update_with_eval_map(struct module *mod, 5925 struct trace_eval_map **start, 5926 int len) 5927 { 5928 struct trace_eval_map **map; 5929 5930 /* Always run sanitizer only if btf_type_tag attr exists. */ 5931 if (len <= 0) { 5932 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && 5933 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && 5934 __has_attribute(btf_type_tag))) 5935 return; 5936 } 5937 5938 map = start; 5939 5940 trace_event_update_all(map, len); 5941 5942 if (len <= 0) 5943 return; 5944 5945 trace_insert_eval_map_file(mod, start, len); 5946 } 5947 5948 static ssize_t 5949 tracing_set_trace_read(struct file *filp, char __user *ubuf, 5950 size_t cnt, loff_t *ppos) 5951 { 5952 struct trace_array *tr = filp->private_data; 5953 char buf[MAX_TRACER_SIZE+2]; 5954 int r; 5955 5956 scoped_guard(mutex, &trace_types_lock) { 5957 r = sprintf(buf, "%s\n", tr->current_trace->name); 5958 } 5959 5960 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5961 } 5962 5963 int tracer_init(struct tracer *t, struct trace_array *tr) 5964 { 5965 tracing_reset_online_cpus(&tr->array_buffer); 5966 return t->init(tr); 5967 } 5968 5969 static void set_buffer_entries(struct array_buffer *buf, unsigned long val) 5970 { 5971 int cpu; 5972 5973 for_each_tracing_cpu(cpu) 5974 per_cpu_ptr(buf->data, cpu)->entries = val; 5975 } 5976 5977 static void update_buffer_entries(struct array_buffer *buf, int cpu) 5978 { 5979 if (cpu == RING_BUFFER_ALL_CPUS) { 5980 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); 5981 } else { 5982 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); 5983 } 5984 } 5985 5986 #ifdef CONFIG_TRACER_MAX_TRACE 5987 /* resize @tr's buffer to the size of @size_tr's entries */ 5988 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 5989 struct array_buffer *size_buf, int cpu_id) 5990 { 5991 int cpu, ret = 0; 5992 5993 if (cpu_id == RING_BUFFER_ALL_CPUS) { 5994 for_each_tracing_cpu(cpu) { 5995 ret = ring_buffer_resize(trace_buf->buffer, 5996 per_cpu_ptr(size_buf->data, cpu)->entries, cpu); 5997 if (ret < 0) 5998 break; 5999 per_cpu_ptr(trace_buf->data, cpu)->entries = 6000 per_cpu_ptr(size_buf->data, cpu)->entries; 6001 } 6002 } else { 6003 ret = ring_buffer_resize(trace_buf->buffer, 6004 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); 6005 if (ret == 0) 6006 per_cpu_ptr(trace_buf->data, cpu_id)->entries = 6007 per_cpu_ptr(size_buf->data, cpu_id)->entries; 6008 } 6009 6010 return ret; 6011 } 6012 #endif /* CONFIG_TRACER_MAX_TRACE */ 6013 6014 static int __tracing_resize_ring_buffer(struct trace_array *tr, 6015 unsigned long size, int cpu) 6016 { 6017 int ret; 6018 6019 /* 6020 * If kernel or user changes the size of the ring buffer 6021 * we use the size that was given, and we can forget about 6022 * expanding it later. 6023 */ 6024 trace_set_ring_buffer_expanded(tr); 6025 6026 /* May be called before buffers are initialized */ 6027 if (!tr->array_buffer.buffer) 6028 return 0; 6029 6030 /* Do not allow tracing while resizing ring buffer */ 6031 tracing_stop_tr(tr); 6032 6033 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); 6034 if (ret < 0) 6035 goto out_start; 6036 6037 #ifdef CONFIG_TRACER_MAX_TRACE 6038 if (!tr->allocated_snapshot) 6039 goto out; 6040 6041 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); 6042 if (ret < 0) { 6043 int r = resize_buffer_duplicate_size(&tr->array_buffer, 6044 &tr->array_buffer, cpu); 6045 if (r < 0) { 6046 /* 6047 * AARGH! We are left with different 6048 * size max buffer!!!! 6049 * The max buffer is our "snapshot" buffer. 6050 * When a tracer needs a snapshot (one of the 6051 * latency tracers), it swaps the max buffer 6052 * with the saved snap shot. We succeeded to 6053 * update the size of the main buffer, but failed to 6054 * update the size of the max buffer. But when we tried 6055 * to reset the main buffer to the original size, we 6056 * failed there too. This is very unlikely to 6057 * happen, but if it does, warn and kill all 6058 * tracing. 6059 */ 6060 WARN_ON(1); 6061 tracing_disabled = 1; 6062 } 6063 goto out_start; 6064 } 6065 6066 update_buffer_entries(&tr->max_buffer, cpu); 6067 6068 out: 6069 #endif /* CONFIG_TRACER_MAX_TRACE */ 6070 6071 update_buffer_entries(&tr->array_buffer, cpu); 6072 out_start: 6073 tracing_start_tr(tr); 6074 return ret; 6075 } 6076 6077 ssize_t tracing_resize_ring_buffer(struct trace_array *tr, 6078 unsigned long size, int cpu_id) 6079 { 6080 guard(mutex)(&trace_types_lock); 6081 6082 if (cpu_id != RING_BUFFER_ALL_CPUS) { 6083 /* make sure, this cpu is enabled in the mask */ 6084 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) 6085 return -EINVAL; 6086 } 6087 6088 return __tracing_resize_ring_buffer(tr, size, cpu_id); 6089 } 6090 6091 struct trace_mod_entry { 6092 unsigned long mod_addr; 6093 char mod_name[MODULE_NAME_LEN]; 6094 }; 6095 6096 struct trace_scratch { 6097 unsigned int clock_id; 6098 unsigned long text_addr; 6099 unsigned long nr_entries; 6100 struct trace_mod_entry entries[]; 6101 }; 6102 6103 static DEFINE_MUTEX(scratch_mutex); 6104 6105 static int cmp_mod_entry(const void *key, const void *pivot) 6106 { 6107 unsigned long addr = (unsigned long)key; 6108 const struct trace_mod_entry *ent = pivot; 6109 6110 if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr) 6111 return 0; 6112 else 6113 return addr - ent->mod_addr; 6114 } 6115 6116 /** 6117 * trace_adjust_address() - Adjust prev boot address to current address. 6118 * @tr: Persistent ring buffer's trace_array. 6119 * @addr: Address in @tr which is adjusted. 6120 */ 6121 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) 6122 { 6123 struct trace_module_delta *module_delta; 6124 struct trace_scratch *tscratch; 6125 struct trace_mod_entry *entry; 6126 unsigned long raddr; 6127 int idx = 0, nr_entries; 6128 6129 /* If we don't have last boot delta, return the address */ 6130 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6131 return addr; 6132 6133 /* tr->module_delta must be protected by rcu. */ 6134 guard(rcu)(); 6135 tscratch = tr->scratch; 6136 /* if there is no tscrach, module_delta must be NULL. */ 6137 module_delta = READ_ONCE(tr->module_delta); 6138 if (!module_delta || !tscratch->nr_entries || 6139 tscratch->entries[0].mod_addr > addr) { 6140 raddr = addr + tr->text_delta; 6141 return __is_kernel(raddr) || is_kernel_core_data(raddr) || 6142 is_kernel_rodata(raddr) ? raddr : addr; 6143 } 6144 6145 /* Note that entries must be sorted. */ 6146 nr_entries = tscratch->nr_entries; 6147 if (nr_entries == 1 || 6148 tscratch->entries[nr_entries - 1].mod_addr < addr) 6149 idx = nr_entries - 1; 6150 else { 6151 entry = __inline_bsearch((void *)addr, 6152 tscratch->entries, 6153 nr_entries - 1, 6154 sizeof(tscratch->entries[0]), 6155 cmp_mod_entry); 6156 if (entry) 6157 idx = entry - tscratch->entries; 6158 } 6159 6160 return addr + module_delta->delta[idx]; 6161 } 6162 6163 #ifdef CONFIG_MODULES 6164 static int save_mod(struct module *mod, void *data) 6165 { 6166 struct trace_array *tr = data; 6167 struct trace_scratch *tscratch; 6168 struct trace_mod_entry *entry; 6169 unsigned int size; 6170 6171 tscratch = tr->scratch; 6172 if (!tscratch) 6173 return -1; 6174 size = tr->scratch_size; 6175 6176 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) 6177 return -1; 6178 6179 entry = &tscratch->entries[tscratch->nr_entries]; 6180 6181 tscratch->nr_entries++; 6182 6183 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; 6184 strscpy(entry->mod_name, mod->name); 6185 6186 return 0; 6187 } 6188 #else 6189 static int save_mod(struct module *mod, void *data) 6190 { 6191 return 0; 6192 } 6193 #endif 6194 6195 static void update_last_data(struct trace_array *tr) 6196 { 6197 struct trace_module_delta *module_delta; 6198 struct trace_scratch *tscratch; 6199 6200 if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) 6201 return; 6202 6203 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6204 return; 6205 6206 /* Only if the buffer has previous boot data clear and update it. */ 6207 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; 6208 6209 /* Reset the module list and reload them */ 6210 if (tr->scratch) { 6211 struct trace_scratch *tscratch = tr->scratch; 6212 6213 tscratch->clock_id = tr->clock_id; 6214 memset(tscratch->entries, 0, 6215 flex_array_size(tscratch, entries, tscratch->nr_entries)); 6216 tscratch->nr_entries = 0; 6217 6218 guard(mutex)(&scratch_mutex); 6219 module_for_each_mod(save_mod, tr); 6220 } 6221 6222 /* 6223 * Need to clear all CPU buffers as there cannot be events 6224 * from the previous boot mixed with events with this boot 6225 * as that will cause a confusing trace. Need to clear all 6226 * CPU buffers, even for those that may currently be offline. 6227 */ 6228 tracing_reset_all_cpus(&tr->array_buffer); 6229 6230 /* Using current data now */ 6231 tr->text_delta = 0; 6232 6233 if (!tr->scratch) 6234 return; 6235 6236 tscratch = tr->scratch; 6237 module_delta = READ_ONCE(tr->module_delta); 6238 WRITE_ONCE(tr->module_delta, NULL); 6239 kfree_rcu(module_delta, rcu); 6240 6241 /* Set the persistent ring buffer meta data to this address */ 6242 tscratch->text_addr = (unsigned long)_text; 6243 } 6244 6245 /** 6246 * tracing_update_buffers - used by tracing facility to expand ring buffers 6247 * @tr: The tracing instance 6248 * 6249 * To save on memory when the tracing is never used on a system with it 6250 * configured in. The ring buffers are set to a minimum size. But once 6251 * a user starts to use the tracing facility, then they need to grow 6252 * to their default size. 6253 * 6254 * This function is to be called when a tracer is about to be used. 6255 */ 6256 int tracing_update_buffers(struct trace_array *tr) 6257 { 6258 int ret = 0; 6259 6260 guard(mutex)(&trace_types_lock); 6261 6262 update_last_data(tr); 6263 6264 if (!tr->ring_buffer_expanded) 6265 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 6266 RING_BUFFER_ALL_CPUS); 6267 return ret; 6268 } 6269 6270 /* 6271 * Used to clear out the tracer before deletion of an instance. 6272 * Must have trace_types_lock held. 6273 */ 6274 static void tracing_set_nop(struct trace_array *tr) 6275 { 6276 if (tr->current_trace == &nop_trace) 6277 return; 6278 6279 tr->current_trace->enabled--; 6280 6281 if (tr->current_trace->reset) 6282 tr->current_trace->reset(tr); 6283 6284 tr->current_trace = &nop_trace; 6285 tr->current_trace_flags = nop_trace.flags; 6286 } 6287 6288 static bool tracer_options_updated; 6289 6290 int tracing_set_tracer(struct trace_array *tr, const char *buf) 6291 { 6292 struct tracer *trace = NULL; 6293 struct tracers *t; 6294 #ifdef CONFIG_TRACER_MAX_TRACE 6295 bool had_max_tr; 6296 #endif 6297 int ret; 6298 6299 guard(mutex)(&trace_types_lock); 6300 6301 update_last_data(tr); 6302 6303 if (!tr->ring_buffer_expanded) { 6304 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 6305 RING_BUFFER_ALL_CPUS); 6306 if (ret < 0) 6307 return ret; 6308 ret = 0; 6309 } 6310 6311 list_for_each_entry(t, &tr->tracers, list) { 6312 if (strcmp(t->tracer->name, buf) == 0) { 6313 trace = t->tracer; 6314 break; 6315 } 6316 } 6317 if (!trace) 6318 return -EINVAL; 6319 6320 if (trace == tr->current_trace) 6321 return 0; 6322 6323 #ifdef CONFIG_TRACER_SNAPSHOT 6324 if (trace->use_max_tr) { 6325 local_irq_disable(); 6326 arch_spin_lock(&tr->max_lock); 6327 ret = tr->cond_snapshot ? -EBUSY : 0; 6328 arch_spin_unlock(&tr->max_lock); 6329 local_irq_enable(); 6330 if (ret) 6331 return ret; 6332 } 6333 #endif 6334 /* Some tracers won't work on kernel command line */ 6335 if (system_state < SYSTEM_RUNNING && trace->noboot) { 6336 pr_warn("Tracer '%s' is not allowed on command line, ignored\n", 6337 trace->name); 6338 return -EINVAL; 6339 } 6340 6341 /* Some tracers are only allowed for the top level buffer */ 6342 if (!trace_ok_for_array(trace, tr)) 6343 return -EINVAL; 6344 6345 /* If trace pipe files are being read, we can't change the tracer */ 6346 if (tr->trace_ref) 6347 return -EBUSY; 6348 6349 trace_branch_disable(); 6350 6351 tr->current_trace->enabled--; 6352 6353 if (tr->current_trace->reset) 6354 tr->current_trace->reset(tr); 6355 6356 #ifdef CONFIG_TRACER_MAX_TRACE 6357 had_max_tr = tr->current_trace->use_max_tr; 6358 6359 /* Current trace needs to be nop_trace before synchronize_rcu */ 6360 tr->current_trace = &nop_trace; 6361 tr->current_trace_flags = nop_trace.flags; 6362 6363 if (had_max_tr && !trace->use_max_tr) { 6364 /* 6365 * We need to make sure that the update_max_tr sees that 6366 * current_trace changed to nop_trace to keep it from 6367 * swapping the buffers after we resize it. 6368 * The update_max_tr is called from interrupts disabled 6369 * so a synchronized_sched() is sufficient. 6370 */ 6371 synchronize_rcu(); 6372 free_snapshot(tr); 6373 tracing_disarm_snapshot(tr); 6374 } 6375 6376 if (!had_max_tr && trace->use_max_tr) { 6377 ret = tracing_arm_snapshot_locked(tr); 6378 if (ret) 6379 return ret; 6380 } 6381 #else 6382 tr->current_trace = &nop_trace; 6383 #endif 6384 6385 tr->current_trace_flags = t->flags ? : t->tracer->flags; 6386 6387 if (trace->init) { 6388 ret = tracer_init(trace, tr); 6389 if (ret) { 6390 #ifdef CONFIG_TRACER_MAX_TRACE 6391 if (trace->use_max_tr) 6392 tracing_disarm_snapshot(tr); 6393 #endif 6394 tr->current_trace_flags = nop_trace.flags; 6395 return ret; 6396 } 6397 } 6398 6399 tr->current_trace = trace; 6400 tr->current_trace->enabled++; 6401 trace_branch_enable(tr); 6402 6403 return 0; 6404 } 6405 6406 static ssize_t 6407 tracing_set_trace_write(struct file *filp, const char __user *ubuf, 6408 size_t cnt, loff_t *ppos) 6409 { 6410 struct trace_array *tr = filp->private_data; 6411 char buf[MAX_TRACER_SIZE+1]; 6412 char *name; 6413 size_t ret; 6414 int err; 6415 6416 ret = cnt; 6417 6418 if (cnt > MAX_TRACER_SIZE) 6419 cnt = MAX_TRACER_SIZE; 6420 6421 if (copy_from_user(buf, ubuf, cnt)) 6422 return -EFAULT; 6423 6424 buf[cnt] = 0; 6425 6426 name = strim(buf); 6427 6428 err = tracing_set_tracer(tr, name); 6429 if (err) 6430 return err; 6431 6432 *ppos += ret; 6433 6434 return ret; 6435 } 6436 6437 static ssize_t 6438 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, 6439 size_t cnt, loff_t *ppos) 6440 { 6441 char buf[64]; 6442 int r; 6443 6444 r = snprintf(buf, sizeof(buf), "%ld\n", 6445 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); 6446 if (r > sizeof(buf)) 6447 r = sizeof(buf); 6448 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6449 } 6450 6451 static ssize_t 6452 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, 6453 size_t cnt, loff_t *ppos) 6454 { 6455 unsigned long val; 6456 int ret; 6457 6458 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6459 if (ret) 6460 return ret; 6461 6462 *ptr = val * 1000; 6463 6464 return cnt; 6465 } 6466 6467 static ssize_t 6468 tracing_thresh_read(struct file *filp, char __user *ubuf, 6469 size_t cnt, loff_t *ppos) 6470 { 6471 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); 6472 } 6473 6474 static ssize_t 6475 tracing_thresh_write(struct file *filp, const char __user *ubuf, 6476 size_t cnt, loff_t *ppos) 6477 { 6478 struct trace_array *tr = filp->private_data; 6479 int ret; 6480 6481 guard(mutex)(&trace_types_lock); 6482 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); 6483 if (ret < 0) 6484 return ret; 6485 6486 if (tr->current_trace->update_thresh) { 6487 ret = tr->current_trace->update_thresh(tr); 6488 if (ret < 0) 6489 return ret; 6490 } 6491 6492 return cnt; 6493 } 6494 6495 #ifdef CONFIG_TRACER_MAX_TRACE 6496 6497 static ssize_t 6498 tracing_max_lat_read(struct file *filp, char __user *ubuf, 6499 size_t cnt, loff_t *ppos) 6500 { 6501 struct trace_array *tr = filp->private_data; 6502 6503 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); 6504 } 6505 6506 static ssize_t 6507 tracing_max_lat_write(struct file *filp, const char __user *ubuf, 6508 size_t cnt, loff_t *ppos) 6509 { 6510 struct trace_array *tr = filp->private_data; 6511 6512 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); 6513 } 6514 6515 #endif 6516 6517 static int open_pipe_on_cpu(struct trace_array *tr, int cpu) 6518 { 6519 if (cpu == RING_BUFFER_ALL_CPUS) { 6520 if (cpumask_empty(tr->pipe_cpumask)) { 6521 cpumask_setall(tr->pipe_cpumask); 6522 return 0; 6523 } 6524 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { 6525 cpumask_set_cpu(cpu, tr->pipe_cpumask); 6526 return 0; 6527 } 6528 return -EBUSY; 6529 } 6530 6531 static void close_pipe_on_cpu(struct trace_array *tr, int cpu) 6532 { 6533 if (cpu == RING_BUFFER_ALL_CPUS) { 6534 WARN_ON(!cpumask_full(tr->pipe_cpumask)); 6535 cpumask_clear(tr->pipe_cpumask); 6536 } else { 6537 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); 6538 cpumask_clear_cpu(cpu, tr->pipe_cpumask); 6539 } 6540 } 6541 6542 static int tracing_open_pipe(struct inode *inode, struct file *filp) 6543 { 6544 struct trace_array *tr = inode->i_private; 6545 struct trace_iterator *iter; 6546 int cpu; 6547 int ret; 6548 6549 ret = tracing_check_open_get_tr(tr); 6550 if (ret) 6551 return ret; 6552 6553 guard(mutex)(&trace_types_lock); 6554 cpu = tracing_get_cpu(inode); 6555 ret = open_pipe_on_cpu(tr, cpu); 6556 if (ret) 6557 goto fail_pipe_on_cpu; 6558 6559 /* create a buffer to store the information to pass to userspace */ 6560 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 6561 if (!iter) { 6562 ret = -ENOMEM; 6563 goto fail_alloc_iter; 6564 } 6565 6566 trace_seq_init(&iter->seq); 6567 iter->trace = tr->current_trace; 6568 6569 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 6570 ret = -ENOMEM; 6571 goto fail; 6572 } 6573 6574 /* trace pipe does not show start of buffer */ 6575 cpumask_setall(iter->started); 6576 6577 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 6578 iter->iter_flags |= TRACE_FILE_LAT_FMT; 6579 6580 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 6581 if (trace_clocks[tr->clock_id].in_ns) 6582 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 6583 6584 iter->tr = tr; 6585 iter->array_buffer = &tr->array_buffer; 6586 iter->cpu_file = cpu; 6587 mutex_init(&iter->mutex); 6588 filp->private_data = iter; 6589 6590 if (iter->trace->pipe_open) 6591 iter->trace->pipe_open(iter); 6592 6593 nonseekable_open(inode, filp); 6594 6595 tr->trace_ref++; 6596 6597 return ret; 6598 6599 fail: 6600 kfree(iter); 6601 fail_alloc_iter: 6602 close_pipe_on_cpu(tr, cpu); 6603 fail_pipe_on_cpu: 6604 __trace_array_put(tr); 6605 return ret; 6606 } 6607 6608 static int tracing_release_pipe(struct inode *inode, struct file *file) 6609 { 6610 struct trace_iterator *iter = file->private_data; 6611 struct trace_array *tr = inode->i_private; 6612 6613 scoped_guard(mutex, &trace_types_lock) { 6614 tr->trace_ref--; 6615 6616 if (iter->trace->pipe_close) 6617 iter->trace->pipe_close(iter); 6618 close_pipe_on_cpu(tr, iter->cpu_file); 6619 } 6620 6621 free_trace_iter_content(iter); 6622 kfree(iter); 6623 6624 trace_array_put(tr); 6625 6626 return 0; 6627 } 6628 6629 static __poll_t 6630 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 6631 { 6632 struct trace_array *tr = iter->tr; 6633 6634 /* Iterators are static, they should be filled or empty */ 6635 if (trace_buffer_iter(iter, iter->cpu_file)) 6636 return EPOLLIN | EPOLLRDNORM; 6637 6638 if (tr->trace_flags & TRACE_ITER(BLOCK)) 6639 /* 6640 * Always select as readable when in blocking mode 6641 */ 6642 return EPOLLIN | EPOLLRDNORM; 6643 else 6644 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, 6645 filp, poll_table, iter->tr->buffer_percent); 6646 } 6647 6648 static __poll_t 6649 tracing_poll_pipe(struct file *filp, poll_table *poll_table) 6650 { 6651 struct trace_iterator *iter = filp->private_data; 6652 6653 return trace_poll(iter, filp, poll_table); 6654 } 6655 6656 /* Must be called with iter->mutex held. */ 6657 static int tracing_wait_pipe(struct file *filp) 6658 { 6659 struct trace_iterator *iter = filp->private_data; 6660 int ret; 6661 6662 while (trace_empty(iter)) { 6663 6664 if ((filp->f_flags & O_NONBLOCK)) { 6665 return -EAGAIN; 6666 } 6667 6668 /* 6669 * We block until we read something and tracing is disabled. 6670 * We still block if tracing is disabled, but we have never 6671 * read anything. This allows a user to cat this file, and 6672 * then enable tracing. But after we have read something, 6673 * we give an EOF when tracing is again disabled. 6674 * 6675 * iter->pos will be 0 if we haven't read anything. 6676 */ 6677 if (!tracer_tracing_is_on(iter->tr) && iter->pos) 6678 break; 6679 6680 mutex_unlock(&iter->mutex); 6681 6682 ret = wait_on_pipe(iter, 0); 6683 6684 mutex_lock(&iter->mutex); 6685 6686 if (ret) 6687 return ret; 6688 } 6689 6690 return 1; 6691 } 6692 6693 static bool update_last_data_if_empty(struct trace_array *tr) 6694 { 6695 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6696 return false; 6697 6698 if (!ring_buffer_empty(tr->array_buffer.buffer)) 6699 return false; 6700 6701 /* 6702 * If the buffer contains the last boot data and all per-cpu 6703 * buffers are empty, reset it from the kernel side. 6704 */ 6705 update_last_data(tr); 6706 return true; 6707 } 6708 6709 /* 6710 * Consumer reader. 6711 */ 6712 static ssize_t 6713 tracing_read_pipe(struct file *filp, char __user *ubuf, 6714 size_t cnt, loff_t *ppos) 6715 { 6716 struct trace_iterator *iter = filp->private_data; 6717 ssize_t sret; 6718 6719 /* 6720 * Avoid more than one consumer on a single file descriptor 6721 * This is just a matter of traces coherency, the ring buffer itself 6722 * is protected. 6723 */ 6724 guard(mutex)(&iter->mutex); 6725 6726 /* return any leftover data */ 6727 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6728 if (sret != -EBUSY) 6729 return sret; 6730 6731 trace_seq_init(&iter->seq); 6732 6733 if (iter->trace->read) { 6734 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 6735 if (sret) 6736 return sret; 6737 } 6738 6739 waitagain: 6740 if (update_last_data_if_empty(iter->tr)) 6741 return 0; 6742 6743 sret = tracing_wait_pipe(filp); 6744 if (sret <= 0) 6745 return sret; 6746 6747 /* stop when tracing is finished */ 6748 if (trace_empty(iter)) 6749 return 0; 6750 6751 if (cnt >= TRACE_SEQ_BUFFER_SIZE) 6752 cnt = TRACE_SEQ_BUFFER_SIZE - 1; 6753 6754 /* reset all but tr, trace, and overruns */ 6755 trace_iterator_reset(iter); 6756 cpumask_clear(iter->started); 6757 trace_seq_init(&iter->seq); 6758 6759 trace_event_read_lock(); 6760 trace_access_lock(iter->cpu_file); 6761 while (trace_find_next_entry_inc(iter) != NULL) { 6762 enum print_line_t ret; 6763 int save_len = iter->seq.seq.len; 6764 6765 ret = print_trace_line(iter); 6766 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6767 /* 6768 * If one print_trace_line() fills entire trace_seq in one shot, 6769 * trace_seq_to_user() will returns -EBUSY because save_len == 0, 6770 * In this case, we need to consume it, otherwise, loop will peek 6771 * this event next time, resulting in an infinite loop. 6772 */ 6773 if (save_len == 0) { 6774 iter->seq.full = 0; 6775 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 6776 trace_consume(iter); 6777 break; 6778 } 6779 6780 /* In other cases, don't print partial lines */ 6781 iter->seq.seq.len = save_len; 6782 break; 6783 } 6784 if (ret != TRACE_TYPE_NO_CONSUME) 6785 trace_consume(iter); 6786 6787 if (trace_seq_used(&iter->seq) >= cnt) 6788 break; 6789 6790 /* 6791 * Setting the full flag means we reached the trace_seq buffer 6792 * size and we should leave by partial output condition above. 6793 * One of the trace_seq_* functions is not used properly. 6794 */ 6795 WARN_ONCE(iter->seq.full, "full flag set for trace type %d", 6796 iter->ent->type); 6797 } 6798 trace_access_unlock(iter->cpu_file); 6799 trace_event_read_unlock(); 6800 6801 /* Now copy what we have to the user */ 6802 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6803 if (iter->seq.readpos >= trace_seq_used(&iter->seq)) 6804 trace_seq_init(&iter->seq); 6805 6806 /* 6807 * If there was nothing to send to user, in spite of consuming trace 6808 * entries, go back to wait for more entries. 6809 */ 6810 if (sret == -EBUSY) 6811 goto waitagain; 6812 6813 return sret; 6814 } 6815 6816 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, 6817 unsigned int idx) 6818 { 6819 __free_page(spd->pages[idx]); 6820 } 6821 6822 static size_t 6823 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) 6824 { 6825 size_t count; 6826 int save_len; 6827 int ret; 6828 6829 /* Seq buffer is page-sized, exactly what we need. */ 6830 for (;;) { 6831 save_len = iter->seq.seq.len; 6832 ret = print_trace_line(iter); 6833 6834 if (trace_seq_has_overflowed(&iter->seq)) { 6835 iter->seq.seq.len = save_len; 6836 break; 6837 } 6838 6839 /* 6840 * This should not be hit, because it should only 6841 * be set if the iter->seq overflowed. But check it 6842 * anyway to be safe. 6843 */ 6844 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6845 iter->seq.seq.len = save_len; 6846 break; 6847 } 6848 6849 count = trace_seq_used(&iter->seq) - save_len; 6850 if (rem < count) { 6851 rem = 0; 6852 iter->seq.seq.len = save_len; 6853 break; 6854 } 6855 6856 if (ret != TRACE_TYPE_NO_CONSUME) 6857 trace_consume(iter); 6858 rem -= count; 6859 if (!trace_find_next_entry_inc(iter)) { 6860 rem = 0; 6861 iter->ent = NULL; 6862 break; 6863 } 6864 } 6865 6866 return rem; 6867 } 6868 6869 static ssize_t tracing_splice_read_pipe(struct file *filp, 6870 loff_t *ppos, 6871 struct pipe_inode_info *pipe, 6872 size_t len, 6873 unsigned int flags) 6874 { 6875 struct page *pages_def[PIPE_DEF_BUFFERS]; 6876 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 6877 struct trace_iterator *iter = filp->private_data; 6878 struct splice_pipe_desc spd = { 6879 .pages = pages_def, 6880 .partial = partial_def, 6881 .nr_pages = 0, /* This gets updated below. */ 6882 .nr_pages_max = PIPE_DEF_BUFFERS, 6883 .ops = &default_pipe_buf_ops, 6884 .spd_release = tracing_spd_release_pipe, 6885 }; 6886 ssize_t ret; 6887 size_t rem; 6888 unsigned int i; 6889 6890 if (splice_grow_spd(pipe, &spd)) 6891 return -ENOMEM; 6892 6893 mutex_lock(&iter->mutex); 6894 6895 if (iter->trace->splice_read) { 6896 ret = iter->trace->splice_read(iter, filp, 6897 ppos, pipe, len, flags); 6898 if (ret) 6899 goto out_err; 6900 } 6901 6902 ret = tracing_wait_pipe(filp); 6903 if (ret <= 0) 6904 goto out_err; 6905 6906 if (!iter->ent && !trace_find_next_entry_inc(iter)) { 6907 ret = -EFAULT; 6908 goto out_err; 6909 } 6910 6911 trace_event_read_lock(); 6912 trace_access_lock(iter->cpu_file); 6913 6914 /* Fill as many pages as possible. */ 6915 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 6916 spd.pages[i] = alloc_page(GFP_KERNEL); 6917 if (!spd.pages[i]) 6918 break; 6919 6920 rem = tracing_fill_pipe_page(rem, iter); 6921 6922 /* Copy the data into the page, so we can start over. */ 6923 ret = trace_seq_to_buffer(&iter->seq, 6924 page_address(spd.pages[i]), 6925 min((size_t)trace_seq_used(&iter->seq), 6926 (size_t)PAGE_SIZE)); 6927 if (ret < 0) { 6928 __free_page(spd.pages[i]); 6929 break; 6930 } 6931 spd.partial[i].offset = 0; 6932 spd.partial[i].len = ret; 6933 6934 trace_seq_init(&iter->seq); 6935 } 6936 6937 trace_access_unlock(iter->cpu_file); 6938 trace_event_read_unlock(); 6939 mutex_unlock(&iter->mutex); 6940 6941 spd.nr_pages = i; 6942 6943 if (i) 6944 ret = splice_to_pipe(pipe, &spd); 6945 else 6946 ret = 0; 6947 out: 6948 splice_shrink_spd(&spd); 6949 return ret; 6950 6951 out_err: 6952 mutex_unlock(&iter->mutex); 6953 goto out; 6954 } 6955 6956 static ssize_t 6957 tracing_syscall_buf_read(struct file *filp, char __user *ubuf, 6958 size_t cnt, loff_t *ppos) 6959 { 6960 struct inode *inode = file_inode(filp); 6961 struct trace_array *tr = inode->i_private; 6962 char buf[64]; 6963 int r; 6964 6965 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz); 6966 6967 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6968 } 6969 6970 static ssize_t 6971 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf, 6972 size_t cnt, loff_t *ppos) 6973 { 6974 struct inode *inode = file_inode(filp); 6975 struct trace_array *tr = inode->i_private; 6976 unsigned long val; 6977 int ret; 6978 6979 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6980 if (ret) 6981 return ret; 6982 6983 if (val > SYSCALL_FAULT_USER_MAX) 6984 val = SYSCALL_FAULT_USER_MAX; 6985 6986 tr->syscall_buf_sz = val; 6987 6988 *ppos += cnt; 6989 6990 return cnt; 6991 } 6992 6993 static ssize_t 6994 tracing_entries_read(struct file *filp, char __user *ubuf, 6995 size_t cnt, loff_t *ppos) 6996 { 6997 struct inode *inode = file_inode(filp); 6998 struct trace_array *tr = inode->i_private; 6999 int cpu = tracing_get_cpu(inode); 7000 char buf[64]; 7001 int r = 0; 7002 ssize_t ret; 7003 7004 mutex_lock(&trace_types_lock); 7005 7006 if (cpu == RING_BUFFER_ALL_CPUS) { 7007 int cpu, buf_size_same; 7008 unsigned long size; 7009 7010 size = 0; 7011 buf_size_same = 1; 7012 /* check if all cpu sizes are same */ 7013 for_each_tracing_cpu(cpu) { 7014 /* fill in the size from first enabled cpu */ 7015 if (size == 0) 7016 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; 7017 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { 7018 buf_size_same = 0; 7019 break; 7020 } 7021 } 7022 7023 if (buf_size_same) { 7024 if (!tr->ring_buffer_expanded) 7025 r = sprintf(buf, "%lu (expanded: %lu)\n", 7026 size >> 10, 7027 trace_buf_size >> 10); 7028 else 7029 r = sprintf(buf, "%lu\n", size >> 10); 7030 } else 7031 r = sprintf(buf, "X\n"); 7032 } else 7033 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); 7034 7035 mutex_unlock(&trace_types_lock); 7036 7037 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 7038 return ret; 7039 } 7040 7041 static ssize_t 7042 tracing_entries_write(struct file *filp, const char __user *ubuf, 7043 size_t cnt, loff_t *ppos) 7044 { 7045 struct inode *inode = file_inode(filp); 7046 struct trace_array *tr = inode->i_private; 7047 unsigned long val; 7048 int ret; 7049 7050 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7051 if (ret) 7052 return ret; 7053 7054 /* must have at least 1 entry */ 7055 if (!val) 7056 return -EINVAL; 7057 7058 /* value is in KB */ 7059 val <<= 10; 7060 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); 7061 if (ret < 0) 7062 return ret; 7063 7064 *ppos += cnt; 7065 7066 return cnt; 7067 } 7068 7069 static ssize_t 7070 tracing_total_entries_read(struct file *filp, char __user *ubuf, 7071 size_t cnt, loff_t *ppos) 7072 { 7073 struct trace_array *tr = filp->private_data; 7074 char buf[64]; 7075 int r, cpu; 7076 unsigned long size = 0, expanded_size = 0; 7077 7078 mutex_lock(&trace_types_lock); 7079 for_each_tracing_cpu(cpu) { 7080 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; 7081 if (!tr->ring_buffer_expanded) 7082 expanded_size += trace_buf_size >> 10; 7083 } 7084 if (tr->ring_buffer_expanded) 7085 r = sprintf(buf, "%lu\n", size); 7086 else 7087 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); 7088 mutex_unlock(&trace_types_lock); 7089 7090 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 7091 } 7092 7093 #define LAST_BOOT_HEADER ((void *)1) 7094 7095 static void *l_next(struct seq_file *m, void *v, loff_t *pos) 7096 { 7097 struct trace_array *tr = m->private; 7098 struct trace_scratch *tscratch = tr->scratch; 7099 unsigned int index = *pos; 7100 7101 (*pos)++; 7102 7103 if (*pos == 1) 7104 return LAST_BOOT_HEADER; 7105 7106 /* Only show offsets of the last boot data */ 7107 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 7108 return NULL; 7109 7110 /* *pos 0 is for the header, 1 is for the first module */ 7111 index--; 7112 7113 if (index >= tscratch->nr_entries) 7114 return NULL; 7115 7116 return &tscratch->entries[index]; 7117 } 7118 7119 static void *l_start(struct seq_file *m, loff_t *pos) 7120 { 7121 mutex_lock(&scratch_mutex); 7122 7123 return l_next(m, NULL, pos); 7124 } 7125 7126 static void l_stop(struct seq_file *m, void *p) 7127 { 7128 mutex_unlock(&scratch_mutex); 7129 } 7130 7131 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) 7132 { 7133 struct trace_scratch *tscratch = tr->scratch; 7134 7135 /* 7136 * Do not leak KASLR address. This only shows the KASLR address of 7137 * the last boot. When the ring buffer is started, the LAST_BOOT 7138 * flag gets cleared, and this should only report "current". 7139 * Otherwise it shows the KASLR address from the previous boot which 7140 * should not be the same as the current boot. 7141 */ 7142 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 7143 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); 7144 else 7145 seq_puts(m, "# Current\n"); 7146 } 7147 7148 static int l_show(struct seq_file *m, void *v) 7149 { 7150 struct trace_array *tr = m->private; 7151 struct trace_mod_entry *entry = v; 7152 7153 if (v == LAST_BOOT_HEADER) { 7154 show_last_boot_header(m, tr); 7155 return 0; 7156 } 7157 7158 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); 7159 return 0; 7160 } 7161 7162 static const struct seq_operations last_boot_seq_ops = { 7163 .start = l_start, 7164 .next = l_next, 7165 .stop = l_stop, 7166 .show = l_show, 7167 }; 7168 7169 static int tracing_last_boot_open(struct inode *inode, struct file *file) 7170 { 7171 struct trace_array *tr = inode->i_private; 7172 struct seq_file *m; 7173 int ret; 7174 7175 ret = tracing_check_open_get_tr(tr); 7176 if (ret) 7177 return ret; 7178 7179 ret = seq_open(file, &last_boot_seq_ops); 7180 if (ret) { 7181 trace_array_put(tr); 7182 return ret; 7183 } 7184 7185 m = file->private_data; 7186 m->private = tr; 7187 7188 return 0; 7189 } 7190 7191 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) 7192 { 7193 struct trace_array *tr = inode->i_private; 7194 int cpu = tracing_get_cpu(inode); 7195 int ret; 7196 7197 ret = tracing_check_open_get_tr(tr); 7198 if (ret) 7199 return ret; 7200 7201 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); 7202 if (ret < 0) 7203 __trace_array_put(tr); 7204 return ret; 7205 } 7206 7207 static ssize_t 7208 tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 7209 size_t cnt, loff_t *ppos) 7210 { 7211 /* 7212 * There is no need to read what the user has written, this function 7213 * is just to make sure that there is no error when "echo" is used 7214 */ 7215 7216 *ppos += cnt; 7217 7218 return cnt; 7219 } 7220 7221 static int 7222 tracing_free_buffer_release(struct inode *inode, struct file *filp) 7223 { 7224 struct trace_array *tr = inode->i_private; 7225 7226 /* disable tracing ? */ 7227 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE)) 7228 tracer_tracing_off(tr); 7229 /* resize the ring buffer to 0 */ 7230 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 7231 7232 trace_array_put(tr); 7233 7234 return 0; 7235 } 7236 7237 #define TRACE_MARKER_MAX_SIZE 4096 7238 7239 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf, 7240 size_t cnt, unsigned long ip) 7241 { 7242 struct ring_buffer_event *event; 7243 enum event_trigger_type tt = ETT_NONE; 7244 struct trace_buffer *buffer; 7245 struct print_entry *entry; 7246 int meta_size; 7247 ssize_t written; 7248 size_t size; 7249 7250 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 7251 again: 7252 size = cnt + meta_size; 7253 7254 buffer = tr->array_buffer.buffer; 7255 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 7256 tracing_gen_ctx()); 7257 if (unlikely(!event)) { 7258 /* 7259 * If the size was greater than what was allowed, then 7260 * make it smaller and try again. 7261 */ 7262 if (size > ring_buffer_max_event_size(buffer)) { 7263 cnt = ring_buffer_max_event_size(buffer) - meta_size; 7264 /* The above should only happen once */ 7265 if (WARN_ON_ONCE(cnt + meta_size == size)) 7266 return -EBADF; 7267 goto again; 7268 } 7269 7270 /* Ring buffer disabled, return as if not open for write */ 7271 return -EBADF; 7272 } 7273 7274 entry = ring_buffer_event_data(event); 7275 entry->ip = ip; 7276 memcpy(&entry->buf, buf, cnt); 7277 written = cnt; 7278 7279 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { 7280 /* do not add \n before testing triggers, but add \0 */ 7281 entry->buf[cnt] = '\0'; 7282 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); 7283 } 7284 7285 if (entry->buf[cnt - 1] != '\n') { 7286 entry->buf[cnt] = '\n'; 7287 entry->buf[cnt + 1] = '\0'; 7288 } else 7289 entry->buf[cnt] = '\0'; 7290 7291 if (static_branch_unlikely(&trace_marker_exports_enabled)) 7292 ftrace_exports(event, TRACE_EXPORT_MARKER); 7293 __buffer_unlock_commit(buffer, event); 7294 7295 if (tt) 7296 event_triggers_post_call(tr->trace_marker_file, tt); 7297 7298 return written; 7299 } 7300 7301 struct trace_user_buf { 7302 char *buf; 7303 }; 7304 7305 static DEFINE_MUTEX(trace_user_buffer_mutex); 7306 static struct trace_user_buf_info *trace_user_buffer; 7307 7308 /** 7309 * trace_user_fault_destroy - free up allocated memory of a trace user buffer 7310 * @tinfo: The descriptor to free up 7311 * 7312 * Frees any data allocated in the trace info dsecriptor. 7313 */ 7314 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo) 7315 { 7316 char *buf; 7317 int cpu; 7318 7319 if (!tinfo || !tinfo->tbuf) 7320 return; 7321 7322 for_each_possible_cpu(cpu) { 7323 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 7324 kfree(buf); 7325 } 7326 free_percpu(tinfo->tbuf); 7327 } 7328 7329 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size) 7330 { 7331 char *buf; 7332 int cpu; 7333 7334 lockdep_assert_held(&trace_user_buffer_mutex); 7335 7336 tinfo->tbuf = alloc_percpu(struct trace_user_buf); 7337 if (!tinfo->tbuf) 7338 return -ENOMEM; 7339 7340 tinfo->ref = 1; 7341 tinfo->size = size; 7342 7343 /* Clear each buffer in case of error */ 7344 for_each_possible_cpu(cpu) { 7345 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL; 7346 } 7347 7348 for_each_possible_cpu(cpu) { 7349 buf = kmalloc_node(size, GFP_KERNEL, 7350 cpu_to_node(cpu)); 7351 if (!buf) 7352 return -ENOMEM; 7353 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf; 7354 } 7355 7356 return 0; 7357 } 7358 7359 /* For internal use. Free and reinitialize */ 7360 static void user_buffer_free(struct trace_user_buf_info **tinfo) 7361 { 7362 lockdep_assert_held(&trace_user_buffer_mutex); 7363 7364 trace_user_fault_destroy(*tinfo); 7365 kfree(*tinfo); 7366 *tinfo = NULL; 7367 } 7368 7369 /* For internal use. Initialize and allocate */ 7370 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size) 7371 { 7372 bool alloc = false; 7373 int ret; 7374 7375 lockdep_assert_held(&trace_user_buffer_mutex); 7376 7377 if (!*tinfo) { 7378 alloc = true; 7379 *tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL); 7380 if (!*tinfo) 7381 return -ENOMEM; 7382 } 7383 7384 ret = user_fault_buffer_enable(*tinfo, size); 7385 if (ret < 0 && alloc) 7386 user_buffer_free(tinfo); 7387 7388 return ret; 7389 } 7390 7391 /* For internal use, derefrence and free if necessary */ 7392 static void user_buffer_put(struct trace_user_buf_info **tinfo) 7393 { 7394 guard(mutex)(&trace_user_buffer_mutex); 7395 7396 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref)) 7397 return; 7398 7399 if (--(*tinfo)->ref) 7400 return; 7401 7402 user_buffer_free(tinfo); 7403 } 7404 7405 /** 7406 * trace_user_fault_init - Allocated or reference a per CPU buffer 7407 * @tinfo: A pointer to the trace buffer descriptor 7408 * @size: The size to allocate each per CPU buffer 7409 * 7410 * Create a per CPU buffer that can be used to copy from user space 7411 * in a task context. When calling trace_user_fault_read(), preemption 7412 * must be disabled, and it will enable preemption and copy user 7413 * space data to the buffer. If any schedule switches occur, it will 7414 * retry until it succeeds without a schedule switch knowing the buffer 7415 * is still valid. 7416 * 7417 * Returns 0 on success, negative on failure. 7418 */ 7419 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size) 7420 { 7421 int ret; 7422 7423 if (!tinfo) 7424 return -EINVAL; 7425 7426 guard(mutex)(&trace_user_buffer_mutex); 7427 7428 ret = user_buffer_init(&tinfo, size); 7429 if (ret < 0) 7430 trace_user_fault_destroy(tinfo); 7431 7432 return ret; 7433 } 7434 7435 /** 7436 * trace_user_fault_get - up the ref count for the user buffer 7437 * @tinfo: A pointer to a pointer to the trace buffer descriptor 7438 * 7439 * Ups the ref count of the trace buffer. 7440 * 7441 * Returns the new ref count. 7442 */ 7443 int trace_user_fault_get(struct trace_user_buf_info *tinfo) 7444 { 7445 if (!tinfo) 7446 return -1; 7447 7448 guard(mutex)(&trace_user_buffer_mutex); 7449 7450 tinfo->ref++; 7451 return tinfo->ref; 7452 } 7453 7454 /** 7455 * trace_user_fault_put - dereference a per cpu trace buffer 7456 * @tinfo: The @tinfo that was passed to trace_user_fault_get() 7457 * 7458 * Decrement the ref count of @tinfo. 7459 * 7460 * Returns the new refcount (negative on error). 7461 */ 7462 int trace_user_fault_put(struct trace_user_buf_info *tinfo) 7463 { 7464 guard(mutex)(&trace_user_buffer_mutex); 7465 7466 if (WARN_ON_ONCE(!tinfo || !tinfo->ref)) 7467 return -1; 7468 7469 --tinfo->ref; 7470 return tinfo->ref; 7471 } 7472 7473 /** 7474 * trace_user_fault_read - Read user space into a per CPU buffer 7475 * @tinfo: The @tinfo allocated by trace_user_fault_get() 7476 * @ptr: The user space pointer to read 7477 * @size: The size of user space to read. 7478 * @copy_func: Optional function to use to copy from user space 7479 * @data: Data to pass to copy_func if it was supplied 7480 * 7481 * Preemption must be disabled when this is called, and must not 7482 * be enabled while using the returned buffer. 7483 * This does the copying from user space into a per CPU buffer. 7484 * 7485 * The @size must not be greater than the size passed in to 7486 * trace_user_fault_init(). 7487 * 7488 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(), 7489 * otherwise it will call @copy_func. It will call @copy_func with: 7490 * 7491 * buffer: the per CPU buffer of the @tinfo. 7492 * ptr: The pointer @ptr to user space to read 7493 * size: The @size of the ptr to read 7494 * data: The @data parameter 7495 * 7496 * It is expected that @copy_func will return 0 on success and non zero 7497 * if there was a fault. 7498 * 7499 * Returns a pointer to the buffer with the content read from @ptr. 7500 * Preemption must remain disabled while the caller accesses the 7501 * buffer returned by this function. 7502 * Returns NULL if there was a fault, or the size passed in is 7503 * greater than the size passed to trace_user_fault_init(). 7504 */ 7505 char *trace_user_fault_read(struct trace_user_buf_info *tinfo, 7506 const char __user *ptr, size_t size, 7507 trace_user_buf_copy copy_func, void *data) 7508 { 7509 int cpu = smp_processor_id(); 7510 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 7511 unsigned int cnt; 7512 int trys = 0; 7513 int ret; 7514 7515 lockdep_assert_preemption_disabled(); 7516 7517 /* 7518 * It's up to the caller to not try to copy more than it said 7519 * it would. 7520 */ 7521 if (size > tinfo->size) 7522 return NULL; 7523 7524 /* 7525 * This acts similar to a seqcount. The per CPU context switches are 7526 * recorded, migration is disabled and preemption is enabled. The 7527 * read of the user space memory is copied into the per CPU buffer. 7528 * Preemption is disabled again, and if the per CPU context switches count 7529 * is still the same, it means the buffer has not been corrupted. 7530 * If the count is different, it is assumed the buffer is corrupted 7531 * and reading must be tried again. 7532 */ 7533 7534 do { 7535 /* 7536 * If for some reason, copy_from_user() always causes a context 7537 * switch, this would then cause an infinite loop. 7538 * If this task is preempted by another user space task, it 7539 * will cause this task to try again. But just in case something 7540 * changes where the copying from user space causes another task 7541 * to run, prevent this from going into an infinite loop. 7542 * 100 tries should be plenty. 7543 */ 7544 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space")) 7545 return NULL; 7546 7547 /* Read the current CPU context switch counter */ 7548 cnt = nr_context_switches_cpu(cpu); 7549 7550 /* 7551 * Preemption is going to be enabled, but this task must 7552 * remain on this CPU. 7553 */ 7554 migrate_disable(); 7555 7556 /* 7557 * Now preemption is being enabed and another task can come in 7558 * and use the same buffer and corrupt our data. 7559 */ 7560 preempt_enable_notrace(); 7561 7562 /* Make sure preemption is enabled here */ 7563 lockdep_assert_preemption_enabled(); 7564 7565 if (copy_func) { 7566 ret = copy_func(buffer, ptr, size, data); 7567 } else { 7568 ret = __copy_from_user(buffer, ptr, size); 7569 } 7570 7571 preempt_disable_notrace(); 7572 migrate_enable(); 7573 7574 /* if it faulted, no need to test if the buffer was corrupted */ 7575 if (ret) 7576 return NULL; 7577 7578 /* 7579 * Preemption is disabled again, now check the per CPU context 7580 * switch counter. If it doesn't match, then another user space 7581 * process may have schedule in and corrupted our buffer. In that 7582 * case the copying must be retried. 7583 */ 7584 } while (nr_context_switches_cpu(cpu) != cnt); 7585 7586 return buffer; 7587 } 7588 7589 static ssize_t 7590 tracing_mark_write(struct file *filp, const char __user *ubuf, 7591 size_t cnt, loff_t *fpos) 7592 { 7593 struct trace_array *tr = filp->private_data; 7594 ssize_t written = -ENODEV; 7595 unsigned long ip; 7596 char *buf; 7597 7598 if (tracing_disabled) 7599 return -EINVAL; 7600 7601 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 7602 return -EINVAL; 7603 7604 if ((ssize_t)cnt < 0) 7605 return -EINVAL; 7606 7607 if (cnt > TRACE_MARKER_MAX_SIZE) 7608 cnt = TRACE_MARKER_MAX_SIZE; 7609 7610 /* Must have preemption disabled while having access to the buffer */ 7611 guard(preempt_notrace)(); 7612 7613 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 7614 if (!buf) 7615 return -EFAULT; 7616 7617 /* The selftests expect this function to be the IP address */ 7618 ip = _THIS_IP_; 7619 7620 /* The global trace_marker can go to multiple instances */ 7621 if (tr == &global_trace) { 7622 guard(rcu)(); 7623 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 7624 written = write_marker_to_buffer(tr, buf, cnt, ip); 7625 if (written < 0) 7626 break; 7627 } 7628 } else { 7629 written = write_marker_to_buffer(tr, buf, cnt, ip); 7630 } 7631 7632 return written; 7633 } 7634 7635 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, 7636 const char *buf, size_t cnt) 7637 { 7638 struct ring_buffer_event *event; 7639 struct trace_buffer *buffer; 7640 struct raw_data_entry *entry; 7641 ssize_t written; 7642 size_t size; 7643 7644 /* cnt includes both the entry->id and the data behind it. */ 7645 size = struct_offset(entry, id) + cnt; 7646 7647 buffer = tr->array_buffer.buffer; 7648 7649 if (size > ring_buffer_max_event_size(buffer)) 7650 return -EINVAL; 7651 7652 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, 7653 tracing_gen_ctx()); 7654 if (!event) 7655 /* Ring buffer disabled, return as if not open for write */ 7656 return -EBADF; 7657 7658 entry = ring_buffer_event_data(event); 7659 unsafe_memcpy(&entry->id, buf, cnt, 7660 "id and content already reserved on ring buffer" 7661 "'buf' includes the 'id' and the data." 7662 "'entry' was allocated with cnt from 'id'."); 7663 written = cnt; 7664 7665 __buffer_unlock_commit(buffer, event); 7666 7667 return written; 7668 } 7669 7670 static ssize_t 7671 tracing_mark_raw_write(struct file *filp, const char __user *ubuf, 7672 size_t cnt, loff_t *fpos) 7673 { 7674 struct trace_array *tr = filp->private_data; 7675 ssize_t written = -ENODEV; 7676 char *buf; 7677 7678 if (tracing_disabled) 7679 return -EINVAL; 7680 7681 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 7682 return -EINVAL; 7683 7684 /* The marker must at least have a tag id */ 7685 if (cnt < sizeof(unsigned int)) 7686 return -EINVAL; 7687 7688 /* raw write is all or nothing */ 7689 if (cnt > TRACE_MARKER_MAX_SIZE) 7690 return -EINVAL; 7691 7692 /* Must have preemption disabled while having access to the buffer */ 7693 guard(preempt_notrace)(); 7694 7695 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 7696 if (!buf) 7697 return -EFAULT; 7698 7699 /* The global trace_marker_raw can go to multiple instances */ 7700 if (tr == &global_trace) { 7701 guard(rcu)(); 7702 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 7703 written = write_raw_marker_to_buffer(tr, buf, cnt); 7704 if (written < 0) 7705 break; 7706 } 7707 } else { 7708 written = write_raw_marker_to_buffer(tr, buf, cnt); 7709 } 7710 7711 return written; 7712 } 7713 7714 static int tracing_mark_open(struct inode *inode, struct file *filp) 7715 { 7716 int ret; 7717 7718 scoped_guard(mutex, &trace_user_buffer_mutex) { 7719 if (!trace_user_buffer) { 7720 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE); 7721 if (ret < 0) 7722 return ret; 7723 } else { 7724 trace_user_buffer->ref++; 7725 } 7726 } 7727 7728 stream_open(inode, filp); 7729 ret = tracing_open_generic_tr(inode, filp); 7730 if (ret < 0) 7731 user_buffer_put(&trace_user_buffer); 7732 return ret; 7733 } 7734 7735 static int tracing_mark_release(struct inode *inode, struct file *file) 7736 { 7737 user_buffer_put(&trace_user_buffer); 7738 return tracing_release_generic_tr(inode, file); 7739 } 7740 7741 static int tracing_clock_show(struct seq_file *m, void *v) 7742 { 7743 struct trace_array *tr = m->private; 7744 int i; 7745 7746 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 7747 seq_printf(m, 7748 "%s%s%s%s", i ? " " : "", 7749 i == tr->clock_id ? "[" : "", trace_clocks[i].name, 7750 i == tr->clock_id ? "]" : ""); 7751 seq_putc(m, '\n'); 7752 7753 return 0; 7754 } 7755 7756 int tracing_set_clock(struct trace_array *tr, const char *clockstr) 7757 { 7758 int i; 7759 7760 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { 7761 if (strcmp(trace_clocks[i].name, clockstr) == 0) 7762 break; 7763 } 7764 if (i == ARRAY_SIZE(trace_clocks)) 7765 return -EINVAL; 7766 7767 guard(mutex)(&trace_types_lock); 7768 7769 tr->clock_id = i; 7770 7771 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); 7772 7773 /* 7774 * New clock may not be consistent with the previous clock. 7775 * Reset the buffer so that it doesn't have incomparable timestamps. 7776 */ 7777 tracing_reset_online_cpus(&tr->array_buffer); 7778 7779 #ifdef CONFIG_TRACER_MAX_TRACE 7780 if (tr->max_buffer.buffer) 7781 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); 7782 tracing_reset_online_cpus(&tr->max_buffer); 7783 #endif 7784 7785 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { 7786 struct trace_scratch *tscratch = tr->scratch; 7787 7788 tscratch->clock_id = i; 7789 } 7790 7791 return 0; 7792 } 7793 7794 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 7795 size_t cnt, loff_t *fpos) 7796 { 7797 struct seq_file *m = filp->private_data; 7798 struct trace_array *tr = m->private; 7799 char buf[64]; 7800 const char *clockstr; 7801 int ret; 7802 7803 if (cnt >= sizeof(buf)) 7804 return -EINVAL; 7805 7806 if (copy_from_user(buf, ubuf, cnt)) 7807 return -EFAULT; 7808 7809 buf[cnt] = 0; 7810 7811 clockstr = strstrip(buf); 7812 7813 ret = tracing_set_clock(tr, clockstr); 7814 if (ret) 7815 return ret; 7816 7817 *fpos += cnt; 7818 7819 return cnt; 7820 } 7821 7822 static int tracing_clock_open(struct inode *inode, struct file *file) 7823 { 7824 struct trace_array *tr = inode->i_private; 7825 int ret; 7826 7827 ret = tracing_check_open_get_tr(tr); 7828 if (ret) 7829 return ret; 7830 7831 ret = single_open(file, tracing_clock_show, inode->i_private); 7832 if (ret < 0) 7833 trace_array_put(tr); 7834 7835 return ret; 7836 } 7837 7838 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) 7839 { 7840 struct trace_array *tr = m->private; 7841 7842 guard(mutex)(&trace_types_lock); 7843 7844 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) 7845 seq_puts(m, "delta [absolute]\n"); 7846 else 7847 seq_puts(m, "[delta] absolute\n"); 7848 7849 return 0; 7850 } 7851 7852 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) 7853 { 7854 struct trace_array *tr = inode->i_private; 7855 int ret; 7856 7857 ret = tracing_check_open_get_tr(tr); 7858 if (ret) 7859 return ret; 7860 7861 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); 7862 if (ret < 0) 7863 trace_array_put(tr); 7864 7865 return ret; 7866 } 7867 7868 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) 7869 { 7870 if (rbe == this_cpu_read(trace_buffered_event)) 7871 return ring_buffer_time_stamp(buffer); 7872 7873 return ring_buffer_event_time_stamp(buffer, rbe); 7874 } 7875 7876 /* 7877 * Set or disable using the per CPU trace_buffer_event when possible. 7878 */ 7879 int tracing_set_filter_buffering(struct trace_array *tr, bool set) 7880 { 7881 guard(mutex)(&trace_types_lock); 7882 7883 if (set && tr->no_filter_buffering_ref++) 7884 return 0; 7885 7886 if (!set) { 7887 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) 7888 return -EINVAL; 7889 7890 --tr->no_filter_buffering_ref; 7891 } 7892 7893 return 0; 7894 } 7895 7896 struct ftrace_buffer_info { 7897 struct trace_iterator iter; 7898 void *spare; 7899 unsigned int spare_cpu; 7900 unsigned int spare_size; 7901 unsigned int read; 7902 }; 7903 7904 #ifdef CONFIG_TRACER_SNAPSHOT 7905 static int tracing_snapshot_open(struct inode *inode, struct file *file) 7906 { 7907 struct trace_array *tr = inode->i_private; 7908 struct trace_iterator *iter; 7909 struct seq_file *m; 7910 int ret; 7911 7912 ret = tracing_check_open_get_tr(tr); 7913 if (ret) 7914 return ret; 7915 7916 if (file->f_mode & FMODE_READ) { 7917 iter = __tracing_open(inode, file, true); 7918 if (IS_ERR(iter)) 7919 ret = PTR_ERR(iter); 7920 } else { 7921 /* Writes still need the seq_file to hold the private data */ 7922 ret = -ENOMEM; 7923 m = kzalloc(sizeof(*m), GFP_KERNEL); 7924 if (!m) 7925 goto out; 7926 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 7927 if (!iter) { 7928 kfree(m); 7929 goto out; 7930 } 7931 ret = 0; 7932 7933 iter->tr = tr; 7934 iter->array_buffer = &tr->max_buffer; 7935 iter->cpu_file = tracing_get_cpu(inode); 7936 m->private = iter; 7937 file->private_data = m; 7938 } 7939 out: 7940 if (ret < 0) 7941 trace_array_put(tr); 7942 7943 return ret; 7944 } 7945 7946 static void tracing_swap_cpu_buffer(void *tr) 7947 { 7948 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); 7949 } 7950 7951 static ssize_t 7952 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, 7953 loff_t *ppos) 7954 { 7955 struct seq_file *m = filp->private_data; 7956 struct trace_iterator *iter = m->private; 7957 struct trace_array *tr = iter->tr; 7958 unsigned long val; 7959 int ret; 7960 7961 ret = tracing_update_buffers(tr); 7962 if (ret < 0) 7963 return ret; 7964 7965 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7966 if (ret) 7967 return ret; 7968 7969 guard(mutex)(&trace_types_lock); 7970 7971 if (tr->current_trace->use_max_tr) 7972 return -EBUSY; 7973 7974 local_irq_disable(); 7975 arch_spin_lock(&tr->max_lock); 7976 if (tr->cond_snapshot) 7977 ret = -EBUSY; 7978 arch_spin_unlock(&tr->max_lock); 7979 local_irq_enable(); 7980 if (ret) 7981 return ret; 7982 7983 switch (val) { 7984 case 0: 7985 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7986 return -EINVAL; 7987 if (tr->allocated_snapshot) 7988 free_snapshot(tr); 7989 break; 7990 case 1: 7991 /* Only allow per-cpu swap if the ring buffer supports it */ 7992 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP 7993 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7994 return -EINVAL; 7995 #endif 7996 if (tr->allocated_snapshot) 7997 ret = resize_buffer_duplicate_size(&tr->max_buffer, 7998 &tr->array_buffer, iter->cpu_file); 7999 8000 ret = tracing_arm_snapshot_locked(tr); 8001 if (ret) 8002 return ret; 8003 8004 /* Now, we're going to swap */ 8005 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 8006 local_irq_disable(); 8007 update_max_tr(tr, current, smp_processor_id(), NULL); 8008 local_irq_enable(); 8009 } else { 8010 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, 8011 (void *)tr, 1); 8012 } 8013 tracing_disarm_snapshot(tr); 8014 break; 8015 default: 8016 if (tr->allocated_snapshot) { 8017 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 8018 tracing_reset_online_cpus(&tr->max_buffer); 8019 else 8020 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); 8021 } 8022 break; 8023 } 8024 8025 if (ret >= 0) { 8026 *ppos += cnt; 8027 ret = cnt; 8028 } 8029 8030 return ret; 8031 } 8032 8033 static int tracing_snapshot_release(struct inode *inode, struct file *file) 8034 { 8035 struct seq_file *m = file->private_data; 8036 int ret; 8037 8038 ret = tracing_release(inode, file); 8039 8040 if (file->f_mode & FMODE_READ) 8041 return ret; 8042 8043 /* If write only, the seq_file is just a stub */ 8044 if (m) 8045 kfree(m->private); 8046 kfree(m); 8047 8048 return 0; 8049 } 8050 8051 static int tracing_buffers_open(struct inode *inode, struct file *filp); 8052 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, 8053 size_t count, loff_t *ppos); 8054 static int tracing_buffers_release(struct inode *inode, struct file *file); 8055 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, 8056 struct pipe_inode_info *pipe, size_t len, unsigned int flags); 8057 8058 static int snapshot_raw_open(struct inode *inode, struct file *filp) 8059 { 8060 struct ftrace_buffer_info *info; 8061 int ret; 8062 8063 /* The following checks for tracefs lockdown */ 8064 ret = tracing_buffers_open(inode, filp); 8065 if (ret < 0) 8066 return ret; 8067 8068 info = filp->private_data; 8069 8070 if (info->iter.trace->use_max_tr) { 8071 tracing_buffers_release(inode, filp); 8072 return -EBUSY; 8073 } 8074 8075 info->iter.snapshot = true; 8076 info->iter.array_buffer = &info->iter.tr->max_buffer; 8077 8078 return ret; 8079 } 8080 8081 #endif /* CONFIG_TRACER_SNAPSHOT */ 8082 8083 8084 static const struct file_operations tracing_thresh_fops = { 8085 .open = tracing_open_generic, 8086 .read = tracing_thresh_read, 8087 .write = tracing_thresh_write, 8088 .llseek = generic_file_llseek, 8089 }; 8090 8091 #ifdef CONFIG_TRACER_MAX_TRACE 8092 static const struct file_operations tracing_max_lat_fops = { 8093 .open = tracing_open_generic_tr, 8094 .read = tracing_max_lat_read, 8095 .write = tracing_max_lat_write, 8096 .llseek = generic_file_llseek, 8097 .release = tracing_release_generic_tr, 8098 }; 8099 #endif 8100 8101 static const struct file_operations set_tracer_fops = { 8102 .open = tracing_open_generic_tr, 8103 .read = tracing_set_trace_read, 8104 .write = tracing_set_trace_write, 8105 .llseek = generic_file_llseek, 8106 .release = tracing_release_generic_tr, 8107 }; 8108 8109 static const struct file_operations tracing_pipe_fops = { 8110 .open = tracing_open_pipe, 8111 .poll = tracing_poll_pipe, 8112 .read = tracing_read_pipe, 8113 .splice_read = tracing_splice_read_pipe, 8114 .release = tracing_release_pipe, 8115 }; 8116 8117 static const struct file_operations tracing_entries_fops = { 8118 .open = tracing_open_generic_tr, 8119 .read = tracing_entries_read, 8120 .write = tracing_entries_write, 8121 .llseek = generic_file_llseek, 8122 .release = tracing_release_generic_tr, 8123 }; 8124 8125 static const struct file_operations tracing_syscall_buf_fops = { 8126 .open = tracing_open_generic_tr, 8127 .read = tracing_syscall_buf_read, 8128 .write = tracing_syscall_buf_write, 8129 .llseek = generic_file_llseek, 8130 .release = tracing_release_generic_tr, 8131 }; 8132 8133 static const struct file_operations tracing_buffer_meta_fops = { 8134 .open = tracing_buffer_meta_open, 8135 .read = seq_read, 8136 .llseek = seq_lseek, 8137 .release = tracing_seq_release, 8138 }; 8139 8140 static const struct file_operations tracing_total_entries_fops = { 8141 .open = tracing_open_generic_tr, 8142 .read = tracing_total_entries_read, 8143 .llseek = generic_file_llseek, 8144 .release = tracing_release_generic_tr, 8145 }; 8146 8147 static const struct file_operations tracing_free_buffer_fops = { 8148 .open = tracing_open_generic_tr, 8149 .write = tracing_free_buffer_write, 8150 .release = tracing_free_buffer_release, 8151 }; 8152 8153 static const struct file_operations tracing_mark_fops = { 8154 .open = tracing_mark_open, 8155 .write = tracing_mark_write, 8156 .release = tracing_mark_release, 8157 }; 8158 8159 static const struct file_operations tracing_mark_raw_fops = { 8160 .open = tracing_mark_open, 8161 .write = tracing_mark_raw_write, 8162 .release = tracing_mark_release, 8163 }; 8164 8165 static const struct file_operations trace_clock_fops = { 8166 .open = tracing_clock_open, 8167 .read = seq_read, 8168 .llseek = seq_lseek, 8169 .release = tracing_single_release_tr, 8170 .write = tracing_clock_write, 8171 }; 8172 8173 static const struct file_operations trace_time_stamp_mode_fops = { 8174 .open = tracing_time_stamp_mode_open, 8175 .read = seq_read, 8176 .llseek = seq_lseek, 8177 .release = tracing_single_release_tr, 8178 }; 8179 8180 static const struct file_operations last_boot_fops = { 8181 .open = tracing_last_boot_open, 8182 .read = seq_read, 8183 .llseek = seq_lseek, 8184 .release = tracing_seq_release, 8185 }; 8186 8187 #ifdef CONFIG_TRACER_SNAPSHOT 8188 static const struct file_operations snapshot_fops = { 8189 .open = tracing_snapshot_open, 8190 .read = seq_read, 8191 .write = tracing_snapshot_write, 8192 .llseek = tracing_lseek, 8193 .release = tracing_snapshot_release, 8194 }; 8195 8196 static const struct file_operations snapshot_raw_fops = { 8197 .open = snapshot_raw_open, 8198 .read = tracing_buffers_read, 8199 .release = tracing_buffers_release, 8200 .splice_read = tracing_buffers_splice_read, 8201 }; 8202 8203 #endif /* CONFIG_TRACER_SNAPSHOT */ 8204 8205 /* 8206 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct 8207 * @filp: The active open file structure 8208 * @ubuf: The userspace provided buffer to read value into 8209 * @cnt: The maximum number of bytes to read 8210 * @ppos: The current "file" position 8211 * 8212 * This function implements the write interface for a struct trace_min_max_param. 8213 * The filp->private_data must point to a trace_min_max_param structure that 8214 * defines where to write the value, the min and the max acceptable values, 8215 * and a lock to protect the write. 8216 */ 8217 static ssize_t 8218 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 8219 { 8220 struct trace_min_max_param *param = filp->private_data; 8221 u64 val; 8222 int err; 8223 8224 if (!param) 8225 return -EFAULT; 8226 8227 err = kstrtoull_from_user(ubuf, cnt, 10, &val); 8228 if (err) 8229 return err; 8230 8231 if (param->lock) 8232 mutex_lock(param->lock); 8233 8234 if (param->min && val < *param->min) 8235 err = -EINVAL; 8236 8237 if (param->max && val > *param->max) 8238 err = -EINVAL; 8239 8240 if (!err) 8241 *param->val = val; 8242 8243 if (param->lock) 8244 mutex_unlock(param->lock); 8245 8246 if (err) 8247 return err; 8248 8249 return cnt; 8250 } 8251 8252 /* 8253 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct 8254 * @filp: The active open file structure 8255 * @ubuf: The userspace provided buffer to read value into 8256 * @cnt: The maximum number of bytes to read 8257 * @ppos: The current "file" position 8258 * 8259 * This function implements the read interface for a struct trace_min_max_param. 8260 * The filp->private_data must point to a trace_min_max_param struct with valid 8261 * data. 8262 */ 8263 static ssize_t 8264 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 8265 { 8266 struct trace_min_max_param *param = filp->private_data; 8267 char buf[U64_STR_SIZE]; 8268 int len; 8269 u64 val; 8270 8271 if (!param) 8272 return -EFAULT; 8273 8274 val = *param->val; 8275 8276 if (cnt > sizeof(buf)) 8277 cnt = sizeof(buf); 8278 8279 len = snprintf(buf, sizeof(buf), "%llu\n", val); 8280 8281 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 8282 } 8283 8284 const struct file_operations trace_min_max_fops = { 8285 .open = tracing_open_generic, 8286 .read = trace_min_max_read, 8287 .write = trace_min_max_write, 8288 }; 8289 8290 #define TRACING_LOG_ERRS_MAX 8 8291 #define TRACING_LOG_LOC_MAX 128 8292 8293 #define CMD_PREFIX " Command: " 8294 8295 struct err_info { 8296 const char **errs; /* ptr to loc-specific array of err strings */ 8297 u8 type; /* index into errs -> specific err string */ 8298 u16 pos; /* caret position */ 8299 u64 ts; 8300 }; 8301 8302 struct tracing_log_err { 8303 struct list_head list; 8304 struct err_info info; 8305 char loc[TRACING_LOG_LOC_MAX]; /* err location */ 8306 char *cmd; /* what caused err */ 8307 }; 8308 8309 static DEFINE_MUTEX(tracing_err_log_lock); 8310 8311 static struct tracing_log_err *alloc_tracing_log_err(int len) 8312 { 8313 struct tracing_log_err *err; 8314 8315 err = kzalloc(sizeof(*err), GFP_KERNEL); 8316 if (!err) 8317 return ERR_PTR(-ENOMEM); 8318 8319 err->cmd = kzalloc(len, GFP_KERNEL); 8320 if (!err->cmd) { 8321 kfree(err); 8322 return ERR_PTR(-ENOMEM); 8323 } 8324 8325 return err; 8326 } 8327 8328 static void free_tracing_log_err(struct tracing_log_err *err) 8329 { 8330 kfree(err->cmd); 8331 kfree(err); 8332 } 8333 8334 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, 8335 int len) 8336 { 8337 struct tracing_log_err *err; 8338 char *cmd; 8339 8340 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { 8341 err = alloc_tracing_log_err(len); 8342 if (PTR_ERR(err) != -ENOMEM) 8343 tr->n_err_log_entries++; 8344 8345 return err; 8346 } 8347 cmd = kzalloc(len, GFP_KERNEL); 8348 if (!cmd) 8349 return ERR_PTR(-ENOMEM); 8350 err = list_first_entry(&tr->err_log, struct tracing_log_err, list); 8351 kfree(err->cmd); 8352 err->cmd = cmd; 8353 list_del(&err->list); 8354 8355 return err; 8356 } 8357 8358 /** 8359 * err_pos - find the position of a string within a command for error careting 8360 * @cmd: The tracing command that caused the error 8361 * @str: The string to position the caret at within @cmd 8362 * 8363 * Finds the position of the first occurrence of @str within @cmd. The 8364 * return value can be passed to tracing_log_err() for caret placement 8365 * within @cmd. 8366 * 8367 * Returns the index within @cmd of the first occurrence of @str or 0 8368 * if @str was not found. 8369 */ 8370 unsigned int err_pos(char *cmd, const char *str) 8371 { 8372 char *found; 8373 8374 if (WARN_ON(!strlen(cmd))) 8375 return 0; 8376 8377 found = strstr(cmd, str); 8378 if (found) 8379 return found - cmd; 8380 8381 return 0; 8382 } 8383 8384 /** 8385 * tracing_log_err - write an error to the tracing error log 8386 * @tr: The associated trace array for the error (NULL for top level array) 8387 * @loc: A string describing where the error occurred 8388 * @cmd: The tracing command that caused the error 8389 * @errs: The array of loc-specific static error strings 8390 * @type: The index into errs[], which produces the specific static err string 8391 * @pos: The position the caret should be placed in the cmd 8392 * 8393 * Writes an error into tracing/error_log of the form: 8394 * 8395 * <loc>: error: <text> 8396 * Command: <cmd> 8397 * ^ 8398 * 8399 * tracing/error_log is a small log file containing the last 8400 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated 8401 * unless there has been a tracing error, and the error log can be 8402 * cleared and have its memory freed by writing the empty string in 8403 * truncation mode to it i.e. echo > tracing/error_log. 8404 * 8405 * NOTE: the @errs array along with the @type param are used to 8406 * produce a static error string - this string is not copied and saved 8407 * when the error is logged - only a pointer to it is saved. See 8408 * existing callers for examples of how static strings are typically 8409 * defined for use with tracing_log_err(). 8410 */ 8411 void tracing_log_err(struct trace_array *tr, 8412 const char *loc, const char *cmd, 8413 const char **errs, u8 type, u16 pos) 8414 { 8415 struct tracing_log_err *err; 8416 int len = 0; 8417 8418 if (!tr) 8419 tr = &global_trace; 8420 8421 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; 8422 8423 guard(mutex)(&tracing_err_log_lock); 8424 8425 err = get_tracing_log_err(tr, len); 8426 if (PTR_ERR(err) == -ENOMEM) 8427 return; 8428 8429 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); 8430 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); 8431 8432 err->info.errs = errs; 8433 err->info.type = type; 8434 err->info.pos = pos; 8435 err->info.ts = local_clock(); 8436 8437 list_add_tail(&err->list, &tr->err_log); 8438 } 8439 8440 static void clear_tracing_err_log(struct trace_array *tr) 8441 { 8442 struct tracing_log_err *err, *next; 8443 8444 guard(mutex)(&tracing_err_log_lock); 8445 8446 list_for_each_entry_safe(err, next, &tr->err_log, list) { 8447 list_del(&err->list); 8448 free_tracing_log_err(err); 8449 } 8450 8451 tr->n_err_log_entries = 0; 8452 } 8453 8454 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) 8455 { 8456 struct trace_array *tr = m->private; 8457 8458 mutex_lock(&tracing_err_log_lock); 8459 8460 return seq_list_start(&tr->err_log, *pos); 8461 } 8462 8463 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) 8464 { 8465 struct trace_array *tr = m->private; 8466 8467 return seq_list_next(v, &tr->err_log, pos); 8468 } 8469 8470 static void tracing_err_log_seq_stop(struct seq_file *m, void *v) 8471 { 8472 mutex_unlock(&tracing_err_log_lock); 8473 } 8474 8475 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) 8476 { 8477 u16 i; 8478 8479 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) 8480 seq_putc(m, ' '); 8481 for (i = 0; i < pos; i++) 8482 seq_putc(m, ' '); 8483 seq_puts(m, "^\n"); 8484 } 8485 8486 static int tracing_err_log_seq_show(struct seq_file *m, void *v) 8487 { 8488 struct tracing_log_err *err = v; 8489 8490 if (err) { 8491 const char *err_text = err->info.errs[err->info.type]; 8492 u64 sec = err->info.ts; 8493 u32 nsec; 8494 8495 nsec = do_div(sec, NSEC_PER_SEC); 8496 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, 8497 err->loc, err_text); 8498 seq_printf(m, "%s", err->cmd); 8499 tracing_err_log_show_pos(m, err->info.pos); 8500 } 8501 8502 return 0; 8503 } 8504 8505 static const struct seq_operations tracing_err_log_seq_ops = { 8506 .start = tracing_err_log_seq_start, 8507 .next = tracing_err_log_seq_next, 8508 .stop = tracing_err_log_seq_stop, 8509 .show = tracing_err_log_seq_show 8510 }; 8511 8512 static int tracing_err_log_open(struct inode *inode, struct file *file) 8513 { 8514 struct trace_array *tr = inode->i_private; 8515 int ret = 0; 8516 8517 ret = tracing_check_open_get_tr(tr); 8518 if (ret) 8519 return ret; 8520 8521 /* If this file was opened for write, then erase contents */ 8522 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 8523 clear_tracing_err_log(tr); 8524 8525 if (file->f_mode & FMODE_READ) { 8526 ret = seq_open(file, &tracing_err_log_seq_ops); 8527 if (!ret) { 8528 struct seq_file *m = file->private_data; 8529 m->private = tr; 8530 } else { 8531 trace_array_put(tr); 8532 } 8533 } 8534 return ret; 8535 } 8536 8537 static ssize_t tracing_err_log_write(struct file *file, 8538 const char __user *buffer, 8539 size_t count, loff_t *ppos) 8540 { 8541 return count; 8542 } 8543 8544 static int tracing_err_log_release(struct inode *inode, struct file *file) 8545 { 8546 struct trace_array *tr = inode->i_private; 8547 8548 trace_array_put(tr); 8549 8550 if (file->f_mode & FMODE_READ) 8551 seq_release(inode, file); 8552 8553 return 0; 8554 } 8555 8556 static const struct file_operations tracing_err_log_fops = { 8557 .open = tracing_err_log_open, 8558 .write = tracing_err_log_write, 8559 .read = seq_read, 8560 .llseek = tracing_lseek, 8561 .release = tracing_err_log_release, 8562 }; 8563 8564 static int tracing_buffers_open(struct inode *inode, struct file *filp) 8565 { 8566 struct trace_array *tr = inode->i_private; 8567 struct ftrace_buffer_info *info; 8568 int ret; 8569 8570 ret = tracing_check_open_get_tr(tr); 8571 if (ret) 8572 return ret; 8573 8574 info = kvzalloc(sizeof(*info), GFP_KERNEL); 8575 if (!info) { 8576 trace_array_put(tr); 8577 return -ENOMEM; 8578 } 8579 8580 mutex_lock(&trace_types_lock); 8581 8582 info->iter.tr = tr; 8583 info->iter.cpu_file = tracing_get_cpu(inode); 8584 info->iter.trace = tr->current_trace; 8585 info->iter.array_buffer = &tr->array_buffer; 8586 info->spare = NULL; 8587 /* Force reading ring buffer for first read */ 8588 info->read = (unsigned int)-1; 8589 8590 filp->private_data = info; 8591 8592 tr->trace_ref++; 8593 8594 mutex_unlock(&trace_types_lock); 8595 8596 ret = nonseekable_open(inode, filp); 8597 if (ret < 0) 8598 trace_array_put(tr); 8599 8600 return ret; 8601 } 8602 8603 static __poll_t 8604 tracing_buffers_poll(struct file *filp, poll_table *poll_table) 8605 { 8606 struct ftrace_buffer_info *info = filp->private_data; 8607 struct trace_iterator *iter = &info->iter; 8608 8609 return trace_poll(iter, filp, poll_table); 8610 } 8611 8612 static ssize_t 8613 tracing_buffers_read(struct file *filp, char __user *ubuf, 8614 size_t count, loff_t *ppos) 8615 { 8616 struct ftrace_buffer_info *info = filp->private_data; 8617 struct trace_iterator *iter = &info->iter; 8618 void *trace_data; 8619 int page_size; 8620 ssize_t ret = 0; 8621 ssize_t size; 8622 8623 if (!count) 8624 return 0; 8625 8626 #ifdef CONFIG_TRACER_MAX_TRACE 8627 if (iter->snapshot && iter->tr->current_trace->use_max_tr) 8628 return -EBUSY; 8629 #endif 8630 8631 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8632 8633 /* Make sure the spare matches the current sub buffer size */ 8634 if (info->spare) { 8635 if (page_size != info->spare_size) { 8636 ring_buffer_free_read_page(iter->array_buffer->buffer, 8637 info->spare_cpu, info->spare); 8638 info->spare = NULL; 8639 } 8640 } 8641 8642 if (!info->spare) { 8643 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, 8644 iter->cpu_file); 8645 if (IS_ERR(info->spare)) { 8646 ret = PTR_ERR(info->spare); 8647 info->spare = NULL; 8648 } else { 8649 info->spare_cpu = iter->cpu_file; 8650 info->spare_size = page_size; 8651 } 8652 } 8653 if (!info->spare) 8654 return ret; 8655 8656 /* Do we have previous read data to read? */ 8657 if (info->read < page_size) 8658 goto read; 8659 8660 again: 8661 trace_access_lock(iter->cpu_file); 8662 ret = ring_buffer_read_page(iter->array_buffer->buffer, 8663 info->spare, 8664 count, 8665 iter->cpu_file, 0); 8666 trace_access_unlock(iter->cpu_file); 8667 8668 if (ret < 0) { 8669 if (trace_empty(iter) && !iter->closed) { 8670 if (update_last_data_if_empty(iter->tr)) 8671 return 0; 8672 8673 if ((filp->f_flags & O_NONBLOCK)) 8674 return -EAGAIN; 8675 8676 ret = wait_on_pipe(iter, 0); 8677 if (ret) 8678 return ret; 8679 8680 goto again; 8681 } 8682 return 0; 8683 } 8684 8685 info->read = 0; 8686 read: 8687 size = page_size - info->read; 8688 if (size > count) 8689 size = count; 8690 trace_data = ring_buffer_read_page_data(info->spare); 8691 ret = copy_to_user(ubuf, trace_data + info->read, size); 8692 if (ret == size) 8693 return -EFAULT; 8694 8695 size -= ret; 8696 8697 *ppos += size; 8698 info->read += size; 8699 8700 return size; 8701 } 8702 8703 static int tracing_buffers_flush(struct file *file, fl_owner_t id) 8704 { 8705 struct ftrace_buffer_info *info = file->private_data; 8706 struct trace_iterator *iter = &info->iter; 8707 8708 iter->closed = true; 8709 /* Make sure the waiters see the new wait_index */ 8710 (void)atomic_fetch_inc_release(&iter->wait_index); 8711 8712 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8713 8714 return 0; 8715 } 8716 8717 static int tracing_buffers_release(struct inode *inode, struct file *file) 8718 { 8719 struct ftrace_buffer_info *info = file->private_data; 8720 struct trace_iterator *iter = &info->iter; 8721 8722 guard(mutex)(&trace_types_lock); 8723 8724 iter->tr->trace_ref--; 8725 8726 __trace_array_put(iter->tr); 8727 8728 if (info->spare) 8729 ring_buffer_free_read_page(iter->array_buffer->buffer, 8730 info->spare_cpu, info->spare); 8731 kvfree(info); 8732 8733 return 0; 8734 } 8735 8736 struct buffer_ref { 8737 struct trace_buffer *buffer; 8738 void *page; 8739 int cpu; 8740 refcount_t refcount; 8741 }; 8742 8743 static void buffer_ref_release(struct buffer_ref *ref) 8744 { 8745 if (!refcount_dec_and_test(&ref->refcount)) 8746 return; 8747 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); 8748 kfree(ref); 8749 } 8750 8751 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, 8752 struct pipe_buffer *buf) 8753 { 8754 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 8755 8756 buffer_ref_release(ref); 8757 buf->private = 0; 8758 } 8759 8760 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, 8761 struct pipe_buffer *buf) 8762 { 8763 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 8764 8765 if (refcount_read(&ref->refcount) > INT_MAX/2) 8766 return false; 8767 8768 refcount_inc(&ref->refcount); 8769 return true; 8770 } 8771 8772 /* Pipe buffer operations for a buffer. */ 8773 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 8774 .release = buffer_pipe_buf_release, 8775 .get = buffer_pipe_buf_get, 8776 }; 8777 8778 /* 8779 * Callback from splice_to_pipe(), if we need to release some pages 8780 * at the end of the spd in case we error'ed out in filling the pipe. 8781 */ 8782 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) 8783 { 8784 struct buffer_ref *ref = 8785 (struct buffer_ref *)spd->partial[i].private; 8786 8787 buffer_ref_release(ref); 8788 spd->partial[i].private = 0; 8789 } 8790 8791 static ssize_t 8792 tracing_buffers_splice_read(struct file *file, loff_t *ppos, 8793 struct pipe_inode_info *pipe, size_t len, 8794 unsigned int flags) 8795 { 8796 struct ftrace_buffer_info *info = file->private_data; 8797 struct trace_iterator *iter = &info->iter; 8798 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 8799 struct page *pages_def[PIPE_DEF_BUFFERS]; 8800 struct splice_pipe_desc spd = { 8801 .pages = pages_def, 8802 .partial = partial_def, 8803 .nr_pages_max = PIPE_DEF_BUFFERS, 8804 .ops = &buffer_pipe_buf_ops, 8805 .spd_release = buffer_spd_release, 8806 }; 8807 struct buffer_ref *ref; 8808 bool woken = false; 8809 int page_size; 8810 int entries, i; 8811 ssize_t ret = 0; 8812 8813 #ifdef CONFIG_TRACER_MAX_TRACE 8814 if (iter->snapshot && iter->tr->current_trace->use_max_tr) 8815 return -EBUSY; 8816 #endif 8817 8818 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8819 if (*ppos & (page_size - 1)) 8820 return -EINVAL; 8821 8822 if (len & (page_size - 1)) { 8823 if (len < page_size) 8824 return -EINVAL; 8825 len &= (~(page_size - 1)); 8826 } 8827 8828 if (splice_grow_spd(pipe, &spd)) 8829 return -ENOMEM; 8830 8831 again: 8832 trace_access_lock(iter->cpu_file); 8833 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8834 8835 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { 8836 struct page *page; 8837 int r; 8838 8839 ref = kzalloc(sizeof(*ref), GFP_KERNEL); 8840 if (!ref) { 8841 ret = -ENOMEM; 8842 break; 8843 } 8844 8845 refcount_set(&ref->refcount, 1); 8846 ref->buffer = iter->array_buffer->buffer; 8847 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 8848 if (IS_ERR(ref->page)) { 8849 ret = PTR_ERR(ref->page); 8850 ref->page = NULL; 8851 kfree(ref); 8852 break; 8853 } 8854 ref->cpu = iter->cpu_file; 8855 8856 r = ring_buffer_read_page(ref->buffer, ref->page, 8857 len, iter->cpu_file, 1); 8858 if (r < 0) { 8859 ring_buffer_free_read_page(ref->buffer, ref->cpu, 8860 ref->page); 8861 kfree(ref); 8862 break; 8863 } 8864 8865 page = virt_to_page(ring_buffer_read_page_data(ref->page)); 8866 8867 spd.pages[i] = page; 8868 spd.partial[i].len = page_size; 8869 spd.partial[i].offset = 0; 8870 spd.partial[i].private = (unsigned long)ref; 8871 spd.nr_pages++; 8872 *ppos += page_size; 8873 8874 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8875 } 8876 8877 trace_access_unlock(iter->cpu_file); 8878 spd.nr_pages = i; 8879 8880 /* did we read anything? */ 8881 if (!spd.nr_pages) { 8882 8883 if (ret) 8884 goto out; 8885 8886 if (woken) 8887 goto out; 8888 8889 ret = -EAGAIN; 8890 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) 8891 goto out; 8892 8893 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 8894 if (ret) 8895 goto out; 8896 8897 /* No need to wait after waking up when tracing is off */ 8898 if (!tracer_tracing_is_on(iter->tr)) 8899 goto out; 8900 8901 /* Iterate one more time to collect any new data then exit */ 8902 woken = true; 8903 8904 goto again; 8905 } 8906 8907 ret = splice_to_pipe(pipe, &spd); 8908 out: 8909 splice_shrink_spd(&spd); 8910 8911 return ret; 8912 } 8913 8914 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 8915 { 8916 struct ftrace_buffer_info *info = file->private_data; 8917 struct trace_iterator *iter = &info->iter; 8918 int err; 8919 8920 if (cmd == TRACE_MMAP_IOCTL_GET_READER) { 8921 if (!(file->f_flags & O_NONBLOCK)) { 8922 err = ring_buffer_wait(iter->array_buffer->buffer, 8923 iter->cpu_file, 8924 iter->tr->buffer_percent, 8925 NULL, NULL); 8926 if (err) 8927 return err; 8928 } 8929 8930 return ring_buffer_map_get_reader(iter->array_buffer->buffer, 8931 iter->cpu_file); 8932 } else if (cmd) { 8933 return -ENOTTY; 8934 } 8935 8936 /* 8937 * An ioctl call with cmd 0 to the ring buffer file will wake up all 8938 * waiters 8939 */ 8940 guard(mutex)(&trace_types_lock); 8941 8942 /* Make sure the waiters see the new wait_index */ 8943 (void)atomic_fetch_inc_release(&iter->wait_index); 8944 8945 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8946 8947 return 0; 8948 } 8949 8950 #ifdef CONFIG_TRACER_MAX_TRACE 8951 static int get_snapshot_map(struct trace_array *tr) 8952 { 8953 int err = 0; 8954 8955 /* 8956 * Called with mmap_lock held. lockdep would be unhappy if we would now 8957 * take trace_types_lock. Instead use the specific 8958 * snapshot_trigger_lock. 8959 */ 8960 spin_lock(&tr->snapshot_trigger_lock); 8961 8962 if (tr->snapshot || tr->mapped == UINT_MAX) 8963 err = -EBUSY; 8964 else 8965 tr->mapped++; 8966 8967 spin_unlock(&tr->snapshot_trigger_lock); 8968 8969 /* Wait for update_max_tr() to observe iter->tr->mapped */ 8970 if (tr->mapped == 1) 8971 synchronize_rcu(); 8972 8973 return err; 8974 8975 } 8976 static void put_snapshot_map(struct trace_array *tr) 8977 { 8978 spin_lock(&tr->snapshot_trigger_lock); 8979 if (!WARN_ON(!tr->mapped)) 8980 tr->mapped--; 8981 spin_unlock(&tr->snapshot_trigger_lock); 8982 } 8983 #else 8984 static inline int get_snapshot_map(struct trace_array *tr) { return 0; } 8985 static inline void put_snapshot_map(struct trace_array *tr) { } 8986 #endif 8987 8988 static void tracing_buffers_mmap_close(struct vm_area_struct *vma) 8989 { 8990 struct ftrace_buffer_info *info = vma->vm_file->private_data; 8991 struct trace_iterator *iter = &info->iter; 8992 8993 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); 8994 put_snapshot_map(iter->tr); 8995 } 8996 8997 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr) 8998 { 8999 /* 9000 * Trace buffer mappings require the complete buffer including 9001 * the meta page. Partial mappings are not supported. 9002 */ 9003 return -EINVAL; 9004 } 9005 9006 static const struct vm_operations_struct tracing_buffers_vmops = { 9007 .close = tracing_buffers_mmap_close, 9008 .may_split = tracing_buffers_may_split, 9009 }; 9010 9011 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) 9012 { 9013 struct ftrace_buffer_info *info = filp->private_data; 9014 struct trace_iterator *iter = &info->iter; 9015 int ret = 0; 9016 9017 /* A memmap'ed and backup buffers are not supported for user space mmap */ 9018 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC)) 9019 return -ENODEV; 9020 9021 ret = get_snapshot_map(iter->tr); 9022 if (ret) 9023 return ret; 9024 9025 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); 9026 if (ret) 9027 put_snapshot_map(iter->tr); 9028 9029 vma->vm_ops = &tracing_buffers_vmops; 9030 9031 return ret; 9032 } 9033 9034 static const struct file_operations tracing_buffers_fops = { 9035 .open = tracing_buffers_open, 9036 .read = tracing_buffers_read, 9037 .poll = tracing_buffers_poll, 9038 .release = tracing_buffers_release, 9039 .flush = tracing_buffers_flush, 9040 .splice_read = tracing_buffers_splice_read, 9041 .unlocked_ioctl = tracing_buffers_ioctl, 9042 .mmap = tracing_buffers_mmap, 9043 }; 9044 9045 static ssize_t 9046 tracing_stats_read(struct file *filp, char __user *ubuf, 9047 size_t count, loff_t *ppos) 9048 { 9049 struct inode *inode = file_inode(filp); 9050 struct trace_array *tr = inode->i_private; 9051 struct array_buffer *trace_buf = &tr->array_buffer; 9052 int cpu = tracing_get_cpu(inode); 9053 struct trace_seq *s; 9054 unsigned long cnt; 9055 unsigned long long t; 9056 unsigned long usec_rem; 9057 9058 s = kmalloc(sizeof(*s), GFP_KERNEL); 9059 if (!s) 9060 return -ENOMEM; 9061 9062 trace_seq_init(s); 9063 9064 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); 9065 trace_seq_printf(s, "entries: %ld\n", cnt); 9066 9067 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); 9068 trace_seq_printf(s, "overrun: %ld\n", cnt); 9069 9070 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); 9071 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 9072 9073 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 9074 trace_seq_printf(s, "bytes: %ld\n", cnt); 9075 9076 if (trace_clocks[tr->clock_id].in_ns) { 9077 /* local or global for trace_clock */ 9078 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 9079 usec_rem = do_div(t, USEC_PER_SEC); 9080 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", 9081 t, usec_rem); 9082 9083 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); 9084 usec_rem = do_div(t, USEC_PER_SEC); 9085 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); 9086 } else { 9087 /* counter or tsc mode for trace_clock */ 9088 trace_seq_printf(s, "oldest event ts: %llu\n", 9089 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 9090 9091 trace_seq_printf(s, "now ts: %llu\n", 9092 ring_buffer_time_stamp(trace_buf->buffer)); 9093 } 9094 9095 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); 9096 trace_seq_printf(s, "dropped events: %ld\n", cnt); 9097 9098 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); 9099 trace_seq_printf(s, "read events: %ld\n", cnt); 9100 9101 count = simple_read_from_buffer(ubuf, count, ppos, 9102 s->buffer, trace_seq_used(s)); 9103 9104 kfree(s); 9105 9106 return count; 9107 } 9108 9109 static const struct file_operations tracing_stats_fops = { 9110 .open = tracing_open_generic_tr, 9111 .read = tracing_stats_read, 9112 .llseek = generic_file_llseek, 9113 .release = tracing_release_generic_tr, 9114 }; 9115 9116 #ifdef CONFIG_DYNAMIC_FTRACE 9117 9118 static ssize_t 9119 tracing_read_dyn_info(struct file *filp, char __user *ubuf, 9120 size_t cnt, loff_t *ppos) 9121 { 9122 ssize_t ret; 9123 char *buf; 9124 int r; 9125 9126 /* 512 should be plenty to hold the amount needed */ 9127 #define DYN_INFO_BUF_SIZE 512 9128 9129 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); 9130 if (!buf) 9131 return -ENOMEM; 9132 9133 r = scnprintf(buf, DYN_INFO_BUF_SIZE, 9134 "%ld pages:%ld groups: %ld\n" 9135 "ftrace boot update time = %llu (ns)\n" 9136 "ftrace module total update time = %llu (ns)\n", 9137 ftrace_update_tot_cnt, 9138 ftrace_number_of_pages, 9139 ftrace_number_of_groups, 9140 ftrace_update_time, 9141 ftrace_total_mod_time); 9142 9143 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9144 kfree(buf); 9145 return ret; 9146 } 9147 9148 static const struct file_operations tracing_dyn_info_fops = { 9149 .open = tracing_open_generic, 9150 .read = tracing_read_dyn_info, 9151 .llseek = generic_file_llseek, 9152 }; 9153 #endif /* CONFIG_DYNAMIC_FTRACE */ 9154 9155 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) 9156 static void 9157 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, 9158 struct trace_array *tr, struct ftrace_probe_ops *ops, 9159 void *data) 9160 { 9161 tracing_snapshot_instance(tr); 9162 } 9163 9164 static void 9165 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, 9166 struct trace_array *tr, struct ftrace_probe_ops *ops, 9167 void *data) 9168 { 9169 struct ftrace_func_mapper *mapper = data; 9170 long *count = NULL; 9171 9172 if (mapper) 9173 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 9174 9175 if (count) { 9176 9177 if (*count <= 0) 9178 return; 9179 9180 (*count)--; 9181 } 9182 9183 tracing_snapshot_instance(tr); 9184 } 9185 9186 static int 9187 ftrace_snapshot_print(struct seq_file *m, unsigned long ip, 9188 struct ftrace_probe_ops *ops, void *data) 9189 { 9190 struct ftrace_func_mapper *mapper = data; 9191 long *count = NULL; 9192 9193 seq_printf(m, "%ps:", (void *)ip); 9194 9195 seq_puts(m, "snapshot"); 9196 9197 if (mapper) 9198 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 9199 9200 if (count) 9201 seq_printf(m, ":count=%ld\n", *count); 9202 else 9203 seq_puts(m, ":unlimited\n"); 9204 9205 return 0; 9206 } 9207 9208 static int 9209 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 9210 unsigned long ip, void *init_data, void **data) 9211 { 9212 struct ftrace_func_mapper *mapper = *data; 9213 9214 if (!mapper) { 9215 mapper = allocate_ftrace_func_mapper(); 9216 if (!mapper) 9217 return -ENOMEM; 9218 *data = mapper; 9219 } 9220 9221 return ftrace_func_mapper_add_ip(mapper, ip, init_data); 9222 } 9223 9224 static void 9225 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 9226 unsigned long ip, void *data) 9227 { 9228 struct ftrace_func_mapper *mapper = data; 9229 9230 if (!ip) { 9231 if (!mapper) 9232 return; 9233 free_ftrace_func_mapper(mapper, NULL); 9234 return; 9235 } 9236 9237 ftrace_func_mapper_remove_ip(mapper, ip); 9238 } 9239 9240 static struct ftrace_probe_ops snapshot_probe_ops = { 9241 .func = ftrace_snapshot, 9242 .print = ftrace_snapshot_print, 9243 }; 9244 9245 static struct ftrace_probe_ops snapshot_count_probe_ops = { 9246 .func = ftrace_count_snapshot, 9247 .print = ftrace_snapshot_print, 9248 .init = ftrace_snapshot_init, 9249 .free = ftrace_snapshot_free, 9250 }; 9251 9252 static int 9253 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, 9254 char *glob, char *cmd, char *param, int enable) 9255 { 9256 struct ftrace_probe_ops *ops; 9257 void *count = (void *)-1; 9258 char *number; 9259 int ret; 9260 9261 if (!tr) 9262 return -ENODEV; 9263 9264 /* hash funcs only work with set_ftrace_filter */ 9265 if (!enable) 9266 return -EINVAL; 9267 9268 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; 9269 9270 if (glob[0] == '!') { 9271 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); 9272 if (!ret) 9273 tracing_disarm_snapshot(tr); 9274 9275 return ret; 9276 } 9277 9278 if (!param) 9279 goto out_reg; 9280 9281 number = strsep(¶m, ":"); 9282 9283 if (!strlen(number)) 9284 goto out_reg; 9285 9286 /* 9287 * We use the callback data field (which is a pointer) 9288 * as our counter. 9289 */ 9290 ret = kstrtoul(number, 0, (unsigned long *)&count); 9291 if (ret) 9292 return ret; 9293 9294 out_reg: 9295 ret = tracing_arm_snapshot(tr); 9296 if (ret < 0) 9297 return ret; 9298 9299 ret = register_ftrace_function_probe(glob, tr, ops, count); 9300 if (ret < 0) 9301 tracing_disarm_snapshot(tr); 9302 9303 return ret < 0 ? ret : 0; 9304 } 9305 9306 static struct ftrace_func_command ftrace_snapshot_cmd = { 9307 .name = "snapshot", 9308 .func = ftrace_trace_snapshot_callback, 9309 }; 9310 9311 static __init int register_snapshot_cmd(void) 9312 { 9313 return register_ftrace_command(&ftrace_snapshot_cmd); 9314 } 9315 #else 9316 static inline __init int register_snapshot_cmd(void) { return 0; } 9317 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ 9318 9319 static struct dentry *tracing_get_dentry(struct trace_array *tr) 9320 { 9321 /* Top directory uses NULL as the parent */ 9322 if (tr->flags & TRACE_ARRAY_FL_GLOBAL) 9323 return NULL; 9324 9325 if (WARN_ON(!tr->dir)) 9326 return ERR_PTR(-ENODEV); 9327 9328 /* All sub buffers have a descriptor */ 9329 return tr->dir; 9330 } 9331 9332 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 9333 { 9334 struct dentry *d_tracer; 9335 9336 if (tr->percpu_dir) 9337 return tr->percpu_dir; 9338 9339 d_tracer = tracing_get_dentry(tr); 9340 if (IS_ERR(d_tracer)) 9341 return NULL; 9342 9343 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); 9344 9345 MEM_FAIL(!tr->percpu_dir, 9346 "Could not create tracefs directory 'per_cpu/%d'\n", cpu); 9347 9348 return tr->percpu_dir; 9349 } 9350 9351 static struct dentry * 9352 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, 9353 void *data, long cpu, const struct file_operations *fops) 9354 { 9355 struct dentry *ret = trace_create_file(name, mode, parent, data, fops); 9356 9357 if (ret) /* See tracing_get_cpu() */ 9358 d_inode(ret)->i_cdev = (void *)(cpu + 1); 9359 return ret; 9360 } 9361 9362 static void 9363 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) 9364 { 9365 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 9366 struct dentry *d_cpu; 9367 char cpu_dir[30]; /* 30 characters should be more than enough */ 9368 9369 if (!d_percpu) 9370 return; 9371 9372 snprintf(cpu_dir, 30, "cpu%ld", cpu); 9373 d_cpu = tracefs_create_dir(cpu_dir, d_percpu); 9374 if (!d_cpu) { 9375 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); 9376 return; 9377 } 9378 9379 /* per cpu trace_pipe */ 9380 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, 9381 tr, cpu, &tracing_pipe_fops); 9382 9383 /* per cpu trace */ 9384 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, 9385 tr, cpu, &tracing_fops); 9386 9387 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, 9388 tr, cpu, &tracing_buffers_fops); 9389 9390 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, 9391 tr, cpu, &tracing_stats_fops); 9392 9393 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, 9394 tr, cpu, &tracing_entries_fops); 9395 9396 if (tr->range_addr_start) 9397 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, 9398 tr, cpu, &tracing_buffer_meta_fops); 9399 #ifdef CONFIG_TRACER_SNAPSHOT 9400 if (!tr->range_addr_start) { 9401 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, 9402 tr, cpu, &snapshot_fops); 9403 9404 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, 9405 tr, cpu, &snapshot_raw_fops); 9406 } 9407 #endif 9408 } 9409 9410 #ifdef CONFIG_FTRACE_SELFTEST 9411 /* Let selftest have access to static functions in this file */ 9412 #include "trace_selftest.c" 9413 #endif 9414 9415 static ssize_t 9416 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, 9417 loff_t *ppos) 9418 { 9419 struct trace_option_dentry *topt = filp->private_data; 9420 char *buf; 9421 9422 if (topt->flags->val & topt->opt->bit) 9423 buf = "1\n"; 9424 else 9425 buf = "0\n"; 9426 9427 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 9428 } 9429 9430 static ssize_t 9431 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, 9432 loff_t *ppos) 9433 { 9434 struct trace_option_dentry *topt = filp->private_data; 9435 unsigned long val; 9436 int ret; 9437 9438 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9439 if (ret) 9440 return ret; 9441 9442 if (val != 0 && val != 1) 9443 return -EINVAL; 9444 9445 if (!!(topt->flags->val & topt->opt->bit) != val) { 9446 guard(mutex)(&trace_types_lock); 9447 ret = __set_tracer_option(topt->tr, topt->flags, 9448 topt->opt, !val); 9449 if (ret) 9450 return ret; 9451 } 9452 9453 *ppos += cnt; 9454 9455 return cnt; 9456 } 9457 9458 static int tracing_open_options(struct inode *inode, struct file *filp) 9459 { 9460 struct trace_option_dentry *topt = inode->i_private; 9461 int ret; 9462 9463 ret = tracing_check_open_get_tr(topt->tr); 9464 if (ret) 9465 return ret; 9466 9467 filp->private_data = inode->i_private; 9468 return 0; 9469 } 9470 9471 static int tracing_release_options(struct inode *inode, struct file *file) 9472 { 9473 struct trace_option_dentry *topt = file->private_data; 9474 9475 trace_array_put(topt->tr); 9476 return 0; 9477 } 9478 9479 static const struct file_operations trace_options_fops = { 9480 .open = tracing_open_options, 9481 .read = trace_options_read, 9482 .write = trace_options_write, 9483 .llseek = generic_file_llseek, 9484 .release = tracing_release_options, 9485 }; 9486 9487 /* 9488 * In order to pass in both the trace_array descriptor as well as the index 9489 * to the flag that the trace option file represents, the trace_array 9490 * has a character array of trace_flags_index[], which holds the index 9491 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. 9492 * The address of this character array is passed to the flag option file 9493 * read/write callbacks. 9494 * 9495 * In order to extract both the index and the trace_array descriptor, 9496 * get_tr_index() uses the following algorithm. 9497 * 9498 * idx = *ptr; 9499 * 9500 * As the pointer itself contains the address of the index (remember 9501 * index[1] == 1). 9502 * 9503 * Then to get the trace_array descriptor, by subtracting that index 9504 * from the ptr, we get to the start of the index itself. 9505 * 9506 * ptr - idx == &index[0] 9507 * 9508 * Then a simple container_of() from that pointer gets us to the 9509 * trace_array descriptor. 9510 */ 9511 static void get_tr_index(void *data, struct trace_array **ptr, 9512 unsigned int *pindex) 9513 { 9514 *pindex = *(unsigned char *)data; 9515 9516 *ptr = container_of(data - *pindex, struct trace_array, 9517 trace_flags_index); 9518 } 9519 9520 static ssize_t 9521 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, 9522 loff_t *ppos) 9523 { 9524 void *tr_index = filp->private_data; 9525 struct trace_array *tr; 9526 unsigned int index; 9527 char *buf; 9528 9529 get_tr_index(tr_index, &tr, &index); 9530 9531 if (tr->trace_flags & (1ULL << index)) 9532 buf = "1\n"; 9533 else 9534 buf = "0\n"; 9535 9536 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 9537 } 9538 9539 static ssize_t 9540 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, 9541 loff_t *ppos) 9542 { 9543 void *tr_index = filp->private_data; 9544 struct trace_array *tr; 9545 unsigned int index; 9546 unsigned long val; 9547 int ret; 9548 9549 get_tr_index(tr_index, &tr, &index); 9550 9551 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9552 if (ret) 9553 return ret; 9554 9555 if (val != 0 && val != 1) 9556 return -EINVAL; 9557 9558 mutex_lock(&event_mutex); 9559 mutex_lock(&trace_types_lock); 9560 ret = set_tracer_flag(tr, 1ULL << index, val); 9561 mutex_unlock(&trace_types_lock); 9562 mutex_unlock(&event_mutex); 9563 9564 if (ret < 0) 9565 return ret; 9566 9567 *ppos += cnt; 9568 9569 return cnt; 9570 } 9571 9572 static const struct file_operations trace_options_core_fops = { 9573 .open = tracing_open_generic, 9574 .read = trace_options_core_read, 9575 .write = trace_options_core_write, 9576 .llseek = generic_file_llseek, 9577 }; 9578 9579 struct dentry *trace_create_file(const char *name, 9580 umode_t mode, 9581 struct dentry *parent, 9582 void *data, 9583 const struct file_operations *fops) 9584 { 9585 struct dentry *ret; 9586 9587 ret = tracefs_create_file(name, mode, parent, data, fops); 9588 if (!ret) 9589 pr_warn("Could not create tracefs '%s' entry\n", name); 9590 9591 return ret; 9592 } 9593 9594 9595 static struct dentry *trace_options_init_dentry(struct trace_array *tr) 9596 { 9597 struct dentry *d_tracer; 9598 9599 if (tr->options) 9600 return tr->options; 9601 9602 d_tracer = tracing_get_dentry(tr); 9603 if (IS_ERR(d_tracer)) 9604 return NULL; 9605 9606 tr->options = tracefs_create_dir("options", d_tracer); 9607 if (!tr->options) { 9608 pr_warn("Could not create tracefs directory 'options'\n"); 9609 return NULL; 9610 } 9611 9612 return tr->options; 9613 } 9614 9615 static void 9616 create_trace_option_file(struct trace_array *tr, 9617 struct trace_option_dentry *topt, 9618 struct tracer_flags *flags, 9619 struct tracer_opt *opt) 9620 { 9621 struct dentry *t_options; 9622 9623 t_options = trace_options_init_dentry(tr); 9624 if (!t_options) 9625 return; 9626 9627 topt->flags = flags; 9628 topt->opt = opt; 9629 topt->tr = tr; 9630 9631 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, 9632 t_options, topt, &trace_options_fops); 9633 } 9634 9635 static int 9636 create_trace_option_files(struct trace_array *tr, struct tracer *tracer, 9637 struct tracer_flags *flags) 9638 { 9639 struct trace_option_dentry *topts; 9640 struct trace_options *tr_topts; 9641 struct tracer_opt *opts; 9642 int cnt; 9643 9644 if (!flags || !flags->opts) 9645 return 0; 9646 9647 opts = flags->opts; 9648 9649 for (cnt = 0; opts[cnt].name; cnt++) 9650 ; 9651 9652 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); 9653 if (!topts) 9654 return 0; 9655 9656 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), 9657 GFP_KERNEL); 9658 if (!tr_topts) { 9659 kfree(topts); 9660 return -ENOMEM; 9661 } 9662 9663 tr->topts = tr_topts; 9664 tr->topts[tr->nr_topts].tracer = tracer; 9665 tr->topts[tr->nr_topts].topts = topts; 9666 tr->nr_topts++; 9667 9668 for (cnt = 0; opts[cnt].name; cnt++) { 9669 create_trace_option_file(tr, &topts[cnt], flags, 9670 &opts[cnt]); 9671 MEM_FAIL(topts[cnt].entry == NULL, 9672 "Failed to create trace option: %s", 9673 opts[cnt].name); 9674 } 9675 return 0; 9676 } 9677 9678 static int get_global_flags_val(struct tracer *tracer) 9679 { 9680 struct tracers *t; 9681 9682 list_for_each_entry(t, &global_trace.tracers, list) { 9683 if (t->tracer != tracer) 9684 continue; 9685 if (!t->flags) 9686 return -1; 9687 return t->flags->val; 9688 } 9689 return -1; 9690 } 9691 9692 static int add_tracer_options(struct trace_array *tr, struct tracers *t) 9693 { 9694 struct tracer *tracer = t->tracer; 9695 struct tracer_flags *flags = t->flags ?: tracer->flags; 9696 9697 if (!flags) 9698 return 0; 9699 9700 /* Only add tracer options after update_tracer_options finish */ 9701 if (!tracer_options_updated) 9702 return 0; 9703 9704 return create_trace_option_files(tr, tracer, flags); 9705 } 9706 9707 static int add_tracer(struct trace_array *tr, struct tracer *tracer) 9708 { 9709 struct tracer_flags *flags; 9710 struct tracers *t; 9711 int ret; 9712 9713 /* Only enable if the directory has been created already. */ 9714 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL)) 9715 return 0; 9716 9717 /* 9718 * If this is an instance, only create flags for tracers 9719 * the instance may have. 9720 */ 9721 if (!trace_ok_for_array(tracer, tr)) 9722 return 0; 9723 9724 t = kmalloc(sizeof(*t), GFP_KERNEL); 9725 if (!t) 9726 return -ENOMEM; 9727 9728 t->tracer = tracer; 9729 t->flags = NULL; 9730 list_add(&t->list, &tr->tracers); 9731 9732 flags = tracer->flags; 9733 if (!flags) { 9734 if (!tracer->default_flags) 9735 return 0; 9736 9737 /* 9738 * If the tracer defines default flags, it means the flags are 9739 * per trace instance. 9740 */ 9741 flags = kmalloc(sizeof(*flags), GFP_KERNEL); 9742 if (!flags) 9743 return -ENOMEM; 9744 9745 *flags = *tracer->default_flags; 9746 flags->trace = tracer; 9747 9748 t->flags = flags; 9749 9750 /* If this is an instance, inherit the global_trace flags */ 9751 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { 9752 int val = get_global_flags_val(tracer); 9753 if (!WARN_ON_ONCE(val < 0)) 9754 flags->val = val; 9755 } 9756 } 9757 9758 ret = add_tracer_options(tr, t); 9759 if (ret < 0) { 9760 list_del(&t->list); 9761 kfree(t->flags); 9762 kfree(t); 9763 } 9764 9765 return ret; 9766 } 9767 9768 static struct dentry * 9769 create_trace_option_core_file(struct trace_array *tr, 9770 const char *option, long index) 9771 { 9772 struct dentry *t_options; 9773 9774 t_options = trace_options_init_dentry(tr); 9775 if (!t_options) 9776 return NULL; 9777 9778 return trace_create_file(option, TRACE_MODE_WRITE, t_options, 9779 (void *)&tr->trace_flags_index[index], 9780 &trace_options_core_fops); 9781 } 9782 9783 static void create_trace_options_dir(struct trace_array *tr) 9784 { 9785 struct dentry *t_options; 9786 bool top_level = tr == &global_trace; 9787 int i; 9788 9789 t_options = trace_options_init_dentry(tr); 9790 if (!t_options) 9791 return; 9792 9793 for (i = 0; trace_options[i]; i++) { 9794 if (top_level || 9795 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) { 9796 create_trace_option_core_file(tr, trace_options[i], i); 9797 } 9798 } 9799 } 9800 9801 static ssize_t 9802 rb_simple_read(struct file *filp, char __user *ubuf, 9803 size_t cnt, loff_t *ppos) 9804 { 9805 struct trace_array *tr = filp->private_data; 9806 char buf[64]; 9807 int r; 9808 9809 r = tracer_tracing_is_on(tr); 9810 r = sprintf(buf, "%d\n", r); 9811 9812 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9813 } 9814 9815 static ssize_t 9816 rb_simple_write(struct file *filp, const char __user *ubuf, 9817 size_t cnt, loff_t *ppos) 9818 { 9819 struct trace_array *tr = filp->private_data; 9820 struct trace_buffer *buffer = tr->array_buffer.buffer; 9821 unsigned long val; 9822 int ret; 9823 9824 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9825 if (ret) 9826 return ret; 9827 9828 if (buffer) { 9829 guard(mutex)(&trace_types_lock); 9830 if (!!val == tracer_tracing_is_on(tr)) { 9831 val = 0; /* do nothing */ 9832 } else if (val) { 9833 tracer_tracing_on(tr); 9834 if (tr->current_trace->start) 9835 tr->current_trace->start(tr); 9836 } else { 9837 tracer_tracing_off(tr); 9838 if (tr->current_trace->stop) 9839 tr->current_trace->stop(tr); 9840 /* Wake up any waiters */ 9841 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); 9842 } 9843 } 9844 9845 (*ppos)++; 9846 9847 return cnt; 9848 } 9849 9850 static const struct file_operations rb_simple_fops = { 9851 .open = tracing_open_generic_tr, 9852 .read = rb_simple_read, 9853 .write = rb_simple_write, 9854 .release = tracing_release_generic_tr, 9855 .llseek = default_llseek, 9856 }; 9857 9858 static ssize_t 9859 buffer_percent_read(struct file *filp, char __user *ubuf, 9860 size_t cnt, loff_t *ppos) 9861 { 9862 struct trace_array *tr = filp->private_data; 9863 char buf[64]; 9864 int r; 9865 9866 r = tr->buffer_percent; 9867 r = sprintf(buf, "%d\n", r); 9868 9869 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9870 } 9871 9872 static ssize_t 9873 buffer_percent_write(struct file *filp, const char __user *ubuf, 9874 size_t cnt, loff_t *ppos) 9875 { 9876 struct trace_array *tr = filp->private_data; 9877 unsigned long val; 9878 int ret; 9879 9880 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9881 if (ret) 9882 return ret; 9883 9884 if (val > 100) 9885 return -EINVAL; 9886 9887 tr->buffer_percent = val; 9888 9889 (*ppos)++; 9890 9891 return cnt; 9892 } 9893 9894 static const struct file_operations buffer_percent_fops = { 9895 .open = tracing_open_generic_tr, 9896 .read = buffer_percent_read, 9897 .write = buffer_percent_write, 9898 .release = tracing_release_generic_tr, 9899 .llseek = default_llseek, 9900 }; 9901 9902 static ssize_t 9903 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 9904 { 9905 struct trace_array *tr = filp->private_data; 9906 size_t size; 9907 char buf[64]; 9908 int order; 9909 int r; 9910 9911 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9912 size = (PAGE_SIZE << order) / 1024; 9913 9914 r = sprintf(buf, "%zd\n", size); 9915 9916 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9917 } 9918 9919 static ssize_t 9920 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 9921 size_t cnt, loff_t *ppos) 9922 { 9923 struct trace_array *tr = filp->private_data; 9924 unsigned long val; 9925 int old_order; 9926 int order; 9927 int pages; 9928 int ret; 9929 9930 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9931 if (ret) 9932 return ret; 9933 9934 val *= 1024; /* value passed in is in KB */ 9935 9936 pages = DIV_ROUND_UP(val, PAGE_SIZE); 9937 order = fls(pages - 1); 9938 9939 /* limit between 1 and 128 system pages */ 9940 if (order < 0 || order > 7) 9941 return -EINVAL; 9942 9943 /* Do not allow tracing while changing the order of the ring buffer */ 9944 tracing_stop_tr(tr); 9945 9946 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9947 if (old_order == order) 9948 goto out; 9949 9950 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 9951 if (ret) 9952 goto out; 9953 9954 #ifdef CONFIG_TRACER_MAX_TRACE 9955 9956 if (!tr->allocated_snapshot) 9957 goto out_max; 9958 9959 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 9960 if (ret) { 9961 /* Put back the old order */ 9962 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); 9963 if (WARN_ON_ONCE(cnt)) { 9964 /* 9965 * AARGH! We are left with different orders! 9966 * The max buffer is our "snapshot" buffer. 9967 * When a tracer needs a snapshot (one of the 9968 * latency tracers), it swaps the max buffer 9969 * with the saved snap shot. We succeeded to 9970 * update the order of the main buffer, but failed to 9971 * update the order of the max buffer. But when we tried 9972 * to reset the main buffer to the original size, we 9973 * failed there too. This is very unlikely to 9974 * happen, but if it does, warn and kill all 9975 * tracing. 9976 */ 9977 tracing_disabled = 1; 9978 } 9979 goto out; 9980 } 9981 out_max: 9982 #endif 9983 (*ppos)++; 9984 out: 9985 if (ret) 9986 cnt = ret; 9987 tracing_start_tr(tr); 9988 return cnt; 9989 } 9990 9991 static const struct file_operations buffer_subbuf_size_fops = { 9992 .open = tracing_open_generic_tr, 9993 .read = buffer_subbuf_size_read, 9994 .write = buffer_subbuf_size_write, 9995 .release = tracing_release_generic_tr, 9996 .llseek = default_llseek, 9997 }; 9998 9999 static struct dentry *trace_instance_dir; 10000 10001 static void 10002 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 10003 10004 #ifdef CONFIG_MODULES 10005 static int make_mod_delta(struct module *mod, void *data) 10006 { 10007 struct trace_module_delta *module_delta; 10008 struct trace_scratch *tscratch; 10009 struct trace_mod_entry *entry; 10010 struct trace_array *tr = data; 10011 int i; 10012 10013 tscratch = tr->scratch; 10014 module_delta = READ_ONCE(tr->module_delta); 10015 for (i = 0; i < tscratch->nr_entries; i++) { 10016 entry = &tscratch->entries[i]; 10017 if (strcmp(mod->name, entry->mod_name)) 10018 continue; 10019 if (mod->state == MODULE_STATE_GOING) 10020 module_delta->delta[i] = 0; 10021 else 10022 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base 10023 - entry->mod_addr; 10024 break; 10025 } 10026 return 0; 10027 } 10028 #else 10029 static int make_mod_delta(struct module *mod, void *data) 10030 { 10031 return 0; 10032 } 10033 #endif 10034 10035 static int mod_addr_comp(const void *a, const void *b, const void *data) 10036 { 10037 const struct trace_mod_entry *e1 = a; 10038 const struct trace_mod_entry *e2 = b; 10039 10040 return e1->mod_addr > e2->mod_addr ? 1 : -1; 10041 } 10042 10043 static void setup_trace_scratch(struct trace_array *tr, 10044 struct trace_scratch *tscratch, unsigned int size) 10045 { 10046 struct trace_module_delta *module_delta; 10047 struct trace_mod_entry *entry; 10048 int i, nr_entries; 10049 10050 if (!tscratch) 10051 return; 10052 10053 tr->scratch = tscratch; 10054 tr->scratch_size = size; 10055 10056 if (tscratch->text_addr) 10057 tr->text_delta = (unsigned long)_text - tscratch->text_addr; 10058 10059 if (struct_size(tscratch, entries, tscratch->nr_entries) > size) 10060 goto reset; 10061 10062 /* Check if each module name is a valid string */ 10063 for (i = 0; i < tscratch->nr_entries; i++) { 10064 int n; 10065 10066 entry = &tscratch->entries[i]; 10067 10068 for (n = 0; n < MODULE_NAME_LEN; n++) { 10069 if (entry->mod_name[n] == '\0') 10070 break; 10071 if (!isprint(entry->mod_name[n])) 10072 goto reset; 10073 } 10074 if (n == MODULE_NAME_LEN) 10075 goto reset; 10076 } 10077 10078 /* Sort the entries so that we can find appropriate module from address. */ 10079 nr_entries = tscratch->nr_entries; 10080 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), 10081 mod_addr_comp, NULL, NULL); 10082 10083 if (IS_ENABLED(CONFIG_MODULES)) { 10084 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL); 10085 if (!module_delta) { 10086 pr_info("module_delta allocation failed. Not able to decode module address."); 10087 goto reset; 10088 } 10089 init_rcu_head(&module_delta->rcu); 10090 } else 10091 module_delta = NULL; 10092 WRITE_ONCE(tr->module_delta, module_delta); 10093 10094 /* Scan modules to make text delta for modules. */ 10095 module_for_each_mod(make_mod_delta, tr); 10096 10097 /* Set trace_clock as the same of the previous boot. */ 10098 if (tscratch->clock_id != tr->clock_id) { 10099 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) || 10100 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) { 10101 pr_info("the previous trace_clock info is not valid."); 10102 goto reset; 10103 } 10104 } 10105 return; 10106 reset: 10107 /* Invalid trace modules */ 10108 memset(tscratch, 0, size); 10109 } 10110 10111 static int 10112 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 10113 { 10114 enum ring_buffer_flags rb_flags; 10115 struct trace_scratch *tscratch; 10116 unsigned int scratch_size = 0; 10117 10118 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0; 10119 10120 buf->tr = tr; 10121 10122 if (tr->range_addr_start && tr->range_addr_size) { 10123 /* Add scratch buffer to handle 128 modules */ 10124 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, 10125 tr->range_addr_start, 10126 tr->range_addr_size, 10127 struct_size(tscratch, entries, 128)); 10128 10129 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); 10130 setup_trace_scratch(tr, tscratch, scratch_size); 10131 10132 /* 10133 * This is basically the same as a mapped buffer, 10134 * with the same restrictions. 10135 */ 10136 tr->mapped++; 10137 } else { 10138 buf->buffer = ring_buffer_alloc(size, rb_flags); 10139 } 10140 if (!buf->buffer) 10141 return -ENOMEM; 10142 10143 buf->data = alloc_percpu(struct trace_array_cpu); 10144 if (!buf->data) { 10145 ring_buffer_free(buf->buffer); 10146 buf->buffer = NULL; 10147 return -ENOMEM; 10148 } 10149 10150 /* Allocate the first page for all buffers */ 10151 set_buffer_entries(&tr->array_buffer, 10152 ring_buffer_size(tr->array_buffer.buffer, 0)); 10153 10154 return 0; 10155 } 10156 10157 static void free_trace_buffer(struct array_buffer *buf) 10158 { 10159 if (buf->buffer) { 10160 ring_buffer_free(buf->buffer); 10161 buf->buffer = NULL; 10162 free_percpu(buf->data); 10163 buf->data = NULL; 10164 } 10165 } 10166 10167 static int allocate_trace_buffers(struct trace_array *tr, int size) 10168 { 10169 int ret; 10170 10171 ret = allocate_trace_buffer(tr, &tr->array_buffer, size); 10172 if (ret) 10173 return ret; 10174 10175 #ifdef CONFIG_TRACER_MAX_TRACE 10176 /* Fix mapped buffer trace arrays do not have snapshot buffers */ 10177 if (tr->range_addr_start) 10178 return 0; 10179 10180 ret = allocate_trace_buffer(tr, &tr->max_buffer, 10181 allocate_snapshot ? size : 1); 10182 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { 10183 free_trace_buffer(&tr->array_buffer); 10184 return -ENOMEM; 10185 } 10186 tr->allocated_snapshot = allocate_snapshot; 10187 10188 allocate_snapshot = false; 10189 #endif 10190 10191 return 0; 10192 } 10193 10194 static void free_trace_buffers(struct trace_array *tr) 10195 { 10196 if (!tr) 10197 return; 10198 10199 free_trace_buffer(&tr->array_buffer); 10200 kfree(tr->module_delta); 10201 10202 #ifdef CONFIG_TRACER_MAX_TRACE 10203 free_trace_buffer(&tr->max_buffer); 10204 #endif 10205 } 10206 10207 static void init_trace_flags_index(struct trace_array *tr) 10208 { 10209 int i; 10210 10211 /* Used by the trace options files */ 10212 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) 10213 tr->trace_flags_index[i] = i; 10214 } 10215 10216 static int __update_tracer(struct trace_array *tr) 10217 { 10218 struct tracer *t; 10219 int ret = 0; 10220 10221 for (t = trace_types; t && !ret; t = t->next) 10222 ret = add_tracer(tr, t); 10223 10224 return ret; 10225 } 10226 10227 static __init int __update_tracer_options(struct trace_array *tr) 10228 { 10229 struct tracers *t; 10230 int ret = 0; 10231 10232 list_for_each_entry(t, &tr->tracers, list) { 10233 ret = add_tracer_options(tr, t); 10234 if (ret < 0) 10235 break; 10236 } 10237 10238 return ret; 10239 } 10240 10241 static __init void update_tracer_options(void) 10242 { 10243 struct trace_array *tr; 10244 10245 guard(mutex)(&trace_types_lock); 10246 tracer_options_updated = true; 10247 list_for_each_entry(tr, &ftrace_trace_arrays, list) 10248 __update_tracer_options(tr); 10249 } 10250 10251 /* Must have trace_types_lock held */ 10252 struct trace_array *trace_array_find(const char *instance) 10253 { 10254 struct trace_array *tr, *found = NULL; 10255 10256 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10257 if (tr->name && strcmp(tr->name, instance) == 0) { 10258 found = tr; 10259 break; 10260 } 10261 } 10262 10263 return found; 10264 } 10265 10266 struct trace_array *trace_array_find_get(const char *instance) 10267 { 10268 struct trace_array *tr; 10269 10270 guard(mutex)(&trace_types_lock); 10271 tr = trace_array_find(instance); 10272 if (tr) 10273 tr->ref++; 10274 10275 return tr; 10276 } 10277 10278 static int trace_array_create_dir(struct trace_array *tr) 10279 { 10280 int ret; 10281 10282 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); 10283 if (!tr->dir) 10284 return -EINVAL; 10285 10286 ret = event_trace_add_tracer(tr->dir, tr); 10287 if (ret) { 10288 tracefs_remove(tr->dir); 10289 return ret; 10290 } 10291 10292 init_tracer_tracefs(tr, tr->dir); 10293 ret = __update_tracer(tr); 10294 if (ret) { 10295 event_trace_del_tracer(tr); 10296 tracefs_remove(tr->dir); 10297 return ret; 10298 } 10299 return 0; 10300 } 10301 10302 static struct trace_array * 10303 trace_array_create_systems(const char *name, const char *systems, 10304 unsigned long range_addr_start, 10305 unsigned long range_addr_size) 10306 { 10307 struct trace_array *tr; 10308 int ret; 10309 10310 ret = -ENOMEM; 10311 tr = kzalloc(sizeof(*tr), GFP_KERNEL); 10312 if (!tr) 10313 return ERR_PTR(ret); 10314 10315 tr->name = kstrdup(name, GFP_KERNEL); 10316 if (!tr->name) 10317 goto out_free_tr; 10318 10319 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) 10320 goto out_free_tr; 10321 10322 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) 10323 goto out_free_tr; 10324 10325 if (systems) { 10326 tr->system_names = kstrdup_const(systems, GFP_KERNEL); 10327 if (!tr->system_names) 10328 goto out_free_tr; 10329 } 10330 10331 /* Only for boot up memory mapped ring buffers */ 10332 tr->range_addr_start = range_addr_start; 10333 tr->range_addr_size = range_addr_size; 10334 10335 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; 10336 10337 cpumask_copy(tr->tracing_cpumask, cpu_all_mask); 10338 10339 raw_spin_lock_init(&tr->start_lock); 10340 10341 tr->syscall_buf_sz = global_trace.syscall_buf_sz; 10342 10343 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 10344 #ifdef CONFIG_TRACER_MAX_TRACE 10345 spin_lock_init(&tr->snapshot_trigger_lock); 10346 #endif 10347 tr->current_trace = &nop_trace; 10348 tr->current_trace_flags = nop_trace.flags; 10349 10350 INIT_LIST_HEAD(&tr->systems); 10351 INIT_LIST_HEAD(&tr->events); 10352 INIT_LIST_HEAD(&tr->hist_vars); 10353 INIT_LIST_HEAD(&tr->err_log); 10354 INIT_LIST_HEAD(&tr->tracers); 10355 INIT_LIST_HEAD(&tr->marker_list); 10356 10357 #ifdef CONFIG_MODULES 10358 INIT_LIST_HEAD(&tr->mod_events); 10359 #endif 10360 10361 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 10362 goto out_free_tr; 10363 10364 /* The ring buffer is defaultly expanded */ 10365 trace_set_ring_buffer_expanded(tr); 10366 10367 if (ftrace_allocate_ftrace_ops(tr) < 0) 10368 goto out_free_tr; 10369 10370 ftrace_init_trace_array(tr); 10371 10372 init_trace_flags_index(tr); 10373 10374 if (trace_instance_dir) { 10375 ret = trace_array_create_dir(tr); 10376 if (ret) 10377 goto out_free_tr; 10378 } else 10379 __trace_early_add_events(tr); 10380 10381 list_add(&tr->list, &ftrace_trace_arrays); 10382 10383 tr->ref++; 10384 10385 return tr; 10386 10387 out_free_tr: 10388 ftrace_free_ftrace_ops(tr); 10389 free_trace_buffers(tr); 10390 free_cpumask_var(tr->pipe_cpumask); 10391 free_cpumask_var(tr->tracing_cpumask); 10392 kfree_const(tr->system_names); 10393 kfree(tr->range_name); 10394 kfree(tr->name); 10395 kfree(tr); 10396 10397 return ERR_PTR(ret); 10398 } 10399 10400 static struct trace_array *trace_array_create(const char *name) 10401 { 10402 return trace_array_create_systems(name, NULL, 0, 0); 10403 } 10404 10405 static int instance_mkdir(const char *name) 10406 { 10407 struct trace_array *tr; 10408 int ret; 10409 10410 guard(mutex)(&event_mutex); 10411 guard(mutex)(&trace_types_lock); 10412 10413 ret = -EEXIST; 10414 if (trace_array_find(name)) 10415 return -EEXIST; 10416 10417 tr = trace_array_create(name); 10418 10419 ret = PTR_ERR_OR_ZERO(tr); 10420 10421 return ret; 10422 } 10423 10424 #ifdef CONFIG_MMU 10425 static u64 map_pages(unsigned long start, unsigned long size) 10426 { 10427 unsigned long vmap_start, vmap_end; 10428 struct vm_struct *area; 10429 int ret; 10430 10431 area = get_vm_area(size, VM_IOREMAP); 10432 if (!area) 10433 return 0; 10434 10435 vmap_start = (unsigned long) area->addr; 10436 vmap_end = vmap_start + size; 10437 10438 ret = vmap_page_range(vmap_start, vmap_end, 10439 start, pgprot_nx(PAGE_KERNEL)); 10440 if (ret < 0) { 10441 free_vm_area(area); 10442 return 0; 10443 } 10444 10445 return (u64)vmap_start; 10446 } 10447 #else 10448 static inline u64 map_pages(unsigned long start, unsigned long size) 10449 { 10450 return 0; 10451 } 10452 #endif 10453 10454 /** 10455 * trace_array_get_by_name - Create/Lookup a trace array, given its name. 10456 * @name: The name of the trace array to be looked up/created. 10457 * @systems: A list of systems to create event directories for (NULL for all) 10458 * 10459 * Returns pointer to trace array with given name. 10460 * NULL, if it cannot be created. 10461 * 10462 * NOTE: This function increments the reference counter associated with the 10463 * trace array returned. This makes sure it cannot be freed while in use. 10464 * Use trace_array_put() once the trace array is no longer needed. 10465 * If the trace_array is to be freed, trace_array_destroy() needs to 10466 * be called after the trace_array_put(), or simply let user space delete 10467 * it from the tracefs instances directory. But until the 10468 * trace_array_put() is called, user space can not delete it. 10469 * 10470 */ 10471 struct trace_array *trace_array_get_by_name(const char *name, const char *systems) 10472 { 10473 struct trace_array *tr; 10474 10475 guard(mutex)(&event_mutex); 10476 guard(mutex)(&trace_types_lock); 10477 10478 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10479 if (tr->name && strcmp(tr->name, name) == 0) { 10480 tr->ref++; 10481 return tr; 10482 } 10483 } 10484 10485 tr = trace_array_create_systems(name, systems, 0, 0); 10486 10487 if (IS_ERR(tr)) 10488 tr = NULL; 10489 else 10490 tr->ref++; 10491 10492 return tr; 10493 } 10494 EXPORT_SYMBOL_GPL(trace_array_get_by_name); 10495 10496 static int __remove_instance(struct trace_array *tr) 10497 { 10498 int i; 10499 10500 /* Reference counter for a newly created trace array = 1. */ 10501 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) 10502 return -EBUSY; 10503 10504 list_del(&tr->list); 10505 10506 /* Disable all the flags that were enabled coming in */ 10507 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { 10508 if ((1 << i) & ZEROED_TRACE_FLAGS) 10509 set_tracer_flag(tr, 1ULL << i, 0); 10510 } 10511 10512 if (printk_trace == tr) 10513 update_printk_trace(&global_trace); 10514 10515 if (update_marker_trace(tr, 0)) 10516 synchronize_rcu(); 10517 10518 tracing_set_nop(tr); 10519 clear_ftrace_function_probes(tr); 10520 event_trace_del_tracer(tr); 10521 ftrace_clear_pids(tr); 10522 ftrace_destroy_function_files(tr); 10523 tracefs_remove(tr->dir); 10524 free_percpu(tr->last_func_repeats); 10525 free_trace_buffers(tr); 10526 clear_tracing_err_log(tr); 10527 free_tracers(tr); 10528 10529 if (tr->range_name) { 10530 reserve_mem_release_by_name(tr->range_name); 10531 kfree(tr->range_name); 10532 } 10533 if (tr->flags & TRACE_ARRAY_FL_VMALLOC) 10534 vfree((void *)tr->range_addr_start); 10535 10536 for (i = 0; i < tr->nr_topts; i++) { 10537 kfree(tr->topts[i].topts); 10538 } 10539 kfree(tr->topts); 10540 10541 free_cpumask_var(tr->pipe_cpumask); 10542 free_cpumask_var(tr->tracing_cpumask); 10543 kfree_const(tr->system_names); 10544 kfree(tr->name); 10545 kfree(tr); 10546 10547 return 0; 10548 } 10549 10550 int trace_array_destroy(struct trace_array *this_tr) 10551 { 10552 struct trace_array *tr; 10553 10554 if (!this_tr) 10555 return -EINVAL; 10556 10557 guard(mutex)(&event_mutex); 10558 guard(mutex)(&trace_types_lock); 10559 10560 10561 /* Making sure trace array exists before destroying it. */ 10562 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10563 if (tr == this_tr) 10564 return __remove_instance(tr); 10565 } 10566 10567 return -ENODEV; 10568 } 10569 EXPORT_SYMBOL_GPL(trace_array_destroy); 10570 10571 static int instance_rmdir(const char *name) 10572 { 10573 struct trace_array *tr; 10574 10575 guard(mutex)(&event_mutex); 10576 guard(mutex)(&trace_types_lock); 10577 10578 tr = trace_array_find(name); 10579 if (!tr) 10580 return -ENODEV; 10581 10582 return __remove_instance(tr); 10583 } 10584 10585 static __init void create_trace_instances(struct dentry *d_tracer) 10586 { 10587 struct trace_array *tr; 10588 10589 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, 10590 instance_mkdir, 10591 instance_rmdir); 10592 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) 10593 return; 10594 10595 guard(mutex)(&event_mutex); 10596 guard(mutex)(&trace_types_lock); 10597 10598 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10599 if (!tr->name) 10600 continue; 10601 if (MEM_FAIL(trace_array_create_dir(tr) < 0, 10602 "Failed to create instance directory\n")) 10603 return; 10604 } 10605 } 10606 10607 static void 10608 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) 10609 { 10610 int cpu; 10611 10612 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, 10613 tr, &show_traces_fops); 10614 10615 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, 10616 tr, &set_tracer_fops); 10617 10618 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, 10619 tr, &tracing_cpumask_fops); 10620 10621 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, 10622 tr, &tracing_iter_fops); 10623 10624 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, 10625 tr, &tracing_fops); 10626 10627 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, 10628 tr, &tracing_pipe_fops); 10629 10630 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, 10631 tr, &tracing_entries_fops); 10632 10633 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, 10634 tr, &tracing_total_entries_fops); 10635 10636 trace_create_file("free_buffer", 0200, d_tracer, 10637 tr, &tracing_free_buffer_fops); 10638 10639 trace_create_file("trace_marker", 0220, d_tracer, 10640 tr, &tracing_mark_fops); 10641 10642 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); 10643 10644 trace_create_file("trace_marker_raw", 0220, d_tracer, 10645 tr, &tracing_mark_raw_fops); 10646 10647 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, 10648 &trace_clock_fops); 10649 10650 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, 10651 tr, &rb_simple_fops); 10652 10653 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, 10654 &trace_time_stamp_mode_fops); 10655 10656 tr->buffer_percent = 50; 10657 10658 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 10659 tr, &buffer_percent_fops); 10660 10661 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, 10662 tr, &buffer_subbuf_size_fops); 10663 10664 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer, 10665 tr, &tracing_syscall_buf_fops); 10666 10667 create_trace_options_dir(tr); 10668 10669 #ifdef CONFIG_TRACER_MAX_TRACE 10670 trace_create_maxlat_file(tr, d_tracer); 10671 #endif 10672 10673 if (ftrace_create_function_files(tr, d_tracer)) 10674 MEM_FAIL(1, "Could not allocate function filter files"); 10675 10676 if (tr->range_addr_start) { 10677 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, 10678 tr, &last_boot_fops); 10679 #ifdef CONFIG_TRACER_SNAPSHOT 10680 } else { 10681 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, 10682 tr, &snapshot_fops); 10683 #endif 10684 } 10685 10686 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, 10687 tr, &tracing_err_log_fops); 10688 10689 for_each_tracing_cpu(cpu) 10690 tracing_init_tracefs_percpu(tr, cpu); 10691 10692 ftrace_init_tracefs(tr, d_tracer); 10693 } 10694 10695 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 10696 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 10697 { 10698 struct vfsmount *mnt; 10699 struct file_system_type *type; 10700 struct fs_context *fc; 10701 int ret; 10702 10703 /* 10704 * To maintain backward compatibility for tools that mount 10705 * debugfs to get to the tracing facility, tracefs is automatically 10706 * mounted to the debugfs/tracing directory. 10707 */ 10708 type = get_fs_type("tracefs"); 10709 if (!type) 10710 return NULL; 10711 10712 fc = fs_context_for_submount(type, mntpt); 10713 put_filesystem(type); 10714 if (IS_ERR(fc)) 10715 return ERR_CAST(fc); 10716 10717 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n"); 10718 10719 ret = vfs_parse_fs_string(fc, "source", "tracefs"); 10720 if (!ret) 10721 mnt = fc_mount(fc); 10722 else 10723 mnt = ERR_PTR(ret); 10724 10725 put_fs_context(fc); 10726 return mnt; 10727 } 10728 #endif 10729 10730 /** 10731 * tracing_init_dentry - initialize top level trace array 10732 * 10733 * This is called when creating files or directories in the tracing 10734 * directory. It is called via fs_initcall() by any of the boot up code 10735 * and expects to return the dentry of the top level tracing directory. 10736 */ 10737 int tracing_init_dentry(void) 10738 { 10739 struct trace_array *tr = &global_trace; 10740 10741 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10742 pr_warn("Tracing disabled due to lockdown\n"); 10743 return -EPERM; 10744 } 10745 10746 /* The top level trace array uses NULL as parent */ 10747 if (tr->dir) 10748 return 0; 10749 10750 if (WARN_ON(!tracefs_initialized())) 10751 return -ENODEV; 10752 10753 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 10754 /* 10755 * As there may still be users that expect the tracing 10756 * files to exist in debugfs/tracing, we must automount 10757 * the tracefs file system there, so older tools still 10758 * work with the newer kernel. 10759 */ 10760 tr->dir = debugfs_create_automount("tracing", NULL, 10761 trace_automount, NULL); 10762 #endif 10763 10764 return 0; 10765 } 10766 10767 extern struct trace_eval_map *__start_ftrace_eval_maps[]; 10768 extern struct trace_eval_map *__stop_ftrace_eval_maps[]; 10769 10770 static struct workqueue_struct *eval_map_wq __initdata; 10771 static struct work_struct eval_map_work __initdata; 10772 static struct work_struct tracerfs_init_work __initdata; 10773 10774 static void __init eval_map_work_func(struct work_struct *work) 10775 { 10776 int len; 10777 10778 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; 10779 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); 10780 } 10781 10782 static int __init trace_eval_init(void) 10783 { 10784 INIT_WORK(&eval_map_work, eval_map_work_func); 10785 10786 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0); 10787 if (!eval_map_wq) { 10788 pr_err("Unable to allocate eval_map_wq\n"); 10789 /* Do work here */ 10790 eval_map_work_func(&eval_map_work); 10791 return -ENOMEM; 10792 } 10793 10794 queue_work(eval_map_wq, &eval_map_work); 10795 return 0; 10796 } 10797 10798 subsys_initcall(trace_eval_init); 10799 10800 static int __init trace_eval_sync(void) 10801 { 10802 /* Make sure the eval map updates are finished */ 10803 if (eval_map_wq) 10804 destroy_workqueue(eval_map_wq); 10805 return 0; 10806 } 10807 10808 late_initcall_sync(trace_eval_sync); 10809 10810 10811 #ifdef CONFIG_MODULES 10812 10813 bool module_exists(const char *module) 10814 { 10815 /* All modules have the symbol __this_module */ 10816 static const char this_mod[] = "__this_module"; 10817 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; 10818 unsigned long val; 10819 int n; 10820 10821 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); 10822 10823 if (n > sizeof(modname) - 1) 10824 return false; 10825 10826 val = module_kallsyms_lookup_name(modname); 10827 return val != 0; 10828 } 10829 10830 static void trace_module_add_evals(struct module *mod) 10831 { 10832 /* 10833 * Modules with bad taint do not have events created, do 10834 * not bother with enums either. 10835 */ 10836 if (trace_module_has_bad_taint(mod)) 10837 return; 10838 10839 /* Even if no trace_evals, this need to sanitize field types. */ 10840 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals); 10841 } 10842 10843 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 10844 static void trace_module_remove_evals(struct module *mod) 10845 { 10846 union trace_eval_map_item *map; 10847 union trace_eval_map_item **last = &trace_eval_maps; 10848 10849 if (!mod->num_trace_evals) 10850 return; 10851 10852 guard(mutex)(&trace_eval_mutex); 10853 10854 map = trace_eval_maps; 10855 10856 while (map) { 10857 if (map->head.mod == mod) 10858 break; 10859 map = trace_eval_jmp_to_tail(map); 10860 last = &map->tail.next; 10861 map = map->tail.next; 10862 } 10863 if (!map) 10864 return; 10865 10866 *last = trace_eval_jmp_to_tail(map)->tail.next; 10867 kfree(map); 10868 } 10869 #else 10870 static inline void trace_module_remove_evals(struct module *mod) { } 10871 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 10872 10873 static void trace_module_record(struct module *mod, bool add) 10874 { 10875 struct trace_array *tr; 10876 unsigned long flags; 10877 10878 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10879 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); 10880 /* Update any persistent trace array that has already been started */ 10881 if (flags == TRACE_ARRAY_FL_BOOT && add) { 10882 guard(mutex)(&scratch_mutex); 10883 save_mod(mod, tr); 10884 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { 10885 /* Update delta if the module loaded in previous boot */ 10886 make_mod_delta(mod, tr); 10887 } 10888 } 10889 } 10890 10891 static int trace_module_notify(struct notifier_block *self, 10892 unsigned long val, void *data) 10893 { 10894 struct module *mod = data; 10895 10896 switch (val) { 10897 case MODULE_STATE_COMING: 10898 trace_module_add_evals(mod); 10899 trace_module_record(mod, true); 10900 break; 10901 case MODULE_STATE_GOING: 10902 trace_module_remove_evals(mod); 10903 trace_module_record(mod, false); 10904 break; 10905 } 10906 10907 return NOTIFY_OK; 10908 } 10909 10910 static struct notifier_block trace_module_nb = { 10911 .notifier_call = trace_module_notify, 10912 .priority = 0, 10913 }; 10914 #endif /* CONFIG_MODULES */ 10915 10916 static __init void tracer_init_tracefs_work_func(struct work_struct *work) 10917 { 10918 10919 event_trace_init(); 10920 10921 init_tracer_tracefs(&global_trace, NULL); 10922 ftrace_init_tracefs_toplevel(&global_trace, NULL); 10923 10924 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, 10925 &global_trace, &tracing_thresh_fops); 10926 10927 trace_create_file("README", TRACE_MODE_READ, NULL, 10928 NULL, &tracing_readme_fops); 10929 10930 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, 10931 NULL, &tracing_saved_cmdlines_fops); 10932 10933 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, 10934 NULL, &tracing_saved_cmdlines_size_fops); 10935 10936 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, 10937 NULL, &tracing_saved_tgids_fops); 10938 10939 trace_create_eval_file(NULL); 10940 10941 #ifdef CONFIG_MODULES 10942 register_module_notifier(&trace_module_nb); 10943 #endif 10944 10945 #ifdef CONFIG_DYNAMIC_FTRACE 10946 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, 10947 NULL, &tracing_dyn_info_fops); 10948 #endif 10949 10950 create_trace_instances(NULL); 10951 10952 update_tracer_options(); 10953 } 10954 10955 static __init int tracer_init_tracefs(void) 10956 { 10957 int ret; 10958 10959 trace_access_lock_init(); 10960 10961 ret = tracing_init_dentry(); 10962 if (ret) 10963 return 0; 10964 10965 if (eval_map_wq) { 10966 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); 10967 queue_work(eval_map_wq, &tracerfs_init_work); 10968 } else { 10969 tracer_init_tracefs_work_func(NULL); 10970 } 10971 10972 if (rv_init_interface()) 10973 pr_err("RV: Error while creating the RV interface\n"); 10974 10975 return 0; 10976 } 10977 10978 fs_initcall(tracer_init_tracefs); 10979 10980 static int trace_die_panic_handler(struct notifier_block *self, 10981 unsigned long ev, void *unused); 10982 10983 static struct notifier_block trace_panic_notifier = { 10984 .notifier_call = trace_die_panic_handler, 10985 .priority = INT_MAX - 1, 10986 }; 10987 10988 static struct notifier_block trace_die_notifier = { 10989 .notifier_call = trace_die_panic_handler, 10990 .priority = INT_MAX - 1, 10991 }; 10992 10993 /* 10994 * The idea is to execute the following die/panic callback early, in order 10995 * to avoid showing irrelevant information in the trace (like other panic 10996 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall 10997 * warnings get disabled (to prevent potential log flooding). 10998 */ 10999 static int trace_die_panic_handler(struct notifier_block *self, 11000 unsigned long ev, void *unused) 11001 { 11002 if (!ftrace_dump_on_oops_enabled()) 11003 return NOTIFY_DONE; 11004 11005 /* The die notifier requires DIE_OOPS to trigger */ 11006 if (self == &trace_die_notifier && ev != DIE_OOPS) 11007 return NOTIFY_DONE; 11008 11009 ftrace_dump(DUMP_PARAM); 11010 11011 return NOTIFY_DONE; 11012 } 11013 11014 /* 11015 * printk is set to max of 1024, we really don't need it that big. 11016 * Nothing should be printing 1000 characters anyway. 11017 */ 11018 #define TRACE_MAX_PRINT 1000 11019 11020 /* 11021 * Define here KERN_TRACE so that we have one place to modify 11022 * it if we decide to change what log level the ftrace dump 11023 * should be at. 11024 */ 11025 #define KERN_TRACE KERN_EMERG 11026 11027 void 11028 trace_printk_seq(struct trace_seq *s) 11029 { 11030 /* Probably should print a warning here. */ 11031 if (s->seq.len >= TRACE_MAX_PRINT) 11032 s->seq.len = TRACE_MAX_PRINT; 11033 11034 /* 11035 * More paranoid code. Although the buffer size is set to 11036 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just 11037 * an extra layer of protection. 11038 */ 11039 if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) 11040 s->seq.len = s->seq.size - 1; 11041 11042 /* should be zero ended, but we are paranoid. */ 11043 s->buffer[s->seq.len] = 0; 11044 11045 printk(KERN_TRACE "%s", s->buffer); 11046 11047 trace_seq_init(s); 11048 } 11049 11050 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) 11051 { 11052 iter->tr = tr; 11053 iter->trace = iter->tr->current_trace; 11054 iter->cpu_file = RING_BUFFER_ALL_CPUS; 11055 iter->array_buffer = &tr->array_buffer; 11056 11057 if (iter->trace && iter->trace->open) 11058 iter->trace->open(iter); 11059 11060 /* Annotate start of buffers if we had overruns */ 11061 if (ring_buffer_overruns(iter->array_buffer->buffer)) 11062 iter->iter_flags |= TRACE_FILE_ANNOTATE; 11063 11064 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 11065 if (trace_clocks[iter->tr->clock_id].in_ns) 11066 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 11067 11068 /* Can not use kmalloc for iter.temp and iter.fmt */ 11069 iter->temp = static_temp_buf; 11070 iter->temp_size = STATIC_TEMP_BUF_SIZE; 11071 iter->fmt = static_fmt_buf; 11072 iter->fmt_size = STATIC_FMT_BUF_SIZE; 11073 } 11074 11075 void trace_init_global_iter(struct trace_iterator *iter) 11076 { 11077 trace_init_iter(iter, &global_trace); 11078 } 11079 11080 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) 11081 { 11082 /* use static because iter can be a bit big for the stack */ 11083 static struct trace_iterator iter; 11084 unsigned int old_userobj; 11085 unsigned long flags; 11086 int cnt = 0; 11087 11088 /* 11089 * Always turn off tracing when we dump. 11090 * We don't need to show trace output of what happens 11091 * between multiple crashes. 11092 * 11093 * If the user does a sysrq-z, then they can re-enable 11094 * tracing with echo 1 > tracing_on. 11095 */ 11096 tracer_tracing_off(tr); 11097 11098 local_irq_save(flags); 11099 11100 /* Simulate the iterator */ 11101 trace_init_iter(&iter, tr); 11102 11103 /* While dumping, do not allow the buffer to be enable */ 11104 tracer_tracing_disable(tr); 11105 11106 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ); 11107 11108 /* don't look at user memory in panic mode */ 11109 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ); 11110 11111 if (dump_mode == DUMP_ORIG) 11112 iter.cpu_file = raw_smp_processor_id(); 11113 else 11114 iter.cpu_file = RING_BUFFER_ALL_CPUS; 11115 11116 if (tr == &global_trace) 11117 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 11118 else 11119 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); 11120 11121 /* Did function tracer already get disabled? */ 11122 if (ftrace_is_dead()) { 11123 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); 11124 printk("# MAY BE MISSING FUNCTION EVENTS\n"); 11125 } 11126 11127 /* 11128 * We need to stop all tracing on all CPUS to read 11129 * the next buffer. This is a bit expensive, but is 11130 * not done often. We fill all what we can read, 11131 * and then release the locks again. 11132 */ 11133 11134 while (!trace_empty(&iter)) { 11135 11136 if (!cnt) 11137 printk(KERN_TRACE "---------------------------------\n"); 11138 11139 cnt++; 11140 11141 trace_iterator_reset(&iter); 11142 iter.iter_flags |= TRACE_FILE_LAT_FMT; 11143 11144 if (trace_find_next_entry_inc(&iter) != NULL) { 11145 int ret; 11146 11147 ret = print_trace_line(&iter); 11148 if (ret != TRACE_TYPE_NO_CONSUME) 11149 trace_consume(&iter); 11150 11151 trace_printk_seq(&iter.seq); 11152 } 11153 touch_nmi_watchdog(); 11154 } 11155 11156 if (!cnt) 11157 printk(KERN_TRACE " (ftrace buffer empty)\n"); 11158 else 11159 printk(KERN_TRACE "---------------------------------\n"); 11160 11161 tr->trace_flags |= old_userobj; 11162 11163 tracer_tracing_enable(tr); 11164 local_irq_restore(flags); 11165 } 11166 11167 static void ftrace_dump_by_param(void) 11168 { 11169 bool first_param = true; 11170 char dump_param[MAX_TRACER_SIZE]; 11171 char *buf, *token, *inst_name; 11172 struct trace_array *tr; 11173 11174 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); 11175 buf = dump_param; 11176 11177 while ((token = strsep(&buf, ",")) != NULL) { 11178 if (first_param) { 11179 first_param = false; 11180 if (!strcmp("0", token)) 11181 continue; 11182 else if (!strcmp("1", token)) { 11183 ftrace_dump_one(&global_trace, DUMP_ALL); 11184 continue; 11185 } 11186 else if (!strcmp("2", token) || 11187 !strcmp("orig_cpu", token)) { 11188 ftrace_dump_one(&global_trace, DUMP_ORIG); 11189 continue; 11190 } 11191 } 11192 11193 inst_name = strsep(&token, "="); 11194 tr = trace_array_find(inst_name); 11195 if (!tr) { 11196 printk(KERN_TRACE "Instance %s not found\n", inst_name); 11197 continue; 11198 } 11199 11200 if (token && (!strcmp("2", token) || 11201 !strcmp("orig_cpu", token))) 11202 ftrace_dump_one(tr, DUMP_ORIG); 11203 else 11204 ftrace_dump_one(tr, DUMP_ALL); 11205 } 11206 } 11207 11208 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) 11209 { 11210 static atomic_t dump_running; 11211 11212 /* Only allow one dump user at a time. */ 11213 if (atomic_inc_return(&dump_running) != 1) { 11214 atomic_dec(&dump_running); 11215 return; 11216 } 11217 11218 switch (oops_dump_mode) { 11219 case DUMP_ALL: 11220 ftrace_dump_one(&global_trace, DUMP_ALL); 11221 break; 11222 case DUMP_ORIG: 11223 ftrace_dump_one(&global_trace, DUMP_ORIG); 11224 break; 11225 case DUMP_PARAM: 11226 ftrace_dump_by_param(); 11227 break; 11228 case DUMP_NONE: 11229 break; 11230 default: 11231 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); 11232 ftrace_dump_one(&global_trace, DUMP_ALL); 11233 } 11234 11235 atomic_dec(&dump_running); 11236 } 11237 EXPORT_SYMBOL_GPL(ftrace_dump); 11238 11239 #define WRITE_BUFSIZE 4096 11240 11241 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, 11242 size_t count, loff_t *ppos, 11243 int (*createfn)(const char *)) 11244 { 11245 char *kbuf __free(kfree) = NULL; 11246 char *buf, *tmp; 11247 int ret = 0; 11248 size_t done = 0; 11249 size_t size; 11250 11251 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 11252 if (!kbuf) 11253 return -ENOMEM; 11254 11255 while (done < count) { 11256 size = count - done; 11257 11258 if (size >= WRITE_BUFSIZE) 11259 size = WRITE_BUFSIZE - 1; 11260 11261 if (copy_from_user(kbuf, buffer + done, size)) 11262 return -EFAULT; 11263 11264 kbuf[size] = '\0'; 11265 buf = kbuf; 11266 do { 11267 tmp = strchr(buf, '\n'); 11268 if (tmp) { 11269 *tmp = '\0'; 11270 size = tmp - buf + 1; 11271 } else { 11272 size = strlen(buf); 11273 if (done + size < count) { 11274 if (buf != kbuf) 11275 break; 11276 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ 11277 pr_warn("Line length is too long: Should be less than %d\n", 11278 WRITE_BUFSIZE - 2); 11279 return -EINVAL; 11280 } 11281 } 11282 done += size; 11283 11284 /* Remove comments */ 11285 tmp = strchr(buf, '#'); 11286 11287 if (tmp) 11288 *tmp = '\0'; 11289 11290 ret = createfn(buf); 11291 if (ret) 11292 return ret; 11293 buf += size; 11294 11295 } while (done < count); 11296 } 11297 return done; 11298 } 11299 11300 #ifdef CONFIG_TRACER_MAX_TRACE 11301 __init static bool tr_needs_alloc_snapshot(const char *name) 11302 { 11303 char *test; 11304 int len = strlen(name); 11305 bool ret; 11306 11307 if (!boot_snapshot_index) 11308 return false; 11309 11310 if (strncmp(name, boot_snapshot_info, len) == 0 && 11311 boot_snapshot_info[len] == '\t') 11312 return true; 11313 11314 test = kmalloc(strlen(name) + 3, GFP_KERNEL); 11315 if (!test) 11316 return false; 11317 11318 sprintf(test, "\t%s\t", name); 11319 ret = strstr(boot_snapshot_info, test) == NULL; 11320 kfree(test); 11321 return ret; 11322 } 11323 11324 __init static void do_allocate_snapshot(const char *name) 11325 { 11326 if (!tr_needs_alloc_snapshot(name)) 11327 return; 11328 11329 /* 11330 * When allocate_snapshot is set, the next call to 11331 * allocate_trace_buffers() (called by trace_array_get_by_name()) 11332 * will allocate the snapshot buffer. That will alse clear 11333 * this flag. 11334 */ 11335 allocate_snapshot = true; 11336 } 11337 #else 11338 static inline void do_allocate_snapshot(const char *name) { } 11339 #endif 11340 11341 __init static int backup_instance_area(const char *backup, 11342 unsigned long *addr, phys_addr_t *size) 11343 { 11344 struct trace_array *backup_tr; 11345 void *allocated_vaddr = NULL; 11346 11347 backup_tr = trace_array_get_by_name(backup, NULL); 11348 if (!backup_tr) { 11349 pr_warn("Tracing: Instance %s is not found.\n", backup); 11350 return -ENOENT; 11351 } 11352 11353 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) { 11354 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup); 11355 trace_array_put(backup_tr); 11356 return -EINVAL; 11357 } 11358 11359 *size = backup_tr->range_addr_size; 11360 11361 allocated_vaddr = vzalloc(*size); 11362 if (!allocated_vaddr) { 11363 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n", 11364 backup, (unsigned long)*size); 11365 trace_array_put(backup_tr); 11366 return -ENOMEM; 11367 } 11368 11369 memcpy(allocated_vaddr, 11370 (void *)backup_tr->range_addr_start, (size_t)*size); 11371 *addr = (unsigned long)allocated_vaddr; 11372 11373 trace_array_put(backup_tr); 11374 return 0; 11375 } 11376 11377 __init static void enable_instances(void) 11378 { 11379 struct trace_array *tr; 11380 bool memmap_area = false; 11381 char *curr_str; 11382 char *name; 11383 char *str; 11384 char *tok; 11385 11386 /* A tab is always appended */ 11387 boot_instance_info[boot_instance_index - 1] = '\0'; 11388 str = boot_instance_info; 11389 11390 while ((curr_str = strsep(&str, "\t"))) { 11391 phys_addr_t start = 0; 11392 phys_addr_t size = 0; 11393 unsigned long addr = 0; 11394 bool traceprintk = false; 11395 bool traceoff = false; 11396 char *flag_delim; 11397 char *addr_delim; 11398 char *rname __free(kfree) = NULL; 11399 char *backup; 11400 11401 tok = strsep(&curr_str, ","); 11402 11403 name = strsep(&tok, "="); 11404 backup = tok; 11405 11406 flag_delim = strchr(name, '^'); 11407 addr_delim = strchr(name, '@'); 11408 11409 if (addr_delim) 11410 *addr_delim++ = '\0'; 11411 11412 if (flag_delim) 11413 *flag_delim++ = '\0'; 11414 11415 if (backup) { 11416 if (backup_instance_area(backup, &addr, &size) < 0) 11417 continue; 11418 } 11419 11420 if (flag_delim) { 11421 char *flag; 11422 11423 while ((flag = strsep(&flag_delim, "^"))) { 11424 if (strcmp(flag, "traceoff") == 0) { 11425 traceoff = true; 11426 } else if ((strcmp(flag, "printk") == 0) || 11427 (strcmp(flag, "traceprintk") == 0) || 11428 (strcmp(flag, "trace_printk") == 0)) { 11429 traceprintk = true; 11430 } else { 11431 pr_info("Tracing: Invalid instance flag '%s' for %s\n", 11432 flag, name); 11433 } 11434 } 11435 } 11436 11437 tok = addr_delim; 11438 if (tok && isdigit(*tok)) { 11439 start = memparse(tok, &tok); 11440 if (!start) { 11441 pr_warn("Tracing: Invalid boot instance address for %s\n", 11442 name); 11443 continue; 11444 } 11445 if (*tok != ':') { 11446 pr_warn("Tracing: No size specified for instance %s\n", name); 11447 continue; 11448 } 11449 tok++; 11450 size = memparse(tok, &tok); 11451 if (!size) { 11452 pr_warn("Tracing: Invalid boot instance size for %s\n", 11453 name); 11454 continue; 11455 } 11456 memmap_area = true; 11457 } else if (tok) { 11458 if (!reserve_mem_find_by_name(tok, &start, &size)) { 11459 start = 0; 11460 pr_warn("Failed to map boot instance %s to %s\n", name, tok); 11461 continue; 11462 } 11463 rname = kstrdup(tok, GFP_KERNEL); 11464 } 11465 11466 if (start) { 11467 /* Start and size must be page aligned */ 11468 if (start & ~PAGE_MASK) { 11469 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); 11470 continue; 11471 } 11472 if (size & ~PAGE_MASK) { 11473 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); 11474 continue; 11475 } 11476 11477 if (memmap_area) 11478 addr = map_pages(start, size); 11479 else 11480 addr = (unsigned long)phys_to_virt(start); 11481 if (addr) { 11482 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", 11483 name, &start, (unsigned long)size); 11484 } else { 11485 pr_warn("Tracing: Failed to map boot instance %s\n", name); 11486 continue; 11487 } 11488 } else { 11489 /* Only non mapped buffers have snapshot buffers */ 11490 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) 11491 do_allocate_snapshot(name); 11492 } 11493 11494 tr = trace_array_create_systems(name, NULL, addr, size); 11495 if (IS_ERR(tr)) { 11496 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); 11497 continue; 11498 } 11499 11500 if (traceoff) 11501 tracer_tracing_off(tr); 11502 11503 if (traceprintk) 11504 update_printk_trace(tr); 11505 11506 /* 11507 * memmap'd buffers can not be freed. 11508 */ 11509 if (memmap_area) { 11510 tr->flags |= TRACE_ARRAY_FL_MEMMAP; 11511 tr->ref++; 11512 } 11513 11514 /* 11515 * Backup buffers can be freed but need vfree(). 11516 */ 11517 if (backup) 11518 tr->flags |= TRACE_ARRAY_FL_VMALLOC; 11519 11520 if (start || backup) { 11521 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 11522 tr->range_name = no_free_ptr(rname); 11523 } 11524 11525 while ((tok = strsep(&curr_str, ","))) { 11526 early_enable_events(tr, tok, true); 11527 } 11528 } 11529 } 11530 11531 __init static int tracer_alloc_buffers(void) 11532 { 11533 int ring_buf_size; 11534 int ret = -ENOMEM; 11535 11536 11537 if (security_locked_down(LOCKDOWN_TRACEFS)) { 11538 pr_warn("Tracing disabled due to lockdown\n"); 11539 return -EPERM; 11540 } 11541 11542 /* 11543 * Make sure we don't accidentally add more trace options 11544 * than we have bits for. 11545 */ 11546 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); 11547 11548 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 11549 return -ENOMEM; 11550 11551 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) 11552 goto out_free_buffer_mask; 11553 11554 /* Only allocate trace_printk buffers if a trace_printk exists */ 11555 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) 11556 /* Must be called before global_trace.buffer is allocated */ 11557 trace_printk_init_buffers(); 11558 11559 /* To save memory, keep the ring buffer size to its minimum */ 11560 if (global_trace.ring_buffer_expanded) 11561 ring_buf_size = trace_buf_size; 11562 else 11563 ring_buf_size = 1; 11564 11565 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 11566 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); 11567 11568 raw_spin_lock_init(&global_trace.start_lock); 11569 11570 /* 11571 * The prepare callbacks allocates some memory for the ring buffer. We 11572 * don't free the buffer if the CPU goes down. If we were to free 11573 * the buffer, then the user would lose any trace that was in the 11574 * buffer. The memory will be removed once the "instance" is removed. 11575 */ 11576 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, 11577 "trace/RB:prepare", trace_rb_cpu_prepare, 11578 NULL); 11579 if (ret < 0) 11580 goto out_free_cpumask; 11581 /* Used for event triggers */ 11582 ret = -ENOMEM; 11583 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); 11584 if (!temp_buffer) 11585 goto out_rm_hp_state; 11586 11587 if (trace_create_savedcmd() < 0) 11588 goto out_free_temp_buffer; 11589 11590 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) 11591 goto out_free_savedcmd; 11592 11593 /* TODO: make the number of buffers hot pluggable with CPUS */ 11594 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { 11595 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); 11596 goto out_free_pipe_cpumask; 11597 } 11598 if (global_trace.buffer_disabled) 11599 tracing_off(); 11600 11601 if (trace_boot_clock) { 11602 ret = tracing_set_clock(&global_trace, trace_boot_clock); 11603 if (ret < 0) 11604 pr_warn("Trace clock %s not defined, going back to default\n", 11605 trace_boot_clock); 11606 } 11607 11608 /* 11609 * register_tracer() might reference current_trace, so it 11610 * needs to be set before we register anything. This is 11611 * just a bootstrap of current_trace anyway. 11612 */ 11613 global_trace.current_trace = &nop_trace; 11614 global_trace.current_trace_flags = nop_trace.flags; 11615 11616 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 11617 #ifdef CONFIG_TRACER_MAX_TRACE 11618 spin_lock_init(&global_trace.snapshot_trigger_lock); 11619 #endif 11620 ftrace_init_global_array_ops(&global_trace); 11621 11622 #ifdef CONFIG_MODULES 11623 INIT_LIST_HEAD(&global_trace.mod_events); 11624 #endif 11625 11626 init_trace_flags_index(&global_trace); 11627 11628 INIT_LIST_HEAD(&global_trace.tracers); 11629 11630 /* All seems OK, enable tracing */ 11631 tracing_disabled = 0; 11632 11633 atomic_notifier_chain_register(&panic_notifier_list, 11634 &trace_panic_notifier); 11635 11636 register_die_notifier(&trace_die_notifier); 11637 11638 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 11639 11640 global_trace.syscall_buf_sz = syscall_buf_size; 11641 11642 INIT_LIST_HEAD(&global_trace.systems); 11643 INIT_LIST_HEAD(&global_trace.events); 11644 INIT_LIST_HEAD(&global_trace.hist_vars); 11645 INIT_LIST_HEAD(&global_trace.err_log); 11646 list_add(&global_trace.marker_list, &marker_copies); 11647 list_add(&global_trace.list, &ftrace_trace_arrays); 11648 11649 register_tracer(&nop_trace); 11650 11651 /* Function tracing may start here (via kernel command line) */ 11652 init_function_trace(); 11653 11654 apply_trace_boot_options(); 11655 11656 register_snapshot_cmd(); 11657 11658 return 0; 11659 11660 out_free_pipe_cpumask: 11661 free_cpumask_var(global_trace.pipe_cpumask); 11662 out_free_savedcmd: 11663 trace_free_saved_cmdlines_buffer(); 11664 out_free_temp_buffer: 11665 ring_buffer_free(temp_buffer); 11666 out_rm_hp_state: 11667 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); 11668 out_free_cpumask: 11669 free_cpumask_var(global_trace.tracing_cpumask); 11670 out_free_buffer_mask: 11671 free_cpumask_var(tracing_buffer_mask); 11672 return ret; 11673 } 11674 11675 #ifdef CONFIG_FUNCTION_TRACER 11676 /* Used to set module cached ftrace filtering at boot up */ 11677 struct trace_array *trace_get_global_array(void) 11678 { 11679 return &global_trace; 11680 } 11681 #endif 11682 11683 void __init ftrace_boot_snapshot(void) 11684 { 11685 #ifdef CONFIG_TRACER_MAX_TRACE 11686 struct trace_array *tr; 11687 11688 if (!snapshot_at_boot) 11689 return; 11690 11691 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 11692 if (!tr->allocated_snapshot) 11693 continue; 11694 11695 tracing_snapshot_instance(tr); 11696 trace_array_puts(tr, "** Boot snapshot taken **\n"); 11697 } 11698 #endif 11699 } 11700 11701 void __init early_trace_init(void) 11702 { 11703 if (tracepoint_printk) { 11704 tracepoint_print_iter = 11705 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); 11706 if (MEM_FAIL(!tracepoint_print_iter, 11707 "Failed to allocate trace iterator\n")) 11708 tracepoint_printk = 0; 11709 else 11710 static_key_enable(&tracepoint_printk_key.key); 11711 } 11712 tracer_alloc_buffers(); 11713 11714 init_events(); 11715 } 11716 11717 void __init trace_init(void) 11718 { 11719 trace_event_init(); 11720 11721 if (boot_instance_index) 11722 enable_instances(); 11723 } 11724 11725 __init static void clear_boot_tracer(void) 11726 { 11727 /* 11728 * The default tracer at boot buffer is an init section. 11729 * This function is called in lateinit. If we did not 11730 * find the boot tracer, then clear it out, to prevent 11731 * later registration from accessing the buffer that is 11732 * about to be freed. 11733 */ 11734 if (!default_bootup_tracer) 11735 return; 11736 11737 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", 11738 default_bootup_tracer); 11739 default_bootup_tracer = NULL; 11740 } 11741 11742 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 11743 __init static void tracing_set_default_clock(void) 11744 { 11745 /* sched_clock_stable() is determined in late_initcall */ 11746 if (!trace_boot_clock && !sched_clock_stable()) { 11747 if (security_locked_down(LOCKDOWN_TRACEFS)) { 11748 pr_warn("Can not set tracing clock due to lockdown\n"); 11749 return; 11750 } 11751 11752 printk(KERN_WARNING 11753 "Unstable clock detected, switching default tracing clock to \"global\"\n" 11754 "If you want to keep using the local clock, then add:\n" 11755 " \"trace_clock=local\"\n" 11756 "on the kernel command line\n"); 11757 tracing_set_clock(&global_trace, "global"); 11758 } 11759 } 11760 #else 11761 static inline void tracing_set_default_clock(void) { } 11762 #endif 11763 11764 __init static int late_trace_init(void) 11765 { 11766 if (tracepoint_printk && tracepoint_printk_stop_on_boot) { 11767 static_key_disable(&tracepoint_printk_key.key); 11768 tracepoint_printk = 0; 11769 } 11770 11771 if (traceoff_after_boot) 11772 tracing_off(); 11773 11774 tracing_set_default_clock(); 11775 clear_boot_tracer(); 11776 return 0; 11777 } 11778 11779 late_initcall_sync(late_trace_init); 11780