1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ring buffer based function tracer 4 * 5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> 6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 7 * 8 * Originally taken from the RT patch by: 9 * Arnaldo Carvalho de Melo <acme@redhat.com> 10 * 11 * Based on code from the latency_tracer, that is: 12 * Copyright (C) 2004-2006 Ingo Molnar 13 * Copyright (C) 2004 Nadia Yvette Chambers 14 */ 15 #include <linux/ring_buffer.h> 16 #include <linux/utsname.h> 17 #include <linux/stacktrace.h> 18 #include <linux/writeback.h> 19 #include <linux/kallsyms.h> 20 #include <linux/security.h> 21 #include <linux/seq_file.h> 22 #include <linux/irqflags.h> 23 #include <linux/syscalls.h> 24 #include <linux/debugfs.h> 25 #include <linux/tracefs.h> 26 #include <linux/pagemap.h> 27 #include <linux/hardirq.h> 28 #include <linux/linkage.h> 29 #include <linux/uaccess.h> 30 #include <linux/cleanup.h> 31 #include <linux/vmalloc.h> 32 #include <linux/ftrace.h> 33 #include <linux/module.h> 34 #include <linux/percpu.h> 35 #include <linux/splice.h> 36 #include <linux/kdebug.h> 37 #include <linux/string.h> 38 #include <linux/mount.h> 39 #include <linux/rwsem.h> 40 #include <linux/slab.h> 41 #include <linux/ctype.h> 42 #include <linux/init.h> 43 #include <linux/panic_notifier.h> 44 #include <linux/poll.h> 45 #include <linux/nmi.h> 46 #include <linux/fs.h> 47 #include <linux/trace.h> 48 #include <linux/sched/clock.h> 49 #include <linux/sched/rt.h> 50 #include <linux/fsnotify.h> 51 #include <linux/irq_work.h> 52 #include <linux/workqueue.h> 53 #include <linux/sort.h> 54 #include <linux/io.h> /* vmap_page_range() */ 55 #include <linux/fs_context.h> 56 57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 58 59 #include "trace.h" 60 #include "trace_output.h" 61 62 #ifdef CONFIG_FTRACE_STARTUP_TEST 63 /* 64 * We need to change this state when a selftest is running. 65 * A selftest will lurk into the ring-buffer to count the 66 * entries inserted during the selftest although some concurrent 67 * insertions into the ring-buffer such as trace_printk could occurred 68 * at the same time, giving false positive or negative results. 69 */ 70 static bool __read_mostly tracing_selftest_running; 71 72 /* 73 * If boot-time tracing including tracers/events via kernel cmdline 74 * is running, we do not want to run SELFTEST. 75 */ 76 bool __read_mostly tracing_selftest_disabled; 77 78 void __init disable_tracing_selftest(const char *reason) 79 { 80 if (!tracing_selftest_disabled) { 81 tracing_selftest_disabled = true; 82 pr_info("Ftrace startup test is disabled due to %s\n", reason); 83 } 84 } 85 #else 86 #define tracing_selftest_running 0 87 #define tracing_selftest_disabled 0 88 #endif 89 90 /* Pipe tracepoints to printk */ 91 static struct trace_iterator *tracepoint_print_iter; 92 int tracepoint_printk; 93 static bool tracepoint_printk_stop_on_boot __initdata; 94 static bool traceoff_after_boot __initdata; 95 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); 96 97 /* Store tracers and their flags per instance */ 98 struct tracers { 99 struct list_head list; 100 struct tracer *tracer; 101 struct tracer_flags *flags; 102 }; 103 104 /* 105 * To prevent the comm cache from being overwritten when no 106 * tracing is active, only save the comm when a trace event 107 * occurred. 108 */ 109 DEFINE_PER_CPU(bool, trace_taskinfo_save); 110 111 /* 112 * Kill all tracing for good (never come back). 113 * It is initialized to 1 but will turn to zero if the initialization 114 * of the tracer is successful. But that is the only place that sets 115 * this back to zero. 116 */ 117 static int tracing_disabled = 1; 118 119 cpumask_var_t __read_mostly tracing_buffer_mask; 120 121 #define MAX_TRACER_SIZE 100 122 /* 123 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 124 * 125 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops 126 * is set, then ftrace_dump is called. This will output the contents 127 * of the ftrace buffers to the console. This is very useful for 128 * capturing traces that lead to crashes and outputting it to a 129 * serial console. 130 * 131 * It is default off, but you can enable it with either specifying 132 * "ftrace_dump_on_oops" in the kernel command line, or setting 133 * /proc/sys/kernel/ftrace_dump_on_oops 134 * Set 1 if you want to dump buffers of all CPUs 135 * Set 2 if you want to dump the buffer of the CPU that triggered oops 136 * Set instance name if you want to dump the specific trace instance 137 * Multiple instance dump is also supported, and instances are separated 138 * by commas. 139 */ 140 /* Set to string format zero to disable by default */ 141 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; 142 143 /* When set, tracing will stop when a WARN*() is hit */ 144 static int __disable_trace_on_warning; 145 146 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 147 void *buffer, size_t *lenp, loff_t *ppos); 148 static const struct ctl_table trace_sysctl_table[] = { 149 { 150 .procname = "ftrace_dump_on_oops", 151 .data = &ftrace_dump_on_oops, 152 .maxlen = MAX_TRACER_SIZE, 153 .mode = 0644, 154 .proc_handler = proc_dostring, 155 }, 156 { 157 .procname = "traceoff_on_warning", 158 .data = &__disable_trace_on_warning, 159 .maxlen = sizeof(__disable_trace_on_warning), 160 .mode = 0644, 161 .proc_handler = proc_dointvec, 162 }, 163 { 164 .procname = "tracepoint_printk", 165 .data = &tracepoint_printk, 166 .maxlen = sizeof(tracepoint_printk), 167 .mode = 0644, 168 .proc_handler = tracepoint_printk_sysctl, 169 }, 170 }; 171 172 static int __init init_trace_sysctls(void) 173 { 174 register_sysctl_init("kernel", trace_sysctl_table); 175 return 0; 176 } 177 subsys_initcall(init_trace_sysctls); 178 179 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 180 /* Map of enums to their values, for "eval_map" file */ 181 struct trace_eval_map_head { 182 struct module *mod; 183 unsigned long length; 184 }; 185 186 union trace_eval_map_item; 187 188 struct trace_eval_map_tail { 189 /* 190 * "end" is first and points to NULL as it must be different 191 * than "mod" or "eval_string" 192 */ 193 union trace_eval_map_item *next; 194 const char *end; /* points to NULL */ 195 }; 196 197 static DEFINE_MUTEX(trace_eval_mutex); 198 199 /* 200 * The trace_eval_maps are saved in an array with two extra elements, 201 * one at the beginning, and one at the end. The beginning item contains 202 * the count of the saved maps (head.length), and the module they 203 * belong to if not built in (head.mod). The ending item contains a 204 * pointer to the next array of saved eval_map items. 205 */ 206 union trace_eval_map_item { 207 struct trace_eval_map map; 208 struct trace_eval_map_head head; 209 struct trace_eval_map_tail tail; 210 }; 211 212 static union trace_eval_map_item *trace_eval_maps; 213 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 214 215 int tracing_set_tracer(struct trace_array *tr, const char *buf); 216 static void ftrace_trace_userstack(struct trace_array *tr, 217 struct trace_buffer *buffer, 218 unsigned int trace_ctx); 219 220 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 221 static char *default_bootup_tracer; 222 223 static bool allocate_snapshot; 224 static bool snapshot_at_boot; 225 226 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; 227 static int boot_instance_index; 228 229 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata; 230 static int boot_snapshot_index; 231 232 static int __init set_cmdline_ftrace(char *str) 233 { 234 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 235 default_bootup_tracer = bootup_tracer_buf; 236 /* We are using ftrace early, expand it */ 237 trace_set_ring_buffer_expanded(NULL); 238 return 1; 239 } 240 __setup("ftrace=", set_cmdline_ftrace); 241 242 int ftrace_dump_on_oops_enabled(void) 243 { 244 if (!strcmp("0", ftrace_dump_on_oops)) 245 return 0; 246 else 247 return 1; 248 } 249 250 static int __init set_ftrace_dump_on_oops(char *str) 251 { 252 if (!*str) { 253 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 254 return 1; 255 } 256 257 if (*str == ',') { 258 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 259 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); 260 return 1; 261 } 262 263 if (*str++ == '=') { 264 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); 265 return 1; 266 } 267 268 return 0; 269 } 270 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 271 272 static int __init stop_trace_on_warning(char *str) 273 { 274 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 275 __disable_trace_on_warning = 1; 276 return 1; 277 } 278 __setup("traceoff_on_warning", stop_trace_on_warning); 279 280 static int __init boot_alloc_snapshot(char *str) 281 { 282 char *slot = boot_snapshot_info + boot_snapshot_index; 283 int left = sizeof(boot_snapshot_info) - boot_snapshot_index; 284 int ret; 285 286 if (str[0] == '=') { 287 str++; 288 if (strlen(str) >= left) 289 return -1; 290 291 ret = snprintf(slot, left, "%s\t", str); 292 boot_snapshot_index += ret; 293 } else { 294 allocate_snapshot = true; 295 /* We also need the main ring buffer expanded */ 296 trace_set_ring_buffer_expanded(NULL); 297 } 298 return 1; 299 } 300 __setup("alloc_snapshot", boot_alloc_snapshot); 301 302 303 static int __init boot_snapshot(char *str) 304 { 305 snapshot_at_boot = true; 306 boot_alloc_snapshot(str); 307 return 1; 308 } 309 __setup("ftrace_boot_snapshot", boot_snapshot); 310 311 312 static int __init boot_instance(char *str) 313 { 314 char *slot = boot_instance_info + boot_instance_index; 315 int left = sizeof(boot_instance_info) - boot_instance_index; 316 int ret; 317 318 if (strlen(str) >= left) 319 return -1; 320 321 ret = snprintf(slot, left, "%s\t", str); 322 boot_instance_index += ret; 323 324 return 1; 325 } 326 __setup("trace_instance=", boot_instance); 327 328 329 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; 330 331 static int __init set_trace_boot_options(char *str) 332 { 333 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); 334 return 1; 335 } 336 __setup("trace_options=", set_trace_boot_options); 337 338 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; 339 static char *trace_boot_clock __initdata; 340 341 static int __init set_trace_boot_clock(char *str) 342 { 343 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); 344 trace_boot_clock = trace_boot_clock_buf; 345 return 1; 346 } 347 __setup("trace_clock=", set_trace_boot_clock); 348 349 static int __init set_tracepoint_printk(char *str) 350 { 351 /* Ignore the "tp_printk_stop_on_boot" param */ 352 if (*str == '_') 353 return 0; 354 355 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 356 tracepoint_printk = 1; 357 return 1; 358 } 359 __setup("tp_printk", set_tracepoint_printk); 360 361 static int __init set_tracepoint_printk_stop(char *str) 362 { 363 tracepoint_printk_stop_on_boot = true; 364 return 1; 365 } 366 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); 367 368 static int __init set_traceoff_after_boot(char *str) 369 { 370 traceoff_after_boot = true; 371 return 1; 372 } 373 __setup("traceoff_after_boot", set_traceoff_after_boot); 374 375 unsigned long long ns2usecs(u64 nsec) 376 { 377 nsec += 500; 378 do_div(nsec, 1000); 379 return nsec; 380 } 381 382 static void 383 trace_process_export(struct trace_export *export, 384 struct ring_buffer_event *event, int flag) 385 { 386 struct trace_entry *entry; 387 unsigned int size = 0; 388 389 if (export->flags & flag) { 390 entry = ring_buffer_event_data(event); 391 size = ring_buffer_event_length(event); 392 export->write(export, entry, size); 393 } 394 } 395 396 static DEFINE_MUTEX(ftrace_export_lock); 397 398 static struct trace_export __rcu *ftrace_exports_list __read_mostly; 399 400 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); 401 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); 402 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); 403 404 static inline void ftrace_exports_enable(struct trace_export *export) 405 { 406 if (export->flags & TRACE_EXPORT_FUNCTION) 407 static_branch_inc(&trace_function_exports_enabled); 408 409 if (export->flags & TRACE_EXPORT_EVENT) 410 static_branch_inc(&trace_event_exports_enabled); 411 412 if (export->flags & TRACE_EXPORT_MARKER) 413 static_branch_inc(&trace_marker_exports_enabled); 414 } 415 416 static inline void ftrace_exports_disable(struct trace_export *export) 417 { 418 if (export->flags & TRACE_EXPORT_FUNCTION) 419 static_branch_dec(&trace_function_exports_enabled); 420 421 if (export->flags & TRACE_EXPORT_EVENT) 422 static_branch_dec(&trace_event_exports_enabled); 423 424 if (export->flags & TRACE_EXPORT_MARKER) 425 static_branch_dec(&trace_marker_exports_enabled); 426 } 427 428 static void ftrace_exports(struct ring_buffer_event *event, int flag) 429 { 430 struct trace_export *export; 431 432 guard(preempt_notrace)(); 433 434 export = rcu_dereference_raw_check(ftrace_exports_list); 435 while (export) { 436 trace_process_export(export, event, flag); 437 export = rcu_dereference_raw_check(export->next); 438 } 439 } 440 441 static inline void 442 add_trace_export(struct trace_export **list, struct trace_export *export) 443 { 444 rcu_assign_pointer(export->next, *list); 445 /* 446 * We are entering export into the list but another 447 * CPU might be walking that list. We need to make sure 448 * the export->next pointer is valid before another CPU sees 449 * the export pointer included into the list. 450 */ 451 rcu_assign_pointer(*list, export); 452 } 453 454 static inline int 455 rm_trace_export(struct trace_export **list, struct trace_export *export) 456 { 457 struct trace_export **p; 458 459 for (p = list; *p != NULL; p = &(*p)->next) 460 if (*p == export) 461 break; 462 463 if (*p != export) 464 return -1; 465 466 rcu_assign_pointer(*p, (*p)->next); 467 468 return 0; 469 } 470 471 static inline void 472 add_ftrace_export(struct trace_export **list, struct trace_export *export) 473 { 474 ftrace_exports_enable(export); 475 476 add_trace_export(list, export); 477 } 478 479 static inline int 480 rm_ftrace_export(struct trace_export **list, struct trace_export *export) 481 { 482 int ret; 483 484 ret = rm_trace_export(list, export); 485 ftrace_exports_disable(export); 486 487 return ret; 488 } 489 490 int register_ftrace_export(struct trace_export *export) 491 { 492 if (WARN_ON_ONCE(!export->write)) 493 return -1; 494 495 guard(mutex)(&ftrace_export_lock); 496 497 add_ftrace_export(&ftrace_exports_list, export); 498 499 return 0; 500 } 501 EXPORT_SYMBOL_GPL(register_ftrace_export); 502 503 int unregister_ftrace_export(struct trace_export *export) 504 { 505 guard(mutex)(&ftrace_export_lock); 506 return rm_ftrace_export(&ftrace_exports_list, export); 507 } 508 EXPORT_SYMBOL_GPL(unregister_ftrace_export); 509 510 /* trace_flags holds trace_options default values */ 511 #define TRACE_DEFAULT_FLAGS \ 512 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \ 513 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \ 514 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \ 515 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \ 516 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \ 517 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \ 518 TRACE_ITER(COPY_MARKER)) 519 520 /* trace_options that are only supported by global_trace */ 521 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \ 522 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \ 523 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS) 524 525 /* trace_flags that are default zero for instances */ 526 #define ZEROED_TRACE_FLAGS \ 527 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \ 528 TRACE_ITER(COPY_MARKER)) 529 530 /* 531 * The global_trace is the descriptor that holds the top-level tracing 532 * buffers for the live tracing. 533 */ 534 static struct trace_array global_trace = { 535 .trace_flags = TRACE_DEFAULT_FLAGS, 536 }; 537 538 static struct trace_array *printk_trace = &global_trace; 539 540 /* List of trace_arrays interested in the top level trace_marker */ 541 static LIST_HEAD(marker_copies); 542 543 static __always_inline bool printk_binsafe(struct trace_array *tr) 544 { 545 /* 546 * The binary format of traceprintk can cause a crash if used 547 * by a buffer from another boot. Force the use of the 548 * non binary version of trace_printk if the trace_printk 549 * buffer is a boot mapped ring buffer. 550 */ 551 return !(tr->flags & TRACE_ARRAY_FL_BOOT); 552 } 553 554 static void update_printk_trace(struct trace_array *tr) 555 { 556 if (printk_trace == tr) 557 return; 558 559 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK); 560 printk_trace = tr; 561 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK); 562 } 563 564 /* Returns true if the status of tr changed */ 565 static bool update_marker_trace(struct trace_array *tr, int enabled) 566 { 567 lockdep_assert_held(&event_mutex); 568 569 if (enabled) { 570 if (!list_empty(&tr->marker_list)) 571 return false; 572 573 list_add_rcu(&tr->marker_list, &marker_copies); 574 tr->trace_flags |= TRACE_ITER(COPY_MARKER); 575 return true; 576 } 577 578 if (list_empty(&tr->marker_list)) 579 return false; 580 581 list_del_init(&tr->marker_list); 582 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); 583 return true; 584 } 585 586 void trace_set_ring_buffer_expanded(struct trace_array *tr) 587 { 588 if (!tr) 589 tr = &global_trace; 590 tr->ring_buffer_expanded = true; 591 } 592 593 LIST_HEAD(ftrace_trace_arrays); 594 595 int trace_array_get(struct trace_array *this_tr) 596 { 597 struct trace_array *tr; 598 599 guard(mutex)(&trace_types_lock); 600 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 601 if (tr == this_tr) { 602 tr->ref++; 603 return 0; 604 } 605 } 606 607 return -ENODEV; 608 } 609 610 static void __trace_array_put(struct trace_array *this_tr) 611 { 612 WARN_ON(!this_tr->ref); 613 this_tr->ref--; 614 } 615 616 /** 617 * trace_array_put - Decrement the reference counter for this trace array. 618 * @this_tr : pointer to the trace array 619 * 620 * NOTE: Use this when we no longer need the trace array returned by 621 * trace_array_get_by_name(). This ensures the trace array can be later 622 * destroyed. 623 * 624 */ 625 void trace_array_put(struct trace_array *this_tr) 626 { 627 if (!this_tr) 628 return; 629 630 guard(mutex)(&trace_types_lock); 631 __trace_array_put(this_tr); 632 } 633 EXPORT_SYMBOL_GPL(trace_array_put); 634 635 int tracing_check_open_get_tr(struct trace_array *tr) 636 { 637 int ret; 638 639 ret = security_locked_down(LOCKDOWN_TRACEFS); 640 if (ret) 641 return ret; 642 643 if (tracing_disabled) 644 return -ENODEV; 645 646 if (tr && trace_array_get(tr) < 0) 647 return -ENODEV; 648 649 return 0; 650 } 651 652 /** 653 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list 654 * @filtered_pids: The list of pids to check 655 * @search_pid: The PID to find in @filtered_pids 656 * 657 * Returns true if @search_pid is found in @filtered_pids, and false otherwise. 658 */ 659 bool 660 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) 661 { 662 return trace_pid_list_is_set(filtered_pids, search_pid); 663 } 664 665 /** 666 * trace_ignore_this_task - should a task be ignored for tracing 667 * @filtered_pids: The list of pids to check 668 * @filtered_no_pids: The list of pids not to be traced 669 * @task: The task that should be ignored if not filtered 670 * 671 * Checks if @task should be traced or not from @filtered_pids. 672 * Returns true if @task should *NOT* be traced. 673 * Returns false if @task should be traced. 674 */ 675 bool 676 trace_ignore_this_task(struct trace_pid_list *filtered_pids, 677 struct trace_pid_list *filtered_no_pids, 678 struct task_struct *task) 679 { 680 /* 681 * If filtered_no_pids is not empty, and the task's pid is listed 682 * in filtered_no_pids, then return true. 683 * Otherwise, if filtered_pids is empty, that means we can 684 * trace all tasks. If it has content, then only trace pids 685 * within filtered_pids. 686 */ 687 688 return (filtered_pids && 689 !trace_find_filtered_pid(filtered_pids, task->pid)) || 690 (filtered_no_pids && 691 trace_find_filtered_pid(filtered_no_pids, task->pid)); 692 } 693 694 /** 695 * trace_filter_add_remove_task - Add or remove a task from a pid_list 696 * @pid_list: The list to modify 697 * @self: The current task for fork or NULL for exit 698 * @task: The task to add or remove 699 * 700 * If adding a task, if @self is defined, the task is only added if @self 701 * is also included in @pid_list. This happens on fork and tasks should 702 * only be added when the parent is listed. If @self is NULL, then the 703 * @task pid will be removed from the list, which would happen on exit 704 * of a task. 705 */ 706 void trace_filter_add_remove_task(struct trace_pid_list *pid_list, 707 struct task_struct *self, 708 struct task_struct *task) 709 { 710 if (!pid_list) 711 return; 712 713 /* For forks, we only add if the forking task is listed */ 714 if (self) { 715 if (!trace_find_filtered_pid(pid_list, self->pid)) 716 return; 717 } 718 719 /* "self" is set for forks, and NULL for exits */ 720 if (self) 721 trace_pid_list_set(pid_list, task->pid); 722 else 723 trace_pid_list_clear(pid_list, task->pid); 724 } 725 726 /** 727 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list 728 * @pid_list: The pid list to show 729 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) 730 * @pos: The position of the file 731 * 732 * This is used by the seq_file "next" operation to iterate the pids 733 * listed in a trace_pid_list structure. 734 * 735 * Returns the pid+1 as we want to display pid of zero, but NULL would 736 * stop the iteration. 737 */ 738 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) 739 { 740 long pid = (unsigned long)v; 741 unsigned int next; 742 743 (*pos)++; 744 745 /* pid already is +1 of the actual previous bit */ 746 if (trace_pid_list_next(pid_list, pid, &next) < 0) 747 return NULL; 748 749 pid = next; 750 751 /* Return pid + 1 to allow zero to be represented */ 752 return (void *)(pid + 1); 753 } 754 755 /** 756 * trace_pid_start - Used for seq_file to start reading pid lists 757 * @pid_list: The pid list to show 758 * @pos: The position of the file 759 * 760 * This is used by seq_file "start" operation to start the iteration 761 * of listing pids. 762 * 763 * Returns the pid+1 as we want to display pid of zero, but NULL would 764 * stop the iteration. 765 */ 766 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) 767 { 768 unsigned long pid; 769 unsigned int first; 770 loff_t l = 0; 771 772 if (trace_pid_list_first(pid_list, &first) < 0) 773 return NULL; 774 775 pid = first; 776 777 /* Return pid + 1 so that zero can be the exit value */ 778 for (pid++; pid && l < *pos; 779 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) 780 ; 781 return (void *)pid; 782 } 783 784 /** 785 * trace_pid_show - show the current pid in seq_file processing 786 * @m: The seq_file structure to write into 787 * @v: A void pointer of the pid (+1) value to display 788 * 789 * Can be directly used by seq_file operations to display the current 790 * pid value. 791 */ 792 int trace_pid_show(struct seq_file *m, void *v) 793 { 794 unsigned long pid = (unsigned long)v - 1; 795 796 seq_printf(m, "%lu\n", pid); 797 return 0; 798 } 799 800 /* 128 should be much more than enough */ 801 #define PID_BUF_SIZE 127 802 803 int trace_pid_write(struct trace_pid_list *filtered_pids, 804 struct trace_pid_list **new_pid_list, 805 const char __user *ubuf, size_t cnt) 806 { 807 struct trace_pid_list *pid_list; 808 struct trace_parser parser; 809 unsigned long val; 810 int nr_pids = 0; 811 ssize_t read = 0; 812 ssize_t ret; 813 loff_t pos; 814 pid_t pid; 815 816 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) 817 return -ENOMEM; 818 819 /* 820 * Always recreate a new array. The write is an all or nothing 821 * operation. Always create a new array when adding new pids by 822 * the user. If the operation fails, then the current list is 823 * not modified. 824 */ 825 pid_list = trace_pid_list_alloc(); 826 if (!pid_list) { 827 trace_parser_put(&parser); 828 return -ENOMEM; 829 } 830 831 if (filtered_pids) { 832 /* copy the current bits to the new max */ 833 ret = trace_pid_list_first(filtered_pids, &pid); 834 while (!ret) { 835 ret = trace_pid_list_set(pid_list, pid); 836 if (ret < 0) 837 goto out; 838 839 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); 840 nr_pids++; 841 } 842 } 843 844 ret = 0; 845 while (cnt > 0) { 846 847 pos = 0; 848 849 ret = trace_get_user(&parser, ubuf, cnt, &pos); 850 if (ret < 0) 851 break; 852 853 read += ret; 854 ubuf += ret; 855 cnt -= ret; 856 857 if (!trace_parser_loaded(&parser)) 858 break; 859 860 ret = -EINVAL; 861 if (kstrtoul(parser.buffer, 0, &val)) 862 break; 863 864 pid = (pid_t)val; 865 866 if (trace_pid_list_set(pid_list, pid) < 0) { 867 ret = -1; 868 break; 869 } 870 nr_pids++; 871 872 trace_parser_clear(&parser); 873 ret = 0; 874 } 875 out: 876 trace_parser_put(&parser); 877 878 if (ret < 0) { 879 trace_pid_list_free(pid_list); 880 return ret; 881 } 882 883 if (!nr_pids) { 884 /* Cleared the list of pids */ 885 trace_pid_list_free(pid_list); 886 pid_list = NULL; 887 } 888 889 *new_pid_list = pid_list; 890 891 return read; 892 } 893 894 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) 895 { 896 u64 ts; 897 898 /* Early boot up does not have a buffer yet */ 899 if (!buf->buffer) 900 return trace_clock_local(); 901 902 ts = ring_buffer_time_stamp(buf->buffer); 903 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); 904 905 return ts; 906 } 907 908 u64 ftrace_now(int cpu) 909 { 910 return buffer_ftrace_now(&global_trace.array_buffer, cpu); 911 } 912 913 /** 914 * tracing_is_enabled - Show if global_trace has been enabled 915 * 916 * Shows if the global trace has been enabled or not. It uses the 917 * mirror flag "buffer_disabled" to be used in fast paths such as for 918 * the irqsoff tracer. But it may be inaccurate due to races. If you 919 * need to know the accurate state, use tracing_is_on() which is a little 920 * slower, but accurate. 921 */ 922 int tracing_is_enabled(void) 923 { 924 /* 925 * For quick access (irqsoff uses this in fast path), just 926 * return the mirror variable of the state of the ring buffer. 927 * It's a little racy, but we don't really care. 928 */ 929 return !global_trace.buffer_disabled; 930 } 931 932 /* 933 * trace_buf_size is the size in bytes that is allocated 934 * for a buffer. Note, the number of bytes is always rounded 935 * to page size. 936 * 937 * This number is purposely set to a low number of 16384. 938 * If the dump on oops happens, it will be much appreciated 939 * to not have to wait for all that output. Anyway this can be 940 * boot time and run time configurable. 941 */ 942 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ 943 944 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; 945 946 /* trace_types holds a link list of available tracers. */ 947 static struct tracer *trace_types __read_mostly; 948 949 /* 950 * trace_types_lock is used to protect the trace_types list. 951 */ 952 DEFINE_MUTEX(trace_types_lock); 953 954 /* 955 * serialize the access of the ring buffer 956 * 957 * ring buffer serializes readers, but it is low level protection. 958 * The validity of the events (which returns by ring_buffer_peek() ..etc) 959 * are not protected by ring buffer. 960 * 961 * The content of events may become garbage if we allow other process consumes 962 * these events concurrently: 963 * A) the page of the consumed events may become a normal page 964 * (not reader page) in ring buffer, and this page will be rewritten 965 * by events producer. 966 * B) The page of the consumed events may become a page for splice_read, 967 * and this page will be returned to system. 968 * 969 * These primitives allow multi process access to different cpu ring buffer 970 * concurrently. 971 * 972 * These primitives don't distinguish read-only and read-consume access. 973 * Multi read-only access are also serialized. 974 */ 975 976 #ifdef CONFIG_SMP 977 static DECLARE_RWSEM(all_cpu_access_lock); 978 static DEFINE_PER_CPU(struct mutex, cpu_access_lock); 979 980 static inline void trace_access_lock(int cpu) 981 { 982 if (cpu == RING_BUFFER_ALL_CPUS) { 983 /* gain it for accessing the whole ring buffer. */ 984 down_write(&all_cpu_access_lock); 985 } else { 986 /* gain it for accessing a cpu ring buffer. */ 987 988 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ 989 down_read(&all_cpu_access_lock); 990 991 /* Secondly block other access to this @cpu ring buffer. */ 992 mutex_lock(&per_cpu(cpu_access_lock, cpu)); 993 } 994 } 995 996 static inline void trace_access_unlock(int cpu) 997 { 998 if (cpu == RING_BUFFER_ALL_CPUS) { 999 up_write(&all_cpu_access_lock); 1000 } else { 1001 mutex_unlock(&per_cpu(cpu_access_lock, cpu)); 1002 up_read(&all_cpu_access_lock); 1003 } 1004 } 1005 1006 static inline void trace_access_lock_init(void) 1007 { 1008 int cpu; 1009 1010 for_each_possible_cpu(cpu) 1011 mutex_init(&per_cpu(cpu_access_lock, cpu)); 1012 } 1013 1014 #else 1015 1016 static DEFINE_MUTEX(access_lock); 1017 1018 static inline void trace_access_lock(int cpu) 1019 { 1020 (void)cpu; 1021 mutex_lock(&access_lock); 1022 } 1023 1024 static inline void trace_access_unlock(int cpu) 1025 { 1026 (void)cpu; 1027 mutex_unlock(&access_lock); 1028 } 1029 1030 static inline void trace_access_lock_init(void) 1031 { 1032 } 1033 1034 #endif 1035 1036 #ifdef CONFIG_STACKTRACE 1037 static void __ftrace_trace_stack(struct trace_array *tr, 1038 struct trace_buffer *buffer, 1039 unsigned int trace_ctx, 1040 int skip, struct pt_regs *regs); 1041 static inline void ftrace_trace_stack(struct trace_array *tr, 1042 struct trace_buffer *buffer, 1043 unsigned int trace_ctx, 1044 int skip, struct pt_regs *regs); 1045 1046 #else 1047 static inline void __ftrace_trace_stack(struct trace_array *tr, 1048 struct trace_buffer *buffer, 1049 unsigned int trace_ctx, 1050 int skip, struct pt_regs *regs) 1051 { 1052 } 1053 static inline void ftrace_trace_stack(struct trace_array *tr, 1054 struct trace_buffer *buffer, 1055 unsigned long trace_ctx, 1056 int skip, struct pt_regs *regs) 1057 { 1058 } 1059 1060 #endif 1061 1062 static __always_inline void 1063 trace_event_setup(struct ring_buffer_event *event, 1064 int type, unsigned int trace_ctx) 1065 { 1066 struct trace_entry *ent = ring_buffer_event_data(event); 1067 1068 tracing_generic_entry_update(ent, type, trace_ctx); 1069 } 1070 1071 static __always_inline struct ring_buffer_event * 1072 __trace_buffer_lock_reserve(struct trace_buffer *buffer, 1073 int type, 1074 unsigned long len, 1075 unsigned int trace_ctx) 1076 { 1077 struct ring_buffer_event *event; 1078 1079 event = ring_buffer_lock_reserve(buffer, len); 1080 if (event != NULL) 1081 trace_event_setup(event, type, trace_ctx); 1082 1083 return event; 1084 } 1085 1086 void tracer_tracing_on(struct trace_array *tr) 1087 { 1088 if (tr->array_buffer.buffer) 1089 ring_buffer_record_on(tr->array_buffer.buffer); 1090 /* 1091 * This flag is looked at when buffers haven't been allocated 1092 * yet, or by some tracers (like irqsoff), that just want to 1093 * know if the ring buffer has been disabled, but it can handle 1094 * races of where it gets disabled but we still do a record. 1095 * As the check is in the fast path of the tracers, it is more 1096 * important to be fast than accurate. 1097 */ 1098 tr->buffer_disabled = 0; 1099 } 1100 1101 /** 1102 * tracing_on - enable tracing buffers 1103 * 1104 * This function enables tracing buffers that may have been 1105 * disabled with tracing_off. 1106 */ 1107 void tracing_on(void) 1108 { 1109 tracer_tracing_on(&global_trace); 1110 } 1111 EXPORT_SYMBOL_GPL(tracing_on); 1112 1113 1114 static __always_inline void 1115 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) 1116 { 1117 __this_cpu_write(trace_taskinfo_save, true); 1118 1119 /* If this is the temp buffer, we need to commit fully */ 1120 if (this_cpu_read(trace_buffered_event) == event) { 1121 /* Length is in event->array[0] */ 1122 ring_buffer_write(buffer, event->array[0], &event->array[1]); 1123 /* Release the temp buffer */ 1124 this_cpu_dec(trace_buffered_event_cnt); 1125 /* ring_buffer_unlock_commit() enables preemption */ 1126 preempt_enable_notrace(); 1127 } else 1128 ring_buffer_unlock_commit(buffer); 1129 } 1130 1131 int __trace_array_puts(struct trace_array *tr, unsigned long ip, 1132 const char *str, int size) 1133 { 1134 struct ring_buffer_event *event; 1135 struct trace_buffer *buffer; 1136 struct print_entry *entry; 1137 unsigned int trace_ctx; 1138 int alloc; 1139 1140 if (!(tr->trace_flags & TRACE_ITER(PRINTK))) 1141 return 0; 1142 1143 if (unlikely(tracing_selftest_running && tr == &global_trace)) 1144 return 0; 1145 1146 if (unlikely(tracing_disabled)) 1147 return 0; 1148 1149 alloc = sizeof(*entry) + size + 2; /* possible \n added */ 1150 1151 trace_ctx = tracing_gen_ctx(); 1152 buffer = tr->array_buffer.buffer; 1153 guard(ring_buffer_nest)(buffer); 1154 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 1155 trace_ctx); 1156 if (!event) 1157 return 0; 1158 1159 entry = ring_buffer_event_data(event); 1160 entry->ip = ip; 1161 1162 memcpy(&entry->buf, str, size); 1163 1164 /* Add a newline if necessary */ 1165 if (entry->buf[size - 1] != '\n') { 1166 entry->buf[size] = '\n'; 1167 entry->buf[size + 1] = '\0'; 1168 } else 1169 entry->buf[size] = '\0'; 1170 1171 __buffer_unlock_commit(buffer, event); 1172 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); 1173 return size; 1174 } 1175 EXPORT_SYMBOL_GPL(__trace_array_puts); 1176 1177 /** 1178 * __trace_puts - write a constant string into the trace buffer. 1179 * @ip: The address of the caller 1180 * @str: The constant string to write 1181 */ 1182 int __trace_puts(unsigned long ip, const char *str) 1183 { 1184 return __trace_array_puts(printk_trace, ip, str, strlen(str)); 1185 } 1186 EXPORT_SYMBOL_GPL(__trace_puts); 1187 1188 /** 1189 * __trace_bputs - write the pointer to a constant string into trace buffer 1190 * @ip: The address of the caller 1191 * @str: The constant string to write to the buffer to 1192 */ 1193 int __trace_bputs(unsigned long ip, const char *str) 1194 { 1195 struct trace_array *tr = READ_ONCE(printk_trace); 1196 struct ring_buffer_event *event; 1197 struct trace_buffer *buffer; 1198 struct bputs_entry *entry; 1199 unsigned int trace_ctx; 1200 int size = sizeof(struct bputs_entry); 1201 1202 if (!printk_binsafe(tr)) 1203 return __trace_puts(ip, str); 1204 1205 if (!(tr->trace_flags & TRACE_ITER(PRINTK))) 1206 return 0; 1207 1208 if (unlikely(tracing_selftest_running || tracing_disabled)) 1209 return 0; 1210 1211 trace_ctx = tracing_gen_ctx(); 1212 buffer = tr->array_buffer.buffer; 1213 1214 guard(ring_buffer_nest)(buffer); 1215 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, 1216 trace_ctx); 1217 if (!event) 1218 return 0; 1219 1220 entry = ring_buffer_event_data(event); 1221 entry->ip = ip; 1222 entry->str = str; 1223 1224 __buffer_unlock_commit(buffer, event); 1225 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); 1226 1227 return 1; 1228 } 1229 EXPORT_SYMBOL_GPL(__trace_bputs); 1230 1231 #ifdef CONFIG_TRACER_SNAPSHOT 1232 static void tracing_snapshot_instance_cond(struct trace_array *tr, 1233 void *cond_data) 1234 { 1235 struct tracer *tracer = tr->current_trace; 1236 unsigned long flags; 1237 1238 if (in_nmi()) { 1239 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); 1240 trace_array_puts(tr, "*** snapshot is being ignored ***\n"); 1241 return; 1242 } 1243 1244 if (!tr->allocated_snapshot) { 1245 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); 1246 trace_array_puts(tr, "*** stopping trace here! ***\n"); 1247 tracer_tracing_off(tr); 1248 return; 1249 } 1250 1251 /* Note, snapshot can not be used when the tracer uses it */ 1252 if (tracer->use_max_tr) { 1253 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); 1254 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 1255 return; 1256 } 1257 1258 if (tr->mapped) { 1259 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); 1260 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 1261 return; 1262 } 1263 1264 local_irq_save(flags); 1265 update_max_tr(tr, current, smp_processor_id(), cond_data); 1266 local_irq_restore(flags); 1267 } 1268 1269 void tracing_snapshot_instance(struct trace_array *tr) 1270 { 1271 tracing_snapshot_instance_cond(tr, NULL); 1272 } 1273 1274 /** 1275 * tracing_snapshot - take a snapshot of the current buffer. 1276 * 1277 * This causes a swap between the snapshot buffer and the current live 1278 * tracing buffer. You can use this to take snapshots of the live 1279 * trace when some condition is triggered, but continue to trace. 1280 * 1281 * Note, make sure to allocate the snapshot with either 1282 * a tracing_snapshot_alloc(), or by doing it manually 1283 * with: echo 1 > /sys/kernel/tracing/snapshot 1284 * 1285 * If the snapshot buffer is not allocated, it will stop tracing. 1286 * Basically making a permanent snapshot. 1287 */ 1288 void tracing_snapshot(void) 1289 { 1290 struct trace_array *tr = &global_trace; 1291 1292 tracing_snapshot_instance(tr); 1293 } 1294 EXPORT_SYMBOL_GPL(tracing_snapshot); 1295 1296 /** 1297 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. 1298 * @tr: The tracing instance to snapshot 1299 * @cond_data: The data to be tested conditionally, and possibly saved 1300 * 1301 * This is the same as tracing_snapshot() except that the snapshot is 1302 * conditional - the snapshot will only happen if the 1303 * cond_snapshot.update() implementation receiving the cond_data 1304 * returns true, which means that the trace array's cond_snapshot 1305 * update() operation used the cond_data to determine whether the 1306 * snapshot should be taken, and if it was, presumably saved it along 1307 * with the snapshot. 1308 */ 1309 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1310 { 1311 tracing_snapshot_instance_cond(tr, cond_data); 1312 } 1313 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1314 1315 /** 1316 * tracing_cond_snapshot_data - get the user data associated with a snapshot 1317 * @tr: The tracing instance 1318 * 1319 * When the user enables a conditional snapshot using 1320 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved 1321 * with the snapshot. This accessor is used to retrieve it. 1322 * 1323 * Should not be called from cond_snapshot.update(), since it takes 1324 * the tr->max_lock lock, which the code calling 1325 * cond_snapshot.update() has already done. 1326 * 1327 * Returns the cond_data associated with the trace array's snapshot. 1328 */ 1329 void *tracing_cond_snapshot_data(struct trace_array *tr) 1330 { 1331 void *cond_data = NULL; 1332 1333 local_irq_disable(); 1334 arch_spin_lock(&tr->max_lock); 1335 1336 if (tr->cond_snapshot) 1337 cond_data = tr->cond_snapshot->cond_data; 1338 1339 arch_spin_unlock(&tr->max_lock); 1340 local_irq_enable(); 1341 1342 return cond_data; 1343 } 1344 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1345 1346 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 1347 struct array_buffer *size_buf, int cpu_id); 1348 static void set_buffer_entries(struct array_buffer *buf, unsigned long val); 1349 1350 int tracing_alloc_snapshot_instance(struct trace_array *tr) 1351 { 1352 int order; 1353 int ret; 1354 1355 if (!tr->allocated_snapshot) { 1356 1357 /* Make the snapshot buffer have the same order as main buffer */ 1358 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 1359 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 1360 if (ret < 0) 1361 return ret; 1362 1363 /* allocate spare buffer */ 1364 ret = resize_buffer_duplicate_size(&tr->max_buffer, 1365 &tr->array_buffer, RING_BUFFER_ALL_CPUS); 1366 if (ret < 0) 1367 return ret; 1368 1369 tr->allocated_snapshot = true; 1370 } 1371 1372 return 0; 1373 } 1374 1375 static void free_snapshot(struct trace_array *tr) 1376 { 1377 /* 1378 * We don't free the ring buffer. instead, resize it because 1379 * The max_tr ring buffer has some state (e.g. ring->clock) and 1380 * we want preserve it. 1381 */ 1382 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); 1383 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); 1384 set_buffer_entries(&tr->max_buffer, 1); 1385 tracing_reset_online_cpus(&tr->max_buffer); 1386 tr->allocated_snapshot = false; 1387 } 1388 1389 static int tracing_arm_snapshot_locked(struct trace_array *tr) 1390 { 1391 int ret; 1392 1393 lockdep_assert_held(&trace_types_lock); 1394 1395 spin_lock(&tr->snapshot_trigger_lock); 1396 if (tr->snapshot == UINT_MAX || tr->mapped) { 1397 spin_unlock(&tr->snapshot_trigger_lock); 1398 return -EBUSY; 1399 } 1400 1401 tr->snapshot++; 1402 spin_unlock(&tr->snapshot_trigger_lock); 1403 1404 ret = tracing_alloc_snapshot_instance(tr); 1405 if (ret) { 1406 spin_lock(&tr->snapshot_trigger_lock); 1407 tr->snapshot--; 1408 spin_unlock(&tr->snapshot_trigger_lock); 1409 } 1410 1411 return ret; 1412 } 1413 1414 int tracing_arm_snapshot(struct trace_array *tr) 1415 { 1416 guard(mutex)(&trace_types_lock); 1417 return tracing_arm_snapshot_locked(tr); 1418 } 1419 1420 void tracing_disarm_snapshot(struct trace_array *tr) 1421 { 1422 spin_lock(&tr->snapshot_trigger_lock); 1423 if (!WARN_ON(!tr->snapshot)) 1424 tr->snapshot--; 1425 spin_unlock(&tr->snapshot_trigger_lock); 1426 } 1427 1428 /** 1429 * tracing_alloc_snapshot - allocate snapshot buffer. 1430 * 1431 * This only allocates the snapshot buffer if it isn't already 1432 * allocated - it doesn't also take a snapshot. 1433 * 1434 * This is meant to be used in cases where the snapshot buffer needs 1435 * to be set up for events that can't sleep but need to be able to 1436 * trigger a snapshot. 1437 */ 1438 int tracing_alloc_snapshot(void) 1439 { 1440 struct trace_array *tr = &global_trace; 1441 int ret; 1442 1443 ret = tracing_alloc_snapshot_instance(tr); 1444 WARN_ON(ret < 0); 1445 1446 return ret; 1447 } 1448 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1449 1450 /** 1451 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. 1452 * 1453 * This is similar to tracing_snapshot(), but it will allocate the 1454 * snapshot buffer if it isn't already allocated. Use this only 1455 * where it is safe to sleep, as the allocation may sleep. 1456 * 1457 * This causes a swap between the snapshot buffer and the current live 1458 * tracing buffer. You can use this to take snapshots of the live 1459 * trace when some condition is triggered, but continue to trace. 1460 */ 1461 void tracing_snapshot_alloc(void) 1462 { 1463 int ret; 1464 1465 ret = tracing_alloc_snapshot(); 1466 if (ret < 0) 1467 return; 1468 1469 tracing_snapshot(); 1470 } 1471 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1472 1473 /** 1474 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance 1475 * @tr: The tracing instance 1476 * @cond_data: User data to associate with the snapshot 1477 * @update: Implementation of the cond_snapshot update function 1478 * 1479 * Check whether the conditional snapshot for the given instance has 1480 * already been enabled, or if the current tracer is already using a 1481 * snapshot; if so, return -EBUSY, else create a cond_snapshot and 1482 * save the cond_data and update function inside. 1483 * 1484 * Returns 0 if successful, error otherwise. 1485 */ 1486 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, 1487 cond_update_fn_t update) 1488 { 1489 struct cond_snapshot *cond_snapshot __free(kfree) = 1490 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); 1491 int ret; 1492 1493 if (!cond_snapshot) 1494 return -ENOMEM; 1495 1496 cond_snapshot->cond_data = cond_data; 1497 cond_snapshot->update = update; 1498 1499 guard(mutex)(&trace_types_lock); 1500 1501 if (tr->current_trace->use_max_tr) 1502 return -EBUSY; 1503 1504 /* 1505 * The cond_snapshot can only change to NULL without the 1506 * trace_types_lock. We don't care if we race with it going 1507 * to NULL, but we want to make sure that it's not set to 1508 * something other than NULL when we get here, which we can 1509 * do safely with only holding the trace_types_lock and not 1510 * having to take the max_lock. 1511 */ 1512 if (tr->cond_snapshot) 1513 return -EBUSY; 1514 1515 ret = tracing_arm_snapshot_locked(tr); 1516 if (ret) 1517 return ret; 1518 1519 local_irq_disable(); 1520 arch_spin_lock(&tr->max_lock); 1521 tr->cond_snapshot = no_free_ptr(cond_snapshot); 1522 arch_spin_unlock(&tr->max_lock); 1523 local_irq_enable(); 1524 1525 return 0; 1526 } 1527 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1528 1529 /** 1530 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance 1531 * @tr: The tracing instance 1532 * 1533 * Check whether the conditional snapshot for the given instance is 1534 * enabled; if so, free the cond_snapshot associated with it, 1535 * otherwise return -EINVAL. 1536 * 1537 * Returns 0 if successful, error otherwise. 1538 */ 1539 int tracing_snapshot_cond_disable(struct trace_array *tr) 1540 { 1541 int ret = 0; 1542 1543 local_irq_disable(); 1544 arch_spin_lock(&tr->max_lock); 1545 1546 if (!tr->cond_snapshot) 1547 ret = -EINVAL; 1548 else { 1549 kfree(tr->cond_snapshot); 1550 tr->cond_snapshot = NULL; 1551 } 1552 1553 arch_spin_unlock(&tr->max_lock); 1554 local_irq_enable(); 1555 1556 tracing_disarm_snapshot(tr); 1557 1558 return ret; 1559 } 1560 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1561 #else 1562 void tracing_snapshot(void) 1563 { 1564 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); 1565 } 1566 EXPORT_SYMBOL_GPL(tracing_snapshot); 1567 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1568 { 1569 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used"); 1570 } 1571 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1572 int tracing_alloc_snapshot(void) 1573 { 1574 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); 1575 return -ENODEV; 1576 } 1577 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1578 void tracing_snapshot_alloc(void) 1579 { 1580 /* Give warning */ 1581 tracing_snapshot(); 1582 } 1583 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1584 void *tracing_cond_snapshot_data(struct trace_array *tr) 1585 { 1586 return NULL; 1587 } 1588 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1589 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) 1590 { 1591 return -ENODEV; 1592 } 1593 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1594 int tracing_snapshot_cond_disable(struct trace_array *tr) 1595 { 1596 return false; 1597 } 1598 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1599 #define free_snapshot(tr) do { } while (0) 1600 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) 1601 #endif /* CONFIG_TRACER_SNAPSHOT */ 1602 1603 void tracer_tracing_off(struct trace_array *tr) 1604 { 1605 if (tr->array_buffer.buffer) 1606 ring_buffer_record_off(tr->array_buffer.buffer); 1607 /* 1608 * This flag is looked at when buffers haven't been allocated 1609 * yet, or by some tracers (like irqsoff), that just want to 1610 * know if the ring buffer has been disabled, but it can handle 1611 * races of where it gets disabled but we still do a record. 1612 * As the check is in the fast path of the tracers, it is more 1613 * important to be fast than accurate. 1614 */ 1615 tr->buffer_disabled = 1; 1616 } 1617 1618 /** 1619 * tracer_tracing_disable() - temporary disable the buffer from write 1620 * @tr: The trace array to disable its buffer for 1621 * 1622 * Expects trace_tracing_enable() to re-enable tracing. 1623 * The difference between this and tracer_tracing_off() is that this 1624 * is a counter and can nest, whereas, tracer_tracing_off() can 1625 * be called multiple times and a single trace_tracing_on() will 1626 * enable it. 1627 */ 1628 void tracer_tracing_disable(struct trace_array *tr) 1629 { 1630 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 1631 return; 1632 1633 ring_buffer_record_disable(tr->array_buffer.buffer); 1634 } 1635 1636 /** 1637 * tracer_tracing_enable() - counter part of tracer_tracing_disable() 1638 * @tr: The trace array that had tracer_tracincg_disable() called on it 1639 * 1640 * This is called after tracer_tracing_disable() has been called on @tr, 1641 * when it's safe to re-enable tracing. 1642 */ 1643 void tracer_tracing_enable(struct trace_array *tr) 1644 { 1645 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 1646 return; 1647 1648 ring_buffer_record_enable(tr->array_buffer.buffer); 1649 } 1650 1651 /** 1652 * tracing_off - turn off tracing buffers 1653 * 1654 * This function stops the tracing buffers from recording data. 1655 * It does not disable any overhead the tracers themselves may 1656 * be causing. This function simply causes all recording to 1657 * the ring buffers to fail. 1658 */ 1659 void tracing_off(void) 1660 { 1661 tracer_tracing_off(&global_trace); 1662 } 1663 EXPORT_SYMBOL_GPL(tracing_off); 1664 1665 void disable_trace_on_warning(void) 1666 { 1667 if (__disable_trace_on_warning) { 1668 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, 1669 "Disabling tracing due to warning\n"); 1670 tracing_off(); 1671 } 1672 } 1673 1674 /** 1675 * tracer_tracing_is_on - show real state of ring buffer enabled 1676 * @tr : the trace array to know if ring buffer is enabled 1677 * 1678 * Shows real state of the ring buffer if it is enabled or not. 1679 */ 1680 bool tracer_tracing_is_on(struct trace_array *tr) 1681 { 1682 if (tr->array_buffer.buffer) 1683 return ring_buffer_record_is_set_on(tr->array_buffer.buffer); 1684 return !tr->buffer_disabled; 1685 } 1686 1687 /** 1688 * tracing_is_on - show state of ring buffers enabled 1689 */ 1690 int tracing_is_on(void) 1691 { 1692 return tracer_tracing_is_on(&global_trace); 1693 } 1694 EXPORT_SYMBOL_GPL(tracing_is_on); 1695 1696 static int __init set_buf_size(char *str) 1697 { 1698 unsigned long buf_size; 1699 1700 if (!str) 1701 return 0; 1702 buf_size = memparse(str, &str); 1703 /* 1704 * nr_entries can not be zero and the startup 1705 * tests require some buffer space. Therefore 1706 * ensure we have at least 4096 bytes of buffer. 1707 */ 1708 trace_buf_size = max(4096UL, buf_size); 1709 return 1; 1710 } 1711 __setup("trace_buf_size=", set_buf_size); 1712 1713 static int __init set_tracing_thresh(char *str) 1714 { 1715 unsigned long threshold; 1716 int ret; 1717 1718 if (!str) 1719 return 0; 1720 ret = kstrtoul(str, 0, &threshold); 1721 if (ret < 0) 1722 return 0; 1723 tracing_thresh = threshold * 1000; 1724 return 1; 1725 } 1726 __setup("tracing_thresh=", set_tracing_thresh); 1727 1728 unsigned long nsecs_to_usecs(unsigned long nsecs) 1729 { 1730 return nsecs / 1000; 1731 } 1732 1733 /* 1734 * TRACE_FLAGS is defined as a tuple matching bit masks with strings. 1735 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that 1736 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list 1737 * of strings in the order that the evals (enum) were defined. 1738 */ 1739 #undef C 1740 #define C(a, b) b 1741 1742 /* These must match the bit positions in trace_iterator_flags */ 1743 static const char *trace_options[] = { 1744 TRACE_FLAGS 1745 NULL 1746 }; 1747 1748 static struct { 1749 u64 (*func)(void); 1750 const char *name; 1751 int in_ns; /* is this clock in nanoseconds? */ 1752 } trace_clocks[] = { 1753 { trace_clock_local, "local", 1 }, 1754 { trace_clock_global, "global", 1 }, 1755 { trace_clock_counter, "counter", 0 }, 1756 { trace_clock_jiffies, "uptime", 0 }, 1757 { trace_clock, "perf", 1 }, 1758 { ktime_get_mono_fast_ns, "mono", 1 }, 1759 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1760 { ktime_get_boot_fast_ns, "boot", 1 }, 1761 { ktime_get_tai_fast_ns, "tai", 1 }, 1762 ARCH_TRACE_CLOCKS 1763 }; 1764 1765 bool trace_clock_in_ns(struct trace_array *tr) 1766 { 1767 if (trace_clocks[tr->clock_id].in_ns) 1768 return true; 1769 1770 return false; 1771 } 1772 1773 /* 1774 * trace_parser_get_init - gets the buffer for trace parser 1775 */ 1776 int trace_parser_get_init(struct trace_parser *parser, int size) 1777 { 1778 memset(parser, 0, sizeof(*parser)); 1779 1780 parser->buffer = kmalloc(size, GFP_KERNEL); 1781 if (!parser->buffer) 1782 return 1; 1783 1784 parser->size = size; 1785 return 0; 1786 } 1787 1788 /* 1789 * trace_parser_put - frees the buffer for trace parser 1790 */ 1791 void trace_parser_put(struct trace_parser *parser) 1792 { 1793 kfree(parser->buffer); 1794 parser->buffer = NULL; 1795 } 1796 1797 /* 1798 * trace_get_user - reads the user input string separated by space 1799 * (matched by isspace(ch)) 1800 * 1801 * For each string found the 'struct trace_parser' is updated, 1802 * and the function returns. 1803 * 1804 * Returns number of bytes read. 1805 * 1806 * See kernel/trace/trace.h for 'struct trace_parser' details. 1807 */ 1808 int trace_get_user(struct trace_parser *parser, const char __user *ubuf, 1809 size_t cnt, loff_t *ppos) 1810 { 1811 char ch; 1812 size_t read = 0; 1813 ssize_t ret; 1814 1815 if (!*ppos) 1816 trace_parser_clear(parser); 1817 1818 ret = get_user(ch, ubuf++); 1819 if (ret) 1820 goto fail; 1821 1822 read++; 1823 cnt--; 1824 1825 /* 1826 * The parser is not finished with the last write, 1827 * continue reading the user input without skipping spaces. 1828 */ 1829 if (!parser->cont) { 1830 /* skip white space */ 1831 while (cnt && isspace(ch)) { 1832 ret = get_user(ch, ubuf++); 1833 if (ret) 1834 goto fail; 1835 read++; 1836 cnt--; 1837 } 1838 1839 parser->idx = 0; 1840 1841 /* only spaces were written */ 1842 if (isspace(ch) || !ch) { 1843 *ppos += read; 1844 return read; 1845 } 1846 } 1847 1848 /* read the non-space input */ 1849 while (cnt && !isspace(ch) && ch) { 1850 if (parser->idx < parser->size - 1) 1851 parser->buffer[parser->idx++] = ch; 1852 else { 1853 ret = -EINVAL; 1854 goto fail; 1855 } 1856 1857 ret = get_user(ch, ubuf++); 1858 if (ret) 1859 goto fail; 1860 read++; 1861 cnt--; 1862 } 1863 1864 /* We either got finished input or we have to wait for another call. */ 1865 if (isspace(ch) || !ch) { 1866 parser->buffer[parser->idx] = 0; 1867 parser->cont = false; 1868 } else if (parser->idx < parser->size - 1) { 1869 parser->cont = true; 1870 parser->buffer[parser->idx++] = ch; 1871 /* Make sure the parsed string always terminates with '\0'. */ 1872 parser->buffer[parser->idx] = 0; 1873 } else { 1874 ret = -EINVAL; 1875 goto fail; 1876 } 1877 1878 *ppos += read; 1879 return read; 1880 fail: 1881 trace_parser_fail(parser); 1882 return ret; 1883 } 1884 1885 /* TODO add a seq_buf_to_buffer() */ 1886 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 1887 { 1888 int len; 1889 1890 if (trace_seq_used(s) <= s->readpos) 1891 return -EBUSY; 1892 1893 len = trace_seq_used(s) - s->readpos; 1894 if (cnt > len) 1895 cnt = len; 1896 memcpy(buf, s->buffer + s->readpos, cnt); 1897 1898 s->readpos += cnt; 1899 return cnt; 1900 } 1901 1902 unsigned long __read_mostly tracing_thresh; 1903 1904 #ifdef CONFIG_TRACER_MAX_TRACE 1905 static const struct file_operations tracing_max_lat_fops; 1906 1907 #ifdef LATENCY_FS_NOTIFY 1908 1909 static struct workqueue_struct *fsnotify_wq; 1910 1911 static void latency_fsnotify_workfn(struct work_struct *work) 1912 { 1913 struct trace_array *tr = container_of(work, struct trace_array, 1914 fsnotify_work); 1915 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY); 1916 } 1917 1918 static void latency_fsnotify_workfn_irq(struct irq_work *iwork) 1919 { 1920 struct trace_array *tr = container_of(iwork, struct trace_array, 1921 fsnotify_irqwork); 1922 queue_work(fsnotify_wq, &tr->fsnotify_work); 1923 } 1924 1925 static void trace_create_maxlat_file(struct trace_array *tr, 1926 struct dentry *d_tracer) 1927 { 1928 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); 1929 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); 1930 tr->d_max_latency = trace_create_file("tracing_max_latency", 1931 TRACE_MODE_WRITE, 1932 d_tracer, tr, 1933 &tracing_max_lat_fops); 1934 } 1935 1936 __init static int latency_fsnotify_init(void) 1937 { 1938 fsnotify_wq = alloc_workqueue("tr_max_lat_wq", 1939 WQ_UNBOUND | WQ_HIGHPRI, 0); 1940 if (!fsnotify_wq) { 1941 pr_err("Unable to allocate tr_max_lat_wq\n"); 1942 return -ENOMEM; 1943 } 1944 return 0; 1945 } 1946 1947 late_initcall_sync(latency_fsnotify_init); 1948 1949 void latency_fsnotify(struct trace_array *tr) 1950 { 1951 if (!fsnotify_wq) 1952 return; 1953 /* 1954 * We cannot call queue_work(&tr->fsnotify_work) from here because it's 1955 * possible that we are called from __schedule() or do_idle(), which 1956 * could cause a deadlock. 1957 */ 1958 irq_work_queue(&tr->fsnotify_irqwork); 1959 } 1960 1961 #else /* !LATENCY_FS_NOTIFY */ 1962 1963 #define trace_create_maxlat_file(tr, d_tracer) \ 1964 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ 1965 d_tracer, tr, &tracing_max_lat_fops) 1966 1967 #endif 1968 1969 /* 1970 * Copy the new maximum trace into the separate maximum-trace 1971 * structure. (this way the maximum trace is permanently saved, 1972 * for later retrieval via /sys/kernel/tracing/tracing_max_latency) 1973 */ 1974 static void 1975 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1976 { 1977 struct array_buffer *trace_buf = &tr->array_buffer; 1978 struct array_buffer *max_buf = &tr->max_buffer; 1979 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); 1980 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); 1981 1982 max_buf->cpu = cpu; 1983 max_buf->time_start = data->preempt_timestamp; 1984 1985 max_data->saved_latency = tr->max_latency; 1986 max_data->critical_start = data->critical_start; 1987 max_data->critical_end = data->critical_end; 1988 1989 strscpy(max_data->comm, tsk->comm); 1990 max_data->pid = tsk->pid; 1991 /* 1992 * If tsk == current, then use current_uid(), as that does not use 1993 * RCU. The irq tracer can be called out of RCU scope. 1994 */ 1995 if (tsk == current) 1996 max_data->uid = current_uid(); 1997 else 1998 max_data->uid = task_uid(tsk); 1999 2000 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 2001 max_data->policy = tsk->policy; 2002 max_data->rt_priority = tsk->rt_priority; 2003 2004 /* record this tasks comm */ 2005 tracing_record_cmdline(tsk); 2006 latency_fsnotify(tr); 2007 } 2008 2009 /** 2010 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 2011 * @tr: tracer 2012 * @tsk: the task with the latency 2013 * @cpu: The cpu that initiated the trace. 2014 * @cond_data: User data associated with a conditional snapshot 2015 * 2016 * Flip the buffers between the @tr and the max_tr and record information 2017 * about which task was the cause of this latency. 2018 */ 2019 void 2020 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, 2021 void *cond_data) 2022 { 2023 if (tr->stop_count) 2024 return; 2025 2026 WARN_ON_ONCE(!irqs_disabled()); 2027 2028 if (!tr->allocated_snapshot) { 2029 /* Only the nop tracer should hit this when disabling */ 2030 WARN_ON_ONCE(tr->current_trace != &nop_trace); 2031 return; 2032 } 2033 2034 arch_spin_lock(&tr->max_lock); 2035 2036 /* Inherit the recordable setting from array_buffer */ 2037 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) 2038 ring_buffer_record_on(tr->max_buffer.buffer); 2039 else 2040 ring_buffer_record_off(tr->max_buffer.buffer); 2041 2042 #ifdef CONFIG_TRACER_SNAPSHOT 2043 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { 2044 arch_spin_unlock(&tr->max_lock); 2045 return; 2046 } 2047 #endif 2048 swap(tr->array_buffer.buffer, tr->max_buffer.buffer); 2049 2050 __update_max_tr(tr, tsk, cpu); 2051 2052 arch_spin_unlock(&tr->max_lock); 2053 2054 /* Any waiters on the old snapshot buffer need to wake up */ 2055 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); 2056 } 2057 2058 /** 2059 * update_max_tr_single - only copy one trace over, and reset the rest 2060 * @tr: tracer 2061 * @tsk: task with the latency 2062 * @cpu: the cpu of the buffer to copy. 2063 * 2064 * Flip the trace of a single CPU buffer between the @tr and the max_tr. 2065 */ 2066 void 2067 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 2068 { 2069 int ret; 2070 2071 if (tr->stop_count) 2072 return; 2073 2074 WARN_ON_ONCE(!irqs_disabled()); 2075 if (!tr->allocated_snapshot) { 2076 /* Only the nop tracer should hit this when disabling */ 2077 WARN_ON_ONCE(tr->current_trace != &nop_trace); 2078 return; 2079 } 2080 2081 arch_spin_lock(&tr->max_lock); 2082 2083 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); 2084 2085 if (ret == -EBUSY) { 2086 /* 2087 * We failed to swap the buffer due to a commit taking 2088 * place on this CPU. We fail to record, but we reset 2089 * the max trace buffer (no one writes directly to it) 2090 * and flag that it failed. 2091 * Another reason is resize is in progress. 2092 */ 2093 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, 2094 "Failed to swap buffers due to commit or resize in progress\n"); 2095 } 2096 2097 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 2098 2099 __update_max_tr(tr, tsk, cpu); 2100 arch_spin_unlock(&tr->max_lock); 2101 } 2102 2103 #endif /* CONFIG_TRACER_MAX_TRACE */ 2104 2105 struct pipe_wait { 2106 struct trace_iterator *iter; 2107 int wait_index; 2108 }; 2109 2110 static bool wait_pipe_cond(void *data) 2111 { 2112 struct pipe_wait *pwait = data; 2113 struct trace_iterator *iter = pwait->iter; 2114 2115 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) 2116 return true; 2117 2118 return iter->closed; 2119 } 2120 2121 static int wait_on_pipe(struct trace_iterator *iter, int full) 2122 { 2123 struct pipe_wait pwait; 2124 int ret; 2125 2126 /* Iterators are static, they should be filled or empty */ 2127 if (trace_buffer_iter(iter, iter->cpu_file)) 2128 return 0; 2129 2130 pwait.wait_index = atomic_read_acquire(&iter->wait_index); 2131 pwait.iter = iter; 2132 2133 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, 2134 wait_pipe_cond, &pwait); 2135 2136 #ifdef CONFIG_TRACER_MAX_TRACE 2137 /* 2138 * Make sure this is still the snapshot buffer, as if a snapshot were 2139 * to happen, this would now be the main buffer. 2140 */ 2141 if (iter->snapshot) 2142 iter->array_buffer = &iter->tr->max_buffer; 2143 #endif 2144 return ret; 2145 } 2146 2147 #ifdef CONFIG_FTRACE_STARTUP_TEST 2148 static bool selftests_can_run; 2149 2150 struct trace_selftests { 2151 struct list_head list; 2152 struct tracer *type; 2153 }; 2154 2155 static LIST_HEAD(postponed_selftests); 2156 2157 static int save_selftest(struct tracer *type) 2158 { 2159 struct trace_selftests *selftest; 2160 2161 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); 2162 if (!selftest) 2163 return -ENOMEM; 2164 2165 selftest->type = type; 2166 list_add(&selftest->list, &postponed_selftests); 2167 return 0; 2168 } 2169 2170 static int run_tracer_selftest(struct tracer *type) 2171 { 2172 struct trace_array *tr = &global_trace; 2173 struct tracer_flags *saved_flags = tr->current_trace_flags; 2174 struct tracer *saved_tracer = tr->current_trace; 2175 int ret; 2176 2177 if (!type->selftest || tracing_selftest_disabled) 2178 return 0; 2179 2180 /* 2181 * If a tracer registers early in boot up (before scheduling is 2182 * initialized and such), then do not run its selftests yet. 2183 * Instead, run it a little later in the boot process. 2184 */ 2185 if (!selftests_can_run) 2186 return save_selftest(type); 2187 2188 if (!tracing_is_on()) { 2189 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", 2190 type->name); 2191 return 0; 2192 } 2193 2194 /* 2195 * Run a selftest on this tracer. 2196 * Here we reset the trace buffer, and set the current 2197 * tracer to be this tracer. The tracer can then run some 2198 * internal tracing to verify that everything is in order. 2199 * If we fail, we do not register this tracer. 2200 */ 2201 tracing_reset_online_cpus(&tr->array_buffer); 2202 2203 tr->current_trace = type; 2204 tr->current_trace_flags = type->flags ? : type->default_flags; 2205 2206 #ifdef CONFIG_TRACER_MAX_TRACE 2207 if (type->use_max_tr) { 2208 /* If we expanded the buffers, make sure the max is expanded too */ 2209 if (tr->ring_buffer_expanded) 2210 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, 2211 RING_BUFFER_ALL_CPUS); 2212 tr->allocated_snapshot = true; 2213 } 2214 #endif 2215 2216 /* the test is responsible for initializing and enabling */ 2217 pr_info("Testing tracer %s: ", type->name); 2218 ret = type->selftest(type, tr); 2219 /* the test is responsible for resetting too */ 2220 tr->current_trace = saved_tracer; 2221 tr->current_trace_flags = saved_flags; 2222 if (ret) { 2223 printk(KERN_CONT "FAILED!\n"); 2224 /* Add the warning after printing 'FAILED' */ 2225 WARN_ON(1); 2226 return -1; 2227 } 2228 /* Only reset on passing, to avoid touching corrupted buffers */ 2229 tracing_reset_online_cpus(&tr->array_buffer); 2230 2231 #ifdef CONFIG_TRACER_MAX_TRACE 2232 if (type->use_max_tr) { 2233 tr->allocated_snapshot = false; 2234 2235 /* Shrink the max buffer again */ 2236 if (tr->ring_buffer_expanded) 2237 ring_buffer_resize(tr->max_buffer.buffer, 1, 2238 RING_BUFFER_ALL_CPUS); 2239 } 2240 #endif 2241 2242 printk(KERN_CONT "PASSED\n"); 2243 return 0; 2244 } 2245 2246 static int do_run_tracer_selftest(struct tracer *type) 2247 { 2248 int ret; 2249 2250 /* 2251 * Tests can take a long time, especially if they are run one after the 2252 * other, as does happen during bootup when all the tracers are 2253 * registered. This could cause the soft lockup watchdog to trigger. 2254 */ 2255 cond_resched(); 2256 2257 tracing_selftest_running = true; 2258 ret = run_tracer_selftest(type); 2259 tracing_selftest_running = false; 2260 2261 return ret; 2262 } 2263 2264 static __init int init_trace_selftests(void) 2265 { 2266 struct trace_selftests *p, *n; 2267 struct tracer *t, **last; 2268 int ret; 2269 2270 selftests_can_run = true; 2271 2272 guard(mutex)(&trace_types_lock); 2273 2274 if (list_empty(&postponed_selftests)) 2275 return 0; 2276 2277 pr_info("Running postponed tracer tests:\n"); 2278 2279 tracing_selftest_running = true; 2280 list_for_each_entry_safe(p, n, &postponed_selftests, list) { 2281 /* This loop can take minutes when sanitizers are enabled, so 2282 * lets make sure we allow RCU processing. 2283 */ 2284 cond_resched(); 2285 ret = run_tracer_selftest(p->type); 2286 /* If the test fails, then warn and remove from available_tracers */ 2287 if (ret < 0) { 2288 WARN(1, "tracer: %s failed selftest, disabling\n", 2289 p->type->name); 2290 last = &trace_types; 2291 for (t = trace_types; t; t = t->next) { 2292 if (t == p->type) { 2293 *last = t->next; 2294 break; 2295 } 2296 last = &t->next; 2297 } 2298 } 2299 list_del(&p->list); 2300 kfree(p); 2301 } 2302 tracing_selftest_running = false; 2303 2304 return 0; 2305 } 2306 core_initcall(init_trace_selftests); 2307 #else 2308 static inline int do_run_tracer_selftest(struct tracer *type) 2309 { 2310 return 0; 2311 } 2312 #endif /* CONFIG_FTRACE_STARTUP_TEST */ 2313 2314 static int add_tracer(struct trace_array *tr, struct tracer *t); 2315 2316 static void __init apply_trace_boot_options(void); 2317 2318 static void free_tracers(struct trace_array *tr) 2319 { 2320 struct tracers *t, *n; 2321 2322 lockdep_assert_held(&trace_types_lock); 2323 2324 list_for_each_entry_safe(t, n, &tr->tracers, list) { 2325 list_del(&t->list); 2326 kfree(t->flags); 2327 kfree(t); 2328 } 2329 } 2330 2331 /** 2332 * register_tracer - register a tracer with the ftrace system. 2333 * @type: the plugin for the tracer 2334 * 2335 * Register a new plugin tracer. 2336 */ 2337 int __init register_tracer(struct tracer *type) 2338 { 2339 struct trace_array *tr; 2340 struct tracer *t; 2341 int ret = 0; 2342 2343 if (!type->name) { 2344 pr_info("Tracer must have a name\n"); 2345 return -1; 2346 } 2347 2348 if (strlen(type->name) >= MAX_TRACER_SIZE) { 2349 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 2350 return -1; 2351 } 2352 2353 if (security_locked_down(LOCKDOWN_TRACEFS)) { 2354 pr_warn("Can not register tracer %s due to lockdown\n", 2355 type->name); 2356 return -EPERM; 2357 } 2358 2359 mutex_lock(&trace_types_lock); 2360 2361 for (t = trace_types; t; t = t->next) { 2362 if (strcmp(type->name, t->name) == 0) { 2363 /* already found */ 2364 pr_info("Tracer %s already registered\n", 2365 type->name); 2366 ret = -1; 2367 goto out; 2368 } 2369 } 2370 2371 /* store the tracer for __set_tracer_option */ 2372 if (type->flags) 2373 type->flags->trace = type; 2374 2375 ret = do_run_tracer_selftest(type); 2376 if (ret < 0) 2377 goto out; 2378 2379 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2380 ret = add_tracer(tr, type); 2381 if (ret < 0) { 2382 /* The tracer will still exist but without options */ 2383 pr_warn("Failed to create tracer options for %s\n", type->name); 2384 break; 2385 } 2386 } 2387 2388 type->next = trace_types; 2389 trace_types = type; 2390 2391 out: 2392 mutex_unlock(&trace_types_lock); 2393 2394 if (ret || !default_bootup_tracer) 2395 return ret; 2396 2397 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) 2398 return 0; 2399 2400 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 2401 /* Do we want this tracer to start on bootup? */ 2402 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0); 2403 default_bootup_tracer = NULL; 2404 2405 apply_trace_boot_options(); 2406 2407 /* disable other selftests, since this will break it. */ 2408 disable_tracing_selftest("running a tracer"); 2409 2410 return 0; 2411 } 2412 2413 static void tracing_reset_cpu(struct array_buffer *buf, int cpu) 2414 { 2415 struct trace_buffer *buffer = buf->buffer; 2416 2417 if (!buffer) 2418 return; 2419 2420 ring_buffer_record_disable(buffer); 2421 2422 /* Make sure all commits have finished */ 2423 synchronize_rcu(); 2424 ring_buffer_reset_cpu(buffer, cpu); 2425 2426 ring_buffer_record_enable(buffer); 2427 } 2428 2429 void tracing_reset_online_cpus(struct array_buffer *buf) 2430 { 2431 struct trace_buffer *buffer = buf->buffer; 2432 2433 if (!buffer) 2434 return; 2435 2436 ring_buffer_record_disable(buffer); 2437 2438 /* Make sure all commits have finished */ 2439 synchronize_rcu(); 2440 2441 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2442 2443 ring_buffer_reset_online_cpus(buffer); 2444 2445 ring_buffer_record_enable(buffer); 2446 } 2447 2448 static void tracing_reset_all_cpus(struct array_buffer *buf) 2449 { 2450 struct trace_buffer *buffer = buf->buffer; 2451 2452 if (!buffer) 2453 return; 2454 2455 ring_buffer_record_disable(buffer); 2456 2457 /* Make sure all commits have finished */ 2458 synchronize_rcu(); 2459 2460 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2461 2462 ring_buffer_reset(buffer); 2463 2464 ring_buffer_record_enable(buffer); 2465 } 2466 2467 /* Must have trace_types_lock held */ 2468 void tracing_reset_all_online_cpus_unlocked(void) 2469 { 2470 struct trace_array *tr; 2471 2472 lockdep_assert_held(&trace_types_lock); 2473 2474 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2475 if (!tr->clear_trace) 2476 continue; 2477 tr->clear_trace = false; 2478 tracing_reset_online_cpus(&tr->array_buffer); 2479 #ifdef CONFIG_TRACER_MAX_TRACE 2480 tracing_reset_online_cpus(&tr->max_buffer); 2481 #endif 2482 } 2483 } 2484 2485 void tracing_reset_all_online_cpus(void) 2486 { 2487 guard(mutex)(&trace_types_lock); 2488 tracing_reset_all_online_cpus_unlocked(); 2489 } 2490 2491 int is_tracing_stopped(void) 2492 { 2493 return global_trace.stop_count; 2494 } 2495 2496 static void tracing_start_tr(struct trace_array *tr) 2497 { 2498 struct trace_buffer *buffer; 2499 2500 if (tracing_disabled) 2501 return; 2502 2503 guard(raw_spinlock_irqsave)(&tr->start_lock); 2504 if (--tr->stop_count) { 2505 if (WARN_ON_ONCE(tr->stop_count < 0)) { 2506 /* Someone screwed up their debugging */ 2507 tr->stop_count = 0; 2508 } 2509 return; 2510 } 2511 2512 /* Prevent the buffers from switching */ 2513 arch_spin_lock(&tr->max_lock); 2514 2515 buffer = tr->array_buffer.buffer; 2516 if (buffer) 2517 ring_buffer_record_enable(buffer); 2518 2519 #ifdef CONFIG_TRACER_MAX_TRACE 2520 buffer = tr->max_buffer.buffer; 2521 if (buffer) 2522 ring_buffer_record_enable(buffer); 2523 #endif 2524 2525 arch_spin_unlock(&tr->max_lock); 2526 } 2527 2528 /** 2529 * tracing_start - quick start of the tracer 2530 * 2531 * If tracing is enabled but was stopped by tracing_stop, 2532 * this will start the tracer back up. 2533 */ 2534 void tracing_start(void) 2535 2536 { 2537 return tracing_start_tr(&global_trace); 2538 } 2539 2540 static void tracing_stop_tr(struct trace_array *tr) 2541 { 2542 struct trace_buffer *buffer; 2543 2544 guard(raw_spinlock_irqsave)(&tr->start_lock); 2545 if (tr->stop_count++) 2546 return; 2547 2548 /* Prevent the buffers from switching */ 2549 arch_spin_lock(&tr->max_lock); 2550 2551 buffer = tr->array_buffer.buffer; 2552 if (buffer) 2553 ring_buffer_record_disable(buffer); 2554 2555 #ifdef CONFIG_TRACER_MAX_TRACE 2556 buffer = tr->max_buffer.buffer; 2557 if (buffer) 2558 ring_buffer_record_disable(buffer); 2559 #endif 2560 2561 arch_spin_unlock(&tr->max_lock); 2562 } 2563 2564 /** 2565 * tracing_stop - quick stop of the tracer 2566 * 2567 * Light weight way to stop tracing. Use in conjunction with 2568 * tracing_start. 2569 */ 2570 void tracing_stop(void) 2571 { 2572 return tracing_stop_tr(&global_trace); 2573 } 2574 2575 /* 2576 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 2577 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 2578 * simplifies those functions and keeps them in sync. 2579 */ 2580 enum print_line_t trace_handle_return(struct trace_seq *s) 2581 { 2582 return trace_seq_has_overflowed(s) ? 2583 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 2584 } 2585 EXPORT_SYMBOL_GPL(trace_handle_return); 2586 2587 static unsigned short migration_disable_value(void) 2588 { 2589 #if defined(CONFIG_SMP) 2590 return current->migration_disabled; 2591 #else 2592 return 0; 2593 #endif 2594 } 2595 2596 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) 2597 { 2598 unsigned int trace_flags = irqs_status; 2599 unsigned int pc; 2600 2601 pc = preempt_count(); 2602 2603 if (pc & NMI_MASK) 2604 trace_flags |= TRACE_FLAG_NMI; 2605 if (pc & HARDIRQ_MASK) 2606 trace_flags |= TRACE_FLAG_HARDIRQ; 2607 if (in_serving_softirq()) 2608 trace_flags |= TRACE_FLAG_SOFTIRQ; 2609 if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) 2610 trace_flags |= TRACE_FLAG_BH_OFF; 2611 2612 if (tif_need_resched()) 2613 trace_flags |= TRACE_FLAG_NEED_RESCHED; 2614 if (test_preempt_need_resched()) 2615 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; 2616 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) 2617 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; 2618 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | 2619 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; 2620 } 2621 2622 struct ring_buffer_event * 2623 trace_buffer_lock_reserve(struct trace_buffer *buffer, 2624 int type, 2625 unsigned long len, 2626 unsigned int trace_ctx) 2627 { 2628 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); 2629 } 2630 2631 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); 2632 DEFINE_PER_CPU(int, trace_buffered_event_cnt); 2633 static int trace_buffered_event_ref; 2634 2635 /** 2636 * trace_buffered_event_enable - enable buffering events 2637 * 2638 * When events are being filtered, it is quicker to use a temporary 2639 * buffer to write the event data into if there's a likely chance 2640 * that it will not be committed. The discard of the ring buffer 2641 * is not as fast as committing, and is much slower than copying 2642 * a commit. 2643 * 2644 * When an event is to be filtered, allocate per cpu buffers to 2645 * write the event data into, and if the event is filtered and discarded 2646 * it is simply dropped, otherwise, the entire data is to be committed 2647 * in one shot. 2648 */ 2649 void trace_buffered_event_enable(void) 2650 { 2651 struct ring_buffer_event *event; 2652 struct page *page; 2653 int cpu; 2654 2655 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2656 2657 if (trace_buffered_event_ref++) 2658 return; 2659 2660 for_each_tracing_cpu(cpu) { 2661 page = alloc_pages_node(cpu_to_node(cpu), 2662 GFP_KERNEL | __GFP_NORETRY, 0); 2663 /* This is just an optimization and can handle failures */ 2664 if (!page) { 2665 pr_err("Failed to allocate event buffer\n"); 2666 break; 2667 } 2668 2669 event = page_address(page); 2670 memset(event, 0, sizeof(*event)); 2671 2672 per_cpu(trace_buffered_event, cpu) = event; 2673 2674 scoped_guard(preempt,) { 2675 if (cpu == smp_processor_id() && 2676 __this_cpu_read(trace_buffered_event) != 2677 per_cpu(trace_buffered_event, cpu)) 2678 WARN_ON_ONCE(1); 2679 } 2680 } 2681 } 2682 2683 static void enable_trace_buffered_event(void *data) 2684 { 2685 this_cpu_dec(trace_buffered_event_cnt); 2686 } 2687 2688 static void disable_trace_buffered_event(void *data) 2689 { 2690 this_cpu_inc(trace_buffered_event_cnt); 2691 } 2692 2693 /** 2694 * trace_buffered_event_disable - disable buffering events 2695 * 2696 * When a filter is removed, it is faster to not use the buffered 2697 * events, and to commit directly into the ring buffer. Free up 2698 * the temp buffers when there are no more users. This requires 2699 * special synchronization with current events. 2700 */ 2701 void trace_buffered_event_disable(void) 2702 { 2703 int cpu; 2704 2705 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2706 2707 if (WARN_ON_ONCE(!trace_buffered_event_ref)) 2708 return; 2709 2710 if (--trace_buffered_event_ref) 2711 return; 2712 2713 /* For each CPU, set the buffer as used. */ 2714 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, 2715 NULL, true); 2716 2717 /* Wait for all current users to finish */ 2718 synchronize_rcu(); 2719 2720 for_each_tracing_cpu(cpu) { 2721 free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); 2722 per_cpu(trace_buffered_event, cpu) = NULL; 2723 } 2724 2725 /* 2726 * Wait for all CPUs that potentially started checking if they can use 2727 * their event buffer only after the previous synchronize_rcu() call and 2728 * they still read a valid pointer from trace_buffered_event. It must be 2729 * ensured they don't see cleared trace_buffered_event_cnt else they 2730 * could wrongly decide to use the pointed-to buffer which is now freed. 2731 */ 2732 synchronize_rcu(); 2733 2734 /* For each CPU, relinquish the buffer */ 2735 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, 2736 true); 2737 } 2738 2739 static struct trace_buffer *temp_buffer; 2740 2741 struct ring_buffer_event * 2742 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, 2743 struct trace_event_file *trace_file, 2744 int type, unsigned long len, 2745 unsigned int trace_ctx) 2746 { 2747 struct ring_buffer_event *entry; 2748 struct trace_array *tr = trace_file->tr; 2749 int val; 2750 2751 *current_rb = tr->array_buffer.buffer; 2752 2753 if (!tr->no_filter_buffering_ref && 2754 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { 2755 preempt_disable_notrace(); 2756 /* 2757 * Filtering is on, so try to use the per cpu buffer first. 2758 * This buffer will simulate a ring_buffer_event, 2759 * where the type_len is zero and the array[0] will 2760 * hold the full length. 2761 * (see include/linux/ring-buffer.h for details on 2762 * how the ring_buffer_event is structured). 2763 * 2764 * Using a temp buffer during filtering and copying it 2765 * on a matched filter is quicker than writing directly 2766 * into the ring buffer and then discarding it when 2767 * it doesn't match. That is because the discard 2768 * requires several atomic operations to get right. 2769 * Copying on match and doing nothing on a failed match 2770 * is still quicker than no copy on match, but having 2771 * to discard out of the ring buffer on a failed match. 2772 */ 2773 if ((entry = __this_cpu_read(trace_buffered_event))) { 2774 int max_len = PAGE_SIZE - struct_size(entry, array, 1); 2775 2776 val = this_cpu_inc_return(trace_buffered_event_cnt); 2777 2778 /* 2779 * Preemption is disabled, but interrupts and NMIs 2780 * can still come in now. If that happens after 2781 * the above increment, then it will have to go 2782 * back to the old method of allocating the event 2783 * on the ring buffer, and if the filter fails, it 2784 * will have to call ring_buffer_discard_commit() 2785 * to remove it. 2786 * 2787 * Need to also check the unlikely case that the 2788 * length is bigger than the temp buffer size. 2789 * If that happens, then the reserve is pretty much 2790 * guaranteed to fail, as the ring buffer currently 2791 * only allows events less than a page. But that may 2792 * change in the future, so let the ring buffer reserve 2793 * handle the failure in that case. 2794 */ 2795 if (val == 1 && likely(len <= max_len)) { 2796 trace_event_setup(entry, type, trace_ctx); 2797 entry->array[0] = len; 2798 /* Return with preemption disabled */ 2799 return entry; 2800 } 2801 this_cpu_dec(trace_buffered_event_cnt); 2802 } 2803 /* __trace_buffer_lock_reserve() disables preemption */ 2804 preempt_enable_notrace(); 2805 } 2806 2807 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2808 trace_ctx); 2809 /* 2810 * If tracing is off, but we have triggers enabled 2811 * we still need to look at the event data. Use the temp_buffer 2812 * to store the trace event for the trigger to use. It's recursive 2813 * safe and will not be recorded anywhere. 2814 */ 2815 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { 2816 *current_rb = temp_buffer; 2817 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2818 trace_ctx); 2819 } 2820 return entry; 2821 } 2822 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); 2823 2824 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); 2825 static DEFINE_MUTEX(tracepoint_printk_mutex); 2826 2827 static void output_printk(struct trace_event_buffer *fbuffer) 2828 { 2829 struct trace_event_call *event_call; 2830 struct trace_event_file *file; 2831 struct trace_event *event; 2832 unsigned long flags; 2833 struct trace_iterator *iter = tracepoint_print_iter; 2834 2835 /* We should never get here if iter is NULL */ 2836 if (WARN_ON_ONCE(!iter)) 2837 return; 2838 2839 event_call = fbuffer->trace_file->event_call; 2840 if (!event_call || !event_call->event.funcs || 2841 !event_call->event.funcs->trace) 2842 return; 2843 2844 file = fbuffer->trace_file; 2845 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 2846 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 2847 !filter_match_preds(file->filter, fbuffer->entry))) 2848 return; 2849 2850 event = &fbuffer->trace_file->event_call->event; 2851 2852 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); 2853 trace_seq_init(&iter->seq); 2854 iter->ent = fbuffer->entry; 2855 event_call->event.funcs->trace(iter, 0, event); 2856 trace_seq_putc(&iter->seq, 0); 2857 printk("%s", iter->seq.buffer); 2858 2859 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); 2860 } 2861 2862 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 2863 void *buffer, size_t *lenp, 2864 loff_t *ppos) 2865 { 2866 int save_tracepoint_printk; 2867 int ret; 2868 2869 guard(mutex)(&tracepoint_printk_mutex); 2870 save_tracepoint_printk = tracepoint_printk; 2871 2872 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2873 2874 /* 2875 * This will force exiting early, as tracepoint_printk 2876 * is always zero when tracepoint_printk_iter is not allocated 2877 */ 2878 if (!tracepoint_print_iter) 2879 tracepoint_printk = 0; 2880 2881 if (save_tracepoint_printk == tracepoint_printk) 2882 return ret; 2883 2884 if (tracepoint_printk) 2885 static_key_enable(&tracepoint_printk_key.key); 2886 else 2887 static_key_disable(&tracepoint_printk_key.key); 2888 2889 return ret; 2890 } 2891 2892 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) 2893 { 2894 enum event_trigger_type tt = ETT_NONE; 2895 struct trace_event_file *file = fbuffer->trace_file; 2896 2897 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, 2898 fbuffer->entry, &tt)) 2899 goto discard; 2900 2901 if (static_key_false(&tracepoint_printk_key.key)) 2902 output_printk(fbuffer); 2903 2904 if (static_branch_unlikely(&trace_event_exports_enabled)) 2905 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); 2906 2907 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, 2908 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); 2909 2910 discard: 2911 if (tt) 2912 event_triggers_post_call(file, tt); 2913 2914 } 2915 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2916 2917 /* 2918 * Skip 3: 2919 * 2920 * trace_buffer_unlock_commit_regs() 2921 * trace_event_buffer_commit() 2922 * trace_event_raw_event_xxx() 2923 */ 2924 # define STACK_SKIP 3 2925 2926 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2927 struct trace_buffer *buffer, 2928 struct ring_buffer_event *event, 2929 unsigned int trace_ctx, 2930 struct pt_regs *regs) 2931 { 2932 __buffer_unlock_commit(buffer, event); 2933 2934 /* 2935 * If regs is not set, then skip the necessary functions. 2936 * Note, we can still get here via blktrace, wakeup tracer 2937 * and mmiotrace, but that's ok if they lose a function or 2938 * two. They are not that meaningful. 2939 */ 2940 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); 2941 ftrace_trace_userstack(tr, buffer, trace_ctx); 2942 } 2943 2944 /* 2945 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. 2946 */ 2947 void 2948 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, 2949 struct ring_buffer_event *event) 2950 { 2951 __buffer_unlock_commit(buffer, event); 2952 } 2953 2954 void 2955 trace_function(struct trace_array *tr, unsigned long ip, unsigned long 2956 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) 2957 { 2958 struct trace_buffer *buffer = tr->array_buffer.buffer; 2959 struct ring_buffer_event *event; 2960 struct ftrace_entry *entry; 2961 int size = sizeof(*entry); 2962 2963 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); 2964 2965 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, 2966 trace_ctx); 2967 if (!event) 2968 return; 2969 entry = ring_buffer_event_data(event); 2970 entry->ip = ip; 2971 entry->parent_ip = parent_ip; 2972 2973 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 2974 if (fregs) { 2975 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) 2976 entry->args[i] = ftrace_regs_get_argument(fregs, i); 2977 } 2978 #endif 2979 2980 if (static_branch_unlikely(&trace_function_exports_enabled)) 2981 ftrace_exports(event, TRACE_EXPORT_FUNCTION); 2982 __buffer_unlock_commit(buffer, event); 2983 } 2984 2985 #ifdef CONFIG_STACKTRACE 2986 2987 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ 2988 #define FTRACE_KSTACK_NESTING 4 2989 2990 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) 2991 2992 struct ftrace_stack { 2993 unsigned long calls[FTRACE_KSTACK_ENTRIES]; 2994 }; 2995 2996 2997 struct ftrace_stacks { 2998 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; 2999 }; 3000 3001 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); 3002 static DEFINE_PER_CPU(int, ftrace_stack_reserve); 3003 3004 static void __ftrace_trace_stack(struct trace_array *tr, 3005 struct trace_buffer *buffer, 3006 unsigned int trace_ctx, 3007 int skip, struct pt_regs *regs) 3008 { 3009 struct ring_buffer_event *event; 3010 unsigned int size, nr_entries; 3011 struct ftrace_stack *fstack; 3012 struct stack_entry *entry; 3013 int stackidx; 3014 int bit; 3015 3016 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START); 3017 if (bit < 0) 3018 return; 3019 3020 /* 3021 * Add one, for this function and the call to save_stack_trace() 3022 * If regs is set, then these functions will not be in the way. 3023 */ 3024 #ifndef CONFIG_UNWINDER_ORC 3025 if (!regs) 3026 skip++; 3027 #endif 3028 3029 guard(preempt_notrace)(); 3030 3031 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; 3032 3033 /* This should never happen. If it does, yell once and skip */ 3034 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) 3035 goto out; 3036 3037 /* 3038 * The above __this_cpu_inc_return() is 'atomic' cpu local. An 3039 * interrupt will either see the value pre increment or post 3040 * increment. If the interrupt happens pre increment it will have 3041 * restored the counter when it returns. We just need a barrier to 3042 * keep gcc from moving things around. 3043 */ 3044 barrier(); 3045 3046 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; 3047 size = ARRAY_SIZE(fstack->calls); 3048 3049 if (regs) { 3050 nr_entries = stack_trace_save_regs(regs, fstack->calls, 3051 size, skip); 3052 } else { 3053 nr_entries = stack_trace_save(fstack->calls, size, skip); 3054 } 3055 3056 #ifdef CONFIG_DYNAMIC_FTRACE 3057 /* Mark entry of stack trace as trampoline code */ 3058 if (tr->ops && tr->ops->trampoline) { 3059 unsigned long tramp_start = tr->ops->trampoline; 3060 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; 3061 unsigned long *calls = fstack->calls; 3062 3063 for (int i = 0; i < nr_entries; i++) { 3064 if (calls[i] >= tramp_start && calls[i] < tramp_end) 3065 calls[i] = FTRACE_TRAMPOLINE_MARKER; 3066 } 3067 } 3068 #endif 3069 3070 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, 3071 struct_size(entry, caller, nr_entries), 3072 trace_ctx); 3073 if (!event) 3074 goto out; 3075 entry = ring_buffer_event_data(event); 3076 3077 entry->size = nr_entries; 3078 memcpy(&entry->caller, fstack->calls, 3079 flex_array_size(entry, caller, nr_entries)); 3080 3081 __buffer_unlock_commit(buffer, event); 3082 3083 out: 3084 /* Again, don't let gcc optimize things here */ 3085 barrier(); 3086 __this_cpu_dec(ftrace_stack_reserve); 3087 trace_clear_recursion(bit); 3088 } 3089 3090 static inline void ftrace_trace_stack(struct trace_array *tr, 3091 struct trace_buffer *buffer, 3092 unsigned int trace_ctx, 3093 int skip, struct pt_regs *regs) 3094 { 3095 if (!(tr->trace_flags & TRACE_ITER(STACKTRACE))) 3096 return; 3097 3098 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); 3099 } 3100 3101 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, 3102 int skip) 3103 { 3104 struct trace_buffer *buffer = tr->array_buffer.buffer; 3105 3106 if (rcu_is_watching()) { 3107 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 3108 return; 3109 } 3110 3111 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) 3112 return; 3113 3114 /* 3115 * When an NMI triggers, RCU is enabled via ct_nmi_enter(), 3116 * but if the above rcu_is_watching() failed, then the NMI 3117 * triggered someplace critical, and ct_irq_enter() should 3118 * not be called from NMI. 3119 */ 3120 if (unlikely(in_nmi())) 3121 return; 3122 3123 ct_irq_enter_irqson(); 3124 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 3125 ct_irq_exit_irqson(); 3126 } 3127 3128 /** 3129 * trace_dump_stack - record a stack back trace in the trace buffer 3130 * @skip: Number of functions to skip (helper handlers) 3131 */ 3132 void trace_dump_stack(int skip) 3133 { 3134 if (tracing_disabled || tracing_selftest_running) 3135 return; 3136 3137 #ifndef CONFIG_UNWINDER_ORC 3138 /* Skip 1 to skip this function. */ 3139 skip++; 3140 #endif 3141 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, 3142 tracing_gen_ctx(), skip, NULL); 3143 } 3144 EXPORT_SYMBOL_GPL(trace_dump_stack); 3145 3146 #ifdef CONFIG_USER_STACKTRACE_SUPPORT 3147 static DEFINE_PER_CPU(int, user_stack_count); 3148 3149 static void 3150 ftrace_trace_userstack(struct trace_array *tr, 3151 struct trace_buffer *buffer, unsigned int trace_ctx) 3152 { 3153 struct ring_buffer_event *event; 3154 struct userstack_entry *entry; 3155 3156 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE))) 3157 return; 3158 3159 /* 3160 * NMIs can not handle page faults, even with fix ups. 3161 * The save user stack can (and often does) fault. 3162 */ 3163 if (unlikely(in_nmi())) 3164 return; 3165 3166 /* 3167 * prevent recursion, since the user stack tracing may 3168 * trigger other kernel events. 3169 */ 3170 guard(preempt)(); 3171 if (__this_cpu_read(user_stack_count)) 3172 return; 3173 3174 __this_cpu_inc(user_stack_count); 3175 3176 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 3177 sizeof(*entry), trace_ctx); 3178 if (!event) 3179 goto out_drop_count; 3180 entry = ring_buffer_event_data(event); 3181 3182 entry->tgid = current->tgid; 3183 memset(&entry->caller, 0, sizeof(entry->caller)); 3184 3185 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); 3186 __buffer_unlock_commit(buffer, event); 3187 3188 out_drop_count: 3189 __this_cpu_dec(user_stack_count); 3190 } 3191 #else /* CONFIG_USER_STACKTRACE_SUPPORT */ 3192 static void ftrace_trace_userstack(struct trace_array *tr, 3193 struct trace_buffer *buffer, 3194 unsigned int trace_ctx) 3195 { 3196 } 3197 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ 3198 3199 #endif /* CONFIG_STACKTRACE */ 3200 3201 static inline void 3202 func_repeats_set_delta_ts(struct func_repeats_entry *entry, 3203 unsigned long long delta) 3204 { 3205 entry->bottom_delta_ts = delta & U32_MAX; 3206 entry->top_delta_ts = (delta >> 32); 3207 } 3208 3209 void trace_last_func_repeats(struct trace_array *tr, 3210 struct trace_func_repeats *last_info, 3211 unsigned int trace_ctx) 3212 { 3213 struct trace_buffer *buffer = tr->array_buffer.buffer; 3214 struct func_repeats_entry *entry; 3215 struct ring_buffer_event *event; 3216 u64 delta; 3217 3218 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, 3219 sizeof(*entry), trace_ctx); 3220 if (!event) 3221 return; 3222 3223 delta = ring_buffer_event_time_stamp(buffer, event) - 3224 last_info->ts_last_call; 3225 3226 entry = ring_buffer_event_data(event); 3227 entry->ip = last_info->ip; 3228 entry->parent_ip = last_info->parent_ip; 3229 entry->count = last_info->count; 3230 func_repeats_set_delta_ts(entry, delta); 3231 3232 __buffer_unlock_commit(buffer, event); 3233 } 3234 3235 /* created for use with alloc_percpu */ 3236 struct trace_buffer_struct { 3237 int nesting; 3238 char buffer[4][TRACE_BUF_SIZE]; 3239 }; 3240 3241 static struct trace_buffer_struct __percpu *trace_percpu_buffer; 3242 3243 /* 3244 * This allows for lockless recording. If we're nested too deeply, then 3245 * this returns NULL. 3246 */ 3247 static char *get_trace_buf(void) 3248 { 3249 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); 3250 3251 if (!trace_percpu_buffer || buffer->nesting >= 4) 3252 return NULL; 3253 3254 buffer->nesting++; 3255 3256 /* Interrupts must see nesting incremented before we use the buffer */ 3257 barrier(); 3258 return &buffer->buffer[buffer->nesting - 1][0]; 3259 } 3260 3261 static void put_trace_buf(void) 3262 { 3263 /* Don't let the decrement of nesting leak before this */ 3264 barrier(); 3265 this_cpu_dec(trace_percpu_buffer->nesting); 3266 } 3267 3268 static int alloc_percpu_trace_buffer(void) 3269 { 3270 struct trace_buffer_struct __percpu *buffers; 3271 3272 if (trace_percpu_buffer) 3273 return 0; 3274 3275 buffers = alloc_percpu(struct trace_buffer_struct); 3276 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) 3277 return -ENOMEM; 3278 3279 trace_percpu_buffer = buffers; 3280 return 0; 3281 } 3282 3283 static int buffers_allocated; 3284 3285 void trace_printk_init_buffers(void) 3286 { 3287 if (buffers_allocated) 3288 return; 3289 3290 if (alloc_percpu_trace_buffer()) 3291 return; 3292 3293 /* trace_printk() is for debug use only. Don't use it in production. */ 3294 3295 pr_warn("\n"); 3296 pr_warn("**********************************************************\n"); 3297 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 3298 pr_warn("** **\n"); 3299 pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); 3300 pr_warn("** **\n"); 3301 pr_warn("** This means that this is a DEBUG kernel and it is **\n"); 3302 pr_warn("** unsafe for production use. **\n"); 3303 pr_warn("** **\n"); 3304 pr_warn("** If you see this message and you are not debugging **\n"); 3305 pr_warn("** the kernel, report this immediately to your vendor! **\n"); 3306 pr_warn("** **\n"); 3307 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); 3308 pr_warn("**********************************************************\n"); 3309 3310 /* Expand the buffers to set size */ 3311 tracing_update_buffers(&global_trace); 3312 3313 buffers_allocated = 1; 3314 3315 /* 3316 * trace_printk_init_buffers() can be called by modules. 3317 * If that happens, then we need to start cmdline recording 3318 * directly here. If the global_trace.buffer is already 3319 * allocated here, then this was called by module code. 3320 */ 3321 if (global_trace.array_buffer.buffer) 3322 tracing_start_cmdline_record(); 3323 } 3324 EXPORT_SYMBOL_GPL(trace_printk_init_buffers); 3325 3326 void trace_printk_start_comm(void) 3327 { 3328 /* Start tracing comms if trace printk is set */ 3329 if (!buffers_allocated) 3330 return; 3331 tracing_start_cmdline_record(); 3332 } 3333 3334 static void trace_printk_start_stop_comm(int enabled) 3335 { 3336 if (!buffers_allocated) 3337 return; 3338 3339 if (enabled) 3340 tracing_start_cmdline_record(); 3341 else 3342 tracing_stop_cmdline_record(); 3343 } 3344 3345 /** 3346 * trace_vbprintk - write binary msg to tracing buffer 3347 * @ip: The address of the caller 3348 * @fmt: The string format to write to the buffer 3349 * @args: Arguments for @fmt 3350 */ 3351 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) 3352 { 3353 struct ring_buffer_event *event; 3354 struct trace_buffer *buffer; 3355 struct trace_array *tr = READ_ONCE(printk_trace); 3356 struct bprint_entry *entry; 3357 unsigned int trace_ctx; 3358 char *tbuffer; 3359 int len = 0, size; 3360 3361 if (!printk_binsafe(tr)) 3362 return trace_vprintk(ip, fmt, args); 3363 3364 if (unlikely(tracing_selftest_running || tracing_disabled)) 3365 return 0; 3366 3367 /* Don't pollute graph traces with trace_vprintk internals */ 3368 pause_graph_tracing(); 3369 3370 trace_ctx = tracing_gen_ctx(); 3371 guard(preempt_notrace)(); 3372 3373 tbuffer = get_trace_buf(); 3374 if (!tbuffer) { 3375 len = 0; 3376 goto out_nobuffer; 3377 } 3378 3379 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); 3380 3381 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) 3382 goto out_put; 3383 3384 size = sizeof(*entry) + sizeof(u32) * len; 3385 buffer = tr->array_buffer.buffer; 3386 scoped_guard(ring_buffer_nest, buffer) { 3387 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, 3388 trace_ctx); 3389 if (!event) 3390 goto out_put; 3391 entry = ring_buffer_event_data(event); 3392 entry->ip = ip; 3393 entry->fmt = fmt; 3394 3395 memcpy(entry->buf, tbuffer, sizeof(u32) * len); 3396 __buffer_unlock_commit(buffer, event); 3397 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); 3398 } 3399 out_put: 3400 put_trace_buf(); 3401 3402 out_nobuffer: 3403 unpause_graph_tracing(); 3404 3405 return len; 3406 } 3407 EXPORT_SYMBOL_GPL(trace_vbprintk); 3408 3409 static __printf(3, 0) 3410 int __trace_array_vprintk(struct trace_buffer *buffer, 3411 unsigned long ip, const char *fmt, va_list args) 3412 { 3413 struct ring_buffer_event *event; 3414 int len = 0, size; 3415 struct print_entry *entry; 3416 unsigned int trace_ctx; 3417 char *tbuffer; 3418 3419 if (tracing_disabled) 3420 return 0; 3421 3422 /* Don't pollute graph traces with trace_vprintk internals */ 3423 pause_graph_tracing(); 3424 3425 trace_ctx = tracing_gen_ctx(); 3426 guard(preempt_notrace)(); 3427 3428 3429 tbuffer = get_trace_buf(); 3430 if (!tbuffer) { 3431 len = 0; 3432 goto out_nobuffer; 3433 } 3434 3435 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); 3436 3437 size = sizeof(*entry) + len + 1; 3438 scoped_guard(ring_buffer_nest, buffer) { 3439 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 3440 trace_ctx); 3441 if (!event) 3442 goto out; 3443 entry = ring_buffer_event_data(event); 3444 entry->ip = ip; 3445 3446 memcpy(&entry->buf, tbuffer, len + 1); 3447 __buffer_unlock_commit(buffer, event); 3448 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); 3449 } 3450 out: 3451 put_trace_buf(); 3452 3453 out_nobuffer: 3454 unpause_graph_tracing(); 3455 3456 return len; 3457 } 3458 3459 int trace_array_vprintk(struct trace_array *tr, 3460 unsigned long ip, const char *fmt, va_list args) 3461 { 3462 if (tracing_selftest_running && tr == &global_trace) 3463 return 0; 3464 3465 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); 3466 } 3467 3468 /** 3469 * trace_array_printk - Print a message to a specific instance 3470 * @tr: The instance trace_array descriptor 3471 * @ip: The instruction pointer that this is called from. 3472 * @fmt: The format to print (printf format) 3473 * 3474 * If a subsystem sets up its own instance, they have the right to 3475 * printk strings into their tracing instance buffer using this 3476 * function. Note, this function will not write into the top level 3477 * buffer (use trace_printk() for that), as writing into the top level 3478 * buffer should only have events that can be individually disabled. 3479 * trace_printk() is only used for debugging a kernel, and should not 3480 * be ever incorporated in normal use. 3481 * 3482 * trace_array_printk() can be used, as it will not add noise to the 3483 * top level tracing buffer. 3484 * 3485 * Note, trace_array_init_printk() must be called on @tr before this 3486 * can be used. 3487 */ 3488 int trace_array_printk(struct trace_array *tr, 3489 unsigned long ip, const char *fmt, ...) 3490 { 3491 int ret; 3492 va_list ap; 3493 3494 if (!tr) 3495 return -ENOENT; 3496 3497 /* This is only allowed for created instances */ 3498 if (tr == &global_trace) 3499 return 0; 3500 3501 if (!(tr->trace_flags & TRACE_ITER(PRINTK))) 3502 return 0; 3503 3504 va_start(ap, fmt); 3505 ret = trace_array_vprintk(tr, ip, fmt, ap); 3506 va_end(ap); 3507 return ret; 3508 } 3509 EXPORT_SYMBOL_GPL(trace_array_printk); 3510 3511 /** 3512 * trace_array_init_printk - Initialize buffers for trace_array_printk() 3513 * @tr: The trace array to initialize the buffers for 3514 * 3515 * As trace_array_printk() only writes into instances, they are OK to 3516 * have in the kernel (unlike trace_printk()). This needs to be called 3517 * before trace_array_printk() can be used on a trace_array. 3518 */ 3519 int trace_array_init_printk(struct trace_array *tr) 3520 { 3521 if (!tr) 3522 return -ENOENT; 3523 3524 /* This is only allowed for created instances */ 3525 if (tr == &global_trace) 3526 return -EINVAL; 3527 3528 return alloc_percpu_trace_buffer(); 3529 } 3530 EXPORT_SYMBOL_GPL(trace_array_init_printk); 3531 3532 int trace_array_printk_buf(struct trace_buffer *buffer, 3533 unsigned long ip, const char *fmt, ...) 3534 { 3535 int ret; 3536 va_list ap; 3537 3538 if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK))) 3539 return 0; 3540 3541 va_start(ap, fmt); 3542 ret = __trace_array_vprintk(buffer, ip, fmt, ap); 3543 va_end(ap); 3544 return ret; 3545 } 3546 3547 int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 3548 { 3549 return trace_array_vprintk(printk_trace, ip, fmt, args); 3550 } 3551 EXPORT_SYMBOL_GPL(trace_vprintk); 3552 3553 static void trace_iterator_increment(struct trace_iterator *iter) 3554 { 3555 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); 3556 3557 iter->idx++; 3558 if (buf_iter) 3559 ring_buffer_iter_advance(buf_iter); 3560 } 3561 3562 static struct trace_entry * 3563 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, 3564 unsigned long *lost_events) 3565 { 3566 struct ring_buffer_event *event; 3567 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 3568 3569 if (buf_iter) { 3570 event = ring_buffer_iter_peek(buf_iter, ts); 3571 if (lost_events) 3572 *lost_events = ring_buffer_iter_dropped(buf_iter) ? 3573 (unsigned long)-1 : 0; 3574 } else { 3575 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, 3576 lost_events); 3577 } 3578 3579 if (event) { 3580 iter->ent_size = ring_buffer_event_length(event); 3581 return ring_buffer_event_data(event); 3582 } 3583 iter->ent_size = 0; 3584 return NULL; 3585 } 3586 3587 static struct trace_entry * 3588 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, 3589 unsigned long *missing_events, u64 *ent_ts) 3590 { 3591 struct trace_buffer *buffer = iter->array_buffer->buffer; 3592 struct trace_entry *ent, *next = NULL; 3593 unsigned long lost_events = 0, next_lost = 0; 3594 int cpu_file = iter->cpu_file; 3595 u64 next_ts = 0, ts; 3596 int next_cpu = -1; 3597 int next_size = 0; 3598 int cpu; 3599 3600 /* 3601 * If we are in a per_cpu trace file, don't bother by iterating over 3602 * all cpu and peek directly. 3603 */ 3604 if (cpu_file > RING_BUFFER_ALL_CPUS) { 3605 if (ring_buffer_empty_cpu(buffer, cpu_file)) 3606 return NULL; 3607 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); 3608 if (ent_cpu) 3609 *ent_cpu = cpu_file; 3610 3611 return ent; 3612 } 3613 3614 for_each_tracing_cpu(cpu) { 3615 3616 if (ring_buffer_empty_cpu(buffer, cpu)) 3617 continue; 3618 3619 ent = peek_next_entry(iter, cpu, &ts, &lost_events); 3620 3621 /* 3622 * Pick the entry with the smallest timestamp: 3623 */ 3624 if (ent && (!next || ts < next_ts)) { 3625 next = ent; 3626 next_cpu = cpu; 3627 next_ts = ts; 3628 next_lost = lost_events; 3629 next_size = iter->ent_size; 3630 } 3631 } 3632 3633 iter->ent_size = next_size; 3634 3635 if (ent_cpu) 3636 *ent_cpu = next_cpu; 3637 3638 if (ent_ts) 3639 *ent_ts = next_ts; 3640 3641 if (missing_events) 3642 *missing_events = next_lost; 3643 3644 return next; 3645 } 3646 3647 #define STATIC_FMT_BUF_SIZE 128 3648 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 3649 3650 char *trace_iter_expand_format(struct trace_iterator *iter) 3651 { 3652 char *tmp; 3653 3654 /* 3655 * iter->tr is NULL when used with tp_printk, which makes 3656 * this get called where it is not safe to call krealloc(). 3657 */ 3658 if (!iter->tr || iter->fmt == static_fmt_buf) 3659 return NULL; 3660 3661 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, 3662 GFP_KERNEL); 3663 if (tmp) { 3664 iter->fmt_size += STATIC_FMT_BUF_SIZE; 3665 iter->fmt = tmp; 3666 } 3667 3668 return tmp; 3669 } 3670 3671 /* Returns true if the string is safe to dereference from an event */ 3672 static bool trace_safe_str(struct trace_iterator *iter, const char *str) 3673 { 3674 unsigned long addr = (unsigned long)str; 3675 struct trace_event *trace_event; 3676 struct trace_event_call *event; 3677 3678 /* OK if part of the event data */ 3679 if ((addr >= (unsigned long)iter->ent) && 3680 (addr < (unsigned long)iter->ent + iter->ent_size)) 3681 return true; 3682 3683 /* OK if part of the temp seq buffer */ 3684 if ((addr >= (unsigned long)iter->tmp_seq.buffer) && 3685 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) 3686 return true; 3687 3688 /* Core rodata can not be freed */ 3689 if (is_kernel_rodata(addr)) 3690 return true; 3691 3692 if (trace_is_tracepoint_string(str)) 3693 return true; 3694 3695 /* 3696 * Now this could be a module event, referencing core module 3697 * data, which is OK. 3698 */ 3699 if (!iter->ent) 3700 return false; 3701 3702 trace_event = ftrace_find_event(iter->ent->type); 3703 if (!trace_event) 3704 return false; 3705 3706 event = container_of(trace_event, struct trace_event_call, event); 3707 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) 3708 return false; 3709 3710 /* Would rather have rodata, but this will suffice */ 3711 if (within_module_core(addr, event->module)) 3712 return true; 3713 3714 return false; 3715 } 3716 3717 /** 3718 * ignore_event - Check dereferenced fields while writing to the seq buffer 3719 * @iter: The iterator that holds the seq buffer and the event being printed 3720 * 3721 * At boot up, test_event_printk() will flag any event that dereferences 3722 * a string with "%s" that does exist in the ring buffer. It may still 3723 * be valid, as the string may point to a static string in the kernel 3724 * rodata that never gets freed. But if the string pointer is pointing 3725 * to something that was allocated, there's a chance that it can be freed 3726 * by the time the user reads the trace. This would cause a bad memory 3727 * access by the kernel and possibly crash the system. 3728 * 3729 * This function will check if the event has any fields flagged as needing 3730 * to be checked at runtime and perform those checks. 3731 * 3732 * If it is found that a field is unsafe, it will write into the @iter->seq 3733 * a message stating what was found to be unsafe. 3734 * 3735 * @return: true if the event is unsafe and should be ignored, 3736 * false otherwise. 3737 */ 3738 bool ignore_event(struct trace_iterator *iter) 3739 { 3740 struct ftrace_event_field *field; 3741 struct trace_event *trace_event; 3742 struct trace_event_call *event; 3743 struct list_head *head; 3744 struct trace_seq *seq; 3745 const void *ptr; 3746 3747 trace_event = ftrace_find_event(iter->ent->type); 3748 3749 seq = &iter->seq; 3750 3751 if (!trace_event) { 3752 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); 3753 return true; 3754 } 3755 3756 event = container_of(trace_event, struct trace_event_call, event); 3757 if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) 3758 return false; 3759 3760 head = trace_get_fields(event); 3761 if (!head) { 3762 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", 3763 trace_event_name(event)); 3764 return true; 3765 } 3766 3767 /* Offsets are from the iter->ent that points to the raw event */ 3768 ptr = iter->ent; 3769 3770 list_for_each_entry(field, head, link) { 3771 const char *str; 3772 bool good; 3773 3774 if (!field->needs_test) 3775 continue; 3776 3777 str = *(const char **)(ptr + field->offset); 3778 3779 good = trace_safe_str(iter, str); 3780 3781 /* 3782 * If you hit this warning, it is likely that the 3783 * trace event in question used %s on a string that 3784 * was saved at the time of the event, but may not be 3785 * around when the trace is read. Use __string(), 3786 * __assign_str() and __get_str() helpers in the TRACE_EVENT() 3787 * instead. See samples/trace_events/trace-events-sample.h 3788 * for reference. 3789 */ 3790 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", 3791 trace_event_name(event), field->name)) { 3792 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", 3793 trace_event_name(event), field->name); 3794 return true; 3795 } 3796 } 3797 return false; 3798 } 3799 3800 const char *trace_event_format(struct trace_iterator *iter, const char *fmt) 3801 { 3802 const char *p, *new_fmt; 3803 char *q; 3804 3805 if (WARN_ON_ONCE(!fmt)) 3806 return fmt; 3807 3808 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR)) 3809 return fmt; 3810 3811 p = fmt; 3812 new_fmt = q = iter->fmt; 3813 while (*p) { 3814 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { 3815 if (!trace_iter_expand_format(iter)) 3816 return fmt; 3817 3818 q += iter->fmt - new_fmt; 3819 new_fmt = iter->fmt; 3820 } 3821 3822 *q++ = *p++; 3823 3824 /* Replace %p with %px */ 3825 if (p[-1] == '%') { 3826 if (p[0] == '%') { 3827 *q++ = *p++; 3828 } else if (p[0] == 'p' && !isalnum(p[1])) { 3829 *q++ = *p++; 3830 *q++ = 'x'; 3831 } 3832 } 3833 } 3834 *q = '\0'; 3835 3836 return new_fmt; 3837 } 3838 3839 #define STATIC_TEMP_BUF_SIZE 128 3840 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); 3841 3842 /* Find the next real entry, without updating the iterator itself */ 3843 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 3844 int *ent_cpu, u64 *ent_ts) 3845 { 3846 /* __find_next_entry will reset ent_size */ 3847 int ent_size = iter->ent_size; 3848 struct trace_entry *entry; 3849 3850 /* 3851 * If called from ftrace_dump(), then the iter->temp buffer 3852 * will be the static_temp_buf and not created from kmalloc. 3853 * If the entry size is greater than the buffer, we can 3854 * not save it. Just return NULL in that case. This is only 3855 * used to add markers when two consecutive events' time 3856 * stamps have a large delta. See trace_print_lat_context() 3857 */ 3858 if (iter->temp == static_temp_buf && 3859 STATIC_TEMP_BUF_SIZE < ent_size) 3860 return NULL; 3861 3862 /* 3863 * The __find_next_entry() may call peek_next_entry(), which may 3864 * call ring_buffer_peek() that may make the contents of iter->ent 3865 * undefined. Need to copy iter->ent now. 3866 */ 3867 if (iter->ent && iter->ent != iter->temp) { 3868 if ((!iter->temp || iter->temp_size < iter->ent_size) && 3869 !WARN_ON_ONCE(iter->temp == static_temp_buf)) { 3870 void *temp; 3871 temp = kmalloc(iter->ent_size, GFP_KERNEL); 3872 if (!temp) 3873 return NULL; 3874 kfree(iter->temp); 3875 iter->temp = temp; 3876 iter->temp_size = iter->ent_size; 3877 } 3878 memcpy(iter->temp, iter->ent, iter->ent_size); 3879 iter->ent = iter->temp; 3880 } 3881 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 3882 /* Put back the original ent_size */ 3883 iter->ent_size = ent_size; 3884 3885 return entry; 3886 } 3887 3888 /* Find the next real entry, and increment the iterator to the next entry */ 3889 void *trace_find_next_entry_inc(struct trace_iterator *iter) 3890 { 3891 iter->ent = __find_next_entry(iter, &iter->cpu, 3892 &iter->lost_events, &iter->ts); 3893 3894 if (iter->ent) 3895 trace_iterator_increment(iter); 3896 3897 return iter->ent ? iter : NULL; 3898 } 3899 3900 static void trace_consume(struct trace_iterator *iter) 3901 { 3902 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, 3903 &iter->lost_events); 3904 } 3905 3906 static void *s_next(struct seq_file *m, void *v, loff_t *pos) 3907 { 3908 struct trace_iterator *iter = m->private; 3909 int i = (int)*pos; 3910 void *ent; 3911 3912 WARN_ON_ONCE(iter->leftover); 3913 3914 (*pos)++; 3915 3916 /* can't go backwards */ 3917 if (iter->idx > i) 3918 return NULL; 3919 3920 if (iter->idx < 0) 3921 ent = trace_find_next_entry_inc(iter); 3922 else 3923 ent = iter; 3924 3925 while (ent && iter->idx < i) 3926 ent = trace_find_next_entry_inc(iter); 3927 3928 iter->pos = *pos; 3929 3930 return ent; 3931 } 3932 3933 void tracing_iter_reset(struct trace_iterator *iter, int cpu) 3934 { 3935 struct ring_buffer_iter *buf_iter; 3936 unsigned long entries = 0; 3937 u64 ts; 3938 3939 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; 3940 3941 buf_iter = trace_buffer_iter(iter, cpu); 3942 if (!buf_iter) 3943 return; 3944 3945 ring_buffer_iter_reset(buf_iter); 3946 3947 /* 3948 * We could have the case with the max latency tracers 3949 * that a reset never took place on a cpu. This is evident 3950 * by the timestamp being before the start of the buffer. 3951 */ 3952 while (ring_buffer_iter_peek(buf_iter, &ts)) { 3953 if (ts >= iter->array_buffer->time_start) 3954 break; 3955 entries++; 3956 ring_buffer_iter_advance(buf_iter); 3957 /* This could be a big loop */ 3958 cond_resched(); 3959 } 3960 3961 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; 3962 } 3963 3964 /* 3965 * The current tracer is copied to avoid a global locking 3966 * all around. 3967 */ 3968 static void *s_start(struct seq_file *m, loff_t *pos) 3969 { 3970 struct trace_iterator *iter = m->private; 3971 struct trace_array *tr = iter->tr; 3972 int cpu_file = iter->cpu_file; 3973 void *p = NULL; 3974 loff_t l = 0; 3975 int cpu; 3976 3977 mutex_lock(&trace_types_lock); 3978 if (unlikely(tr->current_trace != iter->trace)) { 3979 /* Close iter->trace before switching to the new current tracer */ 3980 if (iter->trace->close) 3981 iter->trace->close(iter); 3982 iter->trace = tr->current_trace; 3983 /* Reopen the new current tracer */ 3984 if (iter->trace->open) 3985 iter->trace->open(iter); 3986 } 3987 mutex_unlock(&trace_types_lock); 3988 3989 #ifdef CONFIG_TRACER_MAX_TRACE 3990 if (iter->snapshot && iter->trace->use_max_tr) 3991 return ERR_PTR(-EBUSY); 3992 #endif 3993 3994 if (*pos != iter->pos) { 3995 iter->ent = NULL; 3996 iter->cpu = 0; 3997 iter->idx = -1; 3998 3999 if (cpu_file == RING_BUFFER_ALL_CPUS) { 4000 for_each_tracing_cpu(cpu) 4001 tracing_iter_reset(iter, cpu); 4002 } else 4003 tracing_iter_reset(iter, cpu_file); 4004 4005 iter->leftover = 0; 4006 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 4007 ; 4008 4009 } else { 4010 /* 4011 * If we overflowed the seq_file before, then we want 4012 * to just reuse the trace_seq buffer again. 4013 */ 4014 if (iter->leftover) 4015 p = iter; 4016 else { 4017 l = *pos - 1; 4018 p = s_next(m, p, &l); 4019 } 4020 } 4021 4022 trace_event_read_lock(); 4023 trace_access_lock(cpu_file); 4024 return p; 4025 } 4026 4027 static void s_stop(struct seq_file *m, void *p) 4028 { 4029 struct trace_iterator *iter = m->private; 4030 4031 #ifdef CONFIG_TRACER_MAX_TRACE 4032 if (iter->snapshot && iter->trace->use_max_tr) 4033 return; 4034 #endif 4035 4036 trace_access_unlock(iter->cpu_file); 4037 trace_event_read_unlock(); 4038 } 4039 4040 static void 4041 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, 4042 unsigned long *entries, int cpu) 4043 { 4044 unsigned long count; 4045 4046 count = ring_buffer_entries_cpu(buf->buffer, cpu); 4047 /* 4048 * If this buffer has skipped entries, then we hold all 4049 * entries for the trace and we need to ignore the 4050 * ones before the time stamp. 4051 */ 4052 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { 4053 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; 4054 /* total is the same as the entries */ 4055 *total = count; 4056 } else 4057 *total = count + 4058 ring_buffer_overrun_cpu(buf->buffer, cpu); 4059 *entries = count; 4060 } 4061 4062 static void 4063 get_total_entries(struct array_buffer *buf, 4064 unsigned long *total, unsigned long *entries) 4065 { 4066 unsigned long t, e; 4067 int cpu; 4068 4069 *total = 0; 4070 *entries = 0; 4071 4072 for_each_tracing_cpu(cpu) { 4073 get_total_entries_cpu(buf, &t, &e, cpu); 4074 *total += t; 4075 *entries += e; 4076 } 4077 } 4078 4079 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) 4080 { 4081 unsigned long total, entries; 4082 4083 if (!tr) 4084 tr = &global_trace; 4085 4086 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); 4087 4088 return entries; 4089 } 4090 4091 unsigned long trace_total_entries(struct trace_array *tr) 4092 { 4093 unsigned long total, entries; 4094 4095 if (!tr) 4096 tr = &global_trace; 4097 4098 get_total_entries(&tr->array_buffer, &total, &entries); 4099 4100 return entries; 4101 } 4102 4103 static void print_lat_help_header(struct seq_file *m) 4104 { 4105 seq_puts(m, "# _------=> CPU# \n" 4106 "# / _-----=> irqs-off/BH-disabled\n" 4107 "# | / _----=> need-resched \n" 4108 "# || / _---=> hardirq/softirq \n" 4109 "# ||| / _--=> preempt-depth \n" 4110 "# |||| / _-=> migrate-disable \n" 4111 "# ||||| / delay \n" 4112 "# cmd pid |||||| time | caller \n" 4113 "# \\ / |||||| \\ | / \n"); 4114 } 4115 4116 static void print_event_info(struct array_buffer *buf, struct seq_file *m) 4117 { 4118 unsigned long total; 4119 unsigned long entries; 4120 4121 get_total_entries(buf, &total, &entries); 4122 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 4123 entries, total, num_online_cpus()); 4124 seq_puts(m, "#\n"); 4125 } 4126 4127 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, 4128 unsigned int flags) 4129 { 4130 bool tgid = flags & TRACE_ITER(RECORD_TGID); 4131 4132 print_event_info(buf, m); 4133 4134 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); 4135 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); 4136 } 4137 4138 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, 4139 unsigned int flags) 4140 { 4141 bool tgid = flags & TRACE_ITER(RECORD_TGID); 4142 static const char space[] = " "; 4143 int prec = tgid ? 12 : 2; 4144 4145 print_event_info(buf, m); 4146 4147 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); 4148 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); 4149 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); 4150 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); 4151 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); 4152 seq_printf(m, "# %.*s|||| / delay\n", prec, space); 4153 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); 4154 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); 4155 } 4156 4157 void 4158 print_trace_header(struct seq_file *m, struct trace_iterator *iter) 4159 { 4160 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); 4161 struct array_buffer *buf = iter->array_buffer; 4162 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); 4163 struct tracer *type = iter->trace; 4164 unsigned long entries; 4165 unsigned long total; 4166 const char *name = type->name; 4167 4168 get_total_entries(buf, &total, &entries); 4169 4170 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 4171 name, init_utsname()->release); 4172 seq_puts(m, "# -----------------------------------" 4173 "---------------------------------\n"); 4174 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" 4175 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 4176 nsecs_to_usecs(data->saved_latency), 4177 entries, 4178 total, 4179 buf->cpu, 4180 preempt_model_str(), 4181 /* These are reserved for later use */ 4182 0, 0, 0, 0); 4183 #ifdef CONFIG_SMP 4184 seq_printf(m, " #P:%d)\n", num_online_cpus()); 4185 #else 4186 seq_puts(m, ")\n"); 4187 #endif 4188 seq_puts(m, "# -----------------\n"); 4189 seq_printf(m, "# | task: %.16s-%d " 4190 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 4191 data->comm, data->pid, 4192 from_kuid_munged(seq_user_ns(m), data->uid), data->nice, 4193 data->policy, data->rt_priority); 4194 seq_puts(m, "# -----------------\n"); 4195 4196 if (data->critical_start) { 4197 seq_puts(m, "# => started at: "); 4198 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 4199 trace_print_seq(m, &iter->seq); 4200 seq_puts(m, "\n# => ended at: "); 4201 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 4202 trace_print_seq(m, &iter->seq); 4203 seq_puts(m, "\n#\n"); 4204 } 4205 4206 seq_puts(m, "#\n"); 4207 } 4208 4209 static void test_cpu_buff_start(struct trace_iterator *iter) 4210 { 4211 struct trace_seq *s = &iter->seq; 4212 struct trace_array *tr = iter->tr; 4213 4214 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE))) 4215 return; 4216 4217 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) 4218 return; 4219 4220 if (cpumask_available(iter->started) && 4221 cpumask_test_cpu(iter->cpu, iter->started)) 4222 return; 4223 4224 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) 4225 return; 4226 4227 if (cpumask_available(iter->started)) 4228 cpumask_set_cpu(iter->cpu, iter->started); 4229 4230 /* Don't print started cpu buffer for the first entry of the trace */ 4231 if (iter->idx > 1) 4232 trace_seq_printf(s, "##### CPU %u buffer started ####\n", 4233 iter->cpu); 4234 } 4235 4236 #ifdef CONFIG_FTRACE_SYSCALLS 4237 static bool is_syscall_event(struct trace_event *event) 4238 { 4239 return (event->funcs == &enter_syscall_print_funcs) || 4240 (event->funcs == &exit_syscall_print_funcs); 4241 4242 } 4243 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT 4244 #else 4245 static inline bool is_syscall_event(struct trace_event *event) 4246 { 4247 return false; 4248 } 4249 #define syscall_buf_size 0 4250 #endif /* CONFIG_FTRACE_SYSCALLS */ 4251 4252 static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 4253 { 4254 struct trace_array *tr = iter->tr; 4255 struct trace_seq *s = &iter->seq; 4256 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); 4257 struct trace_entry *entry; 4258 struct trace_event *event; 4259 4260 entry = iter->ent; 4261 4262 test_cpu_buff_start(iter); 4263 4264 event = ftrace_find_event(entry->type); 4265 4266 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 4267 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 4268 trace_print_lat_context(iter); 4269 else 4270 trace_print_context(iter); 4271 } 4272 4273 if (trace_seq_has_overflowed(s)) 4274 return TRACE_TYPE_PARTIAL_LINE; 4275 4276 if (event) { 4277 if (tr->trace_flags & TRACE_ITER(FIELDS)) 4278 return print_event_fields(iter, event); 4279 /* 4280 * For TRACE_EVENT() events, the print_fmt is not 4281 * safe to use if the array has delta offsets 4282 * Force printing via the fields. 4283 */ 4284 if ((tr->text_delta)) { 4285 /* ftrace and system call events are still OK */ 4286 if ((event->type > __TRACE_LAST_TYPE) && 4287 !is_syscall_event(event)) 4288 return print_event_fields(iter, event); 4289 } 4290 return event->funcs->trace(iter, sym_flags, event); 4291 } 4292 4293 trace_seq_printf(s, "Unknown type %d\n", entry->type); 4294 4295 return trace_handle_return(s); 4296 } 4297 4298 static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 4299 { 4300 struct trace_array *tr = iter->tr; 4301 struct trace_seq *s = &iter->seq; 4302 struct trace_entry *entry; 4303 struct trace_event *event; 4304 4305 entry = iter->ent; 4306 4307 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) 4308 trace_seq_printf(s, "%d %d %llu ", 4309 entry->pid, iter->cpu, iter->ts); 4310 4311 if (trace_seq_has_overflowed(s)) 4312 return TRACE_TYPE_PARTIAL_LINE; 4313 4314 event = ftrace_find_event(entry->type); 4315 if (event) 4316 return event->funcs->raw(iter, 0, event); 4317 4318 trace_seq_printf(s, "%d ?\n", entry->type); 4319 4320 return trace_handle_return(s); 4321 } 4322 4323 static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 4324 { 4325 struct trace_array *tr = iter->tr; 4326 struct trace_seq *s = &iter->seq; 4327 unsigned char newline = '\n'; 4328 struct trace_entry *entry; 4329 struct trace_event *event; 4330 4331 entry = iter->ent; 4332 4333 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 4334 SEQ_PUT_HEX_FIELD(s, entry->pid); 4335 SEQ_PUT_HEX_FIELD(s, iter->cpu); 4336 SEQ_PUT_HEX_FIELD(s, iter->ts); 4337 if (trace_seq_has_overflowed(s)) 4338 return TRACE_TYPE_PARTIAL_LINE; 4339 } 4340 4341 event = ftrace_find_event(entry->type); 4342 if (event) { 4343 enum print_line_t ret = event->funcs->hex(iter, 0, event); 4344 if (ret != TRACE_TYPE_HANDLED) 4345 return ret; 4346 } 4347 4348 SEQ_PUT_FIELD(s, newline); 4349 4350 return trace_handle_return(s); 4351 } 4352 4353 static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 4354 { 4355 struct trace_array *tr = iter->tr; 4356 struct trace_seq *s = &iter->seq; 4357 struct trace_entry *entry; 4358 struct trace_event *event; 4359 4360 entry = iter->ent; 4361 4362 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 4363 SEQ_PUT_FIELD(s, entry->pid); 4364 SEQ_PUT_FIELD(s, iter->cpu); 4365 SEQ_PUT_FIELD(s, iter->ts); 4366 if (trace_seq_has_overflowed(s)) 4367 return TRACE_TYPE_PARTIAL_LINE; 4368 } 4369 4370 event = ftrace_find_event(entry->type); 4371 return event ? event->funcs->binary(iter, 0, event) : 4372 TRACE_TYPE_HANDLED; 4373 } 4374 4375 int trace_empty(struct trace_iterator *iter) 4376 { 4377 struct ring_buffer_iter *buf_iter; 4378 int cpu; 4379 4380 /* If we are looking at one CPU buffer, only check that one */ 4381 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 4382 cpu = iter->cpu_file; 4383 buf_iter = trace_buffer_iter(iter, cpu); 4384 if (buf_iter) { 4385 if (!ring_buffer_iter_empty(buf_iter)) 4386 return 0; 4387 } else { 4388 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 4389 return 0; 4390 } 4391 return 1; 4392 } 4393 4394 for_each_tracing_cpu(cpu) { 4395 buf_iter = trace_buffer_iter(iter, cpu); 4396 if (buf_iter) { 4397 if (!ring_buffer_iter_empty(buf_iter)) 4398 return 0; 4399 } else { 4400 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 4401 return 0; 4402 } 4403 } 4404 4405 return 1; 4406 } 4407 4408 /* Called with trace_event_read_lock() held. */ 4409 enum print_line_t print_trace_line(struct trace_iterator *iter) 4410 { 4411 struct trace_array *tr = iter->tr; 4412 unsigned long trace_flags = tr->trace_flags; 4413 enum print_line_t ret; 4414 4415 if (iter->lost_events) { 4416 if (iter->lost_events == (unsigned long)-1) 4417 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", 4418 iter->cpu); 4419 else 4420 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 4421 iter->cpu, iter->lost_events); 4422 if (trace_seq_has_overflowed(&iter->seq)) 4423 return TRACE_TYPE_PARTIAL_LINE; 4424 } 4425 4426 if (iter->trace && iter->trace->print_line) { 4427 ret = iter->trace->print_line(iter); 4428 if (ret != TRACE_TYPE_UNHANDLED) 4429 return ret; 4430 } 4431 4432 if (iter->ent->type == TRACE_BPUTS && 4433 trace_flags & TRACE_ITER(PRINTK) && 4434 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 4435 return trace_print_bputs_msg_only(iter); 4436 4437 if (iter->ent->type == TRACE_BPRINT && 4438 trace_flags & TRACE_ITER(PRINTK) && 4439 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 4440 return trace_print_bprintk_msg_only(iter); 4441 4442 if (iter->ent->type == TRACE_PRINT && 4443 trace_flags & TRACE_ITER(PRINTK) && 4444 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 4445 return trace_print_printk_msg_only(iter); 4446 4447 if (trace_flags & TRACE_ITER(BIN)) 4448 return print_bin_fmt(iter); 4449 4450 if (trace_flags & TRACE_ITER(HEX)) 4451 return print_hex_fmt(iter); 4452 4453 if (trace_flags & TRACE_ITER(RAW)) 4454 return print_raw_fmt(iter); 4455 4456 return print_trace_fmt(iter); 4457 } 4458 4459 void trace_latency_header(struct seq_file *m) 4460 { 4461 struct trace_iterator *iter = m->private; 4462 struct trace_array *tr = iter->tr; 4463 4464 /* print nothing if the buffers are empty */ 4465 if (trace_empty(iter)) 4466 return; 4467 4468 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 4469 print_trace_header(m, iter); 4470 4471 if (!(tr->trace_flags & TRACE_ITER(VERBOSE))) 4472 print_lat_help_header(m); 4473 } 4474 4475 void trace_default_header(struct seq_file *m) 4476 { 4477 struct trace_iterator *iter = m->private; 4478 struct trace_array *tr = iter->tr; 4479 unsigned long trace_flags = tr->trace_flags; 4480 4481 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO))) 4482 return; 4483 4484 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 4485 /* print nothing if the buffers are empty */ 4486 if (trace_empty(iter)) 4487 return; 4488 print_trace_header(m, iter); 4489 if (!(trace_flags & TRACE_ITER(VERBOSE))) 4490 print_lat_help_header(m); 4491 } else { 4492 if (!(trace_flags & TRACE_ITER(VERBOSE))) { 4493 if (trace_flags & TRACE_ITER(IRQ_INFO)) 4494 print_func_help_header_irq(iter->array_buffer, 4495 m, trace_flags); 4496 else 4497 print_func_help_header(iter->array_buffer, m, 4498 trace_flags); 4499 } 4500 } 4501 } 4502 4503 static void test_ftrace_alive(struct seq_file *m) 4504 { 4505 if (!ftrace_is_dead()) 4506 return; 4507 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" 4508 "# MAY BE MISSING FUNCTION EVENTS\n"); 4509 } 4510 4511 #ifdef CONFIG_TRACER_MAX_TRACE 4512 static void show_snapshot_main_help(struct seq_file *m) 4513 { 4514 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" 4515 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 4516 "# Takes a snapshot of the main buffer.\n" 4517 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" 4518 "# (Doesn't have to be '2' works with any number that\n" 4519 "# is not a '0' or '1')\n"); 4520 } 4521 4522 static void show_snapshot_percpu_help(struct seq_file *m) 4523 { 4524 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); 4525 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 4526 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 4527 "# Takes a snapshot of the main buffer for this cpu.\n"); 4528 #else 4529 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" 4530 "# Must use main snapshot file to allocate.\n"); 4531 #endif 4532 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" 4533 "# (Doesn't have to be '2' works with any number that\n" 4534 "# is not a '0' or '1')\n"); 4535 } 4536 4537 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) 4538 { 4539 if (iter->tr->allocated_snapshot) 4540 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); 4541 else 4542 seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); 4543 4544 seq_puts(m, "# Snapshot commands:\n"); 4545 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 4546 show_snapshot_main_help(m); 4547 else 4548 show_snapshot_percpu_help(m); 4549 } 4550 #else 4551 /* Should never be called */ 4552 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } 4553 #endif 4554 4555 static int s_show(struct seq_file *m, void *v) 4556 { 4557 struct trace_iterator *iter = v; 4558 int ret; 4559 4560 if (iter->ent == NULL) { 4561 if (iter->tr) { 4562 seq_printf(m, "# tracer: %s\n", iter->trace->name); 4563 seq_puts(m, "#\n"); 4564 test_ftrace_alive(m); 4565 } 4566 if (iter->snapshot && trace_empty(iter)) 4567 print_snapshot_help(m, iter); 4568 else if (iter->trace && iter->trace->print_header) 4569 iter->trace->print_header(m); 4570 else 4571 trace_default_header(m); 4572 4573 } else if (iter->leftover) { 4574 /* 4575 * If we filled the seq_file buffer earlier, we 4576 * want to just show it now. 4577 */ 4578 ret = trace_print_seq(m, &iter->seq); 4579 4580 /* ret should this time be zero, but you never know */ 4581 iter->leftover = ret; 4582 4583 } else { 4584 ret = print_trace_line(iter); 4585 if (ret == TRACE_TYPE_PARTIAL_LINE) { 4586 iter->seq.full = 0; 4587 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 4588 } 4589 ret = trace_print_seq(m, &iter->seq); 4590 /* 4591 * If we overflow the seq_file buffer, then it will 4592 * ask us for this data again at start up. 4593 * Use that instead. 4594 * ret is 0 if seq_file write succeeded. 4595 * -1 otherwise. 4596 */ 4597 iter->leftover = ret; 4598 } 4599 4600 return 0; 4601 } 4602 4603 /* 4604 * Should be used after trace_array_get(), trace_types_lock 4605 * ensures that i_cdev was already initialized. 4606 */ 4607 static inline int tracing_get_cpu(struct inode *inode) 4608 { 4609 if (inode->i_cdev) /* See trace_create_cpu_file() */ 4610 return (long)inode->i_cdev - 1; 4611 return RING_BUFFER_ALL_CPUS; 4612 } 4613 4614 static const struct seq_operations tracer_seq_ops = { 4615 .start = s_start, 4616 .next = s_next, 4617 .stop = s_stop, 4618 .show = s_show, 4619 }; 4620 4621 /* 4622 * Note, as iter itself can be allocated and freed in different 4623 * ways, this function is only used to free its content, and not 4624 * the iterator itself. The only requirement to all the allocations 4625 * is that it must zero all fields (kzalloc), as freeing works with 4626 * ethier allocated content or NULL. 4627 */ 4628 static void free_trace_iter_content(struct trace_iterator *iter) 4629 { 4630 /* The fmt is either NULL, allocated or points to static_fmt_buf */ 4631 if (iter->fmt != static_fmt_buf) 4632 kfree(iter->fmt); 4633 4634 kfree(iter->temp); 4635 kfree(iter->buffer_iter); 4636 mutex_destroy(&iter->mutex); 4637 free_cpumask_var(iter->started); 4638 } 4639 4640 static struct trace_iterator * 4641 __tracing_open(struct inode *inode, struct file *file, bool snapshot) 4642 { 4643 struct trace_array *tr = inode->i_private; 4644 struct trace_iterator *iter; 4645 int cpu; 4646 4647 if (tracing_disabled) 4648 return ERR_PTR(-ENODEV); 4649 4650 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 4651 if (!iter) 4652 return ERR_PTR(-ENOMEM); 4653 4654 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter), 4655 GFP_KERNEL); 4656 if (!iter->buffer_iter) 4657 goto release; 4658 4659 /* 4660 * trace_find_next_entry() may need to save off iter->ent. 4661 * It will place it into the iter->temp buffer. As most 4662 * events are less than 128, allocate a buffer of that size. 4663 * If one is greater, then trace_find_next_entry() will 4664 * allocate a new buffer to adjust for the bigger iter->ent. 4665 * It's not critical if it fails to get allocated here. 4666 */ 4667 iter->temp = kmalloc(128, GFP_KERNEL); 4668 if (iter->temp) 4669 iter->temp_size = 128; 4670 4671 /* 4672 * trace_event_printf() may need to modify given format 4673 * string to replace %p with %px so that it shows real address 4674 * instead of hash value. However, that is only for the event 4675 * tracing, other tracer may not need. Defer the allocation 4676 * until it is needed. 4677 */ 4678 iter->fmt = NULL; 4679 iter->fmt_size = 0; 4680 4681 mutex_lock(&trace_types_lock); 4682 iter->trace = tr->current_trace; 4683 4684 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 4685 goto fail; 4686 4687 iter->tr = tr; 4688 4689 #ifdef CONFIG_TRACER_MAX_TRACE 4690 /* Currently only the top directory has a snapshot */ 4691 if (tr->current_trace->print_max || snapshot) 4692 iter->array_buffer = &tr->max_buffer; 4693 else 4694 #endif 4695 iter->array_buffer = &tr->array_buffer; 4696 iter->snapshot = snapshot; 4697 iter->pos = -1; 4698 iter->cpu_file = tracing_get_cpu(inode); 4699 mutex_init(&iter->mutex); 4700 4701 /* Notify the tracer early; before we stop tracing. */ 4702 if (iter->trace->open) 4703 iter->trace->open(iter); 4704 4705 /* Annotate start of buffers if we had overruns */ 4706 if (ring_buffer_overruns(iter->array_buffer->buffer)) 4707 iter->iter_flags |= TRACE_FILE_ANNOTATE; 4708 4709 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 4710 if (trace_clocks[tr->clock_id].in_ns) 4711 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 4712 4713 /* 4714 * If pause-on-trace is enabled, then stop the trace while 4715 * dumping, unless this is the "snapshot" file 4716 */ 4717 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) { 4718 iter->iter_flags |= TRACE_FILE_PAUSE; 4719 tracing_stop_tr(tr); 4720 } 4721 4722 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 4723 for_each_tracing_cpu(cpu) { 4724 iter->buffer_iter[cpu] = 4725 ring_buffer_read_start(iter->array_buffer->buffer, 4726 cpu, GFP_KERNEL); 4727 tracing_iter_reset(iter, cpu); 4728 } 4729 } else { 4730 cpu = iter->cpu_file; 4731 iter->buffer_iter[cpu] = 4732 ring_buffer_read_start(iter->array_buffer->buffer, 4733 cpu, GFP_KERNEL); 4734 tracing_iter_reset(iter, cpu); 4735 } 4736 4737 mutex_unlock(&trace_types_lock); 4738 4739 return iter; 4740 4741 fail: 4742 mutex_unlock(&trace_types_lock); 4743 free_trace_iter_content(iter); 4744 release: 4745 seq_release_private(inode, file); 4746 return ERR_PTR(-ENOMEM); 4747 } 4748 4749 int tracing_open_generic(struct inode *inode, struct file *filp) 4750 { 4751 int ret; 4752 4753 ret = tracing_check_open_get_tr(NULL); 4754 if (ret) 4755 return ret; 4756 4757 filp->private_data = inode->i_private; 4758 return 0; 4759 } 4760 4761 bool tracing_is_disabled(void) 4762 { 4763 return (tracing_disabled) ? true: false; 4764 } 4765 4766 /* 4767 * Open and update trace_array ref count. 4768 * Must have the current trace_array passed to it. 4769 */ 4770 int tracing_open_generic_tr(struct inode *inode, struct file *filp) 4771 { 4772 struct trace_array *tr = inode->i_private; 4773 int ret; 4774 4775 ret = tracing_check_open_get_tr(tr); 4776 if (ret) 4777 return ret; 4778 4779 filp->private_data = inode->i_private; 4780 4781 return 0; 4782 } 4783 4784 /* 4785 * The private pointer of the inode is the trace_event_file. 4786 * Update the tr ref count associated to it. 4787 */ 4788 int tracing_open_file_tr(struct inode *inode, struct file *filp) 4789 { 4790 struct trace_event_file *file = inode->i_private; 4791 int ret; 4792 4793 ret = tracing_check_open_get_tr(file->tr); 4794 if (ret) 4795 return ret; 4796 4797 guard(mutex)(&event_mutex); 4798 4799 /* Fail if the file is marked for removal */ 4800 if (file->flags & EVENT_FILE_FL_FREED) { 4801 trace_array_put(file->tr); 4802 return -ENODEV; 4803 } else { 4804 event_file_get(file); 4805 } 4806 4807 filp->private_data = inode->i_private; 4808 4809 return 0; 4810 } 4811 4812 int tracing_release_file_tr(struct inode *inode, struct file *filp) 4813 { 4814 struct trace_event_file *file = inode->i_private; 4815 4816 trace_array_put(file->tr); 4817 event_file_put(file); 4818 4819 return 0; 4820 } 4821 4822 int tracing_single_release_file_tr(struct inode *inode, struct file *filp) 4823 { 4824 tracing_release_file_tr(inode, filp); 4825 return single_release(inode, filp); 4826 } 4827 4828 static int tracing_release(struct inode *inode, struct file *file) 4829 { 4830 struct trace_array *tr = inode->i_private; 4831 struct seq_file *m = file->private_data; 4832 struct trace_iterator *iter; 4833 int cpu; 4834 4835 if (!(file->f_mode & FMODE_READ)) { 4836 trace_array_put(tr); 4837 return 0; 4838 } 4839 4840 /* Writes do not use seq_file */ 4841 iter = m->private; 4842 mutex_lock(&trace_types_lock); 4843 4844 for_each_tracing_cpu(cpu) { 4845 if (iter->buffer_iter[cpu]) 4846 ring_buffer_read_finish(iter->buffer_iter[cpu]); 4847 } 4848 4849 if (iter->trace && iter->trace->close) 4850 iter->trace->close(iter); 4851 4852 if (iter->iter_flags & TRACE_FILE_PAUSE) 4853 /* reenable tracing if it was previously enabled */ 4854 tracing_start_tr(tr); 4855 4856 __trace_array_put(tr); 4857 4858 mutex_unlock(&trace_types_lock); 4859 4860 free_trace_iter_content(iter); 4861 seq_release_private(inode, file); 4862 4863 return 0; 4864 } 4865 4866 int tracing_release_generic_tr(struct inode *inode, struct file *file) 4867 { 4868 struct trace_array *tr = inode->i_private; 4869 4870 trace_array_put(tr); 4871 return 0; 4872 } 4873 4874 static int tracing_single_release_tr(struct inode *inode, struct file *file) 4875 { 4876 struct trace_array *tr = inode->i_private; 4877 4878 trace_array_put(tr); 4879 4880 return single_release(inode, file); 4881 } 4882 4883 static int tracing_open(struct inode *inode, struct file *file) 4884 { 4885 struct trace_array *tr = inode->i_private; 4886 struct trace_iterator *iter; 4887 int ret; 4888 4889 ret = tracing_check_open_get_tr(tr); 4890 if (ret) 4891 return ret; 4892 4893 /* If this file was open for write, then erase contents */ 4894 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 4895 int cpu = tracing_get_cpu(inode); 4896 struct array_buffer *trace_buf = &tr->array_buffer; 4897 4898 #ifdef CONFIG_TRACER_MAX_TRACE 4899 if (tr->current_trace->print_max) 4900 trace_buf = &tr->max_buffer; 4901 #endif 4902 4903 if (cpu == RING_BUFFER_ALL_CPUS) 4904 tracing_reset_online_cpus(trace_buf); 4905 else 4906 tracing_reset_cpu(trace_buf, cpu); 4907 } 4908 4909 if (file->f_mode & FMODE_READ) { 4910 iter = __tracing_open(inode, file, false); 4911 if (IS_ERR(iter)) 4912 ret = PTR_ERR(iter); 4913 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 4914 iter->iter_flags |= TRACE_FILE_LAT_FMT; 4915 } 4916 4917 if (ret < 0) 4918 trace_array_put(tr); 4919 4920 return ret; 4921 } 4922 4923 /* 4924 * Some tracers are not suitable for instance buffers. 4925 * A tracer is always available for the global array (toplevel) 4926 * or if it explicitly states that it is. 4927 */ 4928 static bool 4929 trace_ok_for_array(struct tracer *t, struct trace_array *tr) 4930 { 4931 #ifdef CONFIG_TRACER_SNAPSHOT 4932 /* arrays with mapped buffer range do not have snapshots */ 4933 if (tr->range_addr_start && t->use_max_tr) 4934 return false; 4935 #endif 4936 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; 4937 } 4938 4939 /* Find the next tracer that this trace array may use */ 4940 static struct tracer * 4941 get_tracer_for_array(struct trace_array *tr, struct tracer *t) 4942 { 4943 while (t && !trace_ok_for_array(t, tr)) 4944 t = t->next; 4945 4946 return t; 4947 } 4948 4949 static void * 4950 t_next(struct seq_file *m, void *v, loff_t *pos) 4951 { 4952 struct trace_array *tr = m->private; 4953 struct tracer *t = v; 4954 4955 (*pos)++; 4956 4957 if (t) 4958 t = get_tracer_for_array(tr, t->next); 4959 4960 return t; 4961 } 4962 4963 static void *t_start(struct seq_file *m, loff_t *pos) 4964 { 4965 struct trace_array *tr = m->private; 4966 struct tracer *t; 4967 loff_t l = 0; 4968 4969 mutex_lock(&trace_types_lock); 4970 4971 t = get_tracer_for_array(tr, trace_types); 4972 for (; t && l < *pos; t = t_next(m, t, &l)) 4973 ; 4974 4975 return t; 4976 } 4977 4978 static void t_stop(struct seq_file *m, void *p) 4979 { 4980 mutex_unlock(&trace_types_lock); 4981 } 4982 4983 static int t_show(struct seq_file *m, void *v) 4984 { 4985 struct tracer *t = v; 4986 4987 if (!t) 4988 return 0; 4989 4990 seq_puts(m, t->name); 4991 if (t->next) 4992 seq_putc(m, ' '); 4993 else 4994 seq_putc(m, '\n'); 4995 4996 return 0; 4997 } 4998 4999 static const struct seq_operations show_traces_seq_ops = { 5000 .start = t_start, 5001 .next = t_next, 5002 .stop = t_stop, 5003 .show = t_show, 5004 }; 5005 5006 static int show_traces_open(struct inode *inode, struct file *file) 5007 { 5008 struct trace_array *tr = inode->i_private; 5009 struct seq_file *m; 5010 int ret; 5011 5012 ret = tracing_check_open_get_tr(tr); 5013 if (ret) 5014 return ret; 5015 5016 ret = seq_open(file, &show_traces_seq_ops); 5017 if (ret) { 5018 trace_array_put(tr); 5019 return ret; 5020 } 5021 5022 m = file->private_data; 5023 m->private = tr; 5024 5025 return 0; 5026 } 5027 5028 static int tracing_seq_release(struct inode *inode, struct file *file) 5029 { 5030 struct trace_array *tr = inode->i_private; 5031 5032 trace_array_put(tr); 5033 return seq_release(inode, file); 5034 } 5035 5036 static ssize_t 5037 tracing_write_stub(struct file *filp, const char __user *ubuf, 5038 size_t count, loff_t *ppos) 5039 { 5040 return count; 5041 } 5042 5043 loff_t tracing_lseek(struct file *file, loff_t offset, int whence) 5044 { 5045 int ret; 5046 5047 if (file->f_mode & FMODE_READ) 5048 ret = seq_lseek(file, offset, whence); 5049 else 5050 file->f_pos = ret = 0; 5051 5052 return ret; 5053 } 5054 5055 static const struct file_operations tracing_fops = { 5056 .open = tracing_open, 5057 .read = seq_read, 5058 .read_iter = seq_read_iter, 5059 .splice_read = copy_splice_read, 5060 .write = tracing_write_stub, 5061 .llseek = tracing_lseek, 5062 .release = tracing_release, 5063 }; 5064 5065 static const struct file_operations show_traces_fops = { 5066 .open = show_traces_open, 5067 .read = seq_read, 5068 .llseek = seq_lseek, 5069 .release = tracing_seq_release, 5070 }; 5071 5072 static ssize_t 5073 tracing_cpumask_read(struct file *filp, char __user *ubuf, 5074 size_t count, loff_t *ppos) 5075 { 5076 struct trace_array *tr = file_inode(filp)->i_private; 5077 char *mask_str __free(kfree) = NULL; 5078 int len; 5079 5080 len = snprintf(NULL, 0, "%*pb\n", 5081 cpumask_pr_args(tr->tracing_cpumask)) + 1; 5082 mask_str = kmalloc(len, GFP_KERNEL); 5083 if (!mask_str) 5084 return -ENOMEM; 5085 5086 len = snprintf(mask_str, len, "%*pb\n", 5087 cpumask_pr_args(tr->tracing_cpumask)); 5088 if (len >= count) 5089 return -EINVAL; 5090 5091 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 5092 } 5093 5094 int tracing_set_cpumask(struct trace_array *tr, 5095 cpumask_var_t tracing_cpumask_new) 5096 { 5097 int cpu; 5098 5099 if (!tr) 5100 return -EINVAL; 5101 5102 local_irq_disable(); 5103 arch_spin_lock(&tr->max_lock); 5104 for_each_tracing_cpu(cpu) { 5105 /* 5106 * Increase/decrease the disabled counter if we are 5107 * about to flip a bit in the cpumask: 5108 */ 5109 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && 5110 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 5111 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); 5112 #ifdef CONFIG_TRACER_MAX_TRACE 5113 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); 5114 #endif 5115 } 5116 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && 5117 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 5118 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); 5119 #ifdef CONFIG_TRACER_MAX_TRACE 5120 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); 5121 #endif 5122 } 5123 } 5124 arch_spin_unlock(&tr->max_lock); 5125 local_irq_enable(); 5126 5127 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 5128 5129 return 0; 5130 } 5131 5132 static ssize_t 5133 tracing_cpumask_write(struct file *filp, const char __user *ubuf, 5134 size_t count, loff_t *ppos) 5135 { 5136 struct trace_array *tr = file_inode(filp)->i_private; 5137 cpumask_var_t tracing_cpumask_new; 5138 int err; 5139 5140 if (count == 0 || count > KMALLOC_MAX_SIZE) 5141 return -EINVAL; 5142 5143 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 5144 return -ENOMEM; 5145 5146 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 5147 if (err) 5148 goto err_free; 5149 5150 err = tracing_set_cpumask(tr, tracing_cpumask_new); 5151 if (err) 5152 goto err_free; 5153 5154 free_cpumask_var(tracing_cpumask_new); 5155 5156 return count; 5157 5158 err_free: 5159 free_cpumask_var(tracing_cpumask_new); 5160 5161 return err; 5162 } 5163 5164 static const struct file_operations tracing_cpumask_fops = { 5165 .open = tracing_open_generic_tr, 5166 .read = tracing_cpumask_read, 5167 .write = tracing_cpumask_write, 5168 .release = tracing_release_generic_tr, 5169 .llseek = generic_file_llseek, 5170 }; 5171 5172 static int tracing_trace_options_show(struct seq_file *m, void *v) 5173 { 5174 struct tracer_opt *trace_opts; 5175 struct trace_array *tr = m->private; 5176 struct tracer_flags *flags; 5177 u32 tracer_flags; 5178 int i; 5179 5180 guard(mutex)(&trace_types_lock); 5181 5182 for (i = 0; trace_options[i]; i++) { 5183 if (tr->trace_flags & (1ULL << i)) 5184 seq_printf(m, "%s\n", trace_options[i]); 5185 else 5186 seq_printf(m, "no%s\n", trace_options[i]); 5187 } 5188 5189 flags = tr->current_trace_flags; 5190 if (!flags || !flags->opts) 5191 return 0; 5192 5193 tracer_flags = flags->val; 5194 trace_opts = flags->opts; 5195 5196 for (i = 0; trace_opts[i].name; i++) { 5197 if (tracer_flags & trace_opts[i].bit) 5198 seq_printf(m, "%s\n", trace_opts[i].name); 5199 else 5200 seq_printf(m, "no%s\n", trace_opts[i].name); 5201 } 5202 5203 return 0; 5204 } 5205 5206 static int __set_tracer_option(struct trace_array *tr, 5207 struct tracer_flags *tracer_flags, 5208 struct tracer_opt *opts, int neg) 5209 { 5210 struct tracer *trace = tracer_flags->trace; 5211 int ret = 0; 5212 5213 if (trace->set_flag) 5214 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); 5215 if (ret) 5216 return ret; 5217 5218 if (neg) 5219 tracer_flags->val &= ~opts->bit; 5220 else 5221 tracer_flags->val |= opts->bit; 5222 return 0; 5223 } 5224 5225 /* Try to assign a tracer specific option */ 5226 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) 5227 { 5228 struct tracer_flags *tracer_flags = tr->current_trace_flags; 5229 struct tracer_opt *opts = NULL; 5230 int i; 5231 5232 if (!tracer_flags || !tracer_flags->opts) 5233 return 0; 5234 5235 for (i = 0; tracer_flags->opts[i].name; i++) { 5236 opts = &tracer_flags->opts[i]; 5237 5238 if (strcmp(cmp, opts->name) == 0) 5239 return __set_tracer_option(tr, tracer_flags, opts, neg); 5240 } 5241 5242 return -EINVAL; 5243 } 5244 5245 /* Some tracers require overwrite to stay enabled */ 5246 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set) 5247 { 5248 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set) 5249 return -1; 5250 5251 return 0; 5252 } 5253 5254 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) 5255 { 5256 switch (mask) { 5257 case TRACE_ITER(RECORD_TGID): 5258 case TRACE_ITER(RECORD_CMD): 5259 case TRACE_ITER(TRACE_PRINTK): 5260 case TRACE_ITER(COPY_MARKER): 5261 lockdep_assert_held(&event_mutex); 5262 } 5263 5264 /* do nothing if flag is already set */ 5265 if (!!(tr->trace_flags & mask) == !!enabled) 5266 return 0; 5267 5268 /* Give the tracer a chance to approve the change */ 5269 if (tr->current_trace->flag_changed) 5270 if (tr->current_trace->flag_changed(tr, mask, !!enabled)) 5271 return -EINVAL; 5272 5273 switch (mask) { 5274 case TRACE_ITER(TRACE_PRINTK): 5275 if (enabled) { 5276 update_printk_trace(tr); 5277 } else { 5278 /* 5279 * The global_trace cannot clear this. 5280 * It's flag only gets cleared if another instance sets it. 5281 */ 5282 if (printk_trace == &global_trace) 5283 return -EINVAL; 5284 /* 5285 * An instance must always have it set. 5286 * by default, that's the global_trace instance. 5287 */ 5288 if (printk_trace == tr) 5289 update_printk_trace(&global_trace); 5290 } 5291 break; 5292 5293 case TRACE_ITER(COPY_MARKER): 5294 update_marker_trace(tr, enabled); 5295 /* update_marker_trace updates the tr->trace_flags */ 5296 return 0; 5297 } 5298 5299 if (enabled) 5300 tr->trace_flags |= mask; 5301 else 5302 tr->trace_flags &= ~mask; 5303 5304 switch (mask) { 5305 case TRACE_ITER(RECORD_CMD): 5306 trace_event_enable_cmd_record(enabled); 5307 break; 5308 5309 case TRACE_ITER(RECORD_TGID): 5310 5311 if (trace_alloc_tgid_map() < 0) { 5312 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID); 5313 return -ENOMEM; 5314 } 5315 5316 trace_event_enable_tgid_record(enabled); 5317 break; 5318 5319 case TRACE_ITER(EVENT_FORK): 5320 trace_event_follow_fork(tr, enabled); 5321 break; 5322 5323 case TRACE_ITER(FUNC_FORK): 5324 ftrace_pid_follow_fork(tr, enabled); 5325 break; 5326 5327 case TRACE_ITER(OVERWRITE): 5328 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); 5329 #ifdef CONFIG_TRACER_MAX_TRACE 5330 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); 5331 #endif 5332 break; 5333 5334 case TRACE_ITER(PRINTK): 5335 trace_printk_start_stop_comm(enabled); 5336 trace_printk_control(enabled); 5337 break; 5338 5339 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER) 5340 case TRACE_GRAPH_GRAPH_TIME: 5341 ftrace_graph_graph_time_control(enabled); 5342 break; 5343 #endif 5344 } 5345 5346 return 0; 5347 } 5348 5349 int trace_set_options(struct trace_array *tr, char *option) 5350 { 5351 char *cmp; 5352 int neg = 0; 5353 int ret; 5354 size_t orig_len = strlen(option); 5355 int len; 5356 5357 cmp = strstrip(option); 5358 5359 len = str_has_prefix(cmp, "no"); 5360 if (len) 5361 neg = 1; 5362 5363 cmp += len; 5364 5365 mutex_lock(&event_mutex); 5366 mutex_lock(&trace_types_lock); 5367 5368 ret = match_string(trace_options, -1, cmp); 5369 /* If no option could be set, test the specific tracer options */ 5370 if (ret < 0) 5371 ret = set_tracer_option(tr, cmp, neg); 5372 else 5373 ret = set_tracer_flag(tr, 1ULL << ret, !neg); 5374 5375 mutex_unlock(&trace_types_lock); 5376 mutex_unlock(&event_mutex); 5377 5378 /* 5379 * If the first trailing whitespace is replaced with '\0' by strstrip, 5380 * turn it back into a space. 5381 */ 5382 if (orig_len > strlen(option)) 5383 option[strlen(option)] = ' '; 5384 5385 return ret; 5386 } 5387 5388 static void __init apply_trace_boot_options(void) 5389 { 5390 char *buf = trace_boot_options_buf; 5391 char *option; 5392 5393 while (true) { 5394 option = strsep(&buf, ","); 5395 5396 if (!option) 5397 break; 5398 5399 if (*option) 5400 trace_set_options(&global_trace, option); 5401 5402 /* Put back the comma to allow this to be called again */ 5403 if (buf) 5404 *(buf - 1) = ','; 5405 } 5406 } 5407 5408 static ssize_t 5409 tracing_trace_options_write(struct file *filp, const char __user *ubuf, 5410 size_t cnt, loff_t *ppos) 5411 { 5412 struct seq_file *m = filp->private_data; 5413 struct trace_array *tr = m->private; 5414 char buf[64]; 5415 int ret; 5416 5417 if (cnt >= sizeof(buf)) 5418 return -EINVAL; 5419 5420 if (copy_from_user(buf, ubuf, cnt)) 5421 return -EFAULT; 5422 5423 buf[cnt] = 0; 5424 5425 ret = trace_set_options(tr, buf); 5426 if (ret < 0) 5427 return ret; 5428 5429 *ppos += cnt; 5430 5431 return cnt; 5432 } 5433 5434 static int tracing_trace_options_open(struct inode *inode, struct file *file) 5435 { 5436 struct trace_array *tr = inode->i_private; 5437 int ret; 5438 5439 ret = tracing_check_open_get_tr(tr); 5440 if (ret) 5441 return ret; 5442 5443 ret = single_open(file, tracing_trace_options_show, inode->i_private); 5444 if (ret < 0) 5445 trace_array_put(tr); 5446 5447 return ret; 5448 } 5449 5450 static const struct file_operations tracing_iter_fops = { 5451 .open = tracing_trace_options_open, 5452 .read = seq_read, 5453 .llseek = seq_lseek, 5454 .release = tracing_single_release_tr, 5455 .write = tracing_trace_options_write, 5456 }; 5457 5458 static const char readme_msg[] = 5459 "tracing mini-HOWTO:\n\n" 5460 "By default tracefs removes all OTH file permission bits.\n" 5461 "When mounting tracefs an optional group id can be specified\n" 5462 "which adds the group to every directory and file in tracefs:\n\n" 5463 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" 5464 "# echo 0 > tracing_on : quick way to disable tracing\n" 5465 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" 5466 " Important files:\n" 5467 " trace\t\t\t- The static contents of the buffer\n" 5468 "\t\t\t To clear the buffer write into this file: echo > trace\n" 5469 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" 5470 " current_tracer\t- function and latency tracers\n" 5471 " available_tracers\t- list of configured tracers for current_tracer\n" 5472 " error_log\t- error log for failed commands (that support it)\n" 5473 " buffer_size_kb\t- view and modify size of per cpu buffer\n" 5474 " buffer_total_size_kb - view total size of all cpu buffers\n\n" 5475 " trace_clock\t\t- change the clock used to order events\n" 5476 " local: Per cpu clock but may not be synced across CPUs\n" 5477 " global: Synced across CPUs but slows tracing down.\n" 5478 " counter: Not a clock, but just an increment\n" 5479 " uptime: Jiffy counter from time of boot\n" 5480 " perf: Same clock that perf events use\n" 5481 #ifdef CONFIG_X86_64 5482 " x86-tsc: TSC cycle counter\n" 5483 #endif 5484 "\n timestamp_mode\t- view the mode used to timestamp events\n" 5485 " delta: Delta difference against a buffer-wide timestamp\n" 5486 " absolute: Absolute (standalone) timestamp\n" 5487 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" 5488 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" 5489 " tracing_cpumask\t- Limit which CPUs to trace\n" 5490 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" 5491 "\t\t\t Remove sub-buffer with rmdir\n" 5492 " trace_options\t\t- Set format or modify how tracing happens\n" 5493 "\t\t\t Disable an option by prefixing 'no' to the\n" 5494 "\t\t\t option name\n" 5495 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" 5496 #ifdef CONFIG_DYNAMIC_FTRACE 5497 "\n available_filter_functions - list of functions that can be filtered on\n" 5498 " set_ftrace_filter\t- echo function name in here to only trace these\n" 5499 "\t\t\t functions\n" 5500 "\t accepts: func_full_name or glob-matching-pattern\n" 5501 "\t modules: Can select a group via module\n" 5502 "\t Format: :mod:<module-name>\n" 5503 "\t example: echo :mod:ext3 > set_ftrace_filter\n" 5504 "\t triggers: a command to perform when function is hit\n" 5505 "\t Format: <function>:<trigger>[:count]\n" 5506 "\t trigger: traceon, traceoff\n" 5507 "\t\t enable_event:<system>:<event>\n" 5508 "\t\t disable_event:<system>:<event>\n" 5509 #ifdef CONFIG_STACKTRACE 5510 "\t\t stacktrace\n" 5511 #endif 5512 #ifdef CONFIG_TRACER_SNAPSHOT 5513 "\t\t snapshot\n" 5514 #endif 5515 "\t\t dump\n" 5516 "\t\t cpudump\n" 5517 "\t example: echo do_fault:traceoff > set_ftrace_filter\n" 5518 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" 5519 "\t The first one will disable tracing every time do_fault is hit\n" 5520 "\t The second will disable tracing at most 3 times when do_trap is hit\n" 5521 "\t The first time do trap is hit and it disables tracing, the\n" 5522 "\t counter will decrement to 2. If tracing is already disabled,\n" 5523 "\t the counter will not decrement. It only decrements when the\n" 5524 "\t trigger did work\n" 5525 "\t To remove trigger without count:\n" 5526 "\t echo '!<function>:<trigger> > set_ftrace_filter\n" 5527 "\t To remove trigger with a count:\n" 5528 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" 5529 " set_ftrace_notrace\t- echo function name in here to never trace.\n" 5530 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" 5531 "\t modules: Can select a group via module command :mod:\n" 5532 "\t Does not accept triggers\n" 5533 #endif /* CONFIG_DYNAMIC_FTRACE */ 5534 #ifdef CONFIG_FUNCTION_TRACER 5535 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" 5536 "\t\t (function)\n" 5537 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" 5538 "\t\t (function)\n" 5539 #endif 5540 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 5541 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" 5542 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" 5543 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" 5544 #endif 5545 #ifdef CONFIG_TRACER_SNAPSHOT 5546 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" 5547 "\t\t\t snapshot buffer. Read the contents for more\n" 5548 "\t\t\t information\n" 5549 #endif 5550 #ifdef CONFIG_STACK_TRACER 5551 " stack_trace\t\t- Shows the max stack trace when active\n" 5552 " stack_max_size\t- Shows current max stack size that was traced\n" 5553 "\t\t\t Write into this file to reset the max size (trigger a\n" 5554 "\t\t\t new trace)\n" 5555 #ifdef CONFIG_DYNAMIC_FTRACE 5556 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" 5557 "\t\t\t traces\n" 5558 #endif 5559 #endif /* CONFIG_STACK_TRACER */ 5560 #ifdef CONFIG_DYNAMIC_EVENTS 5561 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" 5562 "\t\t\t Write into this file to define/undefine new trace events.\n" 5563 #endif 5564 #ifdef CONFIG_KPROBE_EVENTS 5565 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" 5566 "\t\t\t Write into this file to define/undefine new trace events.\n" 5567 #endif 5568 #ifdef CONFIG_UPROBE_EVENTS 5569 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" 5570 "\t\t\t Write into this file to define/undefine new trace events.\n" 5571 #endif 5572 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ 5573 defined(CONFIG_FPROBE_EVENTS) 5574 "\t accepts: event-definitions (one definition per line)\n" 5575 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) 5576 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" 5577 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" 5578 #endif 5579 #ifdef CONFIG_FPROBE_EVENTS 5580 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" 5581 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" 5582 #endif 5583 #ifdef CONFIG_HIST_TRIGGERS 5584 "\t s:[synthetic/]<event> <field> [<field>]\n" 5585 #endif 5586 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" 5587 "\t -:[<group>/][<event>]\n" 5588 #ifdef CONFIG_KPROBE_EVENTS 5589 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" 5590 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" 5591 #endif 5592 #ifdef CONFIG_UPROBE_EVENTS 5593 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" 5594 #endif 5595 "\t args: <name>=fetcharg[:type]\n" 5596 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" 5597 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 5598 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" 5599 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS 5600 "\t <argname>[->field[->field|.field...]],\n" 5601 #endif 5602 #else 5603 "\t $stack<index>, $stack, $retval, $comm,\n" 5604 #endif 5605 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" 5606 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 5607 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 5608 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 5609 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" 5610 #ifdef CONFIG_HIST_TRIGGERS 5611 "\t field: <stype> <name>;\n" 5612 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" 5613 "\t [unsigned] char/int/long\n" 5614 #endif 5615 "\t efield: For event probes ('e' types), the field is on of the fields\n" 5616 "\t of the <attached-group>/<attached-event>.\n" 5617 #endif 5618 " set_event\t\t- Enables events by name written into it\n" 5619 "\t\t\t Can enable module events via: :mod:<module>\n" 5620 " events/\t\t- Directory containing all trace event subsystems:\n" 5621 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" 5622 " events/<system>/\t- Directory containing all trace events for <system>:\n" 5623 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" 5624 "\t\t\t events\n" 5625 " filter\t\t- If set, only events passing filter are traced\n" 5626 " events/<system>/<event>/\t- Directory containing control files for\n" 5627 "\t\t\t <event>:\n" 5628 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" 5629 " filter\t\t- If set, only events passing filter are traced\n" 5630 " trigger\t\t- If set, a command to perform when event is hit\n" 5631 "\t Format: <trigger>[:count][if <filter>]\n" 5632 "\t trigger: traceon, traceoff\n" 5633 "\t enable_event:<system>:<event>\n" 5634 "\t disable_event:<system>:<event>\n" 5635 #ifdef CONFIG_HIST_TRIGGERS 5636 "\t enable_hist:<system>:<event>\n" 5637 "\t disable_hist:<system>:<event>\n" 5638 #endif 5639 #ifdef CONFIG_STACKTRACE 5640 "\t\t stacktrace\n" 5641 #endif 5642 #ifdef CONFIG_TRACER_SNAPSHOT 5643 "\t\t snapshot\n" 5644 #endif 5645 #ifdef CONFIG_HIST_TRIGGERS 5646 "\t\t hist (see below)\n" 5647 #endif 5648 "\t example: echo traceoff > events/block/block_unplug/trigger\n" 5649 "\t echo traceoff:3 > events/block/block_unplug/trigger\n" 5650 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" 5651 "\t events/block/block_unplug/trigger\n" 5652 "\t The first disables tracing every time block_unplug is hit.\n" 5653 "\t The second disables tracing the first 3 times block_unplug is hit.\n" 5654 "\t The third enables the kmalloc event the first 3 times block_unplug\n" 5655 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" 5656 "\t Like function triggers, the counter is only decremented if it\n" 5657 "\t enabled or disabled tracing.\n" 5658 "\t To remove a trigger without a count:\n" 5659 "\t echo '!<trigger> > <system>/<event>/trigger\n" 5660 "\t To remove a trigger with a count:\n" 5661 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" 5662 "\t Filters can be ignored when removing a trigger.\n" 5663 #ifdef CONFIG_HIST_TRIGGERS 5664 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 5665 "\t Format: hist:keys=<field1[,field2,...]>\n" 5666 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 5667 "\t [:values=<field1[,field2,...]>]\n" 5668 "\t [:sort=<field1[,field2,...]>]\n" 5669 "\t [:size=#entries]\n" 5670 "\t [:pause][:continue][:clear]\n" 5671 "\t [:name=histname1]\n" 5672 "\t [:nohitcount]\n" 5673 "\t [:<handler>.<action>]\n" 5674 "\t [if <filter>]\n\n" 5675 "\t Note, special fields can be used as well:\n" 5676 "\t common_timestamp - to record current timestamp\n" 5677 "\t common_cpu - to record the CPU the event happened on\n" 5678 "\n" 5679 "\t A hist trigger variable can be:\n" 5680 "\t - a reference to a field e.g. x=current_timestamp,\n" 5681 "\t - a reference to another variable e.g. y=$x,\n" 5682 "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 5683 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 5684 "\n" 5685 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" 5686 "\t multiplication(*) and division(/) operators. An operand can be either a\n" 5687 "\t variable reference, field or numeric literal.\n" 5688 "\n" 5689 "\t When a matching event is hit, an entry is added to a hash\n" 5690 "\t table using the key(s) and value(s) named, and the value of a\n" 5691 "\t sum called 'hitcount' is incremented. Keys and values\n" 5692 "\t correspond to fields in the event's format description. Keys\n" 5693 "\t can be any field, or the special string 'common_stacktrace'.\n" 5694 "\t Compound keys consisting of up to two fields can be specified\n" 5695 "\t by the 'keys' keyword. Values must correspond to numeric\n" 5696 "\t fields. Sort keys consisting of up to two fields can be\n" 5697 "\t specified using the 'sort' keyword. The sort direction can\n" 5698 "\t be modified by appending '.descending' or '.ascending' to a\n" 5699 "\t sort field. The 'size' parameter can be used to specify more\n" 5700 "\t or fewer than the default 2048 entries for the hashtable size.\n" 5701 "\t If a hist trigger is given a name using the 'name' parameter,\n" 5702 "\t its histogram data will be shared with other triggers of the\n" 5703 "\t same name, and trigger hits will update this common data.\n\n" 5704 "\t Reading the 'hist' file for the event will dump the hash\n" 5705 "\t table in its entirety to stdout. If there are multiple hist\n" 5706 "\t triggers attached to an event, there will be a table for each\n" 5707 "\t trigger in the output. The table displayed for a named\n" 5708 "\t trigger will be the same as any other instance having the\n" 5709 "\t same name. The default format used to display a given field\n" 5710 "\t can be modified by appending any of the following modifiers\n" 5711 "\t to the field name, as applicable:\n\n" 5712 "\t .hex display a number as a hex value\n" 5713 "\t .sym display an address as a symbol\n" 5714 "\t .sym-offset display an address as a symbol and offset\n" 5715 "\t .execname display a common_pid as a program name\n" 5716 "\t .syscall display a syscall id as a syscall name\n" 5717 "\t .log2 display log2 value rather than raw number\n" 5718 "\t .buckets=size display values in groups of size rather than raw number\n" 5719 "\t .usecs display a common_timestamp in microseconds\n" 5720 "\t .percent display a number of percentage value\n" 5721 "\t .graph display a bar-graph of a value\n\n" 5722 "\t The 'pause' parameter can be used to pause an existing hist\n" 5723 "\t trigger or to start a hist trigger but not log any events\n" 5724 "\t until told to do so. 'continue' can be used to start or\n" 5725 "\t restart a paused hist trigger.\n\n" 5726 "\t The 'clear' parameter will clear the contents of a running\n" 5727 "\t hist trigger and leave its current paused/active state\n" 5728 "\t unchanged.\n\n" 5729 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" 5730 "\t raw hitcount in the histogram.\n\n" 5731 "\t The enable_hist and disable_hist triggers can be used to\n" 5732 "\t have one event conditionally start and stop another event's\n" 5733 "\t already-attached hist trigger. The syntax is analogous to\n" 5734 "\t the enable_event and disable_event triggers.\n\n" 5735 "\t Hist trigger handlers and actions are executed whenever a\n" 5736 "\t a histogram entry is added or updated. They take the form:\n\n" 5737 "\t <handler>.<action>\n\n" 5738 "\t The available handlers are:\n\n" 5739 "\t onmatch(matching.event) - invoke on addition or update\n" 5740 "\t onmax(var) - invoke if var exceeds current max\n" 5741 "\t onchange(var) - invoke action if var changes\n\n" 5742 "\t The available actions are:\n\n" 5743 "\t trace(<synthetic_event>,param list) - generate synthetic event\n" 5744 "\t save(field,...) - save current event fields\n" 5745 #ifdef CONFIG_TRACER_SNAPSHOT 5746 "\t snapshot() - snapshot the trace buffer\n\n" 5747 #endif 5748 #ifdef CONFIG_SYNTH_EVENTS 5749 " events/synthetic_events\t- Create/append/remove/show synthetic events\n" 5750 "\t Write into this file to define/undefine new synthetic events.\n" 5751 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" 5752 #endif 5753 #endif 5754 ; 5755 5756 static ssize_t 5757 tracing_readme_read(struct file *filp, char __user *ubuf, 5758 size_t cnt, loff_t *ppos) 5759 { 5760 return simple_read_from_buffer(ubuf, cnt, ppos, 5761 readme_msg, strlen(readme_msg)); 5762 } 5763 5764 static const struct file_operations tracing_readme_fops = { 5765 .open = tracing_open_generic, 5766 .read = tracing_readme_read, 5767 .llseek = generic_file_llseek, 5768 }; 5769 5770 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 5771 static union trace_eval_map_item * 5772 update_eval_map(union trace_eval_map_item *ptr) 5773 { 5774 if (!ptr->map.eval_string) { 5775 if (ptr->tail.next) { 5776 ptr = ptr->tail.next; 5777 /* Set ptr to the next real item (skip head) */ 5778 ptr++; 5779 } else 5780 return NULL; 5781 } 5782 return ptr; 5783 } 5784 5785 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) 5786 { 5787 union trace_eval_map_item *ptr = v; 5788 5789 /* 5790 * Paranoid! If ptr points to end, we don't want to increment past it. 5791 * This really should never happen. 5792 */ 5793 (*pos)++; 5794 ptr = update_eval_map(ptr); 5795 if (WARN_ON_ONCE(!ptr)) 5796 return NULL; 5797 5798 ptr++; 5799 ptr = update_eval_map(ptr); 5800 5801 return ptr; 5802 } 5803 5804 static void *eval_map_start(struct seq_file *m, loff_t *pos) 5805 { 5806 union trace_eval_map_item *v; 5807 loff_t l = 0; 5808 5809 mutex_lock(&trace_eval_mutex); 5810 5811 v = trace_eval_maps; 5812 if (v) 5813 v++; 5814 5815 while (v && l < *pos) { 5816 v = eval_map_next(m, v, &l); 5817 } 5818 5819 return v; 5820 } 5821 5822 static void eval_map_stop(struct seq_file *m, void *v) 5823 { 5824 mutex_unlock(&trace_eval_mutex); 5825 } 5826 5827 static int eval_map_show(struct seq_file *m, void *v) 5828 { 5829 union trace_eval_map_item *ptr = v; 5830 5831 seq_printf(m, "%s %ld (%s)\n", 5832 ptr->map.eval_string, ptr->map.eval_value, 5833 ptr->map.system); 5834 5835 return 0; 5836 } 5837 5838 static const struct seq_operations tracing_eval_map_seq_ops = { 5839 .start = eval_map_start, 5840 .next = eval_map_next, 5841 .stop = eval_map_stop, 5842 .show = eval_map_show, 5843 }; 5844 5845 static int tracing_eval_map_open(struct inode *inode, struct file *filp) 5846 { 5847 int ret; 5848 5849 ret = tracing_check_open_get_tr(NULL); 5850 if (ret) 5851 return ret; 5852 5853 return seq_open(filp, &tracing_eval_map_seq_ops); 5854 } 5855 5856 static const struct file_operations tracing_eval_map_fops = { 5857 .open = tracing_eval_map_open, 5858 .read = seq_read, 5859 .llseek = seq_lseek, 5860 .release = seq_release, 5861 }; 5862 5863 static inline union trace_eval_map_item * 5864 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) 5865 { 5866 /* Return tail of array given the head */ 5867 return ptr + ptr->head.length + 1; 5868 } 5869 5870 static void 5871 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, 5872 int len) 5873 { 5874 struct trace_eval_map **stop; 5875 struct trace_eval_map **map; 5876 union trace_eval_map_item *map_array; 5877 union trace_eval_map_item *ptr; 5878 5879 stop = start + len; 5880 5881 /* 5882 * The trace_eval_maps contains the map plus a head and tail item, 5883 * where the head holds the module and length of array, and the 5884 * tail holds a pointer to the next list. 5885 */ 5886 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL); 5887 if (!map_array) { 5888 pr_warn("Unable to allocate trace eval mapping\n"); 5889 return; 5890 } 5891 5892 guard(mutex)(&trace_eval_mutex); 5893 5894 if (!trace_eval_maps) 5895 trace_eval_maps = map_array; 5896 else { 5897 ptr = trace_eval_maps; 5898 for (;;) { 5899 ptr = trace_eval_jmp_to_tail(ptr); 5900 if (!ptr->tail.next) 5901 break; 5902 ptr = ptr->tail.next; 5903 5904 } 5905 ptr->tail.next = map_array; 5906 } 5907 map_array->head.mod = mod; 5908 map_array->head.length = len; 5909 map_array++; 5910 5911 for (map = start; (unsigned long)map < (unsigned long)stop; map++) { 5912 map_array->map = **map; 5913 map_array++; 5914 } 5915 memset(map_array, 0, sizeof(*map_array)); 5916 } 5917 5918 static void trace_create_eval_file(struct dentry *d_tracer) 5919 { 5920 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, 5921 NULL, &tracing_eval_map_fops); 5922 } 5923 5924 #else /* CONFIG_TRACE_EVAL_MAP_FILE */ 5925 static inline void trace_create_eval_file(struct dentry *d_tracer) { } 5926 static inline void trace_insert_eval_map_file(struct module *mod, 5927 struct trace_eval_map **start, int len) { } 5928 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ 5929 5930 static void 5931 trace_event_update_with_eval_map(struct module *mod, 5932 struct trace_eval_map **start, 5933 int len) 5934 { 5935 struct trace_eval_map **map; 5936 5937 /* Always run sanitizer only if btf_type_tag attr exists. */ 5938 if (len <= 0) { 5939 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && 5940 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && 5941 __has_attribute(btf_type_tag))) 5942 return; 5943 } 5944 5945 map = start; 5946 5947 trace_event_update_all(map, len); 5948 5949 if (len <= 0) 5950 return; 5951 5952 trace_insert_eval_map_file(mod, start, len); 5953 } 5954 5955 static ssize_t 5956 tracing_set_trace_read(struct file *filp, char __user *ubuf, 5957 size_t cnt, loff_t *ppos) 5958 { 5959 struct trace_array *tr = filp->private_data; 5960 char buf[MAX_TRACER_SIZE+2]; 5961 int r; 5962 5963 scoped_guard(mutex, &trace_types_lock) { 5964 r = sprintf(buf, "%s\n", tr->current_trace->name); 5965 } 5966 5967 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5968 } 5969 5970 int tracer_init(struct tracer *t, struct trace_array *tr) 5971 { 5972 tracing_reset_online_cpus(&tr->array_buffer); 5973 return t->init(tr); 5974 } 5975 5976 static void set_buffer_entries(struct array_buffer *buf, unsigned long val) 5977 { 5978 int cpu; 5979 5980 for_each_tracing_cpu(cpu) 5981 per_cpu_ptr(buf->data, cpu)->entries = val; 5982 } 5983 5984 static void update_buffer_entries(struct array_buffer *buf, int cpu) 5985 { 5986 if (cpu == RING_BUFFER_ALL_CPUS) { 5987 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); 5988 } else { 5989 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); 5990 } 5991 } 5992 5993 #ifdef CONFIG_TRACER_MAX_TRACE 5994 /* resize @tr's buffer to the size of @size_tr's entries */ 5995 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 5996 struct array_buffer *size_buf, int cpu_id) 5997 { 5998 int cpu, ret = 0; 5999 6000 if (cpu_id == RING_BUFFER_ALL_CPUS) { 6001 for_each_tracing_cpu(cpu) { 6002 ret = ring_buffer_resize(trace_buf->buffer, 6003 per_cpu_ptr(size_buf->data, cpu)->entries, cpu); 6004 if (ret < 0) 6005 break; 6006 per_cpu_ptr(trace_buf->data, cpu)->entries = 6007 per_cpu_ptr(size_buf->data, cpu)->entries; 6008 } 6009 } else { 6010 ret = ring_buffer_resize(trace_buf->buffer, 6011 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); 6012 if (ret == 0) 6013 per_cpu_ptr(trace_buf->data, cpu_id)->entries = 6014 per_cpu_ptr(size_buf->data, cpu_id)->entries; 6015 } 6016 6017 return ret; 6018 } 6019 #endif /* CONFIG_TRACER_MAX_TRACE */ 6020 6021 static int __tracing_resize_ring_buffer(struct trace_array *tr, 6022 unsigned long size, int cpu) 6023 { 6024 int ret; 6025 6026 /* 6027 * If kernel or user changes the size of the ring buffer 6028 * we use the size that was given, and we can forget about 6029 * expanding it later. 6030 */ 6031 trace_set_ring_buffer_expanded(tr); 6032 6033 /* May be called before buffers are initialized */ 6034 if (!tr->array_buffer.buffer) 6035 return 0; 6036 6037 /* Do not allow tracing while resizing ring buffer */ 6038 tracing_stop_tr(tr); 6039 6040 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); 6041 if (ret < 0) 6042 goto out_start; 6043 6044 #ifdef CONFIG_TRACER_MAX_TRACE 6045 if (!tr->allocated_snapshot) 6046 goto out; 6047 6048 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); 6049 if (ret < 0) { 6050 int r = resize_buffer_duplicate_size(&tr->array_buffer, 6051 &tr->array_buffer, cpu); 6052 if (r < 0) { 6053 /* 6054 * AARGH! We are left with different 6055 * size max buffer!!!! 6056 * The max buffer is our "snapshot" buffer. 6057 * When a tracer needs a snapshot (one of the 6058 * latency tracers), it swaps the max buffer 6059 * with the saved snap shot. We succeeded to 6060 * update the size of the main buffer, but failed to 6061 * update the size of the max buffer. But when we tried 6062 * to reset the main buffer to the original size, we 6063 * failed there too. This is very unlikely to 6064 * happen, but if it does, warn and kill all 6065 * tracing. 6066 */ 6067 WARN_ON(1); 6068 tracing_disabled = 1; 6069 } 6070 goto out_start; 6071 } 6072 6073 update_buffer_entries(&tr->max_buffer, cpu); 6074 6075 out: 6076 #endif /* CONFIG_TRACER_MAX_TRACE */ 6077 6078 update_buffer_entries(&tr->array_buffer, cpu); 6079 out_start: 6080 tracing_start_tr(tr); 6081 return ret; 6082 } 6083 6084 ssize_t tracing_resize_ring_buffer(struct trace_array *tr, 6085 unsigned long size, int cpu_id) 6086 { 6087 guard(mutex)(&trace_types_lock); 6088 6089 if (cpu_id != RING_BUFFER_ALL_CPUS) { 6090 /* make sure, this cpu is enabled in the mask */ 6091 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) 6092 return -EINVAL; 6093 } 6094 6095 return __tracing_resize_ring_buffer(tr, size, cpu_id); 6096 } 6097 6098 struct trace_mod_entry { 6099 unsigned long mod_addr; 6100 char mod_name[MODULE_NAME_LEN]; 6101 }; 6102 6103 struct trace_scratch { 6104 unsigned int clock_id; 6105 unsigned long text_addr; 6106 unsigned long nr_entries; 6107 struct trace_mod_entry entries[]; 6108 }; 6109 6110 static DEFINE_MUTEX(scratch_mutex); 6111 6112 static int cmp_mod_entry(const void *key, const void *pivot) 6113 { 6114 unsigned long addr = (unsigned long)key; 6115 const struct trace_mod_entry *ent = pivot; 6116 6117 if (addr < ent[0].mod_addr) 6118 return -1; 6119 6120 return addr >= ent[1].mod_addr; 6121 } 6122 6123 /** 6124 * trace_adjust_address() - Adjust prev boot address to current address. 6125 * @tr: Persistent ring buffer's trace_array. 6126 * @addr: Address in @tr which is adjusted. 6127 */ 6128 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) 6129 { 6130 struct trace_module_delta *module_delta; 6131 struct trace_scratch *tscratch; 6132 struct trace_mod_entry *entry; 6133 unsigned long raddr; 6134 int idx = 0, nr_entries; 6135 6136 /* If we don't have last boot delta, return the address */ 6137 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6138 return addr; 6139 6140 /* tr->module_delta must be protected by rcu. */ 6141 guard(rcu)(); 6142 tscratch = tr->scratch; 6143 /* if there is no tscrach, module_delta must be NULL. */ 6144 module_delta = READ_ONCE(tr->module_delta); 6145 if (!module_delta || !tscratch->nr_entries || 6146 tscratch->entries[0].mod_addr > addr) { 6147 raddr = addr + tr->text_delta; 6148 return __is_kernel(raddr) || is_kernel_core_data(raddr) || 6149 is_kernel_rodata(raddr) ? raddr : addr; 6150 } 6151 6152 /* Note that entries must be sorted. */ 6153 nr_entries = tscratch->nr_entries; 6154 if (nr_entries == 1 || 6155 tscratch->entries[nr_entries - 1].mod_addr < addr) 6156 idx = nr_entries - 1; 6157 else { 6158 entry = __inline_bsearch((void *)addr, 6159 tscratch->entries, 6160 nr_entries - 1, 6161 sizeof(tscratch->entries[0]), 6162 cmp_mod_entry); 6163 if (entry) 6164 idx = entry - tscratch->entries; 6165 } 6166 6167 return addr + module_delta->delta[idx]; 6168 } 6169 6170 #ifdef CONFIG_MODULES 6171 static int save_mod(struct module *mod, void *data) 6172 { 6173 struct trace_array *tr = data; 6174 struct trace_scratch *tscratch; 6175 struct trace_mod_entry *entry; 6176 unsigned int size; 6177 6178 tscratch = tr->scratch; 6179 if (!tscratch) 6180 return -1; 6181 size = tr->scratch_size; 6182 6183 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) 6184 return -1; 6185 6186 entry = &tscratch->entries[tscratch->nr_entries]; 6187 6188 tscratch->nr_entries++; 6189 6190 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; 6191 strscpy(entry->mod_name, mod->name); 6192 6193 return 0; 6194 } 6195 #else 6196 static int save_mod(struct module *mod, void *data) 6197 { 6198 return 0; 6199 } 6200 #endif 6201 6202 static void update_last_data(struct trace_array *tr) 6203 { 6204 struct trace_module_delta *module_delta; 6205 struct trace_scratch *tscratch; 6206 6207 if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) 6208 return; 6209 6210 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6211 return; 6212 6213 /* Only if the buffer has previous boot data clear and update it. */ 6214 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; 6215 6216 /* Reset the module list and reload them */ 6217 if (tr->scratch) { 6218 struct trace_scratch *tscratch = tr->scratch; 6219 6220 tscratch->clock_id = tr->clock_id; 6221 memset(tscratch->entries, 0, 6222 flex_array_size(tscratch, entries, tscratch->nr_entries)); 6223 tscratch->nr_entries = 0; 6224 6225 guard(mutex)(&scratch_mutex); 6226 module_for_each_mod(save_mod, tr); 6227 } 6228 6229 /* 6230 * Need to clear all CPU buffers as there cannot be events 6231 * from the previous boot mixed with events with this boot 6232 * as that will cause a confusing trace. Need to clear all 6233 * CPU buffers, even for those that may currently be offline. 6234 */ 6235 tracing_reset_all_cpus(&tr->array_buffer); 6236 6237 /* Using current data now */ 6238 tr->text_delta = 0; 6239 6240 if (!tr->scratch) 6241 return; 6242 6243 tscratch = tr->scratch; 6244 module_delta = READ_ONCE(tr->module_delta); 6245 WRITE_ONCE(tr->module_delta, NULL); 6246 kfree_rcu(module_delta, rcu); 6247 6248 /* Set the persistent ring buffer meta data to this address */ 6249 tscratch->text_addr = (unsigned long)_text; 6250 } 6251 6252 /** 6253 * tracing_update_buffers - used by tracing facility to expand ring buffers 6254 * @tr: The tracing instance 6255 * 6256 * To save on memory when the tracing is never used on a system with it 6257 * configured in. The ring buffers are set to a minimum size. But once 6258 * a user starts to use the tracing facility, then they need to grow 6259 * to their default size. 6260 * 6261 * This function is to be called when a tracer is about to be used. 6262 */ 6263 int tracing_update_buffers(struct trace_array *tr) 6264 { 6265 int ret = 0; 6266 6267 guard(mutex)(&trace_types_lock); 6268 6269 update_last_data(tr); 6270 6271 if (!tr->ring_buffer_expanded) 6272 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 6273 RING_BUFFER_ALL_CPUS); 6274 return ret; 6275 } 6276 6277 /* 6278 * Used to clear out the tracer before deletion of an instance. 6279 * Must have trace_types_lock held. 6280 */ 6281 static void tracing_set_nop(struct trace_array *tr) 6282 { 6283 if (tr->current_trace == &nop_trace) 6284 return; 6285 6286 tr->current_trace->enabled--; 6287 6288 if (tr->current_trace->reset) 6289 tr->current_trace->reset(tr); 6290 6291 tr->current_trace = &nop_trace; 6292 tr->current_trace_flags = nop_trace.flags; 6293 } 6294 6295 static bool tracer_options_updated; 6296 6297 int tracing_set_tracer(struct trace_array *tr, const char *buf) 6298 { 6299 struct tracer *trace = NULL; 6300 struct tracers *t; 6301 #ifdef CONFIG_TRACER_MAX_TRACE 6302 bool had_max_tr; 6303 #endif 6304 int ret; 6305 6306 guard(mutex)(&trace_types_lock); 6307 6308 update_last_data(tr); 6309 6310 if (!tr->ring_buffer_expanded) { 6311 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 6312 RING_BUFFER_ALL_CPUS); 6313 if (ret < 0) 6314 return ret; 6315 ret = 0; 6316 } 6317 6318 list_for_each_entry(t, &tr->tracers, list) { 6319 if (strcmp(t->tracer->name, buf) == 0) { 6320 trace = t->tracer; 6321 break; 6322 } 6323 } 6324 if (!trace) 6325 return -EINVAL; 6326 6327 if (trace == tr->current_trace) 6328 return 0; 6329 6330 #ifdef CONFIG_TRACER_SNAPSHOT 6331 if (trace->use_max_tr) { 6332 local_irq_disable(); 6333 arch_spin_lock(&tr->max_lock); 6334 ret = tr->cond_snapshot ? -EBUSY : 0; 6335 arch_spin_unlock(&tr->max_lock); 6336 local_irq_enable(); 6337 if (ret) 6338 return ret; 6339 } 6340 #endif 6341 /* Some tracers won't work on kernel command line */ 6342 if (system_state < SYSTEM_RUNNING && trace->noboot) { 6343 pr_warn("Tracer '%s' is not allowed on command line, ignored\n", 6344 trace->name); 6345 return -EINVAL; 6346 } 6347 6348 /* Some tracers are only allowed for the top level buffer */ 6349 if (!trace_ok_for_array(trace, tr)) 6350 return -EINVAL; 6351 6352 /* If trace pipe files are being read, we can't change the tracer */ 6353 if (tr->trace_ref) 6354 return -EBUSY; 6355 6356 trace_branch_disable(); 6357 6358 tr->current_trace->enabled--; 6359 6360 if (tr->current_trace->reset) 6361 tr->current_trace->reset(tr); 6362 6363 #ifdef CONFIG_TRACER_MAX_TRACE 6364 had_max_tr = tr->current_trace->use_max_tr; 6365 6366 /* Current trace needs to be nop_trace before synchronize_rcu */ 6367 tr->current_trace = &nop_trace; 6368 tr->current_trace_flags = nop_trace.flags; 6369 6370 if (had_max_tr && !trace->use_max_tr) { 6371 /* 6372 * We need to make sure that the update_max_tr sees that 6373 * current_trace changed to nop_trace to keep it from 6374 * swapping the buffers after we resize it. 6375 * The update_max_tr is called from interrupts disabled 6376 * so a synchronized_sched() is sufficient. 6377 */ 6378 synchronize_rcu(); 6379 free_snapshot(tr); 6380 tracing_disarm_snapshot(tr); 6381 } 6382 6383 if (!had_max_tr && trace->use_max_tr) { 6384 ret = tracing_arm_snapshot_locked(tr); 6385 if (ret) 6386 return ret; 6387 } 6388 #else 6389 tr->current_trace = &nop_trace; 6390 #endif 6391 6392 tr->current_trace_flags = t->flags ? : t->tracer->flags; 6393 6394 if (trace->init) { 6395 ret = tracer_init(trace, tr); 6396 if (ret) { 6397 #ifdef CONFIG_TRACER_MAX_TRACE 6398 if (trace->use_max_tr) 6399 tracing_disarm_snapshot(tr); 6400 #endif 6401 tr->current_trace_flags = nop_trace.flags; 6402 return ret; 6403 } 6404 } 6405 6406 tr->current_trace = trace; 6407 tr->current_trace->enabled++; 6408 trace_branch_enable(tr); 6409 6410 return 0; 6411 } 6412 6413 static ssize_t 6414 tracing_set_trace_write(struct file *filp, const char __user *ubuf, 6415 size_t cnt, loff_t *ppos) 6416 { 6417 struct trace_array *tr = filp->private_data; 6418 char buf[MAX_TRACER_SIZE+1]; 6419 char *name; 6420 size_t ret; 6421 int err; 6422 6423 ret = cnt; 6424 6425 if (cnt > MAX_TRACER_SIZE) 6426 cnt = MAX_TRACER_SIZE; 6427 6428 if (copy_from_user(buf, ubuf, cnt)) 6429 return -EFAULT; 6430 6431 buf[cnt] = 0; 6432 6433 name = strim(buf); 6434 6435 err = tracing_set_tracer(tr, name); 6436 if (err) 6437 return err; 6438 6439 *ppos += ret; 6440 6441 return ret; 6442 } 6443 6444 static ssize_t 6445 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, 6446 size_t cnt, loff_t *ppos) 6447 { 6448 char buf[64]; 6449 int r; 6450 6451 r = snprintf(buf, sizeof(buf), "%ld\n", 6452 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); 6453 if (r > sizeof(buf)) 6454 r = sizeof(buf); 6455 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6456 } 6457 6458 static ssize_t 6459 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, 6460 size_t cnt, loff_t *ppos) 6461 { 6462 unsigned long val; 6463 int ret; 6464 6465 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6466 if (ret) 6467 return ret; 6468 6469 *ptr = val * 1000; 6470 6471 return cnt; 6472 } 6473 6474 static ssize_t 6475 tracing_thresh_read(struct file *filp, char __user *ubuf, 6476 size_t cnt, loff_t *ppos) 6477 { 6478 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); 6479 } 6480 6481 static ssize_t 6482 tracing_thresh_write(struct file *filp, const char __user *ubuf, 6483 size_t cnt, loff_t *ppos) 6484 { 6485 struct trace_array *tr = filp->private_data; 6486 int ret; 6487 6488 guard(mutex)(&trace_types_lock); 6489 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); 6490 if (ret < 0) 6491 return ret; 6492 6493 if (tr->current_trace->update_thresh) { 6494 ret = tr->current_trace->update_thresh(tr); 6495 if (ret < 0) 6496 return ret; 6497 } 6498 6499 return cnt; 6500 } 6501 6502 #ifdef CONFIG_TRACER_MAX_TRACE 6503 6504 static ssize_t 6505 tracing_max_lat_read(struct file *filp, char __user *ubuf, 6506 size_t cnt, loff_t *ppos) 6507 { 6508 struct trace_array *tr = filp->private_data; 6509 6510 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); 6511 } 6512 6513 static ssize_t 6514 tracing_max_lat_write(struct file *filp, const char __user *ubuf, 6515 size_t cnt, loff_t *ppos) 6516 { 6517 struct trace_array *tr = filp->private_data; 6518 6519 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); 6520 } 6521 6522 #endif 6523 6524 static int open_pipe_on_cpu(struct trace_array *tr, int cpu) 6525 { 6526 if (cpu == RING_BUFFER_ALL_CPUS) { 6527 if (cpumask_empty(tr->pipe_cpumask)) { 6528 cpumask_setall(tr->pipe_cpumask); 6529 return 0; 6530 } 6531 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { 6532 cpumask_set_cpu(cpu, tr->pipe_cpumask); 6533 return 0; 6534 } 6535 return -EBUSY; 6536 } 6537 6538 static void close_pipe_on_cpu(struct trace_array *tr, int cpu) 6539 { 6540 if (cpu == RING_BUFFER_ALL_CPUS) { 6541 WARN_ON(!cpumask_full(tr->pipe_cpumask)); 6542 cpumask_clear(tr->pipe_cpumask); 6543 } else { 6544 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); 6545 cpumask_clear_cpu(cpu, tr->pipe_cpumask); 6546 } 6547 } 6548 6549 static int tracing_open_pipe(struct inode *inode, struct file *filp) 6550 { 6551 struct trace_array *tr = inode->i_private; 6552 struct trace_iterator *iter; 6553 int cpu; 6554 int ret; 6555 6556 ret = tracing_check_open_get_tr(tr); 6557 if (ret) 6558 return ret; 6559 6560 guard(mutex)(&trace_types_lock); 6561 cpu = tracing_get_cpu(inode); 6562 ret = open_pipe_on_cpu(tr, cpu); 6563 if (ret) 6564 goto fail_pipe_on_cpu; 6565 6566 /* create a buffer to store the information to pass to userspace */ 6567 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 6568 if (!iter) { 6569 ret = -ENOMEM; 6570 goto fail_alloc_iter; 6571 } 6572 6573 trace_seq_init(&iter->seq); 6574 iter->trace = tr->current_trace; 6575 6576 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 6577 ret = -ENOMEM; 6578 goto fail; 6579 } 6580 6581 /* trace pipe does not show start of buffer */ 6582 cpumask_setall(iter->started); 6583 6584 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 6585 iter->iter_flags |= TRACE_FILE_LAT_FMT; 6586 6587 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 6588 if (trace_clocks[tr->clock_id].in_ns) 6589 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 6590 6591 iter->tr = tr; 6592 iter->array_buffer = &tr->array_buffer; 6593 iter->cpu_file = cpu; 6594 mutex_init(&iter->mutex); 6595 filp->private_data = iter; 6596 6597 if (iter->trace->pipe_open) 6598 iter->trace->pipe_open(iter); 6599 6600 nonseekable_open(inode, filp); 6601 6602 tr->trace_ref++; 6603 6604 return ret; 6605 6606 fail: 6607 kfree(iter); 6608 fail_alloc_iter: 6609 close_pipe_on_cpu(tr, cpu); 6610 fail_pipe_on_cpu: 6611 __trace_array_put(tr); 6612 return ret; 6613 } 6614 6615 static int tracing_release_pipe(struct inode *inode, struct file *file) 6616 { 6617 struct trace_iterator *iter = file->private_data; 6618 struct trace_array *tr = inode->i_private; 6619 6620 scoped_guard(mutex, &trace_types_lock) { 6621 tr->trace_ref--; 6622 6623 if (iter->trace->pipe_close) 6624 iter->trace->pipe_close(iter); 6625 close_pipe_on_cpu(tr, iter->cpu_file); 6626 } 6627 6628 free_trace_iter_content(iter); 6629 kfree(iter); 6630 6631 trace_array_put(tr); 6632 6633 return 0; 6634 } 6635 6636 static __poll_t 6637 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 6638 { 6639 struct trace_array *tr = iter->tr; 6640 6641 /* Iterators are static, they should be filled or empty */ 6642 if (trace_buffer_iter(iter, iter->cpu_file)) 6643 return EPOLLIN | EPOLLRDNORM; 6644 6645 if (tr->trace_flags & TRACE_ITER(BLOCK)) 6646 /* 6647 * Always select as readable when in blocking mode 6648 */ 6649 return EPOLLIN | EPOLLRDNORM; 6650 else 6651 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, 6652 filp, poll_table, iter->tr->buffer_percent); 6653 } 6654 6655 static __poll_t 6656 tracing_poll_pipe(struct file *filp, poll_table *poll_table) 6657 { 6658 struct trace_iterator *iter = filp->private_data; 6659 6660 return trace_poll(iter, filp, poll_table); 6661 } 6662 6663 /* Must be called with iter->mutex held. */ 6664 static int tracing_wait_pipe(struct file *filp) 6665 { 6666 struct trace_iterator *iter = filp->private_data; 6667 int ret; 6668 6669 while (trace_empty(iter)) { 6670 6671 if ((filp->f_flags & O_NONBLOCK)) { 6672 return -EAGAIN; 6673 } 6674 6675 /* 6676 * We block until we read something and tracing is disabled. 6677 * We still block if tracing is disabled, but we have never 6678 * read anything. This allows a user to cat this file, and 6679 * then enable tracing. But after we have read something, 6680 * we give an EOF when tracing is again disabled. 6681 * 6682 * iter->pos will be 0 if we haven't read anything. 6683 */ 6684 if (!tracer_tracing_is_on(iter->tr) && iter->pos) 6685 break; 6686 6687 mutex_unlock(&iter->mutex); 6688 6689 ret = wait_on_pipe(iter, 0); 6690 6691 mutex_lock(&iter->mutex); 6692 6693 if (ret) 6694 return ret; 6695 } 6696 6697 return 1; 6698 } 6699 6700 static bool update_last_data_if_empty(struct trace_array *tr) 6701 { 6702 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6703 return false; 6704 6705 if (!ring_buffer_empty(tr->array_buffer.buffer)) 6706 return false; 6707 6708 /* 6709 * If the buffer contains the last boot data and all per-cpu 6710 * buffers are empty, reset it from the kernel side. 6711 */ 6712 update_last_data(tr); 6713 return true; 6714 } 6715 6716 /* 6717 * Consumer reader. 6718 */ 6719 static ssize_t 6720 tracing_read_pipe(struct file *filp, char __user *ubuf, 6721 size_t cnt, loff_t *ppos) 6722 { 6723 struct trace_iterator *iter = filp->private_data; 6724 ssize_t sret; 6725 6726 /* 6727 * Avoid more than one consumer on a single file descriptor 6728 * This is just a matter of traces coherency, the ring buffer itself 6729 * is protected. 6730 */ 6731 guard(mutex)(&iter->mutex); 6732 6733 /* return any leftover data */ 6734 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6735 if (sret != -EBUSY) 6736 return sret; 6737 6738 trace_seq_init(&iter->seq); 6739 6740 if (iter->trace->read) { 6741 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 6742 if (sret) 6743 return sret; 6744 } 6745 6746 waitagain: 6747 if (update_last_data_if_empty(iter->tr)) 6748 return 0; 6749 6750 sret = tracing_wait_pipe(filp); 6751 if (sret <= 0) 6752 return sret; 6753 6754 /* stop when tracing is finished */ 6755 if (trace_empty(iter)) 6756 return 0; 6757 6758 if (cnt >= TRACE_SEQ_BUFFER_SIZE) 6759 cnt = TRACE_SEQ_BUFFER_SIZE - 1; 6760 6761 /* reset all but tr, trace, and overruns */ 6762 trace_iterator_reset(iter); 6763 cpumask_clear(iter->started); 6764 trace_seq_init(&iter->seq); 6765 6766 trace_event_read_lock(); 6767 trace_access_lock(iter->cpu_file); 6768 while (trace_find_next_entry_inc(iter) != NULL) { 6769 enum print_line_t ret; 6770 int save_len = iter->seq.seq.len; 6771 6772 ret = print_trace_line(iter); 6773 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6774 /* 6775 * If one print_trace_line() fills entire trace_seq in one shot, 6776 * trace_seq_to_user() will returns -EBUSY because save_len == 0, 6777 * In this case, we need to consume it, otherwise, loop will peek 6778 * this event next time, resulting in an infinite loop. 6779 */ 6780 if (save_len == 0) { 6781 iter->seq.full = 0; 6782 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 6783 trace_consume(iter); 6784 break; 6785 } 6786 6787 /* In other cases, don't print partial lines */ 6788 iter->seq.seq.len = save_len; 6789 break; 6790 } 6791 if (ret != TRACE_TYPE_NO_CONSUME) 6792 trace_consume(iter); 6793 6794 if (trace_seq_used(&iter->seq) >= cnt) 6795 break; 6796 6797 /* 6798 * Setting the full flag means we reached the trace_seq buffer 6799 * size and we should leave by partial output condition above. 6800 * One of the trace_seq_* functions is not used properly. 6801 */ 6802 WARN_ONCE(iter->seq.full, "full flag set for trace type %d", 6803 iter->ent->type); 6804 } 6805 trace_access_unlock(iter->cpu_file); 6806 trace_event_read_unlock(); 6807 6808 /* Now copy what we have to the user */ 6809 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6810 if (iter->seq.readpos >= trace_seq_used(&iter->seq)) 6811 trace_seq_init(&iter->seq); 6812 6813 /* 6814 * If there was nothing to send to user, in spite of consuming trace 6815 * entries, go back to wait for more entries. 6816 */ 6817 if (sret == -EBUSY) 6818 goto waitagain; 6819 6820 return sret; 6821 } 6822 6823 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, 6824 unsigned int idx) 6825 { 6826 __free_page(spd->pages[idx]); 6827 } 6828 6829 static size_t 6830 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) 6831 { 6832 size_t count; 6833 int save_len; 6834 int ret; 6835 6836 /* Seq buffer is page-sized, exactly what we need. */ 6837 for (;;) { 6838 save_len = iter->seq.seq.len; 6839 ret = print_trace_line(iter); 6840 6841 if (trace_seq_has_overflowed(&iter->seq)) { 6842 iter->seq.seq.len = save_len; 6843 break; 6844 } 6845 6846 /* 6847 * This should not be hit, because it should only 6848 * be set if the iter->seq overflowed. But check it 6849 * anyway to be safe. 6850 */ 6851 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6852 iter->seq.seq.len = save_len; 6853 break; 6854 } 6855 6856 count = trace_seq_used(&iter->seq) - save_len; 6857 if (rem < count) { 6858 rem = 0; 6859 iter->seq.seq.len = save_len; 6860 break; 6861 } 6862 6863 if (ret != TRACE_TYPE_NO_CONSUME) 6864 trace_consume(iter); 6865 rem -= count; 6866 if (!trace_find_next_entry_inc(iter)) { 6867 rem = 0; 6868 iter->ent = NULL; 6869 break; 6870 } 6871 } 6872 6873 return rem; 6874 } 6875 6876 static ssize_t tracing_splice_read_pipe(struct file *filp, 6877 loff_t *ppos, 6878 struct pipe_inode_info *pipe, 6879 size_t len, 6880 unsigned int flags) 6881 { 6882 struct page *pages_def[PIPE_DEF_BUFFERS]; 6883 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 6884 struct trace_iterator *iter = filp->private_data; 6885 struct splice_pipe_desc spd = { 6886 .pages = pages_def, 6887 .partial = partial_def, 6888 .nr_pages = 0, /* This gets updated below. */ 6889 .nr_pages_max = PIPE_DEF_BUFFERS, 6890 .ops = &default_pipe_buf_ops, 6891 .spd_release = tracing_spd_release_pipe, 6892 }; 6893 ssize_t ret; 6894 size_t rem; 6895 unsigned int i; 6896 6897 if (splice_grow_spd(pipe, &spd)) 6898 return -ENOMEM; 6899 6900 mutex_lock(&iter->mutex); 6901 6902 if (iter->trace->splice_read) { 6903 ret = iter->trace->splice_read(iter, filp, 6904 ppos, pipe, len, flags); 6905 if (ret) 6906 goto out_err; 6907 } 6908 6909 ret = tracing_wait_pipe(filp); 6910 if (ret <= 0) 6911 goto out_err; 6912 6913 if (!iter->ent && !trace_find_next_entry_inc(iter)) { 6914 ret = -EFAULT; 6915 goto out_err; 6916 } 6917 6918 trace_event_read_lock(); 6919 trace_access_lock(iter->cpu_file); 6920 6921 /* Fill as many pages as possible. */ 6922 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 6923 spd.pages[i] = alloc_page(GFP_KERNEL); 6924 if (!spd.pages[i]) 6925 break; 6926 6927 rem = tracing_fill_pipe_page(rem, iter); 6928 6929 /* Copy the data into the page, so we can start over. */ 6930 ret = trace_seq_to_buffer(&iter->seq, 6931 page_address(spd.pages[i]), 6932 min((size_t)trace_seq_used(&iter->seq), 6933 (size_t)PAGE_SIZE)); 6934 if (ret < 0) { 6935 __free_page(spd.pages[i]); 6936 break; 6937 } 6938 spd.partial[i].offset = 0; 6939 spd.partial[i].len = ret; 6940 6941 trace_seq_init(&iter->seq); 6942 } 6943 6944 trace_access_unlock(iter->cpu_file); 6945 trace_event_read_unlock(); 6946 mutex_unlock(&iter->mutex); 6947 6948 spd.nr_pages = i; 6949 6950 if (i) 6951 ret = splice_to_pipe(pipe, &spd); 6952 else 6953 ret = 0; 6954 out: 6955 splice_shrink_spd(&spd); 6956 return ret; 6957 6958 out_err: 6959 mutex_unlock(&iter->mutex); 6960 goto out; 6961 } 6962 6963 static ssize_t 6964 tracing_syscall_buf_read(struct file *filp, char __user *ubuf, 6965 size_t cnt, loff_t *ppos) 6966 { 6967 struct inode *inode = file_inode(filp); 6968 struct trace_array *tr = inode->i_private; 6969 char buf[64]; 6970 int r; 6971 6972 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz); 6973 6974 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6975 } 6976 6977 static ssize_t 6978 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf, 6979 size_t cnt, loff_t *ppos) 6980 { 6981 struct inode *inode = file_inode(filp); 6982 struct trace_array *tr = inode->i_private; 6983 unsigned long val; 6984 int ret; 6985 6986 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6987 if (ret) 6988 return ret; 6989 6990 if (val > SYSCALL_FAULT_USER_MAX) 6991 val = SYSCALL_FAULT_USER_MAX; 6992 6993 tr->syscall_buf_sz = val; 6994 6995 *ppos += cnt; 6996 6997 return cnt; 6998 } 6999 7000 static ssize_t 7001 tracing_entries_read(struct file *filp, char __user *ubuf, 7002 size_t cnt, loff_t *ppos) 7003 { 7004 struct inode *inode = file_inode(filp); 7005 struct trace_array *tr = inode->i_private; 7006 int cpu = tracing_get_cpu(inode); 7007 char buf[64]; 7008 int r = 0; 7009 ssize_t ret; 7010 7011 mutex_lock(&trace_types_lock); 7012 7013 if (cpu == RING_BUFFER_ALL_CPUS) { 7014 int cpu, buf_size_same; 7015 unsigned long size; 7016 7017 size = 0; 7018 buf_size_same = 1; 7019 /* check if all cpu sizes are same */ 7020 for_each_tracing_cpu(cpu) { 7021 /* fill in the size from first enabled cpu */ 7022 if (size == 0) 7023 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; 7024 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { 7025 buf_size_same = 0; 7026 break; 7027 } 7028 } 7029 7030 if (buf_size_same) { 7031 if (!tr->ring_buffer_expanded) 7032 r = sprintf(buf, "%lu (expanded: %lu)\n", 7033 size >> 10, 7034 trace_buf_size >> 10); 7035 else 7036 r = sprintf(buf, "%lu\n", size >> 10); 7037 } else 7038 r = sprintf(buf, "X\n"); 7039 } else 7040 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); 7041 7042 mutex_unlock(&trace_types_lock); 7043 7044 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 7045 return ret; 7046 } 7047 7048 static ssize_t 7049 tracing_entries_write(struct file *filp, const char __user *ubuf, 7050 size_t cnt, loff_t *ppos) 7051 { 7052 struct inode *inode = file_inode(filp); 7053 struct trace_array *tr = inode->i_private; 7054 unsigned long val; 7055 int ret; 7056 7057 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7058 if (ret) 7059 return ret; 7060 7061 /* must have at least 1 entry */ 7062 if (!val) 7063 return -EINVAL; 7064 7065 /* value is in KB */ 7066 val <<= 10; 7067 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); 7068 if (ret < 0) 7069 return ret; 7070 7071 *ppos += cnt; 7072 7073 return cnt; 7074 } 7075 7076 static ssize_t 7077 tracing_total_entries_read(struct file *filp, char __user *ubuf, 7078 size_t cnt, loff_t *ppos) 7079 { 7080 struct trace_array *tr = filp->private_data; 7081 char buf[64]; 7082 int r, cpu; 7083 unsigned long size = 0, expanded_size = 0; 7084 7085 mutex_lock(&trace_types_lock); 7086 for_each_tracing_cpu(cpu) { 7087 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; 7088 if (!tr->ring_buffer_expanded) 7089 expanded_size += trace_buf_size >> 10; 7090 } 7091 if (tr->ring_buffer_expanded) 7092 r = sprintf(buf, "%lu\n", size); 7093 else 7094 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); 7095 mutex_unlock(&trace_types_lock); 7096 7097 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 7098 } 7099 7100 #define LAST_BOOT_HEADER ((void *)1) 7101 7102 static void *l_next(struct seq_file *m, void *v, loff_t *pos) 7103 { 7104 struct trace_array *tr = m->private; 7105 struct trace_scratch *tscratch = tr->scratch; 7106 unsigned int index = *pos; 7107 7108 (*pos)++; 7109 7110 if (*pos == 1) 7111 return LAST_BOOT_HEADER; 7112 7113 /* Only show offsets of the last boot data */ 7114 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 7115 return NULL; 7116 7117 /* *pos 0 is for the header, 1 is for the first module */ 7118 index--; 7119 7120 if (index >= tscratch->nr_entries) 7121 return NULL; 7122 7123 return &tscratch->entries[index]; 7124 } 7125 7126 static void *l_start(struct seq_file *m, loff_t *pos) 7127 { 7128 mutex_lock(&scratch_mutex); 7129 7130 return l_next(m, NULL, pos); 7131 } 7132 7133 static void l_stop(struct seq_file *m, void *p) 7134 { 7135 mutex_unlock(&scratch_mutex); 7136 } 7137 7138 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) 7139 { 7140 struct trace_scratch *tscratch = tr->scratch; 7141 7142 /* 7143 * Do not leak KASLR address. This only shows the KASLR address of 7144 * the last boot. When the ring buffer is started, the LAST_BOOT 7145 * flag gets cleared, and this should only report "current". 7146 * Otherwise it shows the KASLR address from the previous boot which 7147 * should not be the same as the current boot. 7148 */ 7149 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 7150 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); 7151 else 7152 seq_puts(m, "# Current\n"); 7153 } 7154 7155 static int l_show(struct seq_file *m, void *v) 7156 { 7157 struct trace_array *tr = m->private; 7158 struct trace_mod_entry *entry = v; 7159 7160 if (v == LAST_BOOT_HEADER) { 7161 show_last_boot_header(m, tr); 7162 return 0; 7163 } 7164 7165 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); 7166 return 0; 7167 } 7168 7169 static const struct seq_operations last_boot_seq_ops = { 7170 .start = l_start, 7171 .next = l_next, 7172 .stop = l_stop, 7173 .show = l_show, 7174 }; 7175 7176 static int tracing_last_boot_open(struct inode *inode, struct file *file) 7177 { 7178 struct trace_array *tr = inode->i_private; 7179 struct seq_file *m; 7180 int ret; 7181 7182 ret = tracing_check_open_get_tr(tr); 7183 if (ret) 7184 return ret; 7185 7186 ret = seq_open(file, &last_boot_seq_ops); 7187 if (ret) { 7188 trace_array_put(tr); 7189 return ret; 7190 } 7191 7192 m = file->private_data; 7193 m->private = tr; 7194 7195 return 0; 7196 } 7197 7198 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) 7199 { 7200 struct trace_array *tr = inode->i_private; 7201 int cpu = tracing_get_cpu(inode); 7202 int ret; 7203 7204 ret = tracing_check_open_get_tr(tr); 7205 if (ret) 7206 return ret; 7207 7208 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); 7209 if (ret < 0) 7210 __trace_array_put(tr); 7211 return ret; 7212 } 7213 7214 static ssize_t 7215 tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 7216 size_t cnt, loff_t *ppos) 7217 { 7218 /* 7219 * There is no need to read what the user has written, this function 7220 * is just to make sure that there is no error when "echo" is used 7221 */ 7222 7223 *ppos += cnt; 7224 7225 return cnt; 7226 } 7227 7228 static int 7229 tracing_free_buffer_release(struct inode *inode, struct file *filp) 7230 { 7231 struct trace_array *tr = inode->i_private; 7232 7233 /* disable tracing ? */ 7234 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE)) 7235 tracer_tracing_off(tr); 7236 /* resize the ring buffer to 0 */ 7237 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 7238 7239 trace_array_put(tr); 7240 7241 return 0; 7242 } 7243 7244 #define TRACE_MARKER_MAX_SIZE 4096 7245 7246 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf, 7247 size_t cnt, unsigned long ip) 7248 { 7249 struct ring_buffer_event *event; 7250 enum event_trigger_type tt = ETT_NONE; 7251 struct trace_buffer *buffer; 7252 struct print_entry *entry; 7253 int meta_size; 7254 ssize_t written; 7255 size_t size; 7256 7257 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 7258 again: 7259 size = cnt + meta_size; 7260 7261 buffer = tr->array_buffer.buffer; 7262 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 7263 tracing_gen_ctx()); 7264 if (unlikely(!event)) { 7265 /* 7266 * If the size was greater than what was allowed, then 7267 * make it smaller and try again. 7268 */ 7269 if (size > ring_buffer_max_event_size(buffer)) { 7270 cnt = ring_buffer_max_event_size(buffer) - meta_size; 7271 /* The above should only happen once */ 7272 if (WARN_ON_ONCE(cnt + meta_size == size)) 7273 return -EBADF; 7274 goto again; 7275 } 7276 7277 /* Ring buffer disabled, return as if not open for write */ 7278 return -EBADF; 7279 } 7280 7281 entry = ring_buffer_event_data(event); 7282 entry->ip = ip; 7283 memcpy(&entry->buf, buf, cnt); 7284 written = cnt; 7285 7286 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { 7287 /* do not add \n before testing triggers, but add \0 */ 7288 entry->buf[cnt] = '\0'; 7289 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); 7290 } 7291 7292 if (entry->buf[cnt - 1] != '\n') { 7293 entry->buf[cnt] = '\n'; 7294 entry->buf[cnt + 1] = '\0'; 7295 } else 7296 entry->buf[cnt] = '\0'; 7297 7298 if (static_branch_unlikely(&trace_marker_exports_enabled)) 7299 ftrace_exports(event, TRACE_EXPORT_MARKER); 7300 __buffer_unlock_commit(buffer, event); 7301 7302 if (tt) 7303 event_triggers_post_call(tr->trace_marker_file, tt); 7304 7305 return written; 7306 } 7307 7308 struct trace_user_buf { 7309 char *buf; 7310 }; 7311 7312 static DEFINE_MUTEX(trace_user_buffer_mutex); 7313 static struct trace_user_buf_info *trace_user_buffer; 7314 7315 /** 7316 * trace_user_fault_destroy - free up allocated memory of a trace user buffer 7317 * @tinfo: The descriptor to free up 7318 * 7319 * Frees any data allocated in the trace info dsecriptor. 7320 */ 7321 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo) 7322 { 7323 char *buf; 7324 int cpu; 7325 7326 if (!tinfo || !tinfo->tbuf) 7327 return; 7328 7329 for_each_possible_cpu(cpu) { 7330 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 7331 kfree(buf); 7332 } 7333 free_percpu(tinfo->tbuf); 7334 } 7335 7336 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size) 7337 { 7338 char *buf; 7339 int cpu; 7340 7341 lockdep_assert_held(&trace_user_buffer_mutex); 7342 7343 tinfo->tbuf = alloc_percpu(struct trace_user_buf); 7344 if (!tinfo->tbuf) 7345 return -ENOMEM; 7346 7347 tinfo->ref = 1; 7348 tinfo->size = size; 7349 7350 /* Clear each buffer in case of error */ 7351 for_each_possible_cpu(cpu) { 7352 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL; 7353 } 7354 7355 for_each_possible_cpu(cpu) { 7356 buf = kmalloc_node(size, GFP_KERNEL, 7357 cpu_to_node(cpu)); 7358 if (!buf) 7359 return -ENOMEM; 7360 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf; 7361 } 7362 7363 return 0; 7364 } 7365 7366 /* For internal use. Free and reinitialize */ 7367 static void user_buffer_free(struct trace_user_buf_info **tinfo) 7368 { 7369 lockdep_assert_held(&trace_user_buffer_mutex); 7370 7371 trace_user_fault_destroy(*tinfo); 7372 kfree(*tinfo); 7373 *tinfo = NULL; 7374 } 7375 7376 /* For internal use. Initialize and allocate */ 7377 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size) 7378 { 7379 bool alloc = false; 7380 int ret; 7381 7382 lockdep_assert_held(&trace_user_buffer_mutex); 7383 7384 if (!*tinfo) { 7385 alloc = true; 7386 *tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL); 7387 if (!*tinfo) 7388 return -ENOMEM; 7389 } 7390 7391 ret = user_fault_buffer_enable(*tinfo, size); 7392 if (ret < 0 && alloc) 7393 user_buffer_free(tinfo); 7394 7395 return ret; 7396 } 7397 7398 /* For internal use, derefrence and free if necessary */ 7399 static void user_buffer_put(struct trace_user_buf_info **tinfo) 7400 { 7401 guard(mutex)(&trace_user_buffer_mutex); 7402 7403 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref)) 7404 return; 7405 7406 if (--(*tinfo)->ref) 7407 return; 7408 7409 user_buffer_free(tinfo); 7410 } 7411 7412 /** 7413 * trace_user_fault_init - Allocated or reference a per CPU buffer 7414 * @tinfo: A pointer to the trace buffer descriptor 7415 * @size: The size to allocate each per CPU buffer 7416 * 7417 * Create a per CPU buffer that can be used to copy from user space 7418 * in a task context. When calling trace_user_fault_read(), preemption 7419 * must be disabled, and it will enable preemption and copy user 7420 * space data to the buffer. If any schedule switches occur, it will 7421 * retry until it succeeds without a schedule switch knowing the buffer 7422 * is still valid. 7423 * 7424 * Returns 0 on success, negative on failure. 7425 */ 7426 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size) 7427 { 7428 int ret; 7429 7430 if (!tinfo) 7431 return -EINVAL; 7432 7433 guard(mutex)(&trace_user_buffer_mutex); 7434 7435 ret = user_buffer_init(&tinfo, size); 7436 if (ret < 0) 7437 trace_user_fault_destroy(tinfo); 7438 7439 return ret; 7440 } 7441 7442 /** 7443 * trace_user_fault_get - up the ref count for the user buffer 7444 * @tinfo: A pointer to a pointer to the trace buffer descriptor 7445 * 7446 * Ups the ref count of the trace buffer. 7447 * 7448 * Returns the new ref count. 7449 */ 7450 int trace_user_fault_get(struct trace_user_buf_info *tinfo) 7451 { 7452 if (!tinfo) 7453 return -1; 7454 7455 guard(mutex)(&trace_user_buffer_mutex); 7456 7457 tinfo->ref++; 7458 return tinfo->ref; 7459 } 7460 7461 /** 7462 * trace_user_fault_put - dereference a per cpu trace buffer 7463 * @tinfo: The @tinfo that was passed to trace_user_fault_get() 7464 * 7465 * Decrement the ref count of @tinfo. 7466 * 7467 * Returns the new refcount (negative on error). 7468 */ 7469 int trace_user_fault_put(struct trace_user_buf_info *tinfo) 7470 { 7471 guard(mutex)(&trace_user_buffer_mutex); 7472 7473 if (WARN_ON_ONCE(!tinfo || !tinfo->ref)) 7474 return -1; 7475 7476 --tinfo->ref; 7477 return tinfo->ref; 7478 } 7479 7480 /** 7481 * trace_user_fault_read - Read user space into a per CPU buffer 7482 * @tinfo: The @tinfo allocated by trace_user_fault_get() 7483 * @ptr: The user space pointer to read 7484 * @size: The size of user space to read. 7485 * @copy_func: Optional function to use to copy from user space 7486 * @data: Data to pass to copy_func if it was supplied 7487 * 7488 * Preemption must be disabled when this is called, and must not 7489 * be enabled while using the returned buffer. 7490 * This does the copying from user space into a per CPU buffer. 7491 * 7492 * The @size must not be greater than the size passed in to 7493 * trace_user_fault_init(). 7494 * 7495 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(), 7496 * otherwise it will call @copy_func. It will call @copy_func with: 7497 * 7498 * buffer: the per CPU buffer of the @tinfo. 7499 * ptr: The pointer @ptr to user space to read 7500 * size: The @size of the ptr to read 7501 * data: The @data parameter 7502 * 7503 * It is expected that @copy_func will return 0 on success and non zero 7504 * if there was a fault. 7505 * 7506 * Returns a pointer to the buffer with the content read from @ptr. 7507 * Preemption must remain disabled while the caller accesses the 7508 * buffer returned by this function. 7509 * Returns NULL if there was a fault, or the size passed in is 7510 * greater than the size passed to trace_user_fault_init(). 7511 */ 7512 char *trace_user_fault_read(struct trace_user_buf_info *tinfo, 7513 const char __user *ptr, size_t size, 7514 trace_user_buf_copy copy_func, void *data) 7515 { 7516 int cpu = smp_processor_id(); 7517 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 7518 unsigned int cnt; 7519 int trys = 0; 7520 int ret; 7521 7522 lockdep_assert_preemption_disabled(); 7523 7524 /* 7525 * It's up to the caller to not try to copy more than it said 7526 * it would. 7527 */ 7528 if (size > tinfo->size) 7529 return NULL; 7530 7531 /* 7532 * This acts similar to a seqcount. The per CPU context switches are 7533 * recorded, migration is disabled and preemption is enabled. The 7534 * read of the user space memory is copied into the per CPU buffer. 7535 * Preemption is disabled again, and if the per CPU context switches count 7536 * is still the same, it means the buffer has not been corrupted. 7537 * If the count is different, it is assumed the buffer is corrupted 7538 * and reading must be tried again. 7539 */ 7540 7541 do { 7542 /* 7543 * If for some reason, copy_from_user() always causes a context 7544 * switch, this would then cause an infinite loop. 7545 * If this task is preempted by another user space task, it 7546 * will cause this task to try again. But just in case something 7547 * changes where the copying from user space causes another task 7548 * to run, prevent this from going into an infinite loop. 7549 * 100 tries should be plenty. 7550 */ 7551 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space")) 7552 return NULL; 7553 7554 /* Read the current CPU context switch counter */ 7555 cnt = nr_context_switches_cpu(cpu); 7556 7557 /* 7558 * Preemption is going to be enabled, but this task must 7559 * remain on this CPU. 7560 */ 7561 migrate_disable(); 7562 7563 /* 7564 * Now preemption is being enabled and another task can come in 7565 * and use the same buffer and corrupt our data. 7566 */ 7567 preempt_enable_notrace(); 7568 7569 /* Make sure preemption is enabled here */ 7570 lockdep_assert_preemption_enabled(); 7571 7572 if (copy_func) { 7573 ret = copy_func(buffer, ptr, size, data); 7574 } else { 7575 ret = __copy_from_user(buffer, ptr, size); 7576 } 7577 7578 preempt_disable_notrace(); 7579 migrate_enable(); 7580 7581 /* if it faulted, no need to test if the buffer was corrupted */ 7582 if (ret) 7583 return NULL; 7584 7585 /* 7586 * Preemption is disabled again, now check the per CPU context 7587 * switch counter. If it doesn't match, then another user space 7588 * process may have schedule in and corrupted our buffer. In that 7589 * case the copying must be retried. 7590 */ 7591 } while (nr_context_switches_cpu(cpu) != cnt); 7592 7593 return buffer; 7594 } 7595 7596 static ssize_t 7597 tracing_mark_write(struct file *filp, const char __user *ubuf, 7598 size_t cnt, loff_t *fpos) 7599 { 7600 struct trace_array *tr = filp->private_data; 7601 ssize_t written = -ENODEV; 7602 unsigned long ip; 7603 char *buf; 7604 7605 if (tracing_disabled) 7606 return -EINVAL; 7607 7608 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 7609 return -EINVAL; 7610 7611 if ((ssize_t)cnt < 0) 7612 return -EINVAL; 7613 7614 if (cnt > TRACE_MARKER_MAX_SIZE) 7615 cnt = TRACE_MARKER_MAX_SIZE; 7616 7617 /* Must have preemption disabled while having access to the buffer */ 7618 guard(preempt_notrace)(); 7619 7620 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 7621 if (!buf) 7622 return -EFAULT; 7623 7624 /* The selftests expect this function to be the IP address */ 7625 ip = _THIS_IP_; 7626 7627 /* The global trace_marker can go to multiple instances */ 7628 if (tr == &global_trace) { 7629 guard(rcu)(); 7630 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 7631 written = write_marker_to_buffer(tr, buf, cnt, ip); 7632 if (written < 0) 7633 break; 7634 } 7635 } else { 7636 written = write_marker_to_buffer(tr, buf, cnt, ip); 7637 } 7638 7639 return written; 7640 } 7641 7642 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, 7643 const char *buf, size_t cnt) 7644 { 7645 struct ring_buffer_event *event; 7646 struct trace_buffer *buffer; 7647 struct raw_data_entry *entry; 7648 ssize_t written; 7649 size_t size; 7650 7651 /* cnt includes both the entry->id and the data behind it. */ 7652 size = struct_offset(entry, id) + cnt; 7653 7654 buffer = tr->array_buffer.buffer; 7655 7656 if (size > ring_buffer_max_event_size(buffer)) 7657 return -EINVAL; 7658 7659 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, 7660 tracing_gen_ctx()); 7661 if (!event) 7662 /* Ring buffer disabled, return as if not open for write */ 7663 return -EBADF; 7664 7665 entry = ring_buffer_event_data(event); 7666 unsafe_memcpy(&entry->id, buf, cnt, 7667 "id and content already reserved on ring buffer" 7668 "'buf' includes the 'id' and the data." 7669 "'entry' was allocated with cnt from 'id'."); 7670 written = cnt; 7671 7672 __buffer_unlock_commit(buffer, event); 7673 7674 return written; 7675 } 7676 7677 static ssize_t 7678 tracing_mark_raw_write(struct file *filp, const char __user *ubuf, 7679 size_t cnt, loff_t *fpos) 7680 { 7681 struct trace_array *tr = filp->private_data; 7682 ssize_t written = -ENODEV; 7683 char *buf; 7684 7685 if (tracing_disabled) 7686 return -EINVAL; 7687 7688 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 7689 return -EINVAL; 7690 7691 /* The marker must at least have a tag id */ 7692 if (cnt < sizeof(unsigned int)) 7693 return -EINVAL; 7694 7695 /* raw write is all or nothing */ 7696 if (cnt > TRACE_MARKER_MAX_SIZE) 7697 return -EINVAL; 7698 7699 /* Must have preemption disabled while having access to the buffer */ 7700 guard(preempt_notrace)(); 7701 7702 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 7703 if (!buf) 7704 return -EFAULT; 7705 7706 /* The global trace_marker_raw can go to multiple instances */ 7707 if (tr == &global_trace) { 7708 guard(rcu)(); 7709 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 7710 written = write_raw_marker_to_buffer(tr, buf, cnt); 7711 if (written < 0) 7712 break; 7713 } 7714 } else { 7715 written = write_raw_marker_to_buffer(tr, buf, cnt); 7716 } 7717 7718 return written; 7719 } 7720 7721 static int tracing_mark_open(struct inode *inode, struct file *filp) 7722 { 7723 int ret; 7724 7725 scoped_guard(mutex, &trace_user_buffer_mutex) { 7726 if (!trace_user_buffer) { 7727 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE); 7728 if (ret < 0) 7729 return ret; 7730 } else { 7731 trace_user_buffer->ref++; 7732 } 7733 } 7734 7735 stream_open(inode, filp); 7736 ret = tracing_open_generic_tr(inode, filp); 7737 if (ret < 0) 7738 user_buffer_put(&trace_user_buffer); 7739 return ret; 7740 } 7741 7742 static int tracing_mark_release(struct inode *inode, struct file *file) 7743 { 7744 user_buffer_put(&trace_user_buffer); 7745 return tracing_release_generic_tr(inode, file); 7746 } 7747 7748 static int tracing_clock_show(struct seq_file *m, void *v) 7749 { 7750 struct trace_array *tr = m->private; 7751 int i; 7752 7753 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 7754 seq_printf(m, 7755 "%s%s%s%s", i ? " " : "", 7756 i == tr->clock_id ? "[" : "", trace_clocks[i].name, 7757 i == tr->clock_id ? "]" : ""); 7758 seq_putc(m, '\n'); 7759 7760 return 0; 7761 } 7762 7763 int tracing_set_clock(struct trace_array *tr, const char *clockstr) 7764 { 7765 int i; 7766 7767 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { 7768 if (strcmp(trace_clocks[i].name, clockstr) == 0) 7769 break; 7770 } 7771 if (i == ARRAY_SIZE(trace_clocks)) 7772 return -EINVAL; 7773 7774 guard(mutex)(&trace_types_lock); 7775 7776 tr->clock_id = i; 7777 7778 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); 7779 7780 /* 7781 * New clock may not be consistent with the previous clock. 7782 * Reset the buffer so that it doesn't have incomparable timestamps. 7783 */ 7784 tracing_reset_online_cpus(&tr->array_buffer); 7785 7786 #ifdef CONFIG_TRACER_MAX_TRACE 7787 if (tr->max_buffer.buffer) 7788 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); 7789 tracing_reset_online_cpus(&tr->max_buffer); 7790 #endif 7791 7792 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { 7793 struct trace_scratch *tscratch = tr->scratch; 7794 7795 tscratch->clock_id = i; 7796 } 7797 7798 return 0; 7799 } 7800 7801 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 7802 size_t cnt, loff_t *fpos) 7803 { 7804 struct seq_file *m = filp->private_data; 7805 struct trace_array *tr = m->private; 7806 char buf[64]; 7807 const char *clockstr; 7808 int ret; 7809 7810 if (cnt >= sizeof(buf)) 7811 return -EINVAL; 7812 7813 if (copy_from_user(buf, ubuf, cnt)) 7814 return -EFAULT; 7815 7816 buf[cnt] = 0; 7817 7818 clockstr = strstrip(buf); 7819 7820 ret = tracing_set_clock(tr, clockstr); 7821 if (ret) 7822 return ret; 7823 7824 *fpos += cnt; 7825 7826 return cnt; 7827 } 7828 7829 static int tracing_clock_open(struct inode *inode, struct file *file) 7830 { 7831 struct trace_array *tr = inode->i_private; 7832 int ret; 7833 7834 ret = tracing_check_open_get_tr(tr); 7835 if (ret) 7836 return ret; 7837 7838 ret = single_open(file, tracing_clock_show, inode->i_private); 7839 if (ret < 0) 7840 trace_array_put(tr); 7841 7842 return ret; 7843 } 7844 7845 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) 7846 { 7847 struct trace_array *tr = m->private; 7848 7849 guard(mutex)(&trace_types_lock); 7850 7851 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) 7852 seq_puts(m, "delta [absolute]\n"); 7853 else 7854 seq_puts(m, "[delta] absolute\n"); 7855 7856 return 0; 7857 } 7858 7859 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) 7860 { 7861 struct trace_array *tr = inode->i_private; 7862 int ret; 7863 7864 ret = tracing_check_open_get_tr(tr); 7865 if (ret) 7866 return ret; 7867 7868 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); 7869 if (ret < 0) 7870 trace_array_put(tr); 7871 7872 return ret; 7873 } 7874 7875 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) 7876 { 7877 if (rbe == this_cpu_read(trace_buffered_event)) 7878 return ring_buffer_time_stamp(buffer); 7879 7880 return ring_buffer_event_time_stamp(buffer, rbe); 7881 } 7882 7883 /* 7884 * Set or disable using the per CPU trace_buffer_event when possible. 7885 */ 7886 int tracing_set_filter_buffering(struct trace_array *tr, bool set) 7887 { 7888 guard(mutex)(&trace_types_lock); 7889 7890 if (set && tr->no_filter_buffering_ref++) 7891 return 0; 7892 7893 if (!set) { 7894 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) 7895 return -EINVAL; 7896 7897 --tr->no_filter_buffering_ref; 7898 } 7899 7900 return 0; 7901 } 7902 7903 struct ftrace_buffer_info { 7904 struct trace_iterator iter; 7905 void *spare; 7906 unsigned int spare_cpu; 7907 unsigned int spare_size; 7908 unsigned int read; 7909 }; 7910 7911 #ifdef CONFIG_TRACER_SNAPSHOT 7912 static int tracing_snapshot_open(struct inode *inode, struct file *file) 7913 { 7914 struct trace_array *tr = inode->i_private; 7915 struct trace_iterator *iter; 7916 struct seq_file *m; 7917 int ret; 7918 7919 ret = tracing_check_open_get_tr(tr); 7920 if (ret) 7921 return ret; 7922 7923 if (file->f_mode & FMODE_READ) { 7924 iter = __tracing_open(inode, file, true); 7925 if (IS_ERR(iter)) 7926 ret = PTR_ERR(iter); 7927 } else { 7928 /* Writes still need the seq_file to hold the private data */ 7929 ret = -ENOMEM; 7930 m = kzalloc(sizeof(*m), GFP_KERNEL); 7931 if (!m) 7932 goto out; 7933 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 7934 if (!iter) { 7935 kfree(m); 7936 goto out; 7937 } 7938 ret = 0; 7939 7940 iter->tr = tr; 7941 iter->array_buffer = &tr->max_buffer; 7942 iter->cpu_file = tracing_get_cpu(inode); 7943 m->private = iter; 7944 file->private_data = m; 7945 } 7946 out: 7947 if (ret < 0) 7948 trace_array_put(tr); 7949 7950 return ret; 7951 } 7952 7953 static void tracing_swap_cpu_buffer(void *tr) 7954 { 7955 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); 7956 } 7957 7958 static ssize_t 7959 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, 7960 loff_t *ppos) 7961 { 7962 struct seq_file *m = filp->private_data; 7963 struct trace_iterator *iter = m->private; 7964 struct trace_array *tr = iter->tr; 7965 unsigned long val; 7966 int ret; 7967 7968 ret = tracing_update_buffers(tr); 7969 if (ret < 0) 7970 return ret; 7971 7972 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7973 if (ret) 7974 return ret; 7975 7976 guard(mutex)(&trace_types_lock); 7977 7978 if (tr->current_trace->use_max_tr) 7979 return -EBUSY; 7980 7981 local_irq_disable(); 7982 arch_spin_lock(&tr->max_lock); 7983 if (tr->cond_snapshot) 7984 ret = -EBUSY; 7985 arch_spin_unlock(&tr->max_lock); 7986 local_irq_enable(); 7987 if (ret) 7988 return ret; 7989 7990 switch (val) { 7991 case 0: 7992 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7993 return -EINVAL; 7994 if (tr->allocated_snapshot) 7995 free_snapshot(tr); 7996 break; 7997 case 1: 7998 /* Only allow per-cpu swap if the ring buffer supports it */ 7999 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP 8000 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 8001 return -EINVAL; 8002 #endif 8003 if (tr->allocated_snapshot) 8004 ret = resize_buffer_duplicate_size(&tr->max_buffer, 8005 &tr->array_buffer, iter->cpu_file); 8006 8007 ret = tracing_arm_snapshot_locked(tr); 8008 if (ret) 8009 return ret; 8010 8011 /* Now, we're going to swap */ 8012 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 8013 local_irq_disable(); 8014 update_max_tr(tr, current, smp_processor_id(), NULL); 8015 local_irq_enable(); 8016 } else { 8017 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, 8018 (void *)tr, 1); 8019 } 8020 tracing_disarm_snapshot(tr); 8021 break; 8022 default: 8023 if (tr->allocated_snapshot) { 8024 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 8025 tracing_reset_online_cpus(&tr->max_buffer); 8026 else 8027 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); 8028 } 8029 break; 8030 } 8031 8032 if (ret >= 0) { 8033 *ppos += cnt; 8034 ret = cnt; 8035 } 8036 8037 return ret; 8038 } 8039 8040 static int tracing_snapshot_release(struct inode *inode, struct file *file) 8041 { 8042 struct seq_file *m = file->private_data; 8043 int ret; 8044 8045 ret = tracing_release(inode, file); 8046 8047 if (file->f_mode & FMODE_READ) 8048 return ret; 8049 8050 /* If write only, the seq_file is just a stub */ 8051 if (m) 8052 kfree(m->private); 8053 kfree(m); 8054 8055 return 0; 8056 } 8057 8058 static int tracing_buffers_open(struct inode *inode, struct file *filp); 8059 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, 8060 size_t count, loff_t *ppos); 8061 static int tracing_buffers_release(struct inode *inode, struct file *file); 8062 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, 8063 struct pipe_inode_info *pipe, size_t len, unsigned int flags); 8064 8065 static int snapshot_raw_open(struct inode *inode, struct file *filp) 8066 { 8067 struct ftrace_buffer_info *info; 8068 int ret; 8069 8070 /* The following checks for tracefs lockdown */ 8071 ret = tracing_buffers_open(inode, filp); 8072 if (ret < 0) 8073 return ret; 8074 8075 info = filp->private_data; 8076 8077 if (info->iter.trace->use_max_tr) { 8078 tracing_buffers_release(inode, filp); 8079 return -EBUSY; 8080 } 8081 8082 info->iter.snapshot = true; 8083 info->iter.array_buffer = &info->iter.tr->max_buffer; 8084 8085 return ret; 8086 } 8087 8088 #endif /* CONFIG_TRACER_SNAPSHOT */ 8089 8090 8091 static const struct file_operations tracing_thresh_fops = { 8092 .open = tracing_open_generic, 8093 .read = tracing_thresh_read, 8094 .write = tracing_thresh_write, 8095 .llseek = generic_file_llseek, 8096 }; 8097 8098 #ifdef CONFIG_TRACER_MAX_TRACE 8099 static const struct file_operations tracing_max_lat_fops = { 8100 .open = tracing_open_generic_tr, 8101 .read = tracing_max_lat_read, 8102 .write = tracing_max_lat_write, 8103 .llseek = generic_file_llseek, 8104 .release = tracing_release_generic_tr, 8105 }; 8106 #endif 8107 8108 static const struct file_operations set_tracer_fops = { 8109 .open = tracing_open_generic_tr, 8110 .read = tracing_set_trace_read, 8111 .write = tracing_set_trace_write, 8112 .llseek = generic_file_llseek, 8113 .release = tracing_release_generic_tr, 8114 }; 8115 8116 static const struct file_operations tracing_pipe_fops = { 8117 .open = tracing_open_pipe, 8118 .poll = tracing_poll_pipe, 8119 .read = tracing_read_pipe, 8120 .splice_read = tracing_splice_read_pipe, 8121 .release = tracing_release_pipe, 8122 }; 8123 8124 static const struct file_operations tracing_entries_fops = { 8125 .open = tracing_open_generic_tr, 8126 .read = tracing_entries_read, 8127 .write = tracing_entries_write, 8128 .llseek = generic_file_llseek, 8129 .release = tracing_release_generic_tr, 8130 }; 8131 8132 static const struct file_operations tracing_syscall_buf_fops = { 8133 .open = tracing_open_generic_tr, 8134 .read = tracing_syscall_buf_read, 8135 .write = tracing_syscall_buf_write, 8136 .llseek = generic_file_llseek, 8137 .release = tracing_release_generic_tr, 8138 }; 8139 8140 static const struct file_operations tracing_buffer_meta_fops = { 8141 .open = tracing_buffer_meta_open, 8142 .read = seq_read, 8143 .llseek = seq_lseek, 8144 .release = tracing_seq_release, 8145 }; 8146 8147 static const struct file_operations tracing_total_entries_fops = { 8148 .open = tracing_open_generic_tr, 8149 .read = tracing_total_entries_read, 8150 .llseek = generic_file_llseek, 8151 .release = tracing_release_generic_tr, 8152 }; 8153 8154 static const struct file_operations tracing_free_buffer_fops = { 8155 .open = tracing_open_generic_tr, 8156 .write = tracing_free_buffer_write, 8157 .release = tracing_free_buffer_release, 8158 }; 8159 8160 static const struct file_operations tracing_mark_fops = { 8161 .open = tracing_mark_open, 8162 .write = tracing_mark_write, 8163 .release = tracing_mark_release, 8164 }; 8165 8166 static const struct file_operations tracing_mark_raw_fops = { 8167 .open = tracing_mark_open, 8168 .write = tracing_mark_raw_write, 8169 .release = tracing_mark_release, 8170 }; 8171 8172 static const struct file_operations trace_clock_fops = { 8173 .open = tracing_clock_open, 8174 .read = seq_read, 8175 .llseek = seq_lseek, 8176 .release = tracing_single_release_tr, 8177 .write = tracing_clock_write, 8178 }; 8179 8180 static const struct file_operations trace_time_stamp_mode_fops = { 8181 .open = tracing_time_stamp_mode_open, 8182 .read = seq_read, 8183 .llseek = seq_lseek, 8184 .release = tracing_single_release_tr, 8185 }; 8186 8187 static const struct file_operations last_boot_fops = { 8188 .open = tracing_last_boot_open, 8189 .read = seq_read, 8190 .llseek = seq_lseek, 8191 .release = tracing_seq_release, 8192 }; 8193 8194 #ifdef CONFIG_TRACER_SNAPSHOT 8195 static const struct file_operations snapshot_fops = { 8196 .open = tracing_snapshot_open, 8197 .read = seq_read, 8198 .write = tracing_snapshot_write, 8199 .llseek = tracing_lseek, 8200 .release = tracing_snapshot_release, 8201 }; 8202 8203 static const struct file_operations snapshot_raw_fops = { 8204 .open = snapshot_raw_open, 8205 .read = tracing_buffers_read, 8206 .release = tracing_buffers_release, 8207 .splice_read = tracing_buffers_splice_read, 8208 }; 8209 8210 #endif /* CONFIG_TRACER_SNAPSHOT */ 8211 8212 /* 8213 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct 8214 * @filp: The active open file structure 8215 * @ubuf: The userspace provided buffer to read value into 8216 * @cnt: The maximum number of bytes to read 8217 * @ppos: The current "file" position 8218 * 8219 * This function implements the write interface for a struct trace_min_max_param. 8220 * The filp->private_data must point to a trace_min_max_param structure that 8221 * defines where to write the value, the min and the max acceptable values, 8222 * and a lock to protect the write. 8223 */ 8224 static ssize_t 8225 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 8226 { 8227 struct trace_min_max_param *param = filp->private_data; 8228 u64 val; 8229 int err; 8230 8231 if (!param) 8232 return -EFAULT; 8233 8234 err = kstrtoull_from_user(ubuf, cnt, 10, &val); 8235 if (err) 8236 return err; 8237 8238 if (param->lock) 8239 mutex_lock(param->lock); 8240 8241 if (param->min && val < *param->min) 8242 err = -EINVAL; 8243 8244 if (param->max && val > *param->max) 8245 err = -EINVAL; 8246 8247 if (!err) 8248 *param->val = val; 8249 8250 if (param->lock) 8251 mutex_unlock(param->lock); 8252 8253 if (err) 8254 return err; 8255 8256 return cnt; 8257 } 8258 8259 /* 8260 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct 8261 * @filp: The active open file structure 8262 * @ubuf: The userspace provided buffer to read value into 8263 * @cnt: The maximum number of bytes to read 8264 * @ppos: The current "file" position 8265 * 8266 * This function implements the read interface for a struct trace_min_max_param. 8267 * The filp->private_data must point to a trace_min_max_param struct with valid 8268 * data. 8269 */ 8270 static ssize_t 8271 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 8272 { 8273 struct trace_min_max_param *param = filp->private_data; 8274 char buf[U64_STR_SIZE]; 8275 int len; 8276 u64 val; 8277 8278 if (!param) 8279 return -EFAULT; 8280 8281 val = *param->val; 8282 8283 if (cnt > sizeof(buf)) 8284 cnt = sizeof(buf); 8285 8286 len = snprintf(buf, sizeof(buf), "%llu\n", val); 8287 8288 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 8289 } 8290 8291 const struct file_operations trace_min_max_fops = { 8292 .open = tracing_open_generic, 8293 .read = trace_min_max_read, 8294 .write = trace_min_max_write, 8295 }; 8296 8297 #define TRACING_LOG_ERRS_MAX 8 8298 #define TRACING_LOG_LOC_MAX 128 8299 8300 #define CMD_PREFIX " Command: " 8301 8302 struct err_info { 8303 const char **errs; /* ptr to loc-specific array of err strings */ 8304 u8 type; /* index into errs -> specific err string */ 8305 u16 pos; /* caret position */ 8306 u64 ts; 8307 }; 8308 8309 struct tracing_log_err { 8310 struct list_head list; 8311 struct err_info info; 8312 char loc[TRACING_LOG_LOC_MAX]; /* err location */ 8313 char *cmd; /* what caused err */ 8314 }; 8315 8316 static DEFINE_MUTEX(tracing_err_log_lock); 8317 8318 static struct tracing_log_err *alloc_tracing_log_err(int len) 8319 { 8320 struct tracing_log_err *err; 8321 8322 err = kzalloc(sizeof(*err), GFP_KERNEL); 8323 if (!err) 8324 return ERR_PTR(-ENOMEM); 8325 8326 err->cmd = kzalloc(len, GFP_KERNEL); 8327 if (!err->cmd) { 8328 kfree(err); 8329 return ERR_PTR(-ENOMEM); 8330 } 8331 8332 return err; 8333 } 8334 8335 static void free_tracing_log_err(struct tracing_log_err *err) 8336 { 8337 kfree(err->cmd); 8338 kfree(err); 8339 } 8340 8341 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, 8342 int len) 8343 { 8344 struct tracing_log_err *err; 8345 char *cmd; 8346 8347 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { 8348 err = alloc_tracing_log_err(len); 8349 if (PTR_ERR(err) != -ENOMEM) 8350 tr->n_err_log_entries++; 8351 8352 return err; 8353 } 8354 cmd = kzalloc(len, GFP_KERNEL); 8355 if (!cmd) 8356 return ERR_PTR(-ENOMEM); 8357 err = list_first_entry(&tr->err_log, struct tracing_log_err, list); 8358 kfree(err->cmd); 8359 err->cmd = cmd; 8360 list_del(&err->list); 8361 8362 return err; 8363 } 8364 8365 /** 8366 * err_pos - find the position of a string within a command for error careting 8367 * @cmd: The tracing command that caused the error 8368 * @str: The string to position the caret at within @cmd 8369 * 8370 * Finds the position of the first occurrence of @str within @cmd. The 8371 * return value can be passed to tracing_log_err() for caret placement 8372 * within @cmd. 8373 * 8374 * Returns the index within @cmd of the first occurrence of @str or 0 8375 * if @str was not found. 8376 */ 8377 unsigned int err_pos(char *cmd, const char *str) 8378 { 8379 char *found; 8380 8381 if (WARN_ON(!strlen(cmd))) 8382 return 0; 8383 8384 found = strstr(cmd, str); 8385 if (found) 8386 return found - cmd; 8387 8388 return 0; 8389 } 8390 8391 /** 8392 * tracing_log_err - write an error to the tracing error log 8393 * @tr: The associated trace array for the error (NULL for top level array) 8394 * @loc: A string describing where the error occurred 8395 * @cmd: The tracing command that caused the error 8396 * @errs: The array of loc-specific static error strings 8397 * @type: The index into errs[], which produces the specific static err string 8398 * @pos: The position the caret should be placed in the cmd 8399 * 8400 * Writes an error into tracing/error_log of the form: 8401 * 8402 * <loc>: error: <text> 8403 * Command: <cmd> 8404 * ^ 8405 * 8406 * tracing/error_log is a small log file containing the last 8407 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated 8408 * unless there has been a tracing error, and the error log can be 8409 * cleared and have its memory freed by writing the empty string in 8410 * truncation mode to it i.e. echo > tracing/error_log. 8411 * 8412 * NOTE: the @errs array along with the @type param are used to 8413 * produce a static error string - this string is not copied and saved 8414 * when the error is logged - only a pointer to it is saved. See 8415 * existing callers for examples of how static strings are typically 8416 * defined for use with tracing_log_err(). 8417 */ 8418 void tracing_log_err(struct trace_array *tr, 8419 const char *loc, const char *cmd, 8420 const char **errs, u8 type, u16 pos) 8421 { 8422 struct tracing_log_err *err; 8423 int len = 0; 8424 8425 if (!tr) 8426 tr = &global_trace; 8427 8428 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; 8429 8430 guard(mutex)(&tracing_err_log_lock); 8431 8432 err = get_tracing_log_err(tr, len); 8433 if (PTR_ERR(err) == -ENOMEM) 8434 return; 8435 8436 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); 8437 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); 8438 8439 err->info.errs = errs; 8440 err->info.type = type; 8441 err->info.pos = pos; 8442 err->info.ts = local_clock(); 8443 8444 list_add_tail(&err->list, &tr->err_log); 8445 } 8446 8447 static void clear_tracing_err_log(struct trace_array *tr) 8448 { 8449 struct tracing_log_err *err, *next; 8450 8451 guard(mutex)(&tracing_err_log_lock); 8452 8453 list_for_each_entry_safe(err, next, &tr->err_log, list) { 8454 list_del(&err->list); 8455 free_tracing_log_err(err); 8456 } 8457 8458 tr->n_err_log_entries = 0; 8459 } 8460 8461 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) 8462 { 8463 struct trace_array *tr = m->private; 8464 8465 mutex_lock(&tracing_err_log_lock); 8466 8467 return seq_list_start(&tr->err_log, *pos); 8468 } 8469 8470 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) 8471 { 8472 struct trace_array *tr = m->private; 8473 8474 return seq_list_next(v, &tr->err_log, pos); 8475 } 8476 8477 static void tracing_err_log_seq_stop(struct seq_file *m, void *v) 8478 { 8479 mutex_unlock(&tracing_err_log_lock); 8480 } 8481 8482 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) 8483 { 8484 u16 i; 8485 8486 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) 8487 seq_putc(m, ' '); 8488 for (i = 0; i < pos; i++) 8489 seq_putc(m, ' '); 8490 seq_puts(m, "^\n"); 8491 } 8492 8493 static int tracing_err_log_seq_show(struct seq_file *m, void *v) 8494 { 8495 struct tracing_log_err *err = v; 8496 8497 if (err) { 8498 const char *err_text = err->info.errs[err->info.type]; 8499 u64 sec = err->info.ts; 8500 u32 nsec; 8501 8502 nsec = do_div(sec, NSEC_PER_SEC); 8503 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, 8504 err->loc, err_text); 8505 seq_printf(m, "%s", err->cmd); 8506 tracing_err_log_show_pos(m, err->info.pos); 8507 } 8508 8509 return 0; 8510 } 8511 8512 static const struct seq_operations tracing_err_log_seq_ops = { 8513 .start = tracing_err_log_seq_start, 8514 .next = tracing_err_log_seq_next, 8515 .stop = tracing_err_log_seq_stop, 8516 .show = tracing_err_log_seq_show 8517 }; 8518 8519 static int tracing_err_log_open(struct inode *inode, struct file *file) 8520 { 8521 struct trace_array *tr = inode->i_private; 8522 int ret = 0; 8523 8524 ret = tracing_check_open_get_tr(tr); 8525 if (ret) 8526 return ret; 8527 8528 /* If this file was opened for write, then erase contents */ 8529 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 8530 clear_tracing_err_log(tr); 8531 8532 if (file->f_mode & FMODE_READ) { 8533 ret = seq_open(file, &tracing_err_log_seq_ops); 8534 if (!ret) { 8535 struct seq_file *m = file->private_data; 8536 m->private = tr; 8537 } else { 8538 trace_array_put(tr); 8539 } 8540 } 8541 return ret; 8542 } 8543 8544 static ssize_t tracing_err_log_write(struct file *file, 8545 const char __user *buffer, 8546 size_t count, loff_t *ppos) 8547 { 8548 return count; 8549 } 8550 8551 static int tracing_err_log_release(struct inode *inode, struct file *file) 8552 { 8553 struct trace_array *tr = inode->i_private; 8554 8555 trace_array_put(tr); 8556 8557 if (file->f_mode & FMODE_READ) 8558 seq_release(inode, file); 8559 8560 return 0; 8561 } 8562 8563 static const struct file_operations tracing_err_log_fops = { 8564 .open = tracing_err_log_open, 8565 .write = tracing_err_log_write, 8566 .read = seq_read, 8567 .llseek = tracing_lseek, 8568 .release = tracing_err_log_release, 8569 }; 8570 8571 static int tracing_buffers_open(struct inode *inode, struct file *filp) 8572 { 8573 struct trace_array *tr = inode->i_private; 8574 struct ftrace_buffer_info *info; 8575 int ret; 8576 8577 ret = tracing_check_open_get_tr(tr); 8578 if (ret) 8579 return ret; 8580 8581 info = kvzalloc(sizeof(*info), GFP_KERNEL); 8582 if (!info) { 8583 trace_array_put(tr); 8584 return -ENOMEM; 8585 } 8586 8587 mutex_lock(&trace_types_lock); 8588 8589 info->iter.tr = tr; 8590 info->iter.cpu_file = tracing_get_cpu(inode); 8591 info->iter.trace = tr->current_trace; 8592 info->iter.array_buffer = &tr->array_buffer; 8593 info->spare = NULL; 8594 /* Force reading ring buffer for first read */ 8595 info->read = (unsigned int)-1; 8596 8597 filp->private_data = info; 8598 8599 tr->trace_ref++; 8600 8601 mutex_unlock(&trace_types_lock); 8602 8603 ret = nonseekable_open(inode, filp); 8604 if (ret < 0) 8605 trace_array_put(tr); 8606 8607 return ret; 8608 } 8609 8610 static __poll_t 8611 tracing_buffers_poll(struct file *filp, poll_table *poll_table) 8612 { 8613 struct ftrace_buffer_info *info = filp->private_data; 8614 struct trace_iterator *iter = &info->iter; 8615 8616 return trace_poll(iter, filp, poll_table); 8617 } 8618 8619 static ssize_t 8620 tracing_buffers_read(struct file *filp, char __user *ubuf, 8621 size_t count, loff_t *ppos) 8622 { 8623 struct ftrace_buffer_info *info = filp->private_data; 8624 struct trace_iterator *iter = &info->iter; 8625 void *trace_data; 8626 int page_size; 8627 ssize_t ret = 0; 8628 ssize_t size; 8629 8630 if (!count) 8631 return 0; 8632 8633 #ifdef CONFIG_TRACER_MAX_TRACE 8634 if (iter->snapshot && iter->tr->current_trace->use_max_tr) 8635 return -EBUSY; 8636 #endif 8637 8638 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8639 8640 /* Make sure the spare matches the current sub buffer size */ 8641 if (info->spare) { 8642 if (page_size != info->spare_size) { 8643 ring_buffer_free_read_page(iter->array_buffer->buffer, 8644 info->spare_cpu, info->spare); 8645 info->spare = NULL; 8646 } 8647 } 8648 8649 if (!info->spare) { 8650 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, 8651 iter->cpu_file); 8652 if (IS_ERR(info->spare)) { 8653 ret = PTR_ERR(info->spare); 8654 info->spare = NULL; 8655 } else { 8656 info->spare_cpu = iter->cpu_file; 8657 info->spare_size = page_size; 8658 } 8659 } 8660 if (!info->spare) 8661 return ret; 8662 8663 /* Do we have previous read data to read? */ 8664 if (info->read < page_size) 8665 goto read; 8666 8667 again: 8668 trace_access_lock(iter->cpu_file); 8669 ret = ring_buffer_read_page(iter->array_buffer->buffer, 8670 info->spare, 8671 count, 8672 iter->cpu_file, 0); 8673 trace_access_unlock(iter->cpu_file); 8674 8675 if (ret < 0) { 8676 if (trace_empty(iter) && !iter->closed) { 8677 if (update_last_data_if_empty(iter->tr)) 8678 return 0; 8679 8680 if ((filp->f_flags & O_NONBLOCK)) 8681 return -EAGAIN; 8682 8683 ret = wait_on_pipe(iter, 0); 8684 if (ret) 8685 return ret; 8686 8687 goto again; 8688 } 8689 return 0; 8690 } 8691 8692 info->read = 0; 8693 read: 8694 size = page_size - info->read; 8695 if (size > count) 8696 size = count; 8697 trace_data = ring_buffer_read_page_data(info->spare); 8698 ret = copy_to_user(ubuf, trace_data + info->read, size); 8699 if (ret == size) 8700 return -EFAULT; 8701 8702 size -= ret; 8703 8704 *ppos += size; 8705 info->read += size; 8706 8707 return size; 8708 } 8709 8710 static int tracing_buffers_flush(struct file *file, fl_owner_t id) 8711 { 8712 struct ftrace_buffer_info *info = file->private_data; 8713 struct trace_iterator *iter = &info->iter; 8714 8715 iter->closed = true; 8716 /* Make sure the waiters see the new wait_index */ 8717 (void)atomic_fetch_inc_release(&iter->wait_index); 8718 8719 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8720 8721 return 0; 8722 } 8723 8724 static int tracing_buffers_release(struct inode *inode, struct file *file) 8725 { 8726 struct ftrace_buffer_info *info = file->private_data; 8727 struct trace_iterator *iter = &info->iter; 8728 8729 guard(mutex)(&trace_types_lock); 8730 8731 iter->tr->trace_ref--; 8732 8733 __trace_array_put(iter->tr); 8734 8735 if (info->spare) 8736 ring_buffer_free_read_page(iter->array_buffer->buffer, 8737 info->spare_cpu, info->spare); 8738 kvfree(info); 8739 8740 return 0; 8741 } 8742 8743 struct buffer_ref { 8744 struct trace_buffer *buffer; 8745 void *page; 8746 int cpu; 8747 refcount_t refcount; 8748 }; 8749 8750 static void buffer_ref_release(struct buffer_ref *ref) 8751 { 8752 if (!refcount_dec_and_test(&ref->refcount)) 8753 return; 8754 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); 8755 kfree(ref); 8756 } 8757 8758 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, 8759 struct pipe_buffer *buf) 8760 { 8761 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 8762 8763 buffer_ref_release(ref); 8764 buf->private = 0; 8765 } 8766 8767 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, 8768 struct pipe_buffer *buf) 8769 { 8770 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 8771 8772 if (refcount_read(&ref->refcount) > INT_MAX/2) 8773 return false; 8774 8775 refcount_inc(&ref->refcount); 8776 return true; 8777 } 8778 8779 /* Pipe buffer operations for a buffer. */ 8780 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 8781 .release = buffer_pipe_buf_release, 8782 .get = buffer_pipe_buf_get, 8783 }; 8784 8785 /* 8786 * Callback from splice_to_pipe(), if we need to release some pages 8787 * at the end of the spd in case we error'ed out in filling the pipe. 8788 */ 8789 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) 8790 { 8791 struct buffer_ref *ref = 8792 (struct buffer_ref *)spd->partial[i].private; 8793 8794 buffer_ref_release(ref); 8795 spd->partial[i].private = 0; 8796 } 8797 8798 static ssize_t 8799 tracing_buffers_splice_read(struct file *file, loff_t *ppos, 8800 struct pipe_inode_info *pipe, size_t len, 8801 unsigned int flags) 8802 { 8803 struct ftrace_buffer_info *info = file->private_data; 8804 struct trace_iterator *iter = &info->iter; 8805 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 8806 struct page *pages_def[PIPE_DEF_BUFFERS]; 8807 struct splice_pipe_desc spd = { 8808 .pages = pages_def, 8809 .partial = partial_def, 8810 .nr_pages_max = PIPE_DEF_BUFFERS, 8811 .ops = &buffer_pipe_buf_ops, 8812 .spd_release = buffer_spd_release, 8813 }; 8814 struct buffer_ref *ref; 8815 bool woken = false; 8816 int page_size; 8817 int entries, i; 8818 ssize_t ret = 0; 8819 8820 #ifdef CONFIG_TRACER_MAX_TRACE 8821 if (iter->snapshot && iter->tr->current_trace->use_max_tr) 8822 return -EBUSY; 8823 #endif 8824 8825 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8826 if (*ppos & (page_size - 1)) 8827 return -EINVAL; 8828 8829 if (len & (page_size - 1)) { 8830 if (len < page_size) 8831 return -EINVAL; 8832 len &= (~(page_size - 1)); 8833 } 8834 8835 if (splice_grow_spd(pipe, &spd)) 8836 return -ENOMEM; 8837 8838 again: 8839 trace_access_lock(iter->cpu_file); 8840 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8841 8842 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { 8843 struct page *page; 8844 int r; 8845 8846 ref = kzalloc(sizeof(*ref), GFP_KERNEL); 8847 if (!ref) { 8848 ret = -ENOMEM; 8849 break; 8850 } 8851 8852 refcount_set(&ref->refcount, 1); 8853 ref->buffer = iter->array_buffer->buffer; 8854 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 8855 if (IS_ERR(ref->page)) { 8856 ret = PTR_ERR(ref->page); 8857 ref->page = NULL; 8858 kfree(ref); 8859 break; 8860 } 8861 ref->cpu = iter->cpu_file; 8862 8863 r = ring_buffer_read_page(ref->buffer, ref->page, 8864 len, iter->cpu_file, 1); 8865 if (r < 0) { 8866 ring_buffer_free_read_page(ref->buffer, ref->cpu, 8867 ref->page); 8868 kfree(ref); 8869 break; 8870 } 8871 8872 page = virt_to_page(ring_buffer_read_page_data(ref->page)); 8873 8874 spd.pages[i] = page; 8875 spd.partial[i].len = page_size; 8876 spd.partial[i].offset = 0; 8877 spd.partial[i].private = (unsigned long)ref; 8878 spd.nr_pages++; 8879 *ppos += page_size; 8880 8881 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8882 } 8883 8884 trace_access_unlock(iter->cpu_file); 8885 spd.nr_pages = i; 8886 8887 /* did we read anything? */ 8888 if (!spd.nr_pages) { 8889 8890 if (ret) 8891 goto out; 8892 8893 if (woken) 8894 goto out; 8895 8896 ret = -EAGAIN; 8897 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) 8898 goto out; 8899 8900 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 8901 if (ret) 8902 goto out; 8903 8904 /* No need to wait after waking up when tracing is off */ 8905 if (!tracer_tracing_is_on(iter->tr)) 8906 goto out; 8907 8908 /* Iterate one more time to collect any new data then exit */ 8909 woken = true; 8910 8911 goto again; 8912 } 8913 8914 ret = splice_to_pipe(pipe, &spd); 8915 out: 8916 splice_shrink_spd(&spd); 8917 8918 return ret; 8919 } 8920 8921 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 8922 { 8923 struct ftrace_buffer_info *info = file->private_data; 8924 struct trace_iterator *iter = &info->iter; 8925 int err; 8926 8927 if (cmd == TRACE_MMAP_IOCTL_GET_READER) { 8928 if (!(file->f_flags & O_NONBLOCK)) { 8929 err = ring_buffer_wait(iter->array_buffer->buffer, 8930 iter->cpu_file, 8931 iter->tr->buffer_percent, 8932 NULL, NULL); 8933 if (err) 8934 return err; 8935 } 8936 8937 return ring_buffer_map_get_reader(iter->array_buffer->buffer, 8938 iter->cpu_file); 8939 } else if (cmd) { 8940 return -ENOTTY; 8941 } 8942 8943 /* 8944 * An ioctl call with cmd 0 to the ring buffer file will wake up all 8945 * waiters 8946 */ 8947 guard(mutex)(&trace_types_lock); 8948 8949 /* Make sure the waiters see the new wait_index */ 8950 (void)atomic_fetch_inc_release(&iter->wait_index); 8951 8952 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8953 8954 return 0; 8955 } 8956 8957 #ifdef CONFIG_TRACER_MAX_TRACE 8958 static int get_snapshot_map(struct trace_array *tr) 8959 { 8960 int err = 0; 8961 8962 /* 8963 * Called with mmap_lock held. lockdep would be unhappy if we would now 8964 * take trace_types_lock. Instead use the specific 8965 * snapshot_trigger_lock. 8966 */ 8967 spin_lock(&tr->snapshot_trigger_lock); 8968 8969 if (tr->snapshot || tr->mapped == UINT_MAX) 8970 err = -EBUSY; 8971 else 8972 tr->mapped++; 8973 8974 spin_unlock(&tr->snapshot_trigger_lock); 8975 8976 /* Wait for update_max_tr() to observe iter->tr->mapped */ 8977 if (tr->mapped == 1) 8978 synchronize_rcu(); 8979 8980 return err; 8981 8982 } 8983 static void put_snapshot_map(struct trace_array *tr) 8984 { 8985 spin_lock(&tr->snapshot_trigger_lock); 8986 if (!WARN_ON(!tr->mapped)) 8987 tr->mapped--; 8988 spin_unlock(&tr->snapshot_trigger_lock); 8989 } 8990 #else 8991 static inline int get_snapshot_map(struct trace_array *tr) { return 0; } 8992 static inline void put_snapshot_map(struct trace_array *tr) { } 8993 #endif 8994 8995 static void tracing_buffers_mmap_close(struct vm_area_struct *vma) 8996 { 8997 struct ftrace_buffer_info *info = vma->vm_file->private_data; 8998 struct trace_iterator *iter = &info->iter; 8999 9000 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); 9001 put_snapshot_map(iter->tr); 9002 } 9003 9004 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr) 9005 { 9006 /* 9007 * Trace buffer mappings require the complete buffer including 9008 * the meta page. Partial mappings are not supported. 9009 */ 9010 return -EINVAL; 9011 } 9012 9013 static const struct vm_operations_struct tracing_buffers_vmops = { 9014 .close = tracing_buffers_mmap_close, 9015 .may_split = tracing_buffers_may_split, 9016 }; 9017 9018 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) 9019 { 9020 struct ftrace_buffer_info *info = filp->private_data; 9021 struct trace_iterator *iter = &info->iter; 9022 int ret = 0; 9023 9024 /* A memmap'ed and backup buffers are not supported for user space mmap */ 9025 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC)) 9026 return -ENODEV; 9027 9028 ret = get_snapshot_map(iter->tr); 9029 if (ret) 9030 return ret; 9031 9032 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); 9033 if (ret) 9034 put_snapshot_map(iter->tr); 9035 9036 vma->vm_ops = &tracing_buffers_vmops; 9037 9038 return ret; 9039 } 9040 9041 static const struct file_operations tracing_buffers_fops = { 9042 .open = tracing_buffers_open, 9043 .read = tracing_buffers_read, 9044 .poll = tracing_buffers_poll, 9045 .release = tracing_buffers_release, 9046 .flush = tracing_buffers_flush, 9047 .splice_read = tracing_buffers_splice_read, 9048 .unlocked_ioctl = tracing_buffers_ioctl, 9049 .mmap = tracing_buffers_mmap, 9050 }; 9051 9052 static ssize_t 9053 tracing_stats_read(struct file *filp, char __user *ubuf, 9054 size_t count, loff_t *ppos) 9055 { 9056 struct inode *inode = file_inode(filp); 9057 struct trace_array *tr = inode->i_private; 9058 struct array_buffer *trace_buf = &tr->array_buffer; 9059 int cpu = tracing_get_cpu(inode); 9060 struct trace_seq *s; 9061 unsigned long cnt; 9062 unsigned long long t; 9063 unsigned long usec_rem; 9064 9065 s = kmalloc(sizeof(*s), GFP_KERNEL); 9066 if (!s) 9067 return -ENOMEM; 9068 9069 trace_seq_init(s); 9070 9071 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); 9072 trace_seq_printf(s, "entries: %ld\n", cnt); 9073 9074 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); 9075 trace_seq_printf(s, "overrun: %ld\n", cnt); 9076 9077 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); 9078 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 9079 9080 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 9081 trace_seq_printf(s, "bytes: %ld\n", cnt); 9082 9083 if (trace_clocks[tr->clock_id].in_ns) { 9084 /* local or global for trace_clock */ 9085 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 9086 usec_rem = do_div(t, USEC_PER_SEC); 9087 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", 9088 t, usec_rem); 9089 9090 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); 9091 usec_rem = do_div(t, USEC_PER_SEC); 9092 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); 9093 } else { 9094 /* counter or tsc mode for trace_clock */ 9095 trace_seq_printf(s, "oldest event ts: %llu\n", 9096 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 9097 9098 trace_seq_printf(s, "now ts: %llu\n", 9099 ring_buffer_time_stamp(trace_buf->buffer)); 9100 } 9101 9102 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); 9103 trace_seq_printf(s, "dropped events: %ld\n", cnt); 9104 9105 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); 9106 trace_seq_printf(s, "read events: %ld\n", cnt); 9107 9108 count = simple_read_from_buffer(ubuf, count, ppos, 9109 s->buffer, trace_seq_used(s)); 9110 9111 kfree(s); 9112 9113 return count; 9114 } 9115 9116 static const struct file_operations tracing_stats_fops = { 9117 .open = tracing_open_generic_tr, 9118 .read = tracing_stats_read, 9119 .llseek = generic_file_llseek, 9120 .release = tracing_release_generic_tr, 9121 }; 9122 9123 #ifdef CONFIG_DYNAMIC_FTRACE 9124 9125 static ssize_t 9126 tracing_read_dyn_info(struct file *filp, char __user *ubuf, 9127 size_t cnt, loff_t *ppos) 9128 { 9129 ssize_t ret; 9130 char *buf; 9131 int r; 9132 9133 /* 512 should be plenty to hold the amount needed */ 9134 #define DYN_INFO_BUF_SIZE 512 9135 9136 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); 9137 if (!buf) 9138 return -ENOMEM; 9139 9140 r = scnprintf(buf, DYN_INFO_BUF_SIZE, 9141 "%ld pages:%ld groups: %ld\n" 9142 "ftrace boot update time = %llu (ns)\n" 9143 "ftrace module total update time = %llu (ns)\n", 9144 ftrace_update_tot_cnt, 9145 ftrace_number_of_pages, 9146 ftrace_number_of_groups, 9147 ftrace_update_time, 9148 ftrace_total_mod_time); 9149 9150 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9151 kfree(buf); 9152 return ret; 9153 } 9154 9155 static const struct file_operations tracing_dyn_info_fops = { 9156 .open = tracing_open_generic, 9157 .read = tracing_read_dyn_info, 9158 .llseek = generic_file_llseek, 9159 }; 9160 #endif /* CONFIG_DYNAMIC_FTRACE */ 9161 9162 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) 9163 static void 9164 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, 9165 struct trace_array *tr, struct ftrace_probe_ops *ops, 9166 void *data) 9167 { 9168 tracing_snapshot_instance(tr); 9169 } 9170 9171 static void 9172 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, 9173 struct trace_array *tr, struct ftrace_probe_ops *ops, 9174 void *data) 9175 { 9176 struct ftrace_func_mapper *mapper = data; 9177 long *count = NULL; 9178 9179 if (mapper) 9180 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 9181 9182 if (count) { 9183 9184 if (*count <= 0) 9185 return; 9186 9187 (*count)--; 9188 } 9189 9190 tracing_snapshot_instance(tr); 9191 } 9192 9193 static int 9194 ftrace_snapshot_print(struct seq_file *m, unsigned long ip, 9195 struct ftrace_probe_ops *ops, void *data) 9196 { 9197 struct ftrace_func_mapper *mapper = data; 9198 long *count = NULL; 9199 9200 seq_printf(m, "%ps:", (void *)ip); 9201 9202 seq_puts(m, "snapshot"); 9203 9204 if (mapper) 9205 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 9206 9207 if (count) 9208 seq_printf(m, ":count=%ld\n", *count); 9209 else 9210 seq_puts(m, ":unlimited\n"); 9211 9212 return 0; 9213 } 9214 9215 static int 9216 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 9217 unsigned long ip, void *init_data, void **data) 9218 { 9219 struct ftrace_func_mapper *mapper = *data; 9220 9221 if (!mapper) { 9222 mapper = allocate_ftrace_func_mapper(); 9223 if (!mapper) 9224 return -ENOMEM; 9225 *data = mapper; 9226 } 9227 9228 return ftrace_func_mapper_add_ip(mapper, ip, init_data); 9229 } 9230 9231 static void 9232 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 9233 unsigned long ip, void *data) 9234 { 9235 struct ftrace_func_mapper *mapper = data; 9236 9237 if (!ip) { 9238 if (!mapper) 9239 return; 9240 free_ftrace_func_mapper(mapper, NULL); 9241 return; 9242 } 9243 9244 ftrace_func_mapper_remove_ip(mapper, ip); 9245 } 9246 9247 static struct ftrace_probe_ops snapshot_probe_ops = { 9248 .func = ftrace_snapshot, 9249 .print = ftrace_snapshot_print, 9250 }; 9251 9252 static struct ftrace_probe_ops snapshot_count_probe_ops = { 9253 .func = ftrace_count_snapshot, 9254 .print = ftrace_snapshot_print, 9255 .init = ftrace_snapshot_init, 9256 .free = ftrace_snapshot_free, 9257 }; 9258 9259 static int 9260 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, 9261 char *glob, char *cmd, char *param, int enable) 9262 { 9263 struct ftrace_probe_ops *ops; 9264 void *count = (void *)-1; 9265 char *number; 9266 int ret; 9267 9268 if (!tr) 9269 return -ENODEV; 9270 9271 /* hash funcs only work with set_ftrace_filter */ 9272 if (!enable) 9273 return -EINVAL; 9274 9275 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; 9276 9277 if (glob[0] == '!') { 9278 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); 9279 if (!ret) 9280 tracing_disarm_snapshot(tr); 9281 9282 return ret; 9283 } 9284 9285 if (!param) 9286 goto out_reg; 9287 9288 number = strsep(¶m, ":"); 9289 9290 if (!strlen(number)) 9291 goto out_reg; 9292 9293 /* 9294 * We use the callback data field (which is a pointer) 9295 * as our counter. 9296 */ 9297 ret = kstrtoul(number, 0, (unsigned long *)&count); 9298 if (ret) 9299 return ret; 9300 9301 out_reg: 9302 ret = tracing_arm_snapshot(tr); 9303 if (ret < 0) 9304 return ret; 9305 9306 ret = register_ftrace_function_probe(glob, tr, ops, count); 9307 if (ret < 0) 9308 tracing_disarm_snapshot(tr); 9309 9310 return ret < 0 ? ret : 0; 9311 } 9312 9313 static struct ftrace_func_command ftrace_snapshot_cmd = { 9314 .name = "snapshot", 9315 .func = ftrace_trace_snapshot_callback, 9316 }; 9317 9318 static __init int register_snapshot_cmd(void) 9319 { 9320 return register_ftrace_command(&ftrace_snapshot_cmd); 9321 } 9322 #else 9323 static inline __init int register_snapshot_cmd(void) { return 0; } 9324 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ 9325 9326 static struct dentry *tracing_get_dentry(struct trace_array *tr) 9327 { 9328 /* Top directory uses NULL as the parent */ 9329 if (tr->flags & TRACE_ARRAY_FL_GLOBAL) 9330 return NULL; 9331 9332 if (WARN_ON(!tr->dir)) 9333 return ERR_PTR(-ENODEV); 9334 9335 /* All sub buffers have a descriptor */ 9336 return tr->dir; 9337 } 9338 9339 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 9340 { 9341 struct dentry *d_tracer; 9342 9343 if (tr->percpu_dir) 9344 return tr->percpu_dir; 9345 9346 d_tracer = tracing_get_dentry(tr); 9347 if (IS_ERR(d_tracer)) 9348 return NULL; 9349 9350 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); 9351 9352 MEM_FAIL(!tr->percpu_dir, 9353 "Could not create tracefs directory 'per_cpu/%d'\n", cpu); 9354 9355 return tr->percpu_dir; 9356 } 9357 9358 static struct dentry * 9359 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, 9360 void *data, long cpu, const struct file_operations *fops) 9361 { 9362 struct dentry *ret = trace_create_file(name, mode, parent, data, fops); 9363 9364 if (ret) /* See tracing_get_cpu() */ 9365 d_inode(ret)->i_cdev = (void *)(cpu + 1); 9366 return ret; 9367 } 9368 9369 static void 9370 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) 9371 { 9372 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 9373 struct dentry *d_cpu; 9374 char cpu_dir[30]; /* 30 characters should be more than enough */ 9375 9376 if (!d_percpu) 9377 return; 9378 9379 snprintf(cpu_dir, 30, "cpu%ld", cpu); 9380 d_cpu = tracefs_create_dir(cpu_dir, d_percpu); 9381 if (!d_cpu) { 9382 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); 9383 return; 9384 } 9385 9386 /* per cpu trace_pipe */ 9387 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, 9388 tr, cpu, &tracing_pipe_fops); 9389 9390 /* per cpu trace */ 9391 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, 9392 tr, cpu, &tracing_fops); 9393 9394 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, 9395 tr, cpu, &tracing_buffers_fops); 9396 9397 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, 9398 tr, cpu, &tracing_stats_fops); 9399 9400 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, 9401 tr, cpu, &tracing_entries_fops); 9402 9403 if (tr->range_addr_start) 9404 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, 9405 tr, cpu, &tracing_buffer_meta_fops); 9406 #ifdef CONFIG_TRACER_SNAPSHOT 9407 if (!tr->range_addr_start) { 9408 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, 9409 tr, cpu, &snapshot_fops); 9410 9411 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, 9412 tr, cpu, &snapshot_raw_fops); 9413 } 9414 #endif 9415 } 9416 9417 #ifdef CONFIG_FTRACE_SELFTEST 9418 /* Let selftest have access to static functions in this file */ 9419 #include "trace_selftest.c" 9420 #endif 9421 9422 static ssize_t 9423 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, 9424 loff_t *ppos) 9425 { 9426 struct trace_option_dentry *topt = filp->private_data; 9427 char *buf; 9428 9429 if (topt->flags->val & topt->opt->bit) 9430 buf = "1\n"; 9431 else 9432 buf = "0\n"; 9433 9434 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 9435 } 9436 9437 static ssize_t 9438 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, 9439 loff_t *ppos) 9440 { 9441 struct trace_option_dentry *topt = filp->private_data; 9442 unsigned long val; 9443 int ret; 9444 9445 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9446 if (ret) 9447 return ret; 9448 9449 if (val != 0 && val != 1) 9450 return -EINVAL; 9451 9452 if (!!(topt->flags->val & topt->opt->bit) != val) { 9453 guard(mutex)(&trace_types_lock); 9454 ret = __set_tracer_option(topt->tr, topt->flags, 9455 topt->opt, !val); 9456 if (ret) 9457 return ret; 9458 } 9459 9460 *ppos += cnt; 9461 9462 return cnt; 9463 } 9464 9465 static int tracing_open_options(struct inode *inode, struct file *filp) 9466 { 9467 struct trace_option_dentry *topt = inode->i_private; 9468 int ret; 9469 9470 ret = tracing_check_open_get_tr(topt->tr); 9471 if (ret) 9472 return ret; 9473 9474 filp->private_data = inode->i_private; 9475 return 0; 9476 } 9477 9478 static int tracing_release_options(struct inode *inode, struct file *file) 9479 { 9480 struct trace_option_dentry *topt = file->private_data; 9481 9482 trace_array_put(topt->tr); 9483 return 0; 9484 } 9485 9486 static const struct file_operations trace_options_fops = { 9487 .open = tracing_open_options, 9488 .read = trace_options_read, 9489 .write = trace_options_write, 9490 .llseek = generic_file_llseek, 9491 .release = tracing_release_options, 9492 }; 9493 9494 /* 9495 * In order to pass in both the trace_array descriptor as well as the index 9496 * to the flag that the trace option file represents, the trace_array 9497 * has a character array of trace_flags_index[], which holds the index 9498 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. 9499 * The address of this character array is passed to the flag option file 9500 * read/write callbacks. 9501 * 9502 * In order to extract both the index and the trace_array descriptor, 9503 * get_tr_index() uses the following algorithm. 9504 * 9505 * idx = *ptr; 9506 * 9507 * As the pointer itself contains the address of the index (remember 9508 * index[1] == 1). 9509 * 9510 * Then to get the trace_array descriptor, by subtracting that index 9511 * from the ptr, we get to the start of the index itself. 9512 * 9513 * ptr - idx == &index[0] 9514 * 9515 * Then a simple container_of() from that pointer gets us to the 9516 * trace_array descriptor. 9517 */ 9518 static void get_tr_index(void *data, struct trace_array **ptr, 9519 unsigned int *pindex) 9520 { 9521 *pindex = *(unsigned char *)data; 9522 9523 *ptr = container_of(data - *pindex, struct trace_array, 9524 trace_flags_index); 9525 } 9526 9527 static ssize_t 9528 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, 9529 loff_t *ppos) 9530 { 9531 void *tr_index = filp->private_data; 9532 struct trace_array *tr; 9533 unsigned int index; 9534 char *buf; 9535 9536 get_tr_index(tr_index, &tr, &index); 9537 9538 if (tr->trace_flags & (1ULL << index)) 9539 buf = "1\n"; 9540 else 9541 buf = "0\n"; 9542 9543 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 9544 } 9545 9546 static ssize_t 9547 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, 9548 loff_t *ppos) 9549 { 9550 void *tr_index = filp->private_data; 9551 struct trace_array *tr; 9552 unsigned int index; 9553 unsigned long val; 9554 int ret; 9555 9556 get_tr_index(tr_index, &tr, &index); 9557 9558 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9559 if (ret) 9560 return ret; 9561 9562 if (val != 0 && val != 1) 9563 return -EINVAL; 9564 9565 mutex_lock(&event_mutex); 9566 mutex_lock(&trace_types_lock); 9567 ret = set_tracer_flag(tr, 1ULL << index, val); 9568 mutex_unlock(&trace_types_lock); 9569 mutex_unlock(&event_mutex); 9570 9571 if (ret < 0) 9572 return ret; 9573 9574 *ppos += cnt; 9575 9576 return cnt; 9577 } 9578 9579 static const struct file_operations trace_options_core_fops = { 9580 .open = tracing_open_generic, 9581 .read = trace_options_core_read, 9582 .write = trace_options_core_write, 9583 .llseek = generic_file_llseek, 9584 }; 9585 9586 struct dentry *trace_create_file(const char *name, 9587 umode_t mode, 9588 struct dentry *parent, 9589 void *data, 9590 const struct file_operations *fops) 9591 { 9592 struct dentry *ret; 9593 9594 ret = tracefs_create_file(name, mode, parent, data, fops); 9595 if (!ret) 9596 pr_warn("Could not create tracefs '%s' entry\n", name); 9597 9598 return ret; 9599 } 9600 9601 9602 static struct dentry *trace_options_init_dentry(struct trace_array *tr) 9603 { 9604 struct dentry *d_tracer; 9605 9606 if (tr->options) 9607 return tr->options; 9608 9609 d_tracer = tracing_get_dentry(tr); 9610 if (IS_ERR(d_tracer)) 9611 return NULL; 9612 9613 tr->options = tracefs_create_dir("options", d_tracer); 9614 if (!tr->options) { 9615 pr_warn("Could not create tracefs directory 'options'\n"); 9616 return NULL; 9617 } 9618 9619 return tr->options; 9620 } 9621 9622 static void 9623 create_trace_option_file(struct trace_array *tr, 9624 struct trace_option_dentry *topt, 9625 struct tracer_flags *flags, 9626 struct tracer_opt *opt) 9627 { 9628 struct dentry *t_options; 9629 9630 t_options = trace_options_init_dentry(tr); 9631 if (!t_options) 9632 return; 9633 9634 topt->flags = flags; 9635 topt->opt = opt; 9636 topt->tr = tr; 9637 9638 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, 9639 t_options, topt, &trace_options_fops); 9640 } 9641 9642 static int 9643 create_trace_option_files(struct trace_array *tr, struct tracer *tracer, 9644 struct tracer_flags *flags) 9645 { 9646 struct trace_option_dentry *topts; 9647 struct trace_options *tr_topts; 9648 struct tracer_opt *opts; 9649 int cnt; 9650 9651 if (!flags || !flags->opts) 9652 return 0; 9653 9654 opts = flags->opts; 9655 9656 for (cnt = 0; opts[cnt].name; cnt++) 9657 ; 9658 9659 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); 9660 if (!topts) 9661 return 0; 9662 9663 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), 9664 GFP_KERNEL); 9665 if (!tr_topts) { 9666 kfree(topts); 9667 return -ENOMEM; 9668 } 9669 9670 tr->topts = tr_topts; 9671 tr->topts[tr->nr_topts].tracer = tracer; 9672 tr->topts[tr->nr_topts].topts = topts; 9673 tr->nr_topts++; 9674 9675 for (cnt = 0; opts[cnt].name; cnt++) { 9676 create_trace_option_file(tr, &topts[cnt], flags, 9677 &opts[cnt]); 9678 MEM_FAIL(topts[cnt].entry == NULL, 9679 "Failed to create trace option: %s", 9680 opts[cnt].name); 9681 } 9682 return 0; 9683 } 9684 9685 static int get_global_flags_val(struct tracer *tracer) 9686 { 9687 struct tracers *t; 9688 9689 list_for_each_entry(t, &global_trace.tracers, list) { 9690 if (t->tracer != tracer) 9691 continue; 9692 if (!t->flags) 9693 return -1; 9694 return t->flags->val; 9695 } 9696 return -1; 9697 } 9698 9699 static int add_tracer_options(struct trace_array *tr, struct tracers *t) 9700 { 9701 struct tracer *tracer = t->tracer; 9702 struct tracer_flags *flags = t->flags ?: tracer->flags; 9703 9704 if (!flags) 9705 return 0; 9706 9707 /* Only add tracer options after update_tracer_options finish */ 9708 if (!tracer_options_updated) 9709 return 0; 9710 9711 return create_trace_option_files(tr, tracer, flags); 9712 } 9713 9714 static int add_tracer(struct trace_array *tr, struct tracer *tracer) 9715 { 9716 struct tracer_flags *flags; 9717 struct tracers *t; 9718 int ret; 9719 9720 /* Only enable if the directory has been created already. */ 9721 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL)) 9722 return 0; 9723 9724 /* 9725 * If this is an instance, only create flags for tracers 9726 * the instance may have. 9727 */ 9728 if (!trace_ok_for_array(tracer, tr)) 9729 return 0; 9730 9731 t = kmalloc(sizeof(*t), GFP_KERNEL); 9732 if (!t) 9733 return -ENOMEM; 9734 9735 t->tracer = tracer; 9736 t->flags = NULL; 9737 list_add(&t->list, &tr->tracers); 9738 9739 flags = tracer->flags; 9740 if (!flags) { 9741 if (!tracer->default_flags) 9742 return 0; 9743 9744 /* 9745 * If the tracer defines default flags, it means the flags are 9746 * per trace instance. 9747 */ 9748 flags = kmalloc(sizeof(*flags), GFP_KERNEL); 9749 if (!flags) 9750 return -ENOMEM; 9751 9752 *flags = *tracer->default_flags; 9753 flags->trace = tracer; 9754 9755 t->flags = flags; 9756 9757 /* If this is an instance, inherit the global_trace flags */ 9758 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { 9759 int val = get_global_flags_val(tracer); 9760 if (!WARN_ON_ONCE(val < 0)) 9761 flags->val = val; 9762 } 9763 } 9764 9765 ret = add_tracer_options(tr, t); 9766 if (ret < 0) { 9767 list_del(&t->list); 9768 kfree(t->flags); 9769 kfree(t); 9770 } 9771 9772 return ret; 9773 } 9774 9775 static struct dentry * 9776 create_trace_option_core_file(struct trace_array *tr, 9777 const char *option, long index) 9778 { 9779 struct dentry *t_options; 9780 9781 t_options = trace_options_init_dentry(tr); 9782 if (!t_options) 9783 return NULL; 9784 9785 return trace_create_file(option, TRACE_MODE_WRITE, t_options, 9786 (void *)&tr->trace_flags_index[index], 9787 &trace_options_core_fops); 9788 } 9789 9790 static void create_trace_options_dir(struct trace_array *tr) 9791 { 9792 struct dentry *t_options; 9793 bool top_level = tr == &global_trace; 9794 int i; 9795 9796 t_options = trace_options_init_dentry(tr); 9797 if (!t_options) 9798 return; 9799 9800 for (i = 0; trace_options[i]; i++) { 9801 if (top_level || 9802 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) { 9803 create_trace_option_core_file(tr, trace_options[i], i); 9804 } 9805 } 9806 } 9807 9808 static ssize_t 9809 rb_simple_read(struct file *filp, char __user *ubuf, 9810 size_t cnt, loff_t *ppos) 9811 { 9812 struct trace_array *tr = filp->private_data; 9813 char buf[64]; 9814 int r; 9815 9816 r = tracer_tracing_is_on(tr); 9817 r = sprintf(buf, "%d\n", r); 9818 9819 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9820 } 9821 9822 static ssize_t 9823 rb_simple_write(struct file *filp, const char __user *ubuf, 9824 size_t cnt, loff_t *ppos) 9825 { 9826 struct trace_array *tr = filp->private_data; 9827 struct trace_buffer *buffer = tr->array_buffer.buffer; 9828 unsigned long val; 9829 int ret; 9830 9831 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9832 if (ret) 9833 return ret; 9834 9835 if (buffer) { 9836 guard(mutex)(&trace_types_lock); 9837 if (!!val == tracer_tracing_is_on(tr)) { 9838 val = 0; /* do nothing */ 9839 } else if (val) { 9840 tracer_tracing_on(tr); 9841 if (tr->current_trace->start) 9842 tr->current_trace->start(tr); 9843 } else { 9844 tracer_tracing_off(tr); 9845 if (tr->current_trace->stop) 9846 tr->current_trace->stop(tr); 9847 /* Wake up any waiters */ 9848 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); 9849 } 9850 } 9851 9852 (*ppos)++; 9853 9854 return cnt; 9855 } 9856 9857 static const struct file_operations rb_simple_fops = { 9858 .open = tracing_open_generic_tr, 9859 .read = rb_simple_read, 9860 .write = rb_simple_write, 9861 .release = tracing_release_generic_tr, 9862 .llseek = default_llseek, 9863 }; 9864 9865 static ssize_t 9866 buffer_percent_read(struct file *filp, char __user *ubuf, 9867 size_t cnt, loff_t *ppos) 9868 { 9869 struct trace_array *tr = filp->private_data; 9870 char buf[64]; 9871 int r; 9872 9873 r = tr->buffer_percent; 9874 r = sprintf(buf, "%d\n", r); 9875 9876 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9877 } 9878 9879 static ssize_t 9880 buffer_percent_write(struct file *filp, const char __user *ubuf, 9881 size_t cnt, loff_t *ppos) 9882 { 9883 struct trace_array *tr = filp->private_data; 9884 unsigned long val; 9885 int ret; 9886 9887 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9888 if (ret) 9889 return ret; 9890 9891 if (val > 100) 9892 return -EINVAL; 9893 9894 tr->buffer_percent = val; 9895 9896 (*ppos)++; 9897 9898 return cnt; 9899 } 9900 9901 static const struct file_operations buffer_percent_fops = { 9902 .open = tracing_open_generic_tr, 9903 .read = buffer_percent_read, 9904 .write = buffer_percent_write, 9905 .release = tracing_release_generic_tr, 9906 .llseek = default_llseek, 9907 }; 9908 9909 static ssize_t 9910 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 9911 { 9912 struct trace_array *tr = filp->private_data; 9913 size_t size; 9914 char buf[64]; 9915 int order; 9916 int r; 9917 9918 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9919 size = (PAGE_SIZE << order) / 1024; 9920 9921 r = sprintf(buf, "%zd\n", size); 9922 9923 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9924 } 9925 9926 static ssize_t 9927 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 9928 size_t cnt, loff_t *ppos) 9929 { 9930 struct trace_array *tr = filp->private_data; 9931 unsigned long val; 9932 int old_order; 9933 int order; 9934 int pages; 9935 int ret; 9936 9937 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9938 if (ret) 9939 return ret; 9940 9941 val *= 1024; /* value passed in is in KB */ 9942 9943 pages = DIV_ROUND_UP(val, PAGE_SIZE); 9944 order = fls(pages - 1); 9945 9946 /* limit between 1 and 128 system pages */ 9947 if (order < 0 || order > 7) 9948 return -EINVAL; 9949 9950 /* Do not allow tracing while changing the order of the ring buffer */ 9951 tracing_stop_tr(tr); 9952 9953 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9954 if (old_order == order) 9955 goto out; 9956 9957 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 9958 if (ret) 9959 goto out; 9960 9961 #ifdef CONFIG_TRACER_MAX_TRACE 9962 9963 if (!tr->allocated_snapshot) 9964 goto out_max; 9965 9966 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); 9967 if (ret) { 9968 /* Put back the old order */ 9969 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); 9970 if (WARN_ON_ONCE(cnt)) { 9971 /* 9972 * AARGH! We are left with different orders! 9973 * The max buffer is our "snapshot" buffer. 9974 * When a tracer needs a snapshot (one of the 9975 * latency tracers), it swaps the max buffer 9976 * with the saved snap shot. We succeeded to 9977 * update the order of the main buffer, but failed to 9978 * update the order of the max buffer. But when we tried 9979 * to reset the main buffer to the original size, we 9980 * failed there too. This is very unlikely to 9981 * happen, but if it does, warn and kill all 9982 * tracing. 9983 */ 9984 tracing_disabled = 1; 9985 } 9986 goto out; 9987 } 9988 out_max: 9989 #endif 9990 (*ppos)++; 9991 out: 9992 if (ret) 9993 cnt = ret; 9994 tracing_start_tr(tr); 9995 return cnt; 9996 } 9997 9998 static const struct file_operations buffer_subbuf_size_fops = { 9999 .open = tracing_open_generic_tr, 10000 .read = buffer_subbuf_size_read, 10001 .write = buffer_subbuf_size_write, 10002 .release = tracing_release_generic_tr, 10003 .llseek = default_llseek, 10004 }; 10005 10006 static struct dentry *trace_instance_dir; 10007 10008 static void 10009 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 10010 10011 #ifdef CONFIG_MODULES 10012 static int make_mod_delta(struct module *mod, void *data) 10013 { 10014 struct trace_module_delta *module_delta; 10015 struct trace_scratch *tscratch; 10016 struct trace_mod_entry *entry; 10017 struct trace_array *tr = data; 10018 int i; 10019 10020 tscratch = tr->scratch; 10021 module_delta = READ_ONCE(tr->module_delta); 10022 for (i = 0; i < tscratch->nr_entries; i++) { 10023 entry = &tscratch->entries[i]; 10024 if (strcmp(mod->name, entry->mod_name)) 10025 continue; 10026 if (mod->state == MODULE_STATE_GOING) 10027 module_delta->delta[i] = 0; 10028 else 10029 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base 10030 - entry->mod_addr; 10031 break; 10032 } 10033 return 0; 10034 } 10035 #else 10036 static int make_mod_delta(struct module *mod, void *data) 10037 { 10038 return 0; 10039 } 10040 #endif 10041 10042 static int mod_addr_comp(const void *a, const void *b, const void *data) 10043 { 10044 const struct trace_mod_entry *e1 = a; 10045 const struct trace_mod_entry *e2 = b; 10046 10047 return e1->mod_addr > e2->mod_addr ? 1 : -1; 10048 } 10049 10050 static void setup_trace_scratch(struct trace_array *tr, 10051 struct trace_scratch *tscratch, unsigned int size) 10052 { 10053 struct trace_module_delta *module_delta; 10054 struct trace_mod_entry *entry; 10055 int i, nr_entries; 10056 10057 if (!tscratch) 10058 return; 10059 10060 tr->scratch = tscratch; 10061 tr->scratch_size = size; 10062 10063 if (tscratch->text_addr) 10064 tr->text_delta = (unsigned long)_text - tscratch->text_addr; 10065 10066 if (struct_size(tscratch, entries, tscratch->nr_entries) > size) 10067 goto reset; 10068 10069 /* Check if each module name is a valid string */ 10070 for (i = 0; i < tscratch->nr_entries; i++) { 10071 int n; 10072 10073 entry = &tscratch->entries[i]; 10074 10075 for (n = 0; n < MODULE_NAME_LEN; n++) { 10076 if (entry->mod_name[n] == '\0') 10077 break; 10078 if (!isprint(entry->mod_name[n])) 10079 goto reset; 10080 } 10081 if (n == MODULE_NAME_LEN) 10082 goto reset; 10083 } 10084 10085 /* Sort the entries so that we can find appropriate module from address. */ 10086 nr_entries = tscratch->nr_entries; 10087 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), 10088 mod_addr_comp, NULL, NULL); 10089 10090 if (IS_ENABLED(CONFIG_MODULES)) { 10091 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL); 10092 if (!module_delta) { 10093 pr_info("module_delta allocation failed. Not able to decode module address."); 10094 goto reset; 10095 } 10096 init_rcu_head(&module_delta->rcu); 10097 } else 10098 module_delta = NULL; 10099 WRITE_ONCE(tr->module_delta, module_delta); 10100 10101 /* Scan modules to make text delta for modules. */ 10102 module_for_each_mod(make_mod_delta, tr); 10103 10104 /* Set trace_clock as the same of the previous boot. */ 10105 if (tscratch->clock_id != tr->clock_id) { 10106 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) || 10107 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) { 10108 pr_info("the previous trace_clock info is not valid."); 10109 goto reset; 10110 } 10111 } 10112 return; 10113 reset: 10114 /* Invalid trace modules */ 10115 memset(tscratch, 0, size); 10116 } 10117 10118 static int 10119 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 10120 { 10121 enum ring_buffer_flags rb_flags; 10122 struct trace_scratch *tscratch; 10123 unsigned int scratch_size = 0; 10124 10125 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0; 10126 10127 buf->tr = tr; 10128 10129 if (tr->range_addr_start && tr->range_addr_size) { 10130 /* Add scratch buffer to handle 128 modules */ 10131 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, 10132 tr->range_addr_start, 10133 tr->range_addr_size, 10134 struct_size(tscratch, entries, 128)); 10135 10136 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); 10137 setup_trace_scratch(tr, tscratch, scratch_size); 10138 10139 /* 10140 * This is basically the same as a mapped buffer, 10141 * with the same restrictions. 10142 */ 10143 tr->mapped++; 10144 } else { 10145 buf->buffer = ring_buffer_alloc(size, rb_flags); 10146 } 10147 if (!buf->buffer) 10148 return -ENOMEM; 10149 10150 buf->data = alloc_percpu(struct trace_array_cpu); 10151 if (!buf->data) { 10152 ring_buffer_free(buf->buffer); 10153 buf->buffer = NULL; 10154 return -ENOMEM; 10155 } 10156 10157 /* Allocate the first page for all buffers */ 10158 set_buffer_entries(&tr->array_buffer, 10159 ring_buffer_size(tr->array_buffer.buffer, 0)); 10160 10161 return 0; 10162 } 10163 10164 static void free_trace_buffer(struct array_buffer *buf) 10165 { 10166 if (buf->buffer) { 10167 ring_buffer_free(buf->buffer); 10168 buf->buffer = NULL; 10169 free_percpu(buf->data); 10170 buf->data = NULL; 10171 } 10172 } 10173 10174 static int allocate_trace_buffers(struct trace_array *tr, int size) 10175 { 10176 int ret; 10177 10178 ret = allocate_trace_buffer(tr, &tr->array_buffer, size); 10179 if (ret) 10180 return ret; 10181 10182 #ifdef CONFIG_TRACER_MAX_TRACE 10183 /* Fix mapped buffer trace arrays do not have snapshot buffers */ 10184 if (tr->range_addr_start) 10185 return 0; 10186 10187 ret = allocate_trace_buffer(tr, &tr->max_buffer, 10188 allocate_snapshot ? size : 1); 10189 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { 10190 free_trace_buffer(&tr->array_buffer); 10191 return -ENOMEM; 10192 } 10193 tr->allocated_snapshot = allocate_snapshot; 10194 10195 allocate_snapshot = false; 10196 #endif 10197 10198 return 0; 10199 } 10200 10201 static void free_trace_buffers(struct trace_array *tr) 10202 { 10203 if (!tr) 10204 return; 10205 10206 free_trace_buffer(&tr->array_buffer); 10207 kfree(tr->module_delta); 10208 10209 #ifdef CONFIG_TRACER_MAX_TRACE 10210 free_trace_buffer(&tr->max_buffer); 10211 #endif 10212 } 10213 10214 static void init_trace_flags_index(struct trace_array *tr) 10215 { 10216 int i; 10217 10218 /* Used by the trace options files */ 10219 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) 10220 tr->trace_flags_index[i] = i; 10221 } 10222 10223 static int __update_tracer(struct trace_array *tr) 10224 { 10225 struct tracer *t; 10226 int ret = 0; 10227 10228 for (t = trace_types; t && !ret; t = t->next) 10229 ret = add_tracer(tr, t); 10230 10231 return ret; 10232 } 10233 10234 static __init int __update_tracer_options(struct trace_array *tr) 10235 { 10236 struct tracers *t; 10237 int ret = 0; 10238 10239 list_for_each_entry(t, &tr->tracers, list) { 10240 ret = add_tracer_options(tr, t); 10241 if (ret < 0) 10242 break; 10243 } 10244 10245 return ret; 10246 } 10247 10248 static __init void update_tracer_options(void) 10249 { 10250 struct trace_array *tr; 10251 10252 guard(mutex)(&trace_types_lock); 10253 tracer_options_updated = true; 10254 list_for_each_entry(tr, &ftrace_trace_arrays, list) 10255 __update_tracer_options(tr); 10256 } 10257 10258 /* Must have trace_types_lock held */ 10259 struct trace_array *trace_array_find(const char *instance) 10260 { 10261 struct trace_array *tr, *found = NULL; 10262 10263 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10264 if (tr->name && strcmp(tr->name, instance) == 0) { 10265 found = tr; 10266 break; 10267 } 10268 } 10269 10270 return found; 10271 } 10272 10273 struct trace_array *trace_array_find_get(const char *instance) 10274 { 10275 struct trace_array *tr; 10276 10277 guard(mutex)(&trace_types_lock); 10278 tr = trace_array_find(instance); 10279 if (tr) 10280 tr->ref++; 10281 10282 return tr; 10283 } 10284 10285 static int trace_array_create_dir(struct trace_array *tr) 10286 { 10287 int ret; 10288 10289 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); 10290 if (!tr->dir) 10291 return -EINVAL; 10292 10293 ret = event_trace_add_tracer(tr->dir, tr); 10294 if (ret) { 10295 tracefs_remove(tr->dir); 10296 return ret; 10297 } 10298 10299 init_tracer_tracefs(tr, tr->dir); 10300 ret = __update_tracer(tr); 10301 if (ret) { 10302 event_trace_del_tracer(tr); 10303 tracefs_remove(tr->dir); 10304 return ret; 10305 } 10306 return 0; 10307 } 10308 10309 static struct trace_array * 10310 trace_array_create_systems(const char *name, const char *systems, 10311 unsigned long range_addr_start, 10312 unsigned long range_addr_size) 10313 { 10314 struct trace_array *tr; 10315 int ret; 10316 10317 ret = -ENOMEM; 10318 tr = kzalloc(sizeof(*tr), GFP_KERNEL); 10319 if (!tr) 10320 return ERR_PTR(ret); 10321 10322 tr->name = kstrdup(name, GFP_KERNEL); 10323 if (!tr->name) 10324 goto out_free_tr; 10325 10326 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) 10327 goto out_free_tr; 10328 10329 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) 10330 goto out_free_tr; 10331 10332 if (systems) { 10333 tr->system_names = kstrdup_const(systems, GFP_KERNEL); 10334 if (!tr->system_names) 10335 goto out_free_tr; 10336 } 10337 10338 /* Only for boot up memory mapped ring buffers */ 10339 tr->range_addr_start = range_addr_start; 10340 tr->range_addr_size = range_addr_size; 10341 10342 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; 10343 10344 cpumask_copy(tr->tracing_cpumask, cpu_all_mask); 10345 10346 raw_spin_lock_init(&tr->start_lock); 10347 10348 tr->syscall_buf_sz = global_trace.syscall_buf_sz; 10349 10350 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 10351 #ifdef CONFIG_TRACER_MAX_TRACE 10352 spin_lock_init(&tr->snapshot_trigger_lock); 10353 #endif 10354 tr->current_trace = &nop_trace; 10355 tr->current_trace_flags = nop_trace.flags; 10356 10357 INIT_LIST_HEAD(&tr->systems); 10358 INIT_LIST_HEAD(&tr->events); 10359 INIT_LIST_HEAD(&tr->hist_vars); 10360 INIT_LIST_HEAD(&tr->err_log); 10361 INIT_LIST_HEAD(&tr->tracers); 10362 INIT_LIST_HEAD(&tr->marker_list); 10363 10364 #ifdef CONFIG_MODULES 10365 INIT_LIST_HEAD(&tr->mod_events); 10366 #endif 10367 10368 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 10369 goto out_free_tr; 10370 10371 /* The ring buffer is defaultly expanded */ 10372 trace_set_ring_buffer_expanded(tr); 10373 10374 if (ftrace_allocate_ftrace_ops(tr) < 0) 10375 goto out_free_tr; 10376 10377 ftrace_init_trace_array(tr); 10378 10379 init_trace_flags_index(tr); 10380 10381 if (trace_instance_dir) { 10382 ret = trace_array_create_dir(tr); 10383 if (ret) 10384 goto out_free_tr; 10385 } else 10386 __trace_early_add_events(tr); 10387 10388 list_add(&tr->list, &ftrace_trace_arrays); 10389 10390 tr->ref++; 10391 10392 return tr; 10393 10394 out_free_tr: 10395 ftrace_free_ftrace_ops(tr); 10396 free_trace_buffers(tr); 10397 free_cpumask_var(tr->pipe_cpumask); 10398 free_cpumask_var(tr->tracing_cpumask); 10399 kfree_const(tr->system_names); 10400 kfree(tr->range_name); 10401 kfree(tr->name); 10402 kfree(tr); 10403 10404 return ERR_PTR(ret); 10405 } 10406 10407 static struct trace_array *trace_array_create(const char *name) 10408 { 10409 return trace_array_create_systems(name, NULL, 0, 0); 10410 } 10411 10412 static int instance_mkdir(const char *name) 10413 { 10414 struct trace_array *tr; 10415 int ret; 10416 10417 guard(mutex)(&event_mutex); 10418 guard(mutex)(&trace_types_lock); 10419 10420 ret = -EEXIST; 10421 if (trace_array_find(name)) 10422 return -EEXIST; 10423 10424 tr = trace_array_create(name); 10425 10426 ret = PTR_ERR_OR_ZERO(tr); 10427 10428 return ret; 10429 } 10430 10431 #ifdef CONFIG_MMU 10432 static u64 map_pages(unsigned long start, unsigned long size) 10433 { 10434 unsigned long vmap_start, vmap_end; 10435 struct vm_struct *area; 10436 int ret; 10437 10438 area = get_vm_area(size, VM_IOREMAP); 10439 if (!area) 10440 return 0; 10441 10442 vmap_start = (unsigned long) area->addr; 10443 vmap_end = vmap_start + size; 10444 10445 ret = vmap_page_range(vmap_start, vmap_end, 10446 start, pgprot_nx(PAGE_KERNEL)); 10447 if (ret < 0) { 10448 free_vm_area(area); 10449 return 0; 10450 } 10451 10452 return (u64)vmap_start; 10453 } 10454 #else 10455 static inline u64 map_pages(unsigned long start, unsigned long size) 10456 { 10457 return 0; 10458 } 10459 #endif 10460 10461 /** 10462 * trace_array_get_by_name - Create/Lookup a trace array, given its name. 10463 * @name: The name of the trace array to be looked up/created. 10464 * @systems: A list of systems to create event directories for (NULL for all) 10465 * 10466 * Returns pointer to trace array with given name. 10467 * NULL, if it cannot be created. 10468 * 10469 * NOTE: This function increments the reference counter associated with the 10470 * trace array returned. This makes sure it cannot be freed while in use. 10471 * Use trace_array_put() once the trace array is no longer needed. 10472 * If the trace_array is to be freed, trace_array_destroy() needs to 10473 * be called after the trace_array_put(), or simply let user space delete 10474 * it from the tracefs instances directory. But until the 10475 * trace_array_put() is called, user space can not delete it. 10476 * 10477 */ 10478 struct trace_array *trace_array_get_by_name(const char *name, const char *systems) 10479 { 10480 struct trace_array *tr; 10481 10482 guard(mutex)(&event_mutex); 10483 guard(mutex)(&trace_types_lock); 10484 10485 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10486 if (tr->name && strcmp(tr->name, name) == 0) { 10487 tr->ref++; 10488 return tr; 10489 } 10490 } 10491 10492 tr = trace_array_create_systems(name, systems, 0, 0); 10493 10494 if (IS_ERR(tr)) 10495 tr = NULL; 10496 else 10497 tr->ref++; 10498 10499 return tr; 10500 } 10501 EXPORT_SYMBOL_GPL(trace_array_get_by_name); 10502 10503 static int __remove_instance(struct trace_array *tr) 10504 { 10505 int i; 10506 10507 /* Reference counter for a newly created trace array = 1. */ 10508 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) 10509 return -EBUSY; 10510 10511 list_del(&tr->list); 10512 10513 /* Disable all the flags that were enabled coming in */ 10514 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { 10515 if ((1ULL << i) & ZEROED_TRACE_FLAGS) 10516 set_tracer_flag(tr, 1ULL << i, 0); 10517 } 10518 10519 if (printk_trace == tr) 10520 update_printk_trace(&global_trace); 10521 10522 if (update_marker_trace(tr, 0)) 10523 synchronize_rcu(); 10524 10525 tracing_set_nop(tr); 10526 clear_ftrace_function_probes(tr); 10527 event_trace_del_tracer(tr); 10528 ftrace_clear_pids(tr); 10529 ftrace_destroy_function_files(tr); 10530 tracefs_remove(tr->dir); 10531 free_percpu(tr->last_func_repeats); 10532 free_trace_buffers(tr); 10533 clear_tracing_err_log(tr); 10534 free_tracers(tr); 10535 10536 if (tr->range_name) { 10537 reserve_mem_release_by_name(tr->range_name); 10538 kfree(tr->range_name); 10539 } 10540 if (tr->flags & TRACE_ARRAY_FL_VMALLOC) 10541 vfree((void *)tr->range_addr_start); 10542 10543 for (i = 0; i < tr->nr_topts; i++) { 10544 kfree(tr->topts[i].topts); 10545 } 10546 kfree(tr->topts); 10547 10548 free_cpumask_var(tr->pipe_cpumask); 10549 free_cpumask_var(tr->tracing_cpumask); 10550 kfree_const(tr->system_names); 10551 kfree(tr->name); 10552 kfree(tr); 10553 10554 return 0; 10555 } 10556 10557 int trace_array_destroy(struct trace_array *this_tr) 10558 { 10559 struct trace_array *tr; 10560 10561 if (!this_tr) 10562 return -EINVAL; 10563 10564 guard(mutex)(&event_mutex); 10565 guard(mutex)(&trace_types_lock); 10566 10567 10568 /* Making sure trace array exists before destroying it. */ 10569 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10570 if (tr == this_tr) 10571 return __remove_instance(tr); 10572 } 10573 10574 return -ENODEV; 10575 } 10576 EXPORT_SYMBOL_GPL(trace_array_destroy); 10577 10578 static int instance_rmdir(const char *name) 10579 { 10580 struct trace_array *tr; 10581 10582 guard(mutex)(&event_mutex); 10583 guard(mutex)(&trace_types_lock); 10584 10585 tr = trace_array_find(name); 10586 if (!tr) 10587 return -ENODEV; 10588 10589 return __remove_instance(tr); 10590 } 10591 10592 static __init void create_trace_instances(struct dentry *d_tracer) 10593 { 10594 struct trace_array *tr; 10595 10596 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, 10597 instance_mkdir, 10598 instance_rmdir); 10599 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) 10600 return; 10601 10602 guard(mutex)(&event_mutex); 10603 guard(mutex)(&trace_types_lock); 10604 10605 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10606 if (!tr->name) 10607 continue; 10608 if (MEM_FAIL(trace_array_create_dir(tr) < 0, 10609 "Failed to create instance directory\n")) 10610 return; 10611 } 10612 } 10613 10614 static void 10615 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) 10616 { 10617 int cpu; 10618 10619 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, 10620 tr, &show_traces_fops); 10621 10622 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, 10623 tr, &set_tracer_fops); 10624 10625 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, 10626 tr, &tracing_cpumask_fops); 10627 10628 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, 10629 tr, &tracing_iter_fops); 10630 10631 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, 10632 tr, &tracing_fops); 10633 10634 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, 10635 tr, &tracing_pipe_fops); 10636 10637 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, 10638 tr, &tracing_entries_fops); 10639 10640 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, 10641 tr, &tracing_total_entries_fops); 10642 10643 trace_create_file("free_buffer", 0200, d_tracer, 10644 tr, &tracing_free_buffer_fops); 10645 10646 trace_create_file("trace_marker", 0220, d_tracer, 10647 tr, &tracing_mark_fops); 10648 10649 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); 10650 10651 trace_create_file("trace_marker_raw", 0220, d_tracer, 10652 tr, &tracing_mark_raw_fops); 10653 10654 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, 10655 &trace_clock_fops); 10656 10657 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, 10658 tr, &rb_simple_fops); 10659 10660 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, 10661 &trace_time_stamp_mode_fops); 10662 10663 tr->buffer_percent = 50; 10664 10665 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 10666 tr, &buffer_percent_fops); 10667 10668 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, 10669 tr, &buffer_subbuf_size_fops); 10670 10671 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer, 10672 tr, &tracing_syscall_buf_fops); 10673 10674 create_trace_options_dir(tr); 10675 10676 #ifdef CONFIG_TRACER_MAX_TRACE 10677 trace_create_maxlat_file(tr, d_tracer); 10678 #endif 10679 10680 if (ftrace_create_function_files(tr, d_tracer)) 10681 MEM_FAIL(1, "Could not allocate function filter files"); 10682 10683 if (tr->range_addr_start) { 10684 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, 10685 tr, &last_boot_fops); 10686 #ifdef CONFIG_TRACER_SNAPSHOT 10687 } else { 10688 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, 10689 tr, &snapshot_fops); 10690 #endif 10691 } 10692 10693 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, 10694 tr, &tracing_err_log_fops); 10695 10696 for_each_tracing_cpu(cpu) 10697 tracing_init_tracefs_percpu(tr, cpu); 10698 10699 ftrace_init_tracefs(tr, d_tracer); 10700 } 10701 10702 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 10703 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 10704 { 10705 struct vfsmount *mnt; 10706 struct file_system_type *type; 10707 struct fs_context *fc; 10708 int ret; 10709 10710 /* 10711 * To maintain backward compatibility for tools that mount 10712 * debugfs to get to the tracing facility, tracefs is automatically 10713 * mounted to the debugfs/tracing directory. 10714 */ 10715 type = get_fs_type("tracefs"); 10716 if (!type) 10717 return NULL; 10718 10719 fc = fs_context_for_submount(type, mntpt); 10720 put_filesystem(type); 10721 if (IS_ERR(fc)) 10722 return ERR_CAST(fc); 10723 10724 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n"); 10725 10726 ret = vfs_parse_fs_string(fc, "source", "tracefs"); 10727 if (!ret) 10728 mnt = fc_mount(fc); 10729 else 10730 mnt = ERR_PTR(ret); 10731 10732 put_fs_context(fc); 10733 return mnt; 10734 } 10735 #endif 10736 10737 /** 10738 * tracing_init_dentry - initialize top level trace array 10739 * 10740 * This is called when creating files or directories in the tracing 10741 * directory. It is called via fs_initcall() by any of the boot up code 10742 * and expects to return the dentry of the top level tracing directory. 10743 */ 10744 int tracing_init_dentry(void) 10745 { 10746 struct trace_array *tr = &global_trace; 10747 10748 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10749 pr_warn("Tracing disabled due to lockdown\n"); 10750 return -EPERM; 10751 } 10752 10753 /* The top level trace array uses NULL as parent */ 10754 if (tr->dir) 10755 return 0; 10756 10757 if (WARN_ON(!tracefs_initialized())) 10758 return -ENODEV; 10759 10760 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 10761 /* 10762 * As there may still be users that expect the tracing 10763 * files to exist in debugfs/tracing, we must automount 10764 * the tracefs file system there, so older tools still 10765 * work with the newer kernel. 10766 */ 10767 tr->dir = debugfs_create_automount("tracing", NULL, 10768 trace_automount, NULL); 10769 #endif 10770 10771 return 0; 10772 } 10773 10774 extern struct trace_eval_map *__start_ftrace_eval_maps[]; 10775 extern struct trace_eval_map *__stop_ftrace_eval_maps[]; 10776 10777 static struct workqueue_struct *eval_map_wq __initdata; 10778 static struct work_struct eval_map_work __initdata; 10779 static struct work_struct tracerfs_init_work __initdata; 10780 10781 static void __init eval_map_work_func(struct work_struct *work) 10782 { 10783 int len; 10784 10785 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; 10786 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); 10787 } 10788 10789 static int __init trace_eval_init(void) 10790 { 10791 INIT_WORK(&eval_map_work, eval_map_work_func); 10792 10793 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0); 10794 if (!eval_map_wq) { 10795 pr_err("Unable to allocate eval_map_wq\n"); 10796 /* Do work here */ 10797 eval_map_work_func(&eval_map_work); 10798 return -ENOMEM; 10799 } 10800 10801 queue_work(eval_map_wq, &eval_map_work); 10802 return 0; 10803 } 10804 10805 subsys_initcall(trace_eval_init); 10806 10807 static int __init trace_eval_sync(void) 10808 { 10809 /* Make sure the eval map updates are finished */ 10810 if (eval_map_wq) 10811 destroy_workqueue(eval_map_wq); 10812 return 0; 10813 } 10814 10815 late_initcall_sync(trace_eval_sync); 10816 10817 10818 #ifdef CONFIG_MODULES 10819 10820 bool module_exists(const char *module) 10821 { 10822 /* All modules have the symbol __this_module */ 10823 static const char this_mod[] = "__this_module"; 10824 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; 10825 unsigned long val; 10826 int n; 10827 10828 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); 10829 10830 if (n > sizeof(modname) - 1) 10831 return false; 10832 10833 val = module_kallsyms_lookup_name(modname); 10834 return val != 0; 10835 } 10836 10837 static void trace_module_add_evals(struct module *mod) 10838 { 10839 /* 10840 * Modules with bad taint do not have events created, do 10841 * not bother with enums either. 10842 */ 10843 if (trace_module_has_bad_taint(mod)) 10844 return; 10845 10846 /* Even if no trace_evals, this need to sanitize field types. */ 10847 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals); 10848 } 10849 10850 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 10851 static void trace_module_remove_evals(struct module *mod) 10852 { 10853 union trace_eval_map_item *map; 10854 union trace_eval_map_item **last = &trace_eval_maps; 10855 10856 if (!mod->num_trace_evals) 10857 return; 10858 10859 guard(mutex)(&trace_eval_mutex); 10860 10861 map = trace_eval_maps; 10862 10863 while (map) { 10864 if (map->head.mod == mod) 10865 break; 10866 map = trace_eval_jmp_to_tail(map); 10867 last = &map->tail.next; 10868 map = map->tail.next; 10869 } 10870 if (!map) 10871 return; 10872 10873 *last = trace_eval_jmp_to_tail(map)->tail.next; 10874 kfree(map); 10875 } 10876 #else 10877 static inline void trace_module_remove_evals(struct module *mod) { } 10878 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 10879 10880 static void trace_module_record(struct module *mod, bool add) 10881 { 10882 struct trace_array *tr; 10883 unsigned long flags; 10884 10885 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10886 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); 10887 /* Update any persistent trace array that has already been started */ 10888 if (flags == TRACE_ARRAY_FL_BOOT && add) { 10889 guard(mutex)(&scratch_mutex); 10890 save_mod(mod, tr); 10891 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { 10892 /* Update delta if the module loaded in previous boot */ 10893 make_mod_delta(mod, tr); 10894 } 10895 } 10896 } 10897 10898 static int trace_module_notify(struct notifier_block *self, 10899 unsigned long val, void *data) 10900 { 10901 struct module *mod = data; 10902 10903 switch (val) { 10904 case MODULE_STATE_COMING: 10905 trace_module_add_evals(mod); 10906 trace_module_record(mod, true); 10907 break; 10908 case MODULE_STATE_GOING: 10909 trace_module_remove_evals(mod); 10910 trace_module_record(mod, false); 10911 break; 10912 } 10913 10914 return NOTIFY_OK; 10915 } 10916 10917 static struct notifier_block trace_module_nb = { 10918 .notifier_call = trace_module_notify, 10919 .priority = 0, 10920 }; 10921 #endif /* CONFIG_MODULES */ 10922 10923 static __init void tracer_init_tracefs_work_func(struct work_struct *work) 10924 { 10925 10926 event_trace_init(); 10927 10928 init_tracer_tracefs(&global_trace, NULL); 10929 ftrace_init_tracefs_toplevel(&global_trace, NULL); 10930 10931 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, 10932 &global_trace, &tracing_thresh_fops); 10933 10934 trace_create_file("README", TRACE_MODE_READ, NULL, 10935 NULL, &tracing_readme_fops); 10936 10937 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, 10938 NULL, &tracing_saved_cmdlines_fops); 10939 10940 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, 10941 NULL, &tracing_saved_cmdlines_size_fops); 10942 10943 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, 10944 NULL, &tracing_saved_tgids_fops); 10945 10946 trace_create_eval_file(NULL); 10947 10948 #ifdef CONFIG_MODULES 10949 register_module_notifier(&trace_module_nb); 10950 #endif 10951 10952 #ifdef CONFIG_DYNAMIC_FTRACE 10953 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, 10954 NULL, &tracing_dyn_info_fops); 10955 #endif 10956 10957 create_trace_instances(NULL); 10958 10959 update_tracer_options(); 10960 } 10961 10962 static __init int tracer_init_tracefs(void) 10963 { 10964 int ret; 10965 10966 trace_access_lock_init(); 10967 10968 ret = tracing_init_dentry(); 10969 if (ret) 10970 return 0; 10971 10972 if (eval_map_wq) { 10973 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); 10974 queue_work(eval_map_wq, &tracerfs_init_work); 10975 } else { 10976 tracer_init_tracefs_work_func(NULL); 10977 } 10978 10979 if (rv_init_interface()) 10980 pr_err("RV: Error while creating the RV interface\n"); 10981 10982 return 0; 10983 } 10984 10985 fs_initcall(tracer_init_tracefs); 10986 10987 static int trace_die_panic_handler(struct notifier_block *self, 10988 unsigned long ev, void *unused); 10989 10990 static struct notifier_block trace_panic_notifier = { 10991 .notifier_call = trace_die_panic_handler, 10992 .priority = INT_MAX - 1, 10993 }; 10994 10995 static struct notifier_block trace_die_notifier = { 10996 .notifier_call = trace_die_panic_handler, 10997 .priority = INT_MAX - 1, 10998 }; 10999 11000 /* 11001 * The idea is to execute the following die/panic callback early, in order 11002 * to avoid showing irrelevant information in the trace (like other panic 11003 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall 11004 * warnings get disabled (to prevent potential log flooding). 11005 */ 11006 static int trace_die_panic_handler(struct notifier_block *self, 11007 unsigned long ev, void *unused) 11008 { 11009 if (!ftrace_dump_on_oops_enabled()) 11010 return NOTIFY_DONE; 11011 11012 /* The die notifier requires DIE_OOPS to trigger */ 11013 if (self == &trace_die_notifier && ev != DIE_OOPS) 11014 return NOTIFY_DONE; 11015 11016 ftrace_dump(DUMP_PARAM); 11017 11018 return NOTIFY_DONE; 11019 } 11020 11021 /* 11022 * printk is set to max of 1024, we really don't need it that big. 11023 * Nothing should be printing 1000 characters anyway. 11024 */ 11025 #define TRACE_MAX_PRINT 1000 11026 11027 /* 11028 * Define here KERN_TRACE so that we have one place to modify 11029 * it if we decide to change what log level the ftrace dump 11030 * should be at. 11031 */ 11032 #define KERN_TRACE KERN_EMERG 11033 11034 void 11035 trace_printk_seq(struct trace_seq *s) 11036 { 11037 /* Probably should print a warning here. */ 11038 if (s->seq.len >= TRACE_MAX_PRINT) 11039 s->seq.len = TRACE_MAX_PRINT; 11040 11041 /* 11042 * More paranoid code. Although the buffer size is set to 11043 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just 11044 * an extra layer of protection. 11045 */ 11046 if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) 11047 s->seq.len = s->seq.size - 1; 11048 11049 /* should be zero ended, but we are paranoid. */ 11050 s->buffer[s->seq.len] = 0; 11051 11052 printk(KERN_TRACE "%s", s->buffer); 11053 11054 trace_seq_init(s); 11055 } 11056 11057 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) 11058 { 11059 iter->tr = tr; 11060 iter->trace = iter->tr->current_trace; 11061 iter->cpu_file = RING_BUFFER_ALL_CPUS; 11062 iter->array_buffer = &tr->array_buffer; 11063 11064 if (iter->trace && iter->trace->open) 11065 iter->trace->open(iter); 11066 11067 /* Annotate start of buffers if we had overruns */ 11068 if (ring_buffer_overruns(iter->array_buffer->buffer)) 11069 iter->iter_flags |= TRACE_FILE_ANNOTATE; 11070 11071 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 11072 if (trace_clocks[iter->tr->clock_id].in_ns) 11073 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 11074 11075 /* Can not use kmalloc for iter.temp and iter.fmt */ 11076 iter->temp = static_temp_buf; 11077 iter->temp_size = STATIC_TEMP_BUF_SIZE; 11078 iter->fmt = static_fmt_buf; 11079 iter->fmt_size = STATIC_FMT_BUF_SIZE; 11080 } 11081 11082 void trace_init_global_iter(struct trace_iterator *iter) 11083 { 11084 trace_init_iter(iter, &global_trace); 11085 } 11086 11087 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) 11088 { 11089 /* use static because iter can be a bit big for the stack */ 11090 static struct trace_iterator iter; 11091 unsigned int old_userobj; 11092 unsigned long flags; 11093 int cnt = 0; 11094 11095 /* 11096 * Always turn off tracing when we dump. 11097 * We don't need to show trace output of what happens 11098 * between multiple crashes. 11099 * 11100 * If the user does a sysrq-z, then they can re-enable 11101 * tracing with echo 1 > tracing_on. 11102 */ 11103 tracer_tracing_off(tr); 11104 11105 local_irq_save(flags); 11106 11107 /* Simulate the iterator */ 11108 trace_init_iter(&iter, tr); 11109 11110 /* While dumping, do not allow the buffer to be enable */ 11111 tracer_tracing_disable(tr); 11112 11113 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ); 11114 11115 /* don't look at user memory in panic mode */ 11116 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ); 11117 11118 if (dump_mode == DUMP_ORIG) 11119 iter.cpu_file = raw_smp_processor_id(); 11120 else 11121 iter.cpu_file = RING_BUFFER_ALL_CPUS; 11122 11123 if (tr == &global_trace) 11124 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 11125 else 11126 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); 11127 11128 /* Did function tracer already get disabled? */ 11129 if (ftrace_is_dead()) { 11130 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); 11131 printk("# MAY BE MISSING FUNCTION EVENTS\n"); 11132 } 11133 11134 /* 11135 * We need to stop all tracing on all CPUS to read 11136 * the next buffer. This is a bit expensive, but is 11137 * not done often. We fill all what we can read, 11138 * and then release the locks again. 11139 */ 11140 11141 while (!trace_empty(&iter)) { 11142 11143 if (!cnt) 11144 printk(KERN_TRACE "---------------------------------\n"); 11145 11146 cnt++; 11147 11148 trace_iterator_reset(&iter); 11149 iter.iter_flags |= TRACE_FILE_LAT_FMT; 11150 11151 if (trace_find_next_entry_inc(&iter) != NULL) { 11152 int ret; 11153 11154 ret = print_trace_line(&iter); 11155 if (ret != TRACE_TYPE_NO_CONSUME) 11156 trace_consume(&iter); 11157 11158 trace_printk_seq(&iter.seq); 11159 } 11160 touch_nmi_watchdog(); 11161 } 11162 11163 if (!cnt) 11164 printk(KERN_TRACE " (ftrace buffer empty)\n"); 11165 else 11166 printk(KERN_TRACE "---------------------------------\n"); 11167 11168 tr->trace_flags |= old_userobj; 11169 11170 tracer_tracing_enable(tr); 11171 local_irq_restore(flags); 11172 } 11173 11174 static void ftrace_dump_by_param(void) 11175 { 11176 bool first_param = true; 11177 char dump_param[MAX_TRACER_SIZE]; 11178 char *buf, *token, *inst_name; 11179 struct trace_array *tr; 11180 11181 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); 11182 buf = dump_param; 11183 11184 while ((token = strsep(&buf, ",")) != NULL) { 11185 if (first_param) { 11186 first_param = false; 11187 if (!strcmp("0", token)) 11188 continue; 11189 else if (!strcmp("1", token)) { 11190 ftrace_dump_one(&global_trace, DUMP_ALL); 11191 continue; 11192 } 11193 else if (!strcmp("2", token) || 11194 !strcmp("orig_cpu", token)) { 11195 ftrace_dump_one(&global_trace, DUMP_ORIG); 11196 continue; 11197 } 11198 } 11199 11200 inst_name = strsep(&token, "="); 11201 tr = trace_array_find(inst_name); 11202 if (!tr) { 11203 printk(KERN_TRACE "Instance %s not found\n", inst_name); 11204 continue; 11205 } 11206 11207 if (token && (!strcmp("2", token) || 11208 !strcmp("orig_cpu", token))) 11209 ftrace_dump_one(tr, DUMP_ORIG); 11210 else 11211 ftrace_dump_one(tr, DUMP_ALL); 11212 } 11213 } 11214 11215 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) 11216 { 11217 static atomic_t dump_running; 11218 11219 /* Only allow one dump user at a time. */ 11220 if (atomic_inc_return(&dump_running) != 1) { 11221 atomic_dec(&dump_running); 11222 return; 11223 } 11224 11225 switch (oops_dump_mode) { 11226 case DUMP_ALL: 11227 ftrace_dump_one(&global_trace, DUMP_ALL); 11228 break; 11229 case DUMP_ORIG: 11230 ftrace_dump_one(&global_trace, DUMP_ORIG); 11231 break; 11232 case DUMP_PARAM: 11233 ftrace_dump_by_param(); 11234 break; 11235 case DUMP_NONE: 11236 break; 11237 default: 11238 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); 11239 ftrace_dump_one(&global_trace, DUMP_ALL); 11240 } 11241 11242 atomic_dec(&dump_running); 11243 } 11244 EXPORT_SYMBOL_GPL(ftrace_dump); 11245 11246 #define WRITE_BUFSIZE 4096 11247 11248 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, 11249 size_t count, loff_t *ppos, 11250 int (*createfn)(const char *)) 11251 { 11252 char *kbuf __free(kfree) = NULL; 11253 char *buf, *tmp; 11254 int ret = 0; 11255 size_t done = 0; 11256 size_t size; 11257 11258 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 11259 if (!kbuf) 11260 return -ENOMEM; 11261 11262 while (done < count) { 11263 size = count - done; 11264 11265 if (size >= WRITE_BUFSIZE) 11266 size = WRITE_BUFSIZE - 1; 11267 11268 if (copy_from_user(kbuf, buffer + done, size)) 11269 return -EFAULT; 11270 11271 kbuf[size] = '\0'; 11272 buf = kbuf; 11273 do { 11274 tmp = strchr(buf, '\n'); 11275 if (tmp) { 11276 *tmp = '\0'; 11277 size = tmp - buf + 1; 11278 } else { 11279 size = strlen(buf); 11280 if (done + size < count) { 11281 if (buf != kbuf) 11282 break; 11283 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ 11284 pr_warn("Line length is too long: Should be less than %d\n", 11285 WRITE_BUFSIZE - 2); 11286 return -EINVAL; 11287 } 11288 } 11289 done += size; 11290 11291 /* Remove comments */ 11292 tmp = strchr(buf, '#'); 11293 11294 if (tmp) 11295 *tmp = '\0'; 11296 11297 ret = createfn(buf); 11298 if (ret) 11299 return ret; 11300 buf += size; 11301 11302 } while (done < count); 11303 } 11304 return done; 11305 } 11306 11307 #ifdef CONFIG_TRACER_MAX_TRACE 11308 __init static bool tr_needs_alloc_snapshot(const char *name) 11309 { 11310 char *test; 11311 int len = strlen(name); 11312 bool ret; 11313 11314 if (!boot_snapshot_index) 11315 return false; 11316 11317 if (strncmp(name, boot_snapshot_info, len) == 0 && 11318 boot_snapshot_info[len] == '\t') 11319 return true; 11320 11321 test = kmalloc(strlen(name) + 3, GFP_KERNEL); 11322 if (!test) 11323 return false; 11324 11325 sprintf(test, "\t%s\t", name); 11326 ret = strstr(boot_snapshot_info, test) == NULL; 11327 kfree(test); 11328 return ret; 11329 } 11330 11331 __init static void do_allocate_snapshot(const char *name) 11332 { 11333 if (!tr_needs_alloc_snapshot(name)) 11334 return; 11335 11336 /* 11337 * When allocate_snapshot is set, the next call to 11338 * allocate_trace_buffers() (called by trace_array_get_by_name()) 11339 * will allocate the snapshot buffer. That will also clear 11340 * this flag. 11341 */ 11342 allocate_snapshot = true; 11343 } 11344 #else 11345 static inline void do_allocate_snapshot(const char *name) { } 11346 #endif 11347 11348 __init static int backup_instance_area(const char *backup, 11349 unsigned long *addr, phys_addr_t *size) 11350 { 11351 struct trace_array *backup_tr; 11352 void *allocated_vaddr = NULL; 11353 11354 backup_tr = trace_array_get_by_name(backup, NULL); 11355 if (!backup_tr) { 11356 pr_warn("Tracing: Instance %s is not found.\n", backup); 11357 return -ENOENT; 11358 } 11359 11360 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) { 11361 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup); 11362 trace_array_put(backup_tr); 11363 return -EINVAL; 11364 } 11365 11366 *size = backup_tr->range_addr_size; 11367 11368 allocated_vaddr = vzalloc(*size); 11369 if (!allocated_vaddr) { 11370 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n", 11371 backup, (unsigned long)*size); 11372 trace_array_put(backup_tr); 11373 return -ENOMEM; 11374 } 11375 11376 memcpy(allocated_vaddr, 11377 (void *)backup_tr->range_addr_start, (size_t)*size); 11378 *addr = (unsigned long)allocated_vaddr; 11379 11380 trace_array_put(backup_tr); 11381 return 0; 11382 } 11383 11384 __init static void enable_instances(void) 11385 { 11386 struct trace_array *tr; 11387 bool memmap_area = false; 11388 char *curr_str; 11389 char *name; 11390 char *str; 11391 char *tok; 11392 11393 /* A tab is always appended */ 11394 boot_instance_info[boot_instance_index - 1] = '\0'; 11395 str = boot_instance_info; 11396 11397 while ((curr_str = strsep(&str, "\t"))) { 11398 phys_addr_t start = 0; 11399 phys_addr_t size = 0; 11400 unsigned long addr = 0; 11401 bool traceprintk = false; 11402 bool traceoff = false; 11403 char *flag_delim; 11404 char *addr_delim; 11405 char *rname __free(kfree) = NULL; 11406 char *backup; 11407 11408 tok = strsep(&curr_str, ","); 11409 11410 name = strsep(&tok, "="); 11411 backup = tok; 11412 11413 flag_delim = strchr(name, '^'); 11414 addr_delim = strchr(name, '@'); 11415 11416 if (addr_delim) 11417 *addr_delim++ = '\0'; 11418 11419 if (flag_delim) 11420 *flag_delim++ = '\0'; 11421 11422 if (backup) { 11423 if (backup_instance_area(backup, &addr, &size) < 0) 11424 continue; 11425 } 11426 11427 if (flag_delim) { 11428 char *flag; 11429 11430 while ((flag = strsep(&flag_delim, "^"))) { 11431 if (strcmp(flag, "traceoff") == 0) { 11432 traceoff = true; 11433 } else if ((strcmp(flag, "printk") == 0) || 11434 (strcmp(flag, "traceprintk") == 0) || 11435 (strcmp(flag, "trace_printk") == 0)) { 11436 traceprintk = true; 11437 } else { 11438 pr_info("Tracing: Invalid instance flag '%s' for %s\n", 11439 flag, name); 11440 } 11441 } 11442 } 11443 11444 tok = addr_delim; 11445 if (tok && isdigit(*tok)) { 11446 start = memparse(tok, &tok); 11447 if (!start) { 11448 pr_warn("Tracing: Invalid boot instance address for %s\n", 11449 name); 11450 continue; 11451 } 11452 if (*tok != ':') { 11453 pr_warn("Tracing: No size specified for instance %s\n", name); 11454 continue; 11455 } 11456 tok++; 11457 size = memparse(tok, &tok); 11458 if (!size) { 11459 pr_warn("Tracing: Invalid boot instance size for %s\n", 11460 name); 11461 continue; 11462 } 11463 memmap_area = true; 11464 } else if (tok) { 11465 if (!reserve_mem_find_by_name(tok, &start, &size)) { 11466 start = 0; 11467 pr_warn("Failed to map boot instance %s to %s\n", name, tok); 11468 continue; 11469 } 11470 rname = kstrdup(tok, GFP_KERNEL); 11471 } 11472 11473 if (start) { 11474 /* Start and size must be page aligned */ 11475 if (start & ~PAGE_MASK) { 11476 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); 11477 continue; 11478 } 11479 if (size & ~PAGE_MASK) { 11480 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); 11481 continue; 11482 } 11483 11484 if (memmap_area) 11485 addr = map_pages(start, size); 11486 else 11487 addr = (unsigned long)phys_to_virt(start); 11488 if (addr) { 11489 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", 11490 name, &start, (unsigned long)size); 11491 } else { 11492 pr_warn("Tracing: Failed to map boot instance %s\n", name); 11493 continue; 11494 } 11495 } else { 11496 /* Only non mapped buffers have snapshot buffers */ 11497 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) 11498 do_allocate_snapshot(name); 11499 } 11500 11501 tr = trace_array_create_systems(name, NULL, addr, size); 11502 if (IS_ERR(tr)) { 11503 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); 11504 continue; 11505 } 11506 11507 if (traceoff) 11508 tracer_tracing_off(tr); 11509 11510 if (traceprintk) 11511 update_printk_trace(tr); 11512 11513 /* 11514 * memmap'd buffers can not be freed. 11515 */ 11516 if (memmap_area) { 11517 tr->flags |= TRACE_ARRAY_FL_MEMMAP; 11518 tr->ref++; 11519 } 11520 11521 /* 11522 * Backup buffers can be freed but need vfree(). 11523 */ 11524 if (backup) 11525 tr->flags |= TRACE_ARRAY_FL_VMALLOC; 11526 11527 if (start || backup) { 11528 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 11529 tr->range_name = no_free_ptr(rname); 11530 } 11531 11532 while ((tok = strsep(&curr_str, ","))) { 11533 early_enable_events(tr, tok, true); 11534 } 11535 } 11536 } 11537 11538 __init static int tracer_alloc_buffers(void) 11539 { 11540 int ring_buf_size; 11541 int ret = -ENOMEM; 11542 11543 11544 if (security_locked_down(LOCKDOWN_TRACEFS)) { 11545 pr_warn("Tracing disabled due to lockdown\n"); 11546 return -EPERM; 11547 } 11548 11549 /* 11550 * Make sure we don't accidentally add more trace options 11551 * than we have bits for. 11552 */ 11553 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); 11554 11555 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 11556 return -ENOMEM; 11557 11558 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) 11559 goto out_free_buffer_mask; 11560 11561 /* Only allocate trace_printk buffers if a trace_printk exists */ 11562 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) 11563 /* Must be called before global_trace.buffer is allocated */ 11564 trace_printk_init_buffers(); 11565 11566 /* To save memory, keep the ring buffer size to its minimum */ 11567 if (global_trace.ring_buffer_expanded) 11568 ring_buf_size = trace_buf_size; 11569 else 11570 ring_buf_size = 1; 11571 11572 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 11573 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); 11574 11575 raw_spin_lock_init(&global_trace.start_lock); 11576 11577 /* 11578 * The prepare callbacks allocates some memory for the ring buffer. We 11579 * don't free the buffer if the CPU goes down. If we were to free 11580 * the buffer, then the user would lose any trace that was in the 11581 * buffer. The memory will be removed once the "instance" is removed. 11582 */ 11583 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, 11584 "trace/RB:prepare", trace_rb_cpu_prepare, 11585 NULL); 11586 if (ret < 0) 11587 goto out_free_cpumask; 11588 /* Used for event triggers */ 11589 ret = -ENOMEM; 11590 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); 11591 if (!temp_buffer) 11592 goto out_rm_hp_state; 11593 11594 if (trace_create_savedcmd() < 0) 11595 goto out_free_temp_buffer; 11596 11597 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) 11598 goto out_free_savedcmd; 11599 11600 /* TODO: make the number of buffers hot pluggable with CPUS */ 11601 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { 11602 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); 11603 goto out_free_pipe_cpumask; 11604 } 11605 if (global_trace.buffer_disabled) 11606 tracing_off(); 11607 11608 if (trace_boot_clock) { 11609 ret = tracing_set_clock(&global_trace, trace_boot_clock); 11610 if (ret < 0) 11611 pr_warn("Trace clock %s not defined, going back to default\n", 11612 trace_boot_clock); 11613 } 11614 11615 /* 11616 * register_tracer() might reference current_trace, so it 11617 * needs to be set before we register anything. This is 11618 * just a bootstrap of current_trace anyway. 11619 */ 11620 global_trace.current_trace = &nop_trace; 11621 global_trace.current_trace_flags = nop_trace.flags; 11622 11623 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 11624 #ifdef CONFIG_TRACER_MAX_TRACE 11625 spin_lock_init(&global_trace.snapshot_trigger_lock); 11626 #endif 11627 ftrace_init_global_array_ops(&global_trace); 11628 11629 #ifdef CONFIG_MODULES 11630 INIT_LIST_HEAD(&global_trace.mod_events); 11631 #endif 11632 11633 init_trace_flags_index(&global_trace); 11634 11635 INIT_LIST_HEAD(&global_trace.tracers); 11636 11637 /* All seems OK, enable tracing */ 11638 tracing_disabled = 0; 11639 11640 atomic_notifier_chain_register(&panic_notifier_list, 11641 &trace_panic_notifier); 11642 11643 register_die_notifier(&trace_die_notifier); 11644 11645 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 11646 11647 global_trace.syscall_buf_sz = syscall_buf_size; 11648 11649 INIT_LIST_HEAD(&global_trace.systems); 11650 INIT_LIST_HEAD(&global_trace.events); 11651 INIT_LIST_HEAD(&global_trace.hist_vars); 11652 INIT_LIST_HEAD(&global_trace.err_log); 11653 list_add(&global_trace.marker_list, &marker_copies); 11654 list_add(&global_trace.list, &ftrace_trace_arrays); 11655 11656 register_tracer(&nop_trace); 11657 11658 /* Function tracing may start here (via kernel command line) */ 11659 init_function_trace(); 11660 11661 apply_trace_boot_options(); 11662 11663 register_snapshot_cmd(); 11664 11665 return 0; 11666 11667 out_free_pipe_cpumask: 11668 free_cpumask_var(global_trace.pipe_cpumask); 11669 out_free_savedcmd: 11670 trace_free_saved_cmdlines_buffer(); 11671 out_free_temp_buffer: 11672 ring_buffer_free(temp_buffer); 11673 out_rm_hp_state: 11674 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); 11675 out_free_cpumask: 11676 free_cpumask_var(global_trace.tracing_cpumask); 11677 out_free_buffer_mask: 11678 free_cpumask_var(tracing_buffer_mask); 11679 return ret; 11680 } 11681 11682 #ifdef CONFIG_FUNCTION_TRACER 11683 /* Used to set module cached ftrace filtering at boot up */ 11684 struct trace_array *trace_get_global_array(void) 11685 { 11686 return &global_trace; 11687 } 11688 #endif 11689 11690 void __init ftrace_boot_snapshot(void) 11691 { 11692 #ifdef CONFIG_TRACER_MAX_TRACE 11693 struct trace_array *tr; 11694 11695 if (!snapshot_at_boot) 11696 return; 11697 11698 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 11699 if (!tr->allocated_snapshot) 11700 continue; 11701 11702 tracing_snapshot_instance(tr); 11703 trace_array_puts(tr, "** Boot snapshot taken **\n"); 11704 } 11705 #endif 11706 } 11707 11708 void __init early_trace_init(void) 11709 { 11710 if (tracepoint_printk) { 11711 tracepoint_print_iter = 11712 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); 11713 if (MEM_FAIL(!tracepoint_print_iter, 11714 "Failed to allocate trace iterator\n")) 11715 tracepoint_printk = 0; 11716 else 11717 static_key_enable(&tracepoint_printk_key.key); 11718 } 11719 tracer_alloc_buffers(); 11720 11721 init_events(); 11722 } 11723 11724 void __init trace_init(void) 11725 { 11726 trace_event_init(); 11727 11728 if (boot_instance_index) 11729 enable_instances(); 11730 } 11731 11732 __init static void clear_boot_tracer(void) 11733 { 11734 /* 11735 * The default tracer at boot buffer is an init section. 11736 * This function is called in lateinit. If we did not 11737 * find the boot tracer, then clear it out, to prevent 11738 * later registration from accessing the buffer that is 11739 * about to be freed. 11740 */ 11741 if (!default_bootup_tracer) 11742 return; 11743 11744 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", 11745 default_bootup_tracer); 11746 default_bootup_tracer = NULL; 11747 } 11748 11749 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 11750 __init static void tracing_set_default_clock(void) 11751 { 11752 /* sched_clock_stable() is determined in late_initcall */ 11753 if (!trace_boot_clock && !sched_clock_stable()) { 11754 if (security_locked_down(LOCKDOWN_TRACEFS)) { 11755 pr_warn("Can not set tracing clock due to lockdown\n"); 11756 return; 11757 } 11758 11759 printk(KERN_WARNING 11760 "Unstable clock detected, switching default tracing clock to \"global\"\n" 11761 "If you want to keep using the local clock, then add:\n" 11762 " \"trace_clock=local\"\n" 11763 "on the kernel command line\n"); 11764 tracing_set_clock(&global_trace, "global"); 11765 } 11766 } 11767 #else 11768 static inline void tracing_set_default_clock(void) { } 11769 #endif 11770 11771 __init static int late_trace_init(void) 11772 { 11773 if (tracepoint_printk && tracepoint_printk_stop_on_boot) { 11774 static_key_disable(&tracepoint_printk_key.key); 11775 tracepoint_printk = 0; 11776 } 11777 11778 if (traceoff_after_boot) 11779 tracing_off(); 11780 11781 tracing_set_default_clock(); 11782 clear_boot_tracer(); 11783 return 0; 11784 } 11785 11786 late_initcall_sync(late_trace_init); 11787