1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ring buffer based function tracer 4 * 5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> 6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 7 * 8 * Originally taken from the RT patch by: 9 * Arnaldo Carvalho de Melo <acme@redhat.com> 10 * 11 * Based on code from the latency_tracer, that is: 12 * Copyright (C) 2004-2006 Ingo Molnar 13 * Copyright (C) 2004 Nadia Yvette Chambers 14 */ 15 #include <linux/ring_buffer.h> 16 #include <linux/utsname.h> 17 #include <linux/stacktrace.h> 18 #include <linux/writeback.h> 19 #include <linux/kallsyms.h> 20 #include <linux/security.h> 21 #include <linux/seq_file.h> 22 #include <linux/irqflags.h> 23 #include <linux/syscalls.h> 24 #include <linux/debugfs.h> 25 #include <linux/tracefs.h> 26 #include <linux/pagemap.h> 27 #include <linux/hardirq.h> 28 #include <linux/linkage.h> 29 #include <linux/uaccess.h> 30 #include <linux/cleanup.h> 31 #include <linux/vmalloc.h> 32 #include <linux/ftrace.h> 33 #include <linux/module.h> 34 #include <linux/percpu.h> 35 #include <linux/splice.h> 36 #include <linux/kdebug.h> 37 #include <linux/string.h> 38 #include <linux/mount.h> 39 #include <linux/rwsem.h> 40 #include <linux/slab.h> 41 #include <linux/ctype.h> 42 #include <linux/init.h> 43 #include <linux/panic_notifier.h> 44 #include <linux/poll.h> 45 #include <linux/nmi.h> 46 #include <linux/fs.h> 47 #include <linux/trace.h> 48 #include <linux/sched/clock.h> 49 #include <linux/sched/rt.h> 50 #include <linux/fsnotify.h> 51 #include <linux/irq_work.h> 52 #include <linux/workqueue.h> 53 #include <linux/sort.h> 54 #include <linux/io.h> /* vmap_page_range() */ 55 #include <linux/fs_context.h> 56 57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 58 59 #include "trace.h" 60 #include "trace_output.h" 61 62 #ifdef CONFIG_FTRACE_STARTUP_TEST 63 /* 64 * We need to change this state when a selftest is running. 65 * A selftest will lurk into the ring-buffer to count the 66 * entries inserted during the selftest although some concurrent 67 * insertions into the ring-buffer such as trace_printk could occurred 68 * at the same time, giving false positive or negative results. 69 */ 70 bool __read_mostly tracing_selftest_running; 71 72 /* 73 * If boot-time tracing including tracers/events via kernel cmdline 74 * is running, we do not want to run SELFTEST. 75 */ 76 bool __read_mostly tracing_selftest_disabled; 77 78 void __init disable_tracing_selftest(const char *reason) 79 { 80 if (!tracing_selftest_disabled) { 81 tracing_selftest_disabled = true; 82 pr_info("Ftrace startup test is disabled due to %s\n", reason); 83 } 84 } 85 #else 86 #define tracing_selftest_disabled 0 87 #endif 88 89 /* Pipe tracepoints to printk */ 90 static struct trace_iterator *tracepoint_print_iter; 91 int tracepoint_printk; 92 static bool tracepoint_printk_stop_on_boot __initdata; 93 static bool traceoff_after_boot __initdata; 94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); 95 96 /* Store tracers and their flags per instance */ 97 struct tracers { 98 struct list_head list; 99 struct tracer *tracer; 100 struct tracer_flags *flags; 101 }; 102 103 /* 104 * To prevent the comm cache from being overwritten when no 105 * tracing is active, only save the comm when a trace event 106 * occurred. 107 */ 108 DEFINE_PER_CPU(bool, trace_taskinfo_save); 109 110 /* 111 * Kill all tracing for good (never come back). 112 * It is initialized to 1 but will turn to zero if the initialization 113 * of the tracer is successful. But that is the only place that sets 114 * this back to zero. 115 */ 116 int tracing_disabled = 1; 117 118 cpumask_var_t __read_mostly tracing_buffer_mask; 119 120 #define MAX_TRACER_SIZE 100 121 /* 122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 123 * 124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops 125 * is set, then ftrace_dump is called. This will output the contents 126 * of the ftrace buffers to the console. This is very useful for 127 * capturing traces that lead to crashes and outputting it to a 128 * serial console. 129 * 130 * It is default off, but you can enable it with either specifying 131 * "ftrace_dump_on_oops" in the kernel command line, or setting 132 * /proc/sys/kernel/ftrace_dump_on_oops 133 * Set 1 if you want to dump buffers of all CPUs 134 * Set 2 if you want to dump the buffer of the CPU that triggered oops 135 * Set instance name if you want to dump the specific trace instance 136 * Multiple instance dump is also supported, and instances are separated 137 * by commas. 138 */ 139 /* Set to string format zero to disable by default */ 140 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; 141 142 /* When set, tracing will stop when a WARN*() is hit */ 143 static int __disable_trace_on_warning; 144 145 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 146 void *buffer, size_t *lenp, loff_t *ppos); 147 static const struct ctl_table trace_sysctl_table[] = { 148 { 149 .procname = "ftrace_dump_on_oops", 150 .data = &ftrace_dump_on_oops, 151 .maxlen = MAX_TRACER_SIZE, 152 .mode = 0644, 153 .proc_handler = proc_dostring, 154 }, 155 { 156 .procname = "traceoff_on_warning", 157 .data = &__disable_trace_on_warning, 158 .maxlen = sizeof(__disable_trace_on_warning), 159 .mode = 0644, 160 .proc_handler = proc_dointvec, 161 }, 162 { 163 .procname = "tracepoint_printk", 164 .data = &tracepoint_printk, 165 .maxlen = sizeof(tracepoint_printk), 166 .mode = 0644, 167 .proc_handler = tracepoint_printk_sysctl, 168 }, 169 }; 170 171 static int __init init_trace_sysctls(void) 172 { 173 register_sysctl_init("kernel", trace_sysctl_table); 174 return 0; 175 } 176 subsys_initcall(init_trace_sysctls); 177 178 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 179 /* Map of enums to their values, for "eval_map" file */ 180 struct trace_eval_map_head { 181 struct module *mod; 182 unsigned long length; 183 }; 184 185 union trace_eval_map_item; 186 187 struct trace_eval_map_tail { 188 /* 189 * "end" is first and points to NULL as it must be different 190 * than "mod" or "eval_string" 191 */ 192 union trace_eval_map_item *next; 193 const char *end; /* points to NULL */ 194 }; 195 196 static DEFINE_MUTEX(trace_eval_mutex); 197 198 /* 199 * The trace_eval_maps are saved in an array with two extra elements, 200 * one at the beginning, and one at the end. The beginning item contains 201 * the count of the saved maps (head.length), and the module they 202 * belong to if not built in (head.mod). The ending item contains a 203 * pointer to the next array of saved eval_map items. 204 */ 205 union trace_eval_map_item { 206 struct trace_eval_map map; 207 struct trace_eval_map_head head; 208 struct trace_eval_map_tail tail; 209 }; 210 211 static union trace_eval_map_item *trace_eval_maps; 212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 213 214 int tracing_set_tracer(struct trace_array *tr, const char *buf); 215 static void ftrace_trace_userstack(struct trace_array *tr, 216 struct trace_buffer *buffer, 217 unsigned int trace_ctx); 218 219 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 220 static char *default_bootup_tracer; 221 222 static bool allocate_snapshot; 223 static bool snapshot_at_boot; 224 225 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; 226 static int boot_instance_index; 227 228 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata; 229 static int boot_snapshot_index; 230 231 static int __init set_cmdline_ftrace(char *str) 232 { 233 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 234 default_bootup_tracer = bootup_tracer_buf; 235 /* We are using ftrace early, expand it */ 236 trace_set_ring_buffer_expanded(NULL); 237 return 1; 238 } 239 __setup("ftrace=", set_cmdline_ftrace); 240 241 int ftrace_dump_on_oops_enabled(void) 242 { 243 if (!strcmp("0", ftrace_dump_on_oops)) 244 return 0; 245 else 246 return 1; 247 } 248 249 static int __init set_ftrace_dump_on_oops(char *str) 250 { 251 if (!*str) { 252 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 253 return 1; 254 } 255 256 if (*str == ',') { 257 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 258 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); 259 return 1; 260 } 261 262 if (*str++ == '=') { 263 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); 264 return 1; 265 } 266 267 return 0; 268 } 269 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 270 271 static int __init stop_trace_on_warning(char *str) 272 { 273 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 274 __disable_trace_on_warning = 1; 275 return 1; 276 } 277 __setup("traceoff_on_warning", stop_trace_on_warning); 278 279 static int __init boot_alloc_snapshot(char *str) 280 { 281 char *slot = boot_snapshot_info + boot_snapshot_index; 282 int left = sizeof(boot_snapshot_info) - boot_snapshot_index; 283 int ret; 284 285 if (str[0] == '=') { 286 str++; 287 if (strlen(str) >= left) 288 return -1; 289 290 ret = snprintf(slot, left, "%s\t", str); 291 boot_snapshot_index += ret; 292 } else { 293 allocate_snapshot = true; 294 /* We also need the main ring buffer expanded */ 295 trace_set_ring_buffer_expanded(NULL); 296 } 297 return 1; 298 } 299 __setup("alloc_snapshot", boot_alloc_snapshot); 300 301 302 static int __init boot_snapshot(char *str) 303 { 304 snapshot_at_boot = true; 305 boot_alloc_snapshot(str); 306 return 1; 307 } 308 __setup("ftrace_boot_snapshot", boot_snapshot); 309 310 311 static int __init boot_instance(char *str) 312 { 313 char *slot = boot_instance_info + boot_instance_index; 314 int left = sizeof(boot_instance_info) - boot_instance_index; 315 int ret; 316 317 if (strlen(str) >= left) 318 return -1; 319 320 ret = snprintf(slot, left, "%s\t", str); 321 boot_instance_index += ret; 322 323 return 1; 324 } 325 __setup("trace_instance=", boot_instance); 326 327 328 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; 329 330 static int __init set_trace_boot_options(char *str) 331 { 332 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); 333 return 1; 334 } 335 __setup("trace_options=", set_trace_boot_options); 336 337 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; 338 static char *trace_boot_clock __initdata; 339 340 static int __init set_trace_boot_clock(char *str) 341 { 342 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); 343 trace_boot_clock = trace_boot_clock_buf; 344 return 1; 345 } 346 __setup("trace_clock=", set_trace_boot_clock); 347 348 static int __init set_tracepoint_printk(char *str) 349 { 350 /* Ignore the "tp_printk_stop_on_boot" param */ 351 if (*str == '_') 352 return 0; 353 354 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 355 tracepoint_printk = 1; 356 return 1; 357 } 358 __setup("tp_printk", set_tracepoint_printk); 359 360 static int __init set_tracepoint_printk_stop(char *str) 361 { 362 tracepoint_printk_stop_on_boot = true; 363 return 1; 364 } 365 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); 366 367 static int __init set_traceoff_after_boot(char *str) 368 { 369 traceoff_after_boot = true; 370 return 1; 371 } 372 __setup("traceoff_after_boot", set_traceoff_after_boot); 373 374 unsigned long long ns2usecs(u64 nsec) 375 { 376 nsec += 500; 377 do_div(nsec, 1000); 378 return nsec; 379 } 380 381 static void 382 trace_process_export(struct trace_export *export, 383 struct ring_buffer_event *event, int flag) 384 { 385 struct trace_entry *entry; 386 unsigned int size = 0; 387 388 if (export->flags & flag) { 389 entry = ring_buffer_event_data(event); 390 size = ring_buffer_event_length(event); 391 export->write(export, entry, size); 392 } 393 } 394 395 static DEFINE_MUTEX(ftrace_export_lock); 396 397 static struct trace_export __rcu *ftrace_exports_list __read_mostly; 398 399 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); 400 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); 401 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); 402 403 static inline void ftrace_exports_enable(struct trace_export *export) 404 { 405 if (export->flags & TRACE_EXPORT_FUNCTION) 406 static_branch_inc(&trace_function_exports_enabled); 407 408 if (export->flags & TRACE_EXPORT_EVENT) 409 static_branch_inc(&trace_event_exports_enabled); 410 411 if (export->flags & TRACE_EXPORT_MARKER) 412 static_branch_inc(&trace_marker_exports_enabled); 413 } 414 415 static inline void ftrace_exports_disable(struct trace_export *export) 416 { 417 if (export->flags & TRACE_EXPORT_FUNCTION) 418 static_branch_dec(&trace_function_exports_enabled); 419 420 if (export->flags & TRACE_EXPORT_EVENT) 421 static_branch_dec(&trace_event_exports_enabled); 422 423 if (export->flags & TRACE_EXPORT_MARKER) 424 static_branch_dec(&trace_marker_exports_enabled); 425 } 426 427 static void ftrace_exports(struct ring_buffer_event *event, int flag) 428 { 429 struct trace_export *export; 430 431 guard(preempt_notrace)(); 432 433 export = rcu_dereference_raw_check(ftrace_exports_list); 434 while (export) { 435 trace_process_export(export, event, flag); 436 export = rcu_dereference_raw_check(export->next); 437 } 438 } 439 440 static inline void 441 add_trace_export(struct trace_export **list, struct trace_export *export) 442 { 443 rcu_assign_pointer(export->next, *list); 444 /* 445 * We are entering export into the list but another 446 * CPU might be walking that list. We need to make sure 447 * the export->next pointer is valid before another CPU sees 448 * the export pointer included into the list. 449 */ 450 rcu_assign_pointer(*list, export); 451 } 452 453 static inline int 454 rm_trace_export(struct trace_export **list, struct trace_export *export) 455 { 456 struct trace_export **p; 457 458 for (p = list; *p != NULL; p = &(*p)->next) 459 if (*p == export) 460 break; 461 462 if (*p != export) 463 return -1; 464 465 rcu_assign_pointer(*p, (*p)->next); 466 467 return 0; 468 } 469 470 static inline void 471 add_ftrace_export(struct trace_export **list, struct trace_export *export) 472 { 473 ftrace_exports_enable(export); 474 475 add_trace_export(list, export); 476 } 477 478 static inline int 479 rm_ftrace_export(struct trace_export **list, struct trace_export *export) 480 { 481 int ret; 482 483 ret = rm_trace_export(list, export); 484 ftrace_exports_disable(export); 485 486 return ret; 487 } 488 489 int register_ftrace_export(struct trace_export *export) 490 { 491 if (WARN_ON_ONCE(!export->write)) 492 return -1; 493 494 guard(mutex)(&ftrace_export_lock); 495 496 add_ftrace_export(&ftrace_exports_list, export); 497 498 return 0; 499 } 500 EXPORT_SYMBOL_GPL(register_ftrace_export); 501 502 int unregister_ftrace_export(struct trace_export *export) 503 { 504 guard(mutex)(&ftrace_export_lock); 505 return rm_ftrace_export(&ftrace_exports_list, export); 506 } 507 EXPORT_SYMBOL_GPL(unregister_ftrace_export); 508 509 /* trace_flags holds trace_options default values */ 510 #define TRACE_DEFAULT_FLAGS \ 511 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \ 512 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \ 513 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \ 514 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \ 515 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \ 516 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \ 517 TRACE_ITER(COPY_MARKER)) 518 519 /* trace_options that are only supported by global_trace */ 520 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \ 521 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \ 522 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS) 523 524 /* trace_flags that are default zero for instances */ 525 #define ZEROED_TRACE_FLAGS \ 526 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \ 527 TRACE_ITER(COPY_MARKER)) 528 529 /* 530 * The global_trace is the descriptor that holds the top-level tracing 531 * buffers for the live tracing. 532 */ 533 static struct trace_array global_trace = { 534 .trace_flags = TRACE_DEFAULT_FLAGS, 535 }; 536 537 struct trace_array *printk_trace = &global_trace; 538 539 /* List of trace_arrays interested in the top level trace_marker */ 540 static LIST_HEAD(marker_copies); 541 542 static void update_printk_trace(struct trace_array *tr) 543 { 544 if (printk_trace == tr) 545 return; 546 547 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK); 548 printk_trace = tr; 549 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK); 550 } 551 552 /* Returns true if the status of tr changed */ 553 static bool update_marker_trace(struct trace_array *tr, int enabled) 554 { 555 lockdep_assert_held(&event_mutex); 556 557 if (enabled) { 558 if (!list_empty(&tr->marker_list)) 559 return false; 560 561 list_add_rcu(&tr->marker_list, &marker_copies); 562 tr->trace_flags |= TRACE_ITER(COPY_MARKER); 563 return true; 564 } 565 566 if (list_empty(&tr->marker_list)) 567 return false; 568 569 list_del_init(&tr->marker_list); 570 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); 571 return true; 572 } 573 574 void trace_set_ring_buffer_expanded(struct trace_array *tr) 575 { 576 if (!tr) 577 tr = &global_trace; 578 tr->ring_buffer_expanded = true; 579 } 580 581 LIST_HEAD(ftrace_trace_arrays); 582 583 int trace_array_get(struct trace_array *this_tr) 584 { 585 struct trace_array *tr; 586 587 guard(mutex)(&trace_types_lock); 588 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 589 if (tr == this_tr) { 590 tr->ref++; 591 return 0; 592 } 593 } 594 595 return -ENODEV; 596 } 597 598 static void __trace_array_put(struct trace_array *this_tr) 599 { 600 WARN_ON(!this_tr->ref); 601 this_tr->ref--; 602 } 603 604 /** 605 * trace_array_put - Decrement the reference counter for this trace array. 606 * @this_tr : pointer to the trace array 607 * 608 * NOTE: Use this when we no longer need the trace array returned by 609 * trace_array_get_by_name(). This ensures the trace array can be later 610 * destroyed. 611 * 612 */ 613 void trace_array_put(struct trace_array *this_tr) 614 { 615 if (!this_tr) 616 return; 617 618 guard(mutex)(&trace_types_lock); 619 __trace_array_put(this_tr); 620 } 621 EXPORT_SYMBOL_GPL(trace_array_put); 622 623 int tracing_check_open_get_tr(struct trace_array *tr) 624 { 625 int ret; 626 627 ret = security_locked_down(LOCKDOWN_TRACEFS); 628 if (ret) 629 return ret; 630 631 if (tracing_disabled) 632 return -ENODEV; 633 634 if (tr && trace_array_get(tr) < 0) 635 return -ENODEV; 636 637 return 0; 638 } 639 640 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) 641 { 642 u64 ts; 643 644 /* Early boot up does not have a buffer yet */ 645 if (!buf->buffer) 646 return trace_clock_local(); 647 648 ts = ring_buffer_time_stamp(buf->buffer); 649 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); 650 651 return ts; 652 } 653 654 u64 ftrace_now(int cpu) 655 { 656 return buffer_ftrace_now(&global_trace.array_buffer, cpu); 657 } 658 659 /** 660 * tracing_is_enabled - Show if global_trace has been enabled 661 * 662 * Shows if the global trace has been enabled or not. It uses the 663 * mirror flag "buffer_disabled" to be used in fast paths such as for 664 * the irqsoff tracer. But it may be inaccurate due to races. If you 665 * need to know the accurate state, use tracing_is_on() which is a little 666 * slower, but accurate. 667 */ 668 int tracing_is_enabled(void) 669 { 670 /* 671 * For quick access (irqsoff uses this in fast path), just 672 * return the mirror variable of the state of the ring buffer. 673 * It's a little racy, but we don't really care. 674 */ 675 return !global_trace.buffer_disabled; 676 } 677 678 /* 679 * trace_buf_size is the size in bytes that is allocated 680 * for a buffer. Note, the number of bytes is always rounded 681 * to page size. 682 * 683 * This number is purposely set to a low number of 16384. 684 * If the dump on oops happens, it will be much appreciated 685 * to not have to wait for all that output. Anyway this can be 686 * boot time and run time configurable. 687 */ 688 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ 689 690 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; 691 692 /* trace_types holds a link list of available tracers. */ 693 static struct tracer *trace_types __read_mostly; 694 695 /* 696 * trace_types_lock is used to protect the trace_types list. 697 */ 698 DEFINE_MUTEX(trace_types_lock); 699 700 /* 701 * serialize the access of the ring buffer 702 * 703 * ring buffer serializes readers, but it is low level protection. 704 * The validity of the events (which returns by ring_buffer_peek() ..etc) 705 * are not protected by ring buffer. 706 * 707 * The content of events may become garbage if we allow other process consumes 708 * these events concurrently: 709 * A) the page of the consumed events may become a normal page 710 * (not reader page) in ring buffer, and this page will be rewritten 711 * by events producer. 712 * B) The page of the consumed events may become a page for splice_read, 713 * and this page will be returned to system. 714 * 715 * These primitives allow multi process access to different cpu ring buffer 716 * concurrently. 717 * 718 * These primitives don't distinguish read-only and read-consume access. 719 * Multi read-only access are also serialized. 720 */ 721 722 #ifdef CONFIG_SMP 723 static DECLARE_RWSEM(all_cpu_access_lock); 724 static DEFINE_PER_CPU(struct mutex, cpu_access_lock); 725 726 static inline void trace_access_lock(int cpu) 727 { 728 if (cpu == RING_BUFFER_ALL_CPUS) { 729 /* gain it for accessing the whole ring buffer. */ 730 down_write(&all_cpu_access_lock); 731 } else { 732 /* gain it for accessing a cpu ring buffer. */ 733 734 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ 735 down_read(&all_cpu_access_lock); 736 737 /* Secondly block other access to this @cpu ring buffer. */ 738 mutex_lock(&per_cpu(cpu_access_lock, cpu)); 739 } 740 } 741 742 static inline void trace_access_unlock(int cpu) 743 { 744 if (cpu == RING_BUFFER_ALL_CPUS) { 745 up_write(&all_cpu_access_lock); 746 } else { 747 mutex_unlock(&per_cpu(cpu_access_lock, cpu)); 748 up_read(&all_cpu_access_lock); 749 } 750 } 751 752 static inline void trace_access_lock_init(void) 753 { 754 int cpu; 755 756 for_each_possible_cpu(cpu) 757 mutex_init(&per_cpu(cpu_access_lock, cpu)); 758 } 759 760 #else 761 762 static DEFINE_MUTEX(access_lock); 763 764 static inline void trace_access_lock(int cpu) 765 { 766 (void)cpu; 767 mutex_lock(&access_lock); 768 } 769 770 static inline void trace_access_unlock(int cpu) 771 { 772 (void)cpu; 773 mutex_unlock(&access_lock); 774 } 775 776 static inline void trace_access_lock_init(void) 777 { 778 } 779 780 #endif 781 782 void tracer_tracing_on(struct trace_array *tr) 783 { 784 if (tr->array_buffer.buffer) 785 ring_buffer_record_on(tr->array_buffer.buffer); 786 /* 787 * This flag is looked at when buffers haven't been allocated 788 * yet, or by some tracers (like irqsoff), that just want to 789 * know if the ring buffer has been disabled, but it can handle 790 * races of where it gets disabled but we still do a record. 791 * As the check is in the fast path of the tracers, it is more 792 * important to be fast than accurate. 793 */ 794 tr->buffer_disabled = 0; 795 } 796 797 /** 798 * tracing_on - enable tracing buffers 799 * 800 * This function enables tracing buffers that may have been 801 * disabled with tracing_off. 802 */ 803 void tracing_on(void) 804 { 805 tracer_tracing_on(&global_trace); 806 } 807 EXPORT_SYMBOL_GPL(tracing_on); 808 809 #ifdef CONFIG_TRACER_SNAPSHOT 810 static void tracing_snapshot_instance_cond(struct trace_array *tr, 811 void *cond_data) 812 { 813 unsigned long flags; 814 815 if (in_nmi()) { 816 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); 817 trace_array_puts(tr, "*** snapshot is being ignored ***\n"); 818 return; 819 } 820 821 if (!tr->allocated_snapshot) { 822 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); 823 trace_array_puts(tr, "*** stopping trace here! ***\n"); 824 tracer_tracing_off(tr); 825 return; 826 } 827 828 if (tr->mapped) { 829 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); 830 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 831 return; 832 } 833 834 /* Note, snapshot can not be used when the tracer uses it */ 835 if (tracer_uses_snapshot(tr->current_trace)) { 836 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); 837 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 838 return; 839 } 840 841 local_irq_save(flags); 842 update_max_tr(tr, current, smp_processor_id(), cond_data); 843 local_irq_restore(flags); 844 } 845 846 void tracing_snapshot_instance(struct trace_array *tr) 847 { 848 tracing_snapshot_instance_cond(tr, NULL); 849 } 850 851 /** 852 * tracing_snapshot - take a snapshot of the current buffer. 853 * 854 * This causes a swap between the snapshot buffer and the current live 855 * tracing buffer. You can use this to take snapshots of the live 856 * trace when some condition is triggered, but continue to trace. 857 * 858 * Note, make sure to allocate the snapshot with either 859 * a tracing_snapshot_alloc(), or by doing it manually 860 * with: echo 1 > /sys/kernel/tracing/snapshot 861 * 862 * If the snapshot buffer is not allocated, it will stop tracing. 863 * Basically making a permanent snapshot. 864 */ 865 void tracing_snapshot(void) 866 { 867 struct trace_array *tr = &global_trace; 868 869 tracing_snapshot_instance(tr); 870 } 871 EXPORT_SYMBOL_GPL(tracing_snapshot); 872 873 /** 874 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. 875 * @tr: The tracing instance to snapshot 876 * @cond_data: The data to be tested conditionally, and possibly saved 877 * 878 * This is the same as tracing_snapshot() except that the snapshot is 879 * conditional - the snapshot will only happen if the 880 * cond_snapshot.update() implementation receiving the cond_data 881 * returns true, which means that the trace array's cond_snapshot 882 * update() operation used the cond_data to determine whether the 883 * snapshot should be taken, and if it was, presumably saved it along 884 * with the snapshot. 885 */ 886 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 887 { 888 tracing_snapshot_instance_cond(tr, cond_data); 889 } 890 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 891 892 /** 893 * tracing_cond_snapshot_data - get the user data associated with a snapshot 894 * @tr: The tracing instance 895 * 896 * When the user enables a conditional snapshot using 897 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved 898 * with the snapshot. This accessor is used to retrieve it. 899 * 900 * Should not be called from cond_snapshot.update(), since it takes 901 * the tr->max_lock lock, which the code calling 902 * cond_snapshot.update() has already done. 903 * 904 * Returns the cond_data associated with the trace array's snapshot. 905 */ 906 void *tracing_cond_snapshot_data(struct trace_array *tr) 907 { 908 void *cond_data = NULL; 909 910 local_irq_disable(); 911 arch_spin_lock(&tr->max_lock); 912 913 if (tr->cond_snapshot) 914 cond_data = tr->cond_snapshot->cond_data; 915 916 arch_spin_unlock(&tr->max_lock); 917 local_irq_enable(); 918 919 return cond_data; 920 } 921 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 922 923 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 924 struct array_buffer *size_buf, int cpu_id); 925 static void set_buffer_entries(struct array_buffer *buf, unsigned long val); 926 927 int tracing_alloc_snapshot_instance(struct trace_array *tr) 928 { 929 int order; 930 int ret; 931 932 if (!tr->allocated_snapshot) { 933 934 /* Make the snapshot buffer have the same order as main buffer */ 935 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 936 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); 937 if (ret < 0) 938 return ret; 939 940 /* allocate spare buffer */ 941 ret = resize_buffer_duplicate_size(&tr->snapshot_buffer, 942 &tr->array_buffer, RING_BUFFER_ALL_CPUS); 943 if (ret < 0) 944 return ret; 945 946 tr->allocated_snapshot = true; 947 } 948 949 return 0; 950 } 951 952 static void free_snapshot(struct trace_array *tr) 953 { 954 /* 955 * We don't free the ring buffer. instead, resize it because 956 * The max_tr ring buffer has some state (e.g. ring->clock) and 957 * we want preserve it. 958 */ 959 ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0); 960 ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); 961 set_buffer_entries(&tr->snapshot_buffer, 1); 962 tracing_reset_online_cpus(&tr->snapshot_buffer); 963 tr->allocated_snapshot = false; 964 } 965 966 static int tracing_arm_snapshot_locked(struct trace_array *tr) 967 { 968 int ret; 969 970 lockdep_assert_held(&trace_types_lock); 971 972 spin_lock(&tr->snapshot_trigger_lock); 973 if (tr->snapshot == UINT_MAX || tr->mapped) { 974 spin_unlock(&tr->snapshot_trigger_lock); 975 return -EBUSY; 976 } 977 978 tr->snapshot++; 979 spin_unlock(&tr->snapshot_trigger_lock); 980 981 ret = tracing_alloc_snapshot_instance(tr); 982 if (ret) { 983 spin_lock(&tr->snapshot_trigger_lock); 984 tr->snapshot--; 985 spin_unlock(&tr->snapshot_trigger_lock); 986 } 987 988 return ret; 989 } 990 991 int tracing_arm_snapshot(struct trace_array *tr) 992 { 993 guard(mutex)(&trace_types_lock); 994 return tracing_arm_snapshot_locked(tr); 995 } 996 997 void tracing_disarm_snapshot(struct trace_array *tr) 998 { 999 spin_lock(&tr->snapshot_trigger_lock); 1000 if (!WARN_ON(!tr->snapshot)) 1001 tr->snapshot--; 1002 spin_unlock(&tr->snapshot_trigger_lock); 1003 } 1004 1005 /** 1006 * tracing_alloc_snapshot - allocate snapshot buffer. 1007 * 1008 * This only allocates the snapshot buffer if it isn't already 1009 * allocated - it doesn't also take a snapshot. 1010 * 1011 * This is meant to be used in cases where the snapshot buffer needs 1012 * to be set up for events that can't sleep but need to be able to 1013 * trigger a snapshot. 1014 */ 1015 int tracing_alloc_snapshot(void) 1016 { 1017 struct trace_array *tr = &global_trace; 1018 int ret; 1019 1020 ret = tracing_alloc_snapshot_instance(tr); 1021 WARN_ON(ret < 0); 1022 1023 return ret; 1024 } 1025 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1026 1027 /** 1028 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. 1029 * 1030 * This is similar to tracing_snapshot(), but it will allocate the 1031 * snapshot buffer if it isn't already allocated. Use this only 1032 * where it is safe to sleep, as the allocation may sleep. 1033 * 1034 * This causes a swap between the snapshot buffer and the current live 1035 * tracing buffer. You can use this to take snapshots of the live 1036 * trace when some condition is triggered, but continue to trace. 1037 */ 1038 void tracing_snapshot_alloc(void) 1039 { 1040 int ret; 1041 1042 ret = tracing_alloc_snapshot(); 1043 if (ret < 0) 1044 return; 1045 1046 tracing_snapshot(); 1047 } 1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1049 1050 /** 1051 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance 1052 * @tr: The tracing instance 1053 * @cond_data: User data to associate with the snapshot 1054 * @update: Implementation of the cond_snapshot update function 1055 * 1056 * Check whether the conditional snapshot for the given instance has 1057 * already been enabled, or if the current tracer is already using a 1058 * snapshot; if so, return -EBUSY, else create a cond_snapshot and 1059 * save the cond_data and update function inside. 1060 * 1061 * Returns 0 if successful, error otherwise. 1062 */ 1063 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, 1064 cond_update_fn_t update) 1065 { 1066 struct cond_snapshot *cond_snapshot __free(kfree) = 1067 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); 1068 int ret; 1069 1070 if (!cond_snapshot) 1071 return -ENOMEM; 1072 1073 cond_snapshot->cond_data = cond_data; 1074 cond_snapshot->update = update; 1075 1076 guard(mutex)(&trace_types_lock); 1077 1078 if (tracer_uses_snapshot(tr->current_trace)) 1079 return -EBUSY; 1080 1081 /* 1082 * The cond_snapshot can only change to NULL without the 1083 * trace_types_lock. We don't care if we race with it going 1084 * to NULL, but we want to make sure that it's not set to 1085 * something other than NULL when we get here, which we can 1086 * do safely with only holding the trace_types_lock and not 1087 * having to take the max_lock. 1088 */ 1089 if (tr->cond_snapshot) 1090 return -EBUSY; 1091 1092 ret = tracing_arm_snapshot_locked(tr); 1093 if (ret) 1094 return ret; 1095 1096 local_irq_disable(); 1097 arch_spin_lock(&tr->max_lock); 1098 tr->cond_snapshot = no_free_ptr(cond_snapshot); 1099 arch_spin_unlock(&tr->max_lock); 1100 local_irq_enable(); 1101 1102 return 0; 1103 } 1104 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1105 1106 /** 1107 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance 1108 * @tr: The tracing instance 1109 * 1110 * Check whether the conditional snapshot for the given instance is 1111 * enabled; if so, free the cond_snapshot associated with it, 1112 * otherwise return -EINVAL. 1113 * 1114 * Returns 0 if successful, error otherwise. 1115 */ 1116 int tracing_snapshot_cond_disable(struct trace_array *tr) 1117 { 1118 int ret = 0; 1119 1120 local_irq_disable(); 1121 arch_spin_lock(&tr->max_lock); 1122 1123 if (!tr->cond_snapshot) 1124 ret = -EINVAL; 1125 else { 1126 kfree(tr->cond_snapshot); 1127 tr->cond_snapshot = NULL; 1128 } 1129 1130 arch_spin_unlock(&tr->max_lock); 1131 local_irq_enable(); 1132 1133 tracing_disarm_snapshot(tr); 1134 1135 return ret; 1136 } 1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1138 #else 1139 void tracing_snapshot(void) 1140 { 1141 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); 1142 } 1143 EXPORT_SYMBOL_GPL(tracing_snapshot); 1144 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1145 { 1146 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used"); 1147 } 1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1149 int tracing_alloc_snapshot(void) 1150 { 1151 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); 1152 return -ENODEV; 1153 } 1154 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1155 void tracing_snapshot_alloc(void) 1156 { 1157 /* Give warning */ 1158 tracing_snapshot(); 1159 } 1160 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1161 void *tracing_cond_snapshot_data(struct trace_array *tr) 1162 { 1163 return NULL; 1164 } 1165 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1166 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) 1167 { 1168 return -ENODEV; 1169 } 1170 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1171 int tracing_snapshot_cond_disable(struct trace_array *tr) 1172 { 1173 return false; 1174 } 1175 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1176 #define free_snapshot(tr) do { } while (0) 1177 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) 1178 #endif /* CONFIG_TRACER_SNAPSHOT */ 1179 1180 void tracer_tracing_off(struct trace_array *tr) 1181 { 1182 if (tr->array_buffer.buffer) 1183 ring_buffer_record_off(tr->array_buffer.buffer); 1184 /* 1185 * This flag is looked at when buffers haven't been allocated 1186 * yet, or by some tracers (like irqsoff), that just want to 1187 * know if the ring buffer has been disabled, but it can handle 1188 * races of where it gets disabled but we still do a record. 1189 * As the check is in the fast path of the tracers, it is more 1190 * important to be fast than accurate. 1191 */ 1192 tr->buffer_disabled = 1; 1193 } 1194 1195 /** 1196 * tracer_tracing_disable() - temporary disable the buffer from write 1197 * @tr: The trace array to disable its buffer for 1198 * 1199 * Expects trace_tracing_enable() to re-enable tracing. 1200 * The difference between this and tracer_tracing_off() is that this 1201 * is a counter and can nest, whereas, tracer_tracing_off() can 1202 * be called multiple times and a single trace_tracing_on() will 1203 * enable it. 1204 */ 1205 void tracer_tracing_disable(struct trace_array *tr) 1206 { 1207 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 1208 return; 1209 1210 ring_buffer_record_disable(tr->array_buffer.buffer); 1211 } 1212 1213 /** 1214 * tracer_tracing_enable() - counter part of tracer_tracing_disable() 1215 * @tr: The trace array that had tracer_tracincg_disable() called on it 1216 * 1217 * This is called after tracer_tracing_disable() has been called on @tr, 1218 * when it's safe to re-enable tracing. 1219 */ 1220 void tracer_tracing_enable(struct trace_array *tr) 1221 { 1222 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 1223 return; 1224 1225 ring_buffer_record_enable(tr->array_buffer.buffer); 1226 } 1227 1228 /** 1229 * tracing_off - turn off tracing buffers 1230 * 1231 * This function stops the tracing buffers from recording data. 1232 * It does not disable any overhead the tracers themselves may 1233 * be causing. This function simply causes all recording to 1234 * the ring buffers to fail. 1235 */ 1236 void tracing_off(void) 1237 { 1238 tracer_tracing_off(&global_trace); 1239 } 1240 EXPORT_SYMBOL_GPL(tracing_off); 1241 1242 void disable_trace_on_warning(void) 1243 { 1244 if (__disable_trace_on_warning) { 1245 struct trace_array *tr = READ_ONCE(printk_trace); 1246 1247 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, 1248 "Disabling tracing due to warning\n"); 1249 tracing_off(); 1250 1251 /* Disable trace_printk() buffer too */ 1252 if (tr != &global_trace) { 1253 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1254 "Disabling tracing due to warning\n"); 1255 tracer_tracing_off(tr); 1256 } 1257 } 1258 } 1259 1260 /** 1261 * tracer_tracing_is_on - show real state of ring buffer enabled 1262 * @tr : the trace array to know if ring buffer is enabled 1263 * 1264 * Shows real state of the ring buffer if it is enabled or not. 1265 */ 1266 bool tracer_tracing_is_on(struct trace_array *tr) 1267 { 1268 if (tr->array_buffer.buffer) 1269 return ring_buffer_record_is_set_on(tr->array_buffer.buffer); 1270 return !tr->buffer_disabled; 1271 } 1272 1273 /** 1274 * tracing_is_on - show state of ring buffers enabled 1275 */ 1276 int tracing_is_on(void) 1277 { 1278 return tracer_tracing_is_on(&global_trace); 1279 } 1280 EXPORT_SYMBOL_GPL(tracing_is_on); 1281 1282 static int __init set_buf_size(char *str) 1283 { 1284 unsigned long buf_size; 1285 1286 if (!str) 1287 return 0; 1288 buf_size = memparse(str, &str); 1289 /* 1290 * nr_entries can not be zero and the startup 1291 * tests require some buffer space. Therefore 1292 * ensure we have at least 4096 bytes of buffer. 1293 */ 1294 trace_buf_size = max(4096UL, buf_size); 1295 return 1; 1296 } 1297 __setup("trace_buf_size=", set_buf_size); 1298 1299 static int __init set_tracing_thresh(char *str) 1300 { 1301 unsigned long threshold; 1302 int ret; 1303 1304 if (!str) 1305 return 0; 1306 ret = kstrtoul(str, 0, &threshold); 1307 if (ret < 0) 1308 return 0; 1309 tracing_thresh = threshold * 1000; 1310 return 1; 1311 } 1312 __setup("tracing_thresh=", set_tracing_thresh); 1313 1314 unsigned long nsecs_to_usecs(unsigned long nsecs) 1315 { 1316 return nsecs / 1000; 1317 } 1318 1319 /* 1320 * TRACE_FLAGS is defined as a tuple matching bit masks with strings. 1321 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that 1322 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list 1323 * of strings in the order that the evals (enum) were defined. 1324 */ 1325 #undef C 1326 #define C(a, b) b 1327 1328 /* These must match the bit positions in trace_iterator_flags */ 1329 static const char *trace_options[] = { 1330 TRACE_FLAGS 1331 NULL 1332 }; 1333 1334 static struct { 1335 u64 (*func)(void); 1336 const char *name; 1337 int in_ns; /* is this clock in nanoseconds? */ 1338 } trace_clocks[] = { 1339 { trace_clock_local, "local", 1 }, 1340 { trace_clock_global, "global", 1 }, 1341 { trace_clock_counter, "counter", 0 }, 1342 { trace_clock_jiffies, "uptime", 0 }, 1343 { trace_clock, "perf", 1 }, 1344 { ktime_get_mono_fast_ns, "mono", 1 }, 1345 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1346 { ktime_get_boot_fast_ns, "boot", 1 }, 1347 { ktime_get_tai_fast_ns, "tai", 1 }, 1348 ARCH_TRACE_CLOCKS 1349 }; 1350 1351 bool trace_clock_in_ns(struct trace_array *tr) 1352 { 1353 if (trace_clocks[tr->clock_id].in_ns) 1354 return true; 1355 1356 return false; 1357 } 1358 1359 /* 1360 * trace_parser_get_init - gets the buffer for trace parser 1361 */ 1362 int trace_parser_get_init(struct trace_parser *parser, int size) 1363 { 1364 memset(parser, 0, sizeof(*parser)); 1365 1366 parser->buffer = kmalloc(size, GFP_KERNEL); 1367 if (!parser->buffer) 1368 return 1; 1369 1370 parser->size = size; 1371 return 0; 1372 } 1373 1374 /* 1375 * trace_parser_put - frees the buffer for trace parser 1376 */ 1377 void trace_parser_put(struct trace_parser *parser) 1378 { 1379 kfree(parser->buffer); 1380 parser->buffer = NULL; 1381 } 1382 1383 /* 1384 * trace_get_user - reads the user input string separated by space 1385 * (matched by isspace(ch)) 1386 * 1387 * For each string found the 'struct trace_parser' is updated, 1388 * and the function returns. 1389 * 1390 * Returns number of bytes read. 1391 * 1392 * See kernel/trace/trace.h for 'struct trace_parser' details. 1393 */ 1394 int trace_get_user(struct trace_parser *parser, const char __user *ubuf, 1395 size_t cnt, loff_t *ppos) 1396 { 1397 char ch; 1398 size_t read = 0; 1399 ssize_t ret; 1400 1401 if (!*ppos) 1402 trace_parser_clear(parser); 1403 1404 ret = get_user(ch, ubuf++); 1405 if (ret) 1406 goto fail; 1407 1408 read++; 1409 cnt--; 1410 1411 /* 1412 * The parser is not finished with the last write, 1413 * continue reading the user input without skipping spaces. 1414 */ 1415 if (!parser->cont) { 1416 /* skip white space */ 1417 while (cnt && isspace(ch)) { 1418 ret = get_user(ch, ubuf++); 1419 if (ret) 1420 goto fail; 1421 read++; 1422 cnt--; 1423 } 1424 1425 parser->idx = 0; 1426 1427 /* only spaces were written */ 1428 if (isspace(ch) || !ch) { 1429 *ppos += read; 1430 return read; 1431 } 1432 } 1433 1434 /* read the non-space input */ 1435 while (cnt && !isspace(ch) && ch) { 1436 if (parser->idx < parser->size - 1) 1437 parser->buffer[parser->idx++] = ch; 1438 else { 1439 ret = -EINVAL; 1440 goto fail; 1441 } 1442 1443 ret = get_user(ch, ubuf++); 1444 if (ret) 1445 goto fail; 1446 read++; 1447 cnt--; 1448 } 1449 1450 /* We either got finished input or we have to wait for another call. */ 1451 if (isspace(ch) || !ch) { 1452 parser->buffer[parser->idx] = 0; 1453 parser->cont = false; 1454 } else if (parser->idx < parser->size - 1) { 1455 parser->cont = true; 1456 parser->buffer[parser->idx++] = ch; 1457 /* Make sure the parsed string always terminates with '\0'. */ 1458 parser->buffer[parser->idx] = 0; 1459 } else { 1460 ret = -EINVAL; 1461 goto fail; 1462 } 1463 1464 *ppos += read; 1465 return read; 1466 fail: 1467 trace_parser_fail(parser); 1468 return ret; 1469 } 1470 1471 /* TODO add a seq_buf_to_buffer() */ 1472 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 1473 { 1474 int len; 1475 1476 if (trace_seq_used(s) <= s->readpos) 1477 return -EBUSY; 1478 1479 len = trace_seq_used(s) - s->readpos; 1480 if (cnt > len) 1481 cnt = len; 1482 memcpy(buf, s->buffer + s->readpos, cnt); 1483 1484 s->readpos += cnt; 1485 return cnt; 1486 } 1487 1488 unsigned long __read_mostly tracing_thresh; 1489 1490 #ifdef CONFIG_TRACER_MAX_TRACE 1491 #ifdef LATENCY_FS_NOTIFY 1492 static struct workqueue_struct *fsnotify_wq; 1493 1494 static void latency_fsnotify_workfn(struct work_struct *work) 1495 { 1496 struct trace_array *tr = container_of(work, struct trace_array, 1497 fsnotify_work); 1498 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY); 1499 } 1500 1501 static void latency_fsnotify_workfn_irq(struct irq_work *iwork) 1502 { 1503 struct trace_array *tr = container_of(iwork, struct trace_array, 1504 fsnotify_irqwork); 1505 queue_work(fsnotify_wq, &tr->fsnotify_work); 1506 } 1507 1508 __init static int latency_fsnotify_init(void) 1509 { 1510 fsnotify_wq = alloc_workqueue("tr_max_lat_wq", 1511 WQ_UNBOUND | WQ_HIGHPRI, 0); 1512 if (!fsnotify_wq) { 1513 pr_err("Unable to allocate tr_max_lat_wq\n"); 1514 return -ENOMEM; 1515 } 1516 return 0; 1517 } 1518 1519 late_initcall_sync(latency_fsnotify_init); 1520 1521 void latency_fsnotify(struct trace_array *tr) 1522 { 1523 if (!fsnotify_wq) 1524 return; 1525 /* 1526 * We cannot call queue_work(&tr->fsnotify_work) from here because it's 1527 * possible that we are called from __schedule() or do_idle(), which 1528 * could cause a deadlock. 1529 */ 1530 irq_work_queue(&tr->fsnotify_irqwork); 1531 } 1532 #endif /* !LATENCY_FS_NOTIFY */ 1533 1534 static const struct file_operations tracing_max_lat_fops; 1535 1536 static void trace_create_maxlat_file(struct trace_array *tr, 1537 struct dentry *d_tracer) 1538 { 1539 #ifdef LATENCY_FS_NOTIFY 1540 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); 1541 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); 1542 #endif 1543 tr->d_max_latency = trace_create_file("tracing_max_latency", 1544 TRACE_MODE_WRITE, 1545 d_tracer, tr, 1546 &tracing_max_lat_fops); 1547 } 1548 1549 /* 1550 * Copy the new maximum trace into the separate maximum-trace 1551 * structure. (this way the maximum trace is permanently saved, 1552 * for later retrieval via /sys/kernel/tracing/tracing_max_latency) 1553 */ 1554 static void 1555 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1556 { 1557 struct array_buffer *trace_buf = &tr->array_buffer; 1558 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); 1559 struct array_buffer *max_buf = &tr->snapshot_buffer; 1560 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); 1561 1562 max_buf->cpu = cpu; 1563 max_buf->time_start = data->preempt_timestamp; 1564 1565 max_data->saved_latency = tr->max_latency; 1566 max_data->critical_start = data->critical_start; 1567 max_data->critical_end = data->critical_end; 1568 1569 strscpy(max_data->comm, tsk->comm); 1570 max_data->pid = tsk->pid; 1571 /* 1572 * If tsk == current, then use current_uid(), as that does not use 1573 * RCU. The irq tracer can be called out of RCU scope. 1574 */ 1575 if (tsk == current) 1576 max_data->uid = current_uid(); 1577 else 1578 max_data->uid = task_uid(tsk); 1579 1580 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 1581 max_data->policy = tsk->policy; 1582 max_data->rt_priority = tsk->rt_priority; 1583 1584 /* record this tasks comm */ 1585 tracing_record_cmdline(tsk); 1586 latency_fsnotify(tr); 1587 } 1588 #else 1589 static inline void trace_create_maxlat_file(struct trace_array *tr, 1590 struct dentry *d_tracer) { } 1591 static inline void __update_max_tr(struct trace_array *tr, 1592 struct task_struct *tsk, int cpu) { } 1593 #endif /* CONFIG_TRACER_MAX_TRACE */ 1594 1595 #ifdef CONFIG_TRACER_SNAPSHOT 1596 /** 1597 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 1598 * @tr: tracer 1599 * @tsk: the task with the latency 1600 * @cpu: The cpu that initiated the trace. 1601 * @cond_data: User data associated with a conditional snapshot 1602 * 1603 * Flip the buffers between the @tr and the max_tr and record information 1604 * about which task was the cause of this latency. 1605 */ 1606 void 1607 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, 1608 void *cond_data) 1609 { 1610 if (tr->stop_count) 1611 return; 1612 1613 WARN_ON_ONCE(!irqs_disabled()); 1614 1615 if (!tr->allocated_snapshot) { 1616 /* Only the nop tracer should hit this when disabling */ 1617 WARN_ON_ONCE(tr->current_trace != &nop_trace); 1618 return; 1619 } 1620 1621 arch_spin_lock(&tr->max_lock); 1622 1623 /* Inherit the recordable setting from array_buffer */ 1624 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) 1625 ring_buffer_record_on(tr->snapshot_buffer.buffer); 1626 else 1627 ring_buffer_record_off(tr->snapshot_buffer.buffer); 1628 1629 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { 1630 arch_spin_unlock(&tr->max_lock); 1631 return; 1632 } 1633 1634 swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer); 1635 1636 __update_max_tr(tr, tsk, cpu); 1637 1638 arch_spin_unlock(&tr->max_lock); 1639 1640 /* Any waiters on the old snapshot buffer need to wake up */ 1641 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); 1642 } 1643 1644 /** 1645 * update_max_tr_single - only copy one trace over, and reset the rest 1646 * @tr: tracer 1647 * @tsk: task with the latency 1648 * @cpu: the cpu of the buffer to copy. 1649 * 1650 * Flip the trace of a single CPU buffer between the @tr and the max_tr. 1651 */ 1652 void 1653 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 1654 { 1655 int ret; 1656 1657 if (tr->stop_count) 1658 return; 1659 1660 WARN_ON_ONCE(!irqs_disabled()); 1661 if (!tr->allocated_snapshot) { 1662 /* Only the nop tracer should hit this when disabling */ 1663 WARN_ON_ONCE(tr->current_trace != &nop_trace); 1664 return; 1665 } 1666 1667 arch_spin_lock(&tr->max_lock); 1668 1669 ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu); 1670 1671 if (ret == -EBUSY) { 1672 /* 1673 * We failed to swap the buffer due to a commit taking 1674 * place on this CPU. We fail to record, but we reset 1675 * the max trace buffer (no one writes directly to it) 1676 * and flag that it failed. 1677 * Another reason is resize is in progress. 1678 */ 1679 trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_, 1680 "Failed to swap buffers due to commit or resize in progress\n"); 1681 } 1682 1683 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 1684 1685 __update_max_tr(tr, tsk, cpu); 1686 arch_spin_unlock(&tr->max_lock); 1687 } 1688 #endif /* CONFIG_TRACER_SNAPSHOT */ 1689 1690 struct pipe_wait { 1691 struct trace_iterator *iter; 1692 int wait_index; 1693 }; 1694 1695 static bool wait_pipe_cond(void *data) 1696 { 1697 struct pipe_wait *pwait = data; 1698 struct trace_iterator *iter = pwait->iter; 1699 1700 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) 1701 return true; 1702 1703 return iter->closed; 1704 } 1705 1706 static int wait_on_pipe(struct trace_iterator *iter, int full) 1707 { 1708 struct pipe_wait pwait; 1709 int ret; 1710 1711 /* Iterators are static, they should be filled or empty */ 1712 if (trace_buffer_iter(iter, iter->cpu_file)) 1713 return 0; 1714 1715 pwait.wait_index = atomic_read_acquire(&iter->wait_index); 1716 pwait.iter = iter; 1717 1718 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, 1719 wait_pipe_cond, &pwait); 1720 1721 #ifdef CONFIG_TRACER_SNAPSHOT 1722 /* 1723 * Make sure this is still the snapshot buffer, as if a snapshot were 1724 * to happen, this would now be the main buffer. 1725 */ 1726 if (iter->snapshot) 1727 iter->array_buffer = &iter->tr->snapshot_buffer; 1728 #endif 1729 return ret; 1730 } 1731 1732 #ifdef CONFIG_FTRACE_STARTUP_TEST 1733 static bool selftests_can_run; 1734 1735 struct trace_selftests { 1736 struct list_head list; 1737 struct tracer *type; 1738 }; 1739 1740 static LIST_HEAD(postponed_selftests); 1741 1742 static int save_selftest(struct tracer *type) 1743 { 1744 struct trace_selftests *selftest; 1745 1746 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); 1747 if (!selftest) 1748 return -ENOMEM; 1749 1750 selftest->type = type; 1751 list_add(&selftest->list, &postponed_selftests); 1752 return 0; 1753 } 1754 1755 static int run_tracer_selftest(struct tracer *type) 1756 { 1757 struct trace_array *tr = &global_trace; 1758 struct tracer_flags *saved_flags = tr->current_trace_flags; 1759 struct tracer *saved_tracer = tr->current_trace; 1760 int ret; 1761 1762 if (!type->selftest || tracing_selftest_disabled) 1763 return 0; 1764 1765 /* 1766 * If a tracer registers early in boot up (before scheduling is 1767 * initialized and such), then do not run its selftests yet. 1768 * Instead, run it a little later in the boot process. 1769 */ 1770 if (!selftests_can_run) 1771 return save_selftest(type); 1772 1773 if (!tracing_is_on()) { 1774 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", 1775 type->name); 1776 return 0; 1777 } 1778 1779 /* 1780 * Run a selftest on this tracer. 1781 * Here we reset the trace buffer, and set the current 1782 * tracer to be this tracer. The tracer can then run some 1783 * internal tracing to verify that everything is in order. 1784 * If we fail, we do not register this tracer. 1785 */ 1786 tracing_reset_online_cpus(&tr->array_buffer); 1787 1788 tr->current_trace = type; 1789 tr->current_trace_flags = type->flags ? : type->default_flags; 1790 1791 #ifdef CONFIG_TRACER_MAX_TRACE 1792 if (tracer_uses_snapshot(type)) { 1793 /* If we expanded the buffers, make sure the max is expanded too */ 1794 if (tr->ring_buffer_expanded) 1795 ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size, 1796 RING_BUFFER_ALL_CPUS); 1797 tr->allocated_snapshot = true; 1798 } 1799 #endif 1800 1801 /* the test is responsible for initializing and enabling */ 1802 pr_info("Testing tracer %s: ", type->name); 1803 ret = type->selftest(type, tr); 1804 /* the test is responsible for resetting too */ 1805 tr->current_trace = saved_tracer; 1806 tr->current_trace_flags = saved_flags; 1807 if (ret) { 1808 printk(KERN_CONT "FAILED!\n"); 1809 /* Add the warning after printing 'FAILED' */ 1810 WARN_ON(1); 1811 return -1; 1812 } 1813 /* Only reset on passing, to avoid touching corrupted buffers */ 1814 tracing_reset_online_cpus(&tr->array_buffer); 1815 1816 #ifdef CONFIG_TRACER_MAX_TRACE 1817 if (tracer_uses_snapshot(type)) { 1818 tr->allocated_snapshot = false; 1819 1820 /* Shrink the max buffer again */ 1821 if (tr->ring_buffer_expanded) 1822 ring_buffer_resize(tr->snapshot_buffer.buffer, 1, 1823 RING_BUFFER_ALL_CPUS); 1824 } 1825 #endif 1826 1827 printk(KERN_CONT "PASSED\n"); 1828 return 0; 1829 } 1830 1831 static int do_run_tracer_selftest(struct tracer *type) 1832 { 1833 int ret; 1834 1835 /* 1836 * Tests can take a long time, especially if they are run one after the 1837 * other, as does happen during bootup when all the tracers are 1838 * registered. This could cause the soft lockup watchdog to trigger. 1839 */ 1840 cond_resched(); 1841 1842 tracing_selftest_running = true; 1843 ret = run_tracer_selftest(type); 1844 tracing_selftest_running = false; 1845 1846 return ret; 1847 } 1848 1849 static __init int init_trace_selftests(void) 1850 { 1851 struct trace_selftests *p, *n; 1852 struct tracer *t, **last; 1853 int ret; 1854 1855 selftests_can_run = true; 1856 1857 guard(mutex)(&trace_types_lock); 1858 1859 if (list_empty(&postponed_selftests)) 1860 return 0; 1861 1862 pr_info("Running postponed tracer tests:\n"); 1863 1864 tracing_selftest_running = true; 1865 list_for_each_entry_safe(p, n, &postponed_selftests, list) { 1866 /* This loop can take minutes when sanitizers are enabled, so 1867 * lets make sure we allow RCU processing. 1868 */ 1869 cond_resched(); 1870 ret = run_tracer_selftest(p->type); 1871 /* If the test fails, then warn and remove from available_tracers */ 1872 if (ret < 0) { 1873 WARN(1, "tracer: %s failed selftest, disabling\n", 1874 p->type->name); 1875 last = &trace_types; 1876 for (t = trace_types; t; t = t->next) { 1877 if (t == p->type) { 1878 *last = t->next; 1879 break; 1880 } 1881 last = &t->next; 1882 } 1883 } 1884 list_del(&p->list); 1885 kfree(p); 1886 } 1887 tracing_selftest_running = false; 1888 1889 return 0; 1890 } 1891 core_initcall(init_trace_selftests); 1892 #else 1893 static inline int do_run_tracer_selftest(struct tracer *type) 1894 { 1895 return 0; 1896 } 1897 #endif /* CONFIG_FTRACE_STARTUP_TEST */ 1898 1899 static int add_tracer(struct trace_array *tr, struct tracer *t); 1900 1901 static void __init apply_trace_boot_options(void); 1902 1903 static void free_tracers(struct trace_array *tr) 1904 { 1905 struct tracers *t, *n; 1906 1907 lockdep_assert_held(&trace_types_lock); 1908 1909 list_for_each_entry_safe(t, n, &tr->tracers, list) { 1910 list_del(&t->list); 1911 kfree(t->flags); 1912 kfree(t); 1913 } 1914 } 1915 1916 /** 1917 * register_tracer - register a tracer with the ftrace system. 1918 * @type: the plugin for the tracer 1919 * 1920 * Register a new plugin tracer. 1921 */ 1922 int __init register_tracer(struct tracer *type) 1923 { 1924 struct trace_array *tr; 1925 struct tracer *t; 1926 int ret = 0; 1927 1928 if (!type->name) { 1929 pr_info("Tracer must have a name\n"); 1930 return -1; 1931 } 1932 1933 if (strlen(type->name) >= MAX_TRACER_SIZE) { 1934 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 1935 return -1; 1936 } 1937 1938 if (security_locked_down(LOCKDOWN_TRACEFS)) { 1939 pr_warn("Can not register tracer %s due to lockdown\n", 1940 type->name); 1941 return -EPERM; 1942 } 1943 1944 mutex_lock(&trace_types_lock); 1945 1946 for (t = trace_types; t; t = t->next) { 1947 if (strcmp(type->name, t->name) == 0) { 1948 /* already found */ 1949 pr_info("Tracer %s already registered\n", 1950 type->name); 1951 ret = -1; 1952 goto out; 1953 } 1954 } 1955 1956 /* store the tracer for __set_tracer_option */ 1957 if (type->flags) 1958 type->flags->trace = type; 1959 1960 ret = do_run_tracer_selftest(type); 1961 if (ret < 0) 1962 goto out; 1963 1964 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 1965 ret = add_tracer(tr, type); 1966 if (ret < 0) { 1967 /* The tracer will still exist but without options */ 1968 pr_warn("Failed to create tracer options for %s\n", type->name); 1969 break; 1970 } 1971 } 1972 1973 type->next = trace_types; 1974 trace_types = type; 1975 1976 out: 1977 mutex_unlock(&trace_types_lock); 1978 1979 if (ret || !default_bootup_tracer) 1980 return ret; 1981 1982 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) 1983 return 0; 1984 1985 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 1986 /* Do we want this tracer to start on bootup? */ 1987 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0); 1988 default_bootup_tracer = NULL; 1989 1990 apply_trace_boot_options(); 1991 1992 /* disable other selftests, since this will break it. */ 1993 disable_tracing_selftest("running a tracer"); 1994 1995 return 0; 1996 } 1997 1998 static void tracing_reset_cpu(struct array_buffer *buf, int cpu) 1999 { 2000 struct trace_buffer *buffer = buf->buffer; 2001 2002 if (!buffer) 2003 return; 2004 2005 ring_buffer_record_disable(buffer); 2006 2007 /* Make sure all commits have finished */ 2008 synchronize_rcu(); 2009 ring_buffer_reset_cpu(buffer, cpu); 2010 2011 ring_buffer_record_enable(buffer); 2012 } 2013 2014 void tracing_reset_online_cpus(struct array_buffer *buf) 2015 { 2016 struct trace_buffer *buffer = buf->buffer; 2017 2018 if (!buffer) 2019 return; 2020 2021 ring_buffer_record_disable(buffer); 2022 2023 /* Make sure all commits have finished */ 2024 synchronize_rcu(); 2025 2026 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2027 2028 ring_buffer_reset_online_cpus(buffer); 2029 2030 ring_buffer_record_enable(buffer); 2031 } 2032 2033 static void tracing_reset_all_cpus(struct array_buffer *buf) 2034 { 2035 struct trace_buffer *buffer = buf->buffer; 2036 2037 if (!buffer) 2038 return; 2039 2040 ring_buffer_record_disable(buffer); 2041 2042 /* Make sure all commits have finished */ 2043 synchronize_rcu(); 2044 2045 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2046 2047 ring_buffer_reset(buffer); 2048 2049 ring_buffer_record_enable(buffer); 2050 } 2051 2052 /* Must have trace_types_lock held */ 2053 void tracing_reset_all_online_cpus_unlocked(void) 2054 { 2055 struct trace_array *tr; 2056 2057 lockdep_assert_held(&trace_types_lock); 2058 2059 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2060 if (!tr->clear_trace) 2061 continue; 2062 tr->clear_trace = false; 2063 tracing_reset_online_cpus(&tr->array_buffer); 2064 #ifdef CONFIG_TRACER_SNAPSHOT 2065 tracing_reset_online_cpus(&tr->snapshot_buffer); 2066 #endif 2067 } 2068 } 2069 2070 void tracing_reset_all_online_cpus(void) 2071 { 2072 guard(mutex)(&trace_types_lock); 2073 tracing_reset_all_online_cpus_unlocked(); 2074 } 2075 2076 int is_tracing_stopped(void) 2077 { 2078 return global_trace.stop_count; 2079 } 2080 2081 static void tracing_start_tr(struct trace_array *tr) 2082 { 2083 struct trace_buffer *buffer; 2084 2085 if (tracing_disabled) 2086 return; 2087 2088 guard(raw_spinlock_irqsave)(&tr->start_lock); 2089 if (--tr->stop_count) { 2090 if (WARN_ON_ONCE(tr->stop_count < 0)) { 2091 /* Someone screwed up their debugging */ 2092 tr->stop_count = 0; 2093 } 2094 return; 2095 } 2096 2097 /* Prevent the buffers from switching */ 2098 arch_spin_lock(&tr->max_lock); 2099 2100 buffer = tr->array_buffer.buffer; 2101 if (buffer) 2102 ring_buffer_record_enable(buffer); 2103 2104 #ifdef CONFIG_TRACER_SNAPSHOT 2105 buffer = tr->snapshot_buffer.buffer; 2106 if (buffer) 2107 ring_buffer_record_enable(buffer); 2108 #endif 2109 2110 arch_spin_unlock(&tr->max_lock); 2111 } 2112 2113 /** 2114 * tracing_start - quick start of the tracer 2115 * 2116 * If tracing is enabled but was stopped by tracing_stop, 2117 * this will start the tracer back up. 2118 */ 2119 void tracing_start(void) 2120 2121 { 2122 return tracing_start_tr(&global_trace); 2123 } 2124 2125 static void tracing_stop_tr(struct trace_array *tr) 2126 { 2127 struct trace_buffer *buffer; 2128 2129 guard(raw_spinlock_irqsave)(&tr->start_lock); 2130 if (tr->stop_count++) 2131 return; 2132 2133 /* Prevent the buffers from switching */ 2134 arch_spin_lock(&tr->max_lock); 2135 2136 buffer = tr->array_buffer.buffer; 2137 if (buffer) 2138 ring_buffer_record_disable(buffer); 2139 2140 #ifdef CONFIG_TRACER_SNAPSHOT 2141 buffer = tr->snapshot_buffer.buffer; 2142 if (buffer) 2143 ring_buffer_record_disable(buffer); 2144 #endif 2145 2146 arch_spin_unlock(&tr->max_lock); 2147 } 2148 2149 /** 2150 * tracing_stop - quick stop of the tracer 2151 * 2152 * Light weight way to stop tracing. Use in conjunction with 2153 * tracing_start. 2154 */ 2155 void tracing_stop(void) 2156 { 2157 return tracing_stop_tr(&global_trace); 2158 } 2159 2160 /* 2161 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 2162 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 2163 * simplifies those functions and keeps them in sync. 2164 */ 2165 enum print_line_t trace_handle_return(struct trace_seq *s) 2166 { 2167 return trace_seq_has_overflowed(s) ? 2168 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 2169 } 2170 EXPORT_SYMBOL_GPL(trace_handle_return); 2171 2172 static unsigned short migration_disable_value(void) 2173 { 2174 #if defined(CONFIG_SMP) 2175 return current->migration_disabled; 2176 #else 2177 return 0; 2178 #endif 2179 } 2180 2181 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) 2182 { 2183 unsigned int trace_flags = irqs_status; 2184 unsigned int pc; 2185 2186 pc = preempt_count(); 2187 2188 if (pc & NMI_MASK) 2189 trace_flags |= TRACE_FLAG_NMI; 2190 if (pc & HARDIRQ_MASK) 2191 trace_flags |= TRACE_FLAG_HARDIRQ; 2192 if (in_serving_softirq()) 2193 trace_flags |= TRACE_FLAG_SOFTIRQ; 2194 if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) 2195 trace_flags |= TRACE_FLAG_BH_OFF; 2196 2197 if (tif_need_resched()) 2198 trace_flags |= TRACE_FLAG_NEED_RESCHED; 2199 if (test_preempt_need_resched()) 2200 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; 2201 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) 2202 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; 2203 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | 2204 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; 2205 } 2206 2207 struct ring_buffer_event * 2208 trace_buffer_lock_reserve(struct trace_buffer *buffer, 2209 int type, 2210 unsigned long len, 2211 unsigned int trace_ctx) 2212 { 2213 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); 2214 } 2215 2216 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); 2217 DEFINE_PER_CPU(int, trace_buffered_event_cnt); 2218 static int trace_buffered_event_ref; 2219 2220 /** 2221 * trace_buffered_event_enable - enable buffering events 2222 * 2223 * When events are being filtered, it is quicker to use a temporary 2224 * buffer to write the event data into if there's a likely chance 2225 * that it will not be committed. The discard of the ring buffer 2226 * is not as fast as committing, and is much slower than copying 2227 * a commit. 2228 * 2229 * When an event is to be filtered, allocate per cpu buffers to 2230 * write the event data into, and if the event is filtered and discarded 2231 * it is simply dropped, otherwise, the entire data is to be committed 2232 * in one shot. 2233 */ 2234 void trace_buffered_event_enable(void) 2235 { 2236 struct ring_buffer_event *event; 2237 struct page *page; 2238 int cpu; 2239 2240 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2241 2242 if (trace_buffered_event_ref++) 2243 return; 2244 2245 for_each_tracing_cpu(cpu) { 2246 page = alloc_pages_node(cpu_to_node(cpu), 2247 GFP_KERNEL | __GFP_NORETRY, 0); 2248 /* This is just an optimization and can handle failures */ 2249 if (!page) { 2250 pr_err("Failed to allocate event buffer\n"); 2251 break; 2252 } 2253 2254 event = page_address(page); 2255 memset(event, 0, sizeof(*event)); 2256 2257 per_cpu(trace_buffered_event, cpu) = event; 2258 2259 scoped_guard(preempt,) { 2260 if (cpu == smp_processor_id() && 2261 __this_cpu_read(trace_buffered_event) != 2262 per_cpu(trace_buffered_event, cpu)) 2263 WARN_ON_ONCE(1); 2264 } 2265 } 2266 } 2267 2268 static void enable_trace_buffered_event(void *data) 2269 { 2270 this_cpu_dec(trace_buffered_event_cnt); 2271 } 2272 2273 static void disable_trace_buffered_event(void *data) 2274 { 2275 this_cpu_inc(trace_buffered_event_cnt); 2276 } 2277 2278 /** 2279 * trace_buffered_event_disable - disable buffering events 2280 * 2281 * When a filter is removed, it is faster to not use the buffered 2282 * events, and to commit directly into the ring buffer. Free up 2283 * the temp buffers when there are no more users. This requires 2284 * special synchronization with current events. 2285 */ 2286 void trace_buffered_event_disable(void) 2287 { 2288 int cpu; 2289 2290 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2291 2292 if (WARN_ON_ONCE(!trace_buffered_event_ref)) 2293 return; 2294 2295 if (--trace_buffered_event_ref) 2296 return; 2297 2298 /* For each CPU, set the buffer as used. */ 2299 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, 2300 NULL, true); 2301 2302 /* Wait for all current users to finish */ 2303 synchronize_rcu(); 2304 2305 for_each_tracing_cpu(cpu) { 2306 free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); 2307 per_cpu(trace_buffered_event, cpu) = NULL; 2308 } 2309 2310 /* 2311 * Wait for all CPUs that potentially started checking if they can use 2312 * their event buffer only after the previous synchronize_rcu() call and 2313 * they still read a valid pointer from trace_buffered_event. It must be 2314 * ensured they don't see cleared trace_buffered_event_cnt else they 2315 * could wrongly decide to use the pointed-to buffer which is now freed. 2316 */ 2317 synchronize_rcu(); 2318 2319 /* For each CPU, relinquish the buffer */ 2320 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, 2321 true); 2322 } 2323 2324 static struct trace_buffer *temp_buffer; 2325 2326 struct ring_buffer_event * 2327 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, 2328 struct trace_event_file *trace_file, 2329 int type, unsigned long len, 2330 unsigned int trace_ctx) 2331 { 2332 struct ring_buffer_event *entry; 2333 struct trace_array *tr = trace_file->tr; 2334 int val; 2335 2336 *current_rb = tr->array_buffer.buffer; 2337 2338 if (!tr->no_filter_buffering_ref && 2339 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { 2340 preempt_disable_notrace(); 2341 /* 2342 * Filtering is on, so try to use the per cpu buffer first. 2343 * This buffer will simulate a ring_buffer_event, 2344 * where the type_len is zero and the array[0] will 2345 * hold the full length. 2346 * (see include/linux/ring-buffer.h for details on 2347 * how the ring_buffer_event is structured). 2348 * 2349 * Using a temp buffer during filtering and copying it 2350 * on a matched filter is quicker than writing directly 2351 * into the ring buffer and then discarding it when 2352 * it doesn't match. That is because the discard 2353 * requires several atomic operations to get right. 2354 * Copying on match and doing nothing on a failed match 2355 * is still quicker than no copy on match, but having 2356 * to discard out of the ring buffer on a failed match. 2357 */ 2358 if ((entry = __this_cpu_read(trace_buffered_event))) { 2359 int max_len = PAGE_SIZE - struct_size(entry, array, 1); 2360 2361 val = this_cpu_inc_return(trace_buffered_event_cnt); 2362 2363 /* 2364 * Preemption is disabled, but interrupts and NMIs 2365 * can still come in now. If that happens after 2366 * the above increment, then it will have to go 2367 * back to the old method of allocating the event 2368 * on the ring buffer, and if the filter fails, it 2369 * will have to call ring_buffer_discard_commit() 2370 * to remove it. 2371 * 2372 * Need to also check the unlikely case that the 2373 * length is bigger than the temp buffer size. 2374 * If that happens, then the reserve is pretty much 2375 * guaranteed to fail, as the ring buffer currently 2376 * only allows events less than a page. But that may 2377 * change in the future, so let the ring buffer reserve 2378 * handle the failure in that case. 2379 */ 2380 if (val == 1 && likely(len <= max_len)) { 2381 trace_event_setup(entry, type, trace_ctx); 2382 entry->array[0] = len; 2383 /* Return with preemption disabled */ 2384 return entry; 2385 } 2386 this_cpu_dec(trace_buffered_event_cnt); 2387 } 2388 /* __trace_buffer_lock_reserve() disables preemption */ 2389 preempt_enable_notrace(); 2390 } 2391 2392 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2393 trace_ctx); 2394 /* 2395 * If tracing is off, but we have triggers enabled 2396 * we still need to look at the event data. Use the temp_buffer 2397 * to store the trace event for the trigger to use. It's recursive 2398 * safe and will not be recorded anywhere. 2399 */ 2400 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { 2401 *current_rb = temp_buffer; 2402 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2403 trace_ctx); 2404 } 2405 return entry; 2406 } 2407 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); 2408 2409 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); 2410 static DEFINE_MUTEX(tracepoint_printk_mutex); 2411 2412 static void output_printk(struct trace_event_buffer *fbuffer) 2413 { 2414 struct trace_event_call *event_call; 2415 struct trace_event_file *file; 2416 struct trace_event *event; 2417 unsigned long flags; 2418 struct trace_iterator *iter = tracepoint_print_iter; 2419 2420 /* We should never get here if iter is NULL */ 2421 if (WARN_ON_ONCE(!iter)) 2422 return; 2423 2424 event_call = fbuffer->trace_file->event_call; 2425 if (!event_call || !event_call->event.funcs || 2426 !event_call->event.funcs->trace) 2427 return; 2428 2429 file = fbuffer->trace_file; 2430 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 2431 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 2432 !filter_match_preds(file->filter, fbuffer->entry))) 2433 return; 2434 2435 event = &fbuffer->trace_file->event_call->event; 2436 2437 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); 2438 trace_seq_init(&iter->seq); 2439 iter->ent = fbuffer->entry; 2440 event_call->event.funcs->trace(iter, 0, event); 2441 trace_seq_putc(&iter->seq, 0); 2442 printk("%s", iter->seq.buffer); 2443 2444 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); 2445 } 2446 2447 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 2448 void *buffer, size_t *lenp, 2449 loff_t *ppos) 2450 { 2451 int save_tracepoint_printk; 2452 int ret; 2453 2454 guard(mutex)(&tracepoint_printk_mutex); 2455 save_tracepoint_printk = tracepoint_printk; 2456 2457 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2458 2459 /* 2460 * This will force exiting early, as tracepoint_printk 2461 * is always zero when tracepoint_printk_iter is not allocated 2462 */ 2463 if (!tracepoint_print_iter) 2464 tracepoint_printk = 0; 2465 2466 if (save_tracepoint_printk == tracepoint_printk) 2467 return ret; 2468 2469 if (tracepoint_printk) 2470 static_key_enable(&tracepoint_printk_key.key); 2471 else 2472 static_key_disable(&tracepoint_printk_key.key); 2473 2474 return ret; 2475 } 2476 2477 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) 2478 { 2479 enum event_trigger_type tt = ETT_NONE; 2480 struct trace_event_file *file = fbuffer->trace_file; 2481 2482 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, 2483 fbuffer->entry, &tt)) 2484 goto discard; 2485 2486 if (static_key_false(&tracepoint_printk_key.key)) 2487 output_printk(fbuffer); 2488 2489 if (static_branch_unlikely(&trace_event_exports_enabled)) 2490 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); 2491 2492 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, 2493 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); 2494 2495 discard: 2496 if (tt) 2497 event_triggers_post_call(file, tt); 2498 2499 } 2500 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2501 2502 /* 2503 * Skip 3: 2504 * 2505 * trace_buffer_unlock_commit_regs() 2506 * trace_event_buffer_commit() 2507 * trace_event_raw_event_xxx() 2508 */ 2509 # define STACK_SKIP 3 2510 2511 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2512 struct trace_buffer *buffer, 2513 struct ring_buffer_event *event, 2514 unsigned int trace_ctx, 2515 struct pt_regs *regs) 2516 { 2517 __buffer_unlock_commit(buffer, event); 2518 2519 /* 2520 * If regs is not set, then skip the necessary functions. 2521 * Note, we can still get here via blktrace, wakeup tracer 2522 * and mmiotrace, but that's ok if they lose a function or 2523 * two. They are not that meaningful. 2524 */ 2525 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); 2526 ftrace_trace_userstack(tr, buffer, trace_ctx); 2527 } 2528 2529 /* 2530 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. 2531 */ 2532 void 2533 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, 2534 struct ring_buffer_event *event) 2535 { 2536 __buffer_unlock_commit(buffer, event); 2537 } 2538 2539 void 2540 trace_function(struct trace_array *tr, unsigned long ip, unsigned long 2541 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) 2542 { 2543 struct trace_buffer *buffer = tr->array_buffer.buffer; 2544 struct ring_buffer_event *event; 2545 struct ftrace_entry *entry; 2546 int size = sizeof(*entry); 2547 2548 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); 2549 2550 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, 2551 trace_ctx); 2552 if (!event) 2553 return; 2554 entry = ring_buffer_event_data(event); 2555 entry->ip = ip; 2556 entry->parent_ip = parent_ip; 2557 2558 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 2559 if (fregs) { 2560 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) 2561 entry->args[i] = ftrace_regs_get_argument(fregs, i); 2562 } 2563 #endif 2564 2565 if (static_branch_unlikely(&trace_function_exports_enabled)) 2566 ftrace_exports(event, TRACE_EXPORT_FUNCTION); 2567 __buffer_unlock_commit(buffer, event); 2568 } 2569 2570 #ifdef CONFIG_STACKTRACE 2571 2572 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ 2573 #define FTRACE_KSTACK_NESTING 4 2574 2575 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) 2576 2577 struct ftrace_stack { 2578 unsigned long calls[FTRACE_KSTACK_ENTRIES]; 2579 }; 2580 2581 2582 struct ftrace_stacks { 2583 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; 2584 }; 2585 2586 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); 2587 static DEFINE_PER_CPU(int, ftrace_stack_reserve); 2588 2589 void __ftrace_trace_stack(struct trace_array *tr, 2590 struct trace_buffer *buffer, 2591 unsigned int trace_ctx, 2592 int skip, struct pt_regs *regs) 2593 { 2594 struct ring_buffer_event *event; 2595 unsigned int size, nr_entries; 2596 struct ftrace_stack *fstack; 2597 struct stack_entry *entry; 2598 int stackidx; 2599 int bit; 2600 2601 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START); 2602 if (bit < 0) 2603 return; 2604 2605 /* 2606 * Add one, for this function and the call to save_stack_trace() 2607 * If regs is set, then these functions will not be in the way. 2608 */ 2609 #ifndef CONFIG_UNWINDER_ORC 2610 if (!regs) 2611 skip++; 2612 #endif 2613 2614 guard(preempt_notrace)(); 2615 2616 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; 2617 2618 /* This should never happen. If it does, yell once and skip */ 2619 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) 2620 goto out; 2621 2622 /* 2623 * The above __this_cpu_inc_return() is 'atomic' cpu local. An 2624 * interrupt will either see the value pre increment or post 2625 * increment. If the interrupt happens pre increment it will have 2626 * restored the counter when it returns. We just need a barrier to 2627 * keep gcc from moving things around. 2628 */ 2629 barrier(); 2630 2631 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; 2632 size = ARRAY_SIZE(fstack->calls); 2633 2634 if (regs) { 2635 nr_entries = stack_trace_save_regs(regs, fstack->calls, 2636 size, skip); 2637 } else { 2638 nr_entries = stack_trace_save(fstack->calls, size, skip); 2639 } 2640 2641 #ifdef CONFIG_DYNAMIC_FTRACE 2642 /* Mark entry of stack trace as trampoline code */ 2643 if (tr->ops && tr->ops->trampoline) { 2644 unsigned long tramp_start = tr->ops->trampoline; 2645 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; 2646 unsigned long *calls = fstack->calls; 2647 2648 for (int i = 0; i < nr_entries; i++) { 2649 if (calls[i] >= tramp_start && calls[i] < tramp_end) 2650 calls[i] = FTRACE_TRAMPOLINE_MARKER; 2651 } 2652 } 2653 #endif 2654 2655 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, 2656 struct_size(entry, caller, nr_entries), 2657 trace_ctx); 2658 if (!event) 2659 goto out; 2660 entry = ring_buffer_event_data(event); 2661 2662 entry->size = nr_entries; 2663 memcpy(&entry->caller, fstack->calls, 2664 flex_array_size(entry, caller, nr_entries)); 2665 2666 __buffer_unlock_commit(buffer, event); 2667 2668 out: 2669 /* Again, don't let gcc optimize things here */ 2670 barrier(); 2671 __this_cpu_dec(ftrace_stack_reserve); 2672 trace_clear_recursion(bit); 2673 } 2674 2675 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, 2676 int skip) 2677 { 2678 struct trace_buffer *buffer = tr->array_buffer.buffer; 2679 2680 if (rcu_is_watching()) { 2681 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 2682 return; 2683 } 2684 2685 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) 2686 return; 2687 2688 /* 2689 * When an NMI triggers, RCU is enabled via ct_nmi_enter(), 2690 * but if the above rcu_is_watching() failed, then the NMI 2691 * triggered someplace critical, and ct_irq_enter() should 2692 * not be called from NMI. 2693 */ 2694 if (unlikely(in_nmi())) 2695 return; 2696 2697 ct_irq_enter_irqson(); 2698 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 2699 ct_irq_exit_irqson(); 2700 } 2701 2702 /** 2703 * trace_dump_stack - record a stack back trace in the trace buffer 2704 * @skip: Number of functions to skip (helper handlers) 2705 */ 2706 void trace_dump_stack(int skip) 2707 { 2708 if (tracing_disabled || tracing_selftest_running) 2709 return; 2710 2711 #ifndef CONFIG_UNWINDER_ORC 2712 /* Skip 1 to skip this function. */ 2713 skip++; 2714 #endif 2715 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, 2716 tracing_gen_ctx(), skip, NULL); 2717 } 2718 EXPORT_SYMBOL_GPL(trace_dump_stack); 2719 2720 #ifdef CONFIG_USER_STACKTRACE_SUPPORT 2721 static DEFINE_PER_CPU(int, user_stack_count); 2722 2723 static void 2724 ftrace_trace_userstack(struct trace_array *tr, 2725 struct trace_buffer *buffer, unsigned int trace_ctx) 2726 { 2727 struct ring_buffer_event *event; 2728 struct userstack_entry *entry; 2729 2730 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE))) 2731 return; 2732 2733 /* 2734 * NMIs can not handle page faults, even with fix ups. 2735 * The save user stack can (and often does) fault. 2736 */ 2737 if (unlikely(in_nmi())) 2738 return; 2739 2740 /* 2741 * prevent recursion, since the user stack tracing may 2742 * trigger other kernel events. 2743 */ 2744 guard(preempt)(); 2745 if (__this_cpu_read(user_stack_count)) 2746 return; 2747 2748 __this_cpu_inc(user_stack_count); 2749 2750 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 2751 sizeof(*entry), trace_ctx); 2752 if (!event) 2753 goto out_drop_count; 2754 entry = ring_buffer_event_data(event); 2755 2756 entry->tgid = current->tgid; 2757 memset(&entry->caller, 0, sizeof(entry->caller)); 2758 2759 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); 2760 __buffer_unlock_commit(buffer, event); 2761 2762 out_drop_count: 2763 __this_cpu_dec(user_stack_count); 2764 } 2765 #else /* CONFIG_USER_STACKTRACE_SUPPORT */ 2766 static void ftrace_trace_userstack(struct trace_array *tr, 2767 struct trace_buffer *buffer, 2768 unsigned int trace_ctx) 2769 { 2770 } 2771 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ 2772 2773 #endif /* CONFIG_STACKTRACE */ 2774 2775 static inline void 2776 func_repeats_set_delta_ts(struct func_repeats_entry *entry, 2777 unsigned long long delta) 2778 { 2779 entry->bottom_delta_ts = delta & U32_MAX; 2780 entry->top_delta_ts = (delta >> 32); 2781 } 2782 2783 void trace_last_func_repeats(struct trace_array *tr, 2784 struct trace_func_repeats *last_info, 2785 unsigned int trace_ctx) 2786 { 2787 struct trace_buffer *buffer = tr->array_buffer.buffer; 2788 struct func_repeats_entry *entry; 2789 struct ring_buffer_event *event; 2790 u64 delta; 2791 2792 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, 2793 sizeof(*entry), trace_ctx); 2794 if (!event) 2795 return; 2796 2797 delta = ring_buffer_event_time_stamp(buffer, event) - 2798 last_info->ts_last_call; 2799 2800 entry = ring_buffer_event_data(event); 2801 entry->ip = last_info->ip; 2802 entry->parent_ip = last_info->parent_ip; 2803 entry->count = last_info->count; 2804 func_repeats_set_delta_ts(entry, delta); 2805 2806 __buffer_unlock_commit(buffer, event); 2807 } 2808 2809 static void trace_iterator_increment(struct trace_iterator *iter) 2810 { 2811 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); 2812 2813 iter->idx++; 2814 if (buf_iter) 2815 ring_buffer_iter_advance(buf_iter); 2816 } 2817 2818 static struct trace_entry * 2819 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, 2820 unsigned long *lost_events) 2821 { 2822 struct ring_buffer_event *event; 2823 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 2824 2825 if (buf_iter) { 2826 event = ring_buffer_iter_peek(buf_iter, ts); 2827 if (lost_events) 2828 *lost_events = ring_buffer_iter_dropped(buf_iter) ? 2829 (unsigned long)-1 : 0; 2830 } else { 2831 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, 2832 lost_events); 2833 } 2834 2835 if (event) { 2836 iter->ent_size = ring_buffer_event_length(event); 2837 return ring_buffer_event_data(event); 2838 } 2839 iter->ent_size = 0; 2840 return NULL; 2841 } 2842 2843 static struct trace_entry * 2844 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, 2845 unsigned long *missing_events, u64 *ent_ts) 2846 { 2847 struct trace_buffer *buffer = iter->array_buffer->buffer; 2848 struct trace_entry *ent, *next = NULL; 2849 unsigned long lost_events = 0, next_lost = 0; 2850 int cpu_file = iter->cpu_file; 2851 u64 next_ts = 0, ts; 2852 int next_cpu = -1; 2853 int next_size = 0; 2854 int cpu; 2855 2856 /* 2857 * If we are in a per_cpu trace file, don't bother by iterating over 2858 * all cpu and peek directly. 2859 */ 2860 if (cpu_file > RING_BUFFER_ALL_CPUS) { 2861 if (ring_buffer_empty_cpu(buffer, cpu_file)) 2862 return NULL; 2863 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); 2864 if (ent_cpu) 2865 *ent_cpu = cpu_file; 2866 2867 return ent; 2868 } 2869 2870 for_each_tracing_cpu(cpu) { 2871 2872 if (ring_buffer_empty_cpu(buffer, cpu)) 2873 continue; 2874 2875 ent = peek_next_entry(iter, cpu, &ts, &lost_events); 2876 2877 /* 2878 * Pick the entry with the smallest timestamp: 2879 */ 2880 if (ent && (!next || ts < next_ts)) { 2881 next = ent; 2882 next_cpu = cpu; 2883 next_ts = ts; 2884 next_lost = lost_events; 2885 next_size = iter->ent_size; 2886 } 2887 } 2888 2889 iter->ent_size = next_size; 2890 2891 if (ent_cpu) 2892 *ent_cpu = next_cpu; 2893 2894 if (ent_ts) 2895 *ent_ts = next_ts; 2896 2897 if (missing_events) 2898 *missing_events = next_lost; 2899 2900 return next; 2901 } 2902 2903 #define STATIC_FMT_BUF_SIZE 128 2904 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 2905 2906 char *trace_iter_expand_format(struct trace_iterator *iter) 2907 { 2908 char *tmp; 2909 2910 /* 2911 * iter->tr is NULL when used with tp_printk, which makes 2912 * this get called where it is not safe to call krealloc(). 2913 */ 2914 if (!iter->tr || iter->fmt == static_fmt_buf) 2915 return NULL; 2916 2917 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, 2918 GFP_KERNEL); 2919 if (tmp) { 2920 iter->fmt_size += STATIC_FMT_BUF_SIZE; 2921 iter->fmt = tmp; 2922 } 2923 2924 return tmp; 2925 } 2926 2927 /* Returns true if the string is safe to dereference from an event */ 2928 static bool trace_safe_str(struct trace_iterator *iter, const char *str) 2929 { 2930 unsigned long addr = (unsigned long)str; 2931 struct trace_event *trace_event; 2932 struct trace_event_call *event; 2933 2934 /* OK if part of the event data */ 2935 if ((addr >= (unsigned long)iter->ent) && 2936 (addr < (unsigned long)iter->ent + iter->ent_size)) 2937 return true; 2938 2939 /* OK if part of the temp seq buffer */ 2940 if ((addr >= (unsigned long)iter->tmp_seq.buffer) && 2941 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) 2942 return true; 2943 2944 /* Core rodata can not be freed */ 2945 if (is_kernel_rodata(addr)) 2946 return true; 2947 2948 if (trace_is_tracepoint_string(str)) 2949 return true; 2950 2951 /* 2952 * Now this could be a module event, referencing core module 2953 * data, which is OK. 2954 */ 2955 if (!iter->ent) 2956 return false; 2957 2958 trace_event = ftrace_find_event(iter->ent->type); 2959 if (!trace_event) 2960 return false; 2961 2962 event = container_of(trace_event, struct trace_event_call, event); 2963 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) 2964 return false; 2965 2966 /* Would rather have rodata, but this will suffice */ 2967 if (within_module_core(addr, event->module)) 2968 return true; 2969 2970 return false; 2971 } 2972 2973 /** 2974 * ignore_event - Check dereferenced fields while writing to the seq buffer 2975 * @iter: The iterator that holds the seq buffer and the event being printed 2976 * 2977 * At boot up, test_event_printk() will flag any event that dereferences 2978 * a string with "%s" that does exist in the ring buffer. It may still 2979 * be valid, as the string may point to a static string in the kernel 2980 * rodata that never gets freed. But if the string pointer is pointing 2981 * to something that was allocated, there's a chance that it can be freed 2982 * by the time the user reads the trace. This would cause a bad memory 2983 * access by the kernel and possibly crash the system. 2984 * 2985 * This function will check if the event has any fields flagged as needing 2986 * to be checked at runtime and perform those checks. 2987 * 2988 * If it is found that a field is unsafe, it will write into the @iter->seq 2989 * a message stating what was found to be unsafe. 2990 * 2991 * @return: true if the event is unsafe and should be ignored, 2992 * false otherwise. 2993 */ 2994 bool ignore_event(struct trace_iterator *iter) 2995 { 2996 struct ftrace_event_field *field; 2997 struct trace_event *trace_event; 2998 struct trace_event_call *event; 2999 struct list_head *head; 3000 struct trace_seq *seq; 3001 const void *ptr; 3002 3003 trace_event = ftrace_find_event(iter->ent->type); 3004 3005 seq = &iter->seq; 3006 3007 if (!trace_event) { 3008 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); 3009 return true; 3010 } 3011 3012 event = container_of(trace_event, struct trace_event_call, event); 3013 if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) 3014 return false; 3015 3016 head = trace_get_fields(event); 3017 if (!head) { 3018 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", 3019 trace_event_name(event)); 3020 return true; 3021 } 3022 3023 /* Offsets are from the iter->ent that points to the raw event */ 3024 ptr = iter->ent; 3025 3026 list_for_each_entry(field, head, link) { 3027 const char *str; 3028 bool good; 3029 3030 if (!field->needs_test) 3031 continue; 3032 3033 str = *(const char **)(ptr + field->offset); 3034 3035 good = trace_safe_str(iter, str); 3036 3037 /* 3038 * If you hit this warning, it is likely that the 3039 * trace event in question used %s on a string that 3040 * was saved at the time of the event, but may not be 3041 * around when the trace is read. Use __string(), 3042 * __assign_str() and __get_str() helpers in the TRACE_EVENT() 3043 * instead. See samples/trace_events/trace-events-sample.h 3044 * for reference. 3045 */ 3046 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", 3047 trace_event_name(event), field->name)) { 3048 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", 3049 trace_event_name(event), field->name); 3050 return true; 3051 } 3052 } 3053 return false; 3054 } 3055 3056 const char *trace_event_format(struct trace_iterator *iter, const char *fmt) 3057 { 3058 const char *p, *new_fmt; 3059 char *q; 3060 3061 if (WARN_ON_ONCE(!fmt)) 3062 return fmt; 3063 3064 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR)) 3065 return fmt; 3066 3067 p = fmt; 3068 new_fmt = q = iter->fmt; 3069 while (*p) { 3070 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { 3071 if (!trace_iter_expand_format(iter)) 3072 return fmt; 3073 3074 q += iter->fmt - new_fmt; 3075 new_fmt = iter->fmt; 3076 } 3077 3078 *q++ = *p++; 3079 3080 /* Replace %p with %px */ 3081 if (p[-1] == '%') { 3082 if (p[0] == '%') { 3083 *q++ = *p++; 3084 } else if (p[0] == 'p' && !isalnum(p[1])) { 3085 *q++ = *p++; 3086 *q++ = 'x'; 3087 } 3088 } 3089 } 3090 *q = '\0'; 3091 3092 return new_fmt; 3093 } 3094 3095 #define STATIC_TEMP_BUF_SIZE 128 3096 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); 3097 3098 /* Find the next real entry, without updating the iterator itself */ 3099 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 3100 int *ent_cpu, u64 *ent_ts) 3101 { 3102 /* __find_next_entry will reset ent_size */ 3103 int ent_size = iter->ent_size; 3104 struct trace_entry *entry; 3105 3106 /* 3107 * If called from ftrace_dump(), then the iter->temp buffer 3108 * will be the static_temp_buf and not created from kmalloc. 3109 * If the entry size is greater than the buffer, we can 3110 * not save it. Just return NULL in that case. This is only 3111 * used to add markers when two consecutive events' time 3112 * stamps have a large delta. See trace_print_lat_context() 3113 */ 3114 if (iter->temp == static_temp_buf && 3115 STATIC_TEMP_BUF_SIZE < ent_size) 3116 return NULL; 3117 3118 /* 3119 * The __find_next_entry() may call peek_next_entry(), which may 3120 * call ring_buffer_peek() that may make the contents of iter->ent 3121 * undefined. Need to copy iter->ent now. 3122 */ 3123 if (iter->ent && iter->ent != iter->temp) { 3124 if ((!iter->temp || iter->temp_size < iter->ent_size) && 3125 !WARN_ON_ONCE(iter->temp == static_temp_buf)) { 3126 void *temp; 3127 temp = kmalloc(iter->ent_size, GFP_KERNEL); 3128 if (!temp) 3129 return NULL; 3130 kfree(iter->temp); 3131 iter->temp = temp; 3132 iter->temp_size = iter->ent_size; 3133 } 3134 memcpy(iter->temp, iter->ent, iter->ent_size); 3135 iter->ent = iter->temp; 3136 } 3137 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 3138 /* Put back the original ent_size */ 3139 iter->ent_size = ent_size; 3140 3141 return entry; 3142 } 3143 3144 /* Find the next real entry, and increment the iterator to the next entry */ 3145 void *trace_find_next_entry_inc(struct trace_iterator *iter) 3146 { 3147 iter->ent = __find_next_entry(iter, &iter->cpu, 3148 &iter->lost_events, &iter->ts); 3149 3150 if (iter->ent) 3151 trace_iterator_increment(iter); 3152 3153 return iter->ent ? iter : NULL; 3154 } 3155 3156 static void trace_consume(struct trace_iterator *iter) 3157 { 3158 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, 3159 &iter->lost_events); 3160 } 3161 3162 static void *s_next(struct seq_file *m, void *v, loff_t *pos) 3163 { 3164 struct trace_iterator *iter = m->private; 3165 int i = (int)*pos; 3166 void *ent; 3167 3168 WARN_ON_ONCE(iter->leftover); 3169 3170 (*pos)++; 3171 3172 /* can't go backwards */ 3173 if (iter->idx > i) 3174 return NULL; 3175 3176 if (iter->idx < 0) 3177 ent = trace_find_next_entry_inc(iter); 3178 else 3179 ent = iter; 3180 3181 while (ent && iter->idx < i) 3182 ent = trace_find_next_entry_inc(iter); 3183 3184 iter->pos = *pos; 3185 3186 return ent; 3187 } 3188 3189 void tracing_iter_reset(struct trace_iterator *iter, int cpu) 3190 { 3191 struct ring_buffer_iter *buf_iter; 3192 unsigned long entries = 0; 3193 u64 ts; 3194 3195 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; 3196 3197 buf_iter = trace_buffer_iter(iter, cpu); 3198 if (!buf_iter) 3199 return; 3200 3201 ring_buffer_iter_reset(buf_iter); 3202 3203 /* 3204 * We could have the case with the max latency tracers 3205 * that a reset never took place on a cpu. This is evident 3206 * by the timestamp being before the start of the buffer. 3207 */ 3208 while (ring_buffer_iter_peek(buf_iter, &ts)) { 3209 if (ts >= iter->array_buffer->time_start) 3210 break; 3211 entries++; 3212 ring_buffer_iter_advance(buf_iter); 3213 /* This could be a big loop */ 3214 cond_resched(); 3215 } 3216 3217 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; 3218 } 3219 3220 /* 3221 * The current tracer is copied to avoid a global locking 3222 * all around. 3223 */ 3224 static void *s_start(struct seq_file *m, loff_t *pos) 3225 { 3226 struct trace_iterator *iter = m->private; 3227 struct trace_array *tr = iter->tr; 3228 int cpu_file = iter->cpu_file; 3229 void *p = NULL; 3230 loff_t l = 0; 3231 int cpu; 3232 3233 mutex_lock(&trace_types_lock); 3234 if (unlikely(tr->current_trace != iter->trace)) { 3235 /* Close iter->trace before switching to the new current tracer */ 3236 if (iter->trace->close) 3237 iter->trace->close(iter); 3238 iter->trace = tr->current_trace; 3239 /* Reopen the new current tracer */ 3240 if (iter->trace->open) 3241 iter->trace->open(iter); 3242 } 3243 mutex_unlock(&trace_types_lock); 3244 3245 if (iter->snapshot && tracer_uses_snapshot(iter->trace)) 3246 return ERR_PTR(-EBUSY); 3247 3248 if (*pos != iter->pos) { 3249 iter->ent = NULL; 3250 iter->cpu = 0; 3251 iter->idx = -1; 3252 3253 if (cpu_file == RING_BUFFER_ALL_CPUS) { 3254 for_each_tracing_cpu(cpu) 3255 tracing_iter_reset(iter, cpu); 3256 } else 3257 tracing_iter_reset(iter, cpu_file); 3258 3259 iter->leftover = 0; 3260 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 3261 ; 3262 3263 } else { 3264 /* 3265 * If we overflowed the seq_file before, then we want 3266 * to just reuse the trace_seq buffer again. 3267 */ 3268 if (iter->leftover) 3269 p = iter; 3270 else { 3271 l = *pos - 1; 3272 p = s_next(m, p, &l); 3273 } 3274 } 3275 3276 trace_event_read_lock(); 3277 trace_access_lock(cpu_file); 3278 return p; 3279 } 3280 3281 static void s_stop(struct seq_file *m, void *p) 3282 { 3283 struct trace_iterator *iter = m->private; 3284 3285 if (iter->snapshot && tracer_uses_snapshot(iter->trace)) 3286 return; 3287 3288 trace_access_unlock(iter->cpu_file); 3289 trace_event_read_unlock(); 3290 } 3291 3292 static void 3293 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, 3294 unsigned long *entries, int cpu) 3295 { 3296 unsigned long count; 3297 3298 count = ring_buffer_entries_cpu(buf->buffer, cpu); 3299 /* 3300 * If this buffer has skipped entries, then we hold all 3301 * entries for the trace and we need to ignore the 3302 * ones before the time stamp. 3303 */ 3304 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { 3305 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; 3306 /* total is the same as the entries */ 3307 *total = count; 3308 } else 3309 *total = count + 3310 ring_buffer_overrun_cpu(buf->buffer, cpu); 3311 *entries = count; 3312 } 3313 3314 static void 3315 get_total_entries(struct array_buffer *buf, 3316 unsigned long *total, unsigned long *entries) 3317 { 3318 unsigned long t, e; 3319 int cpu; 3320 3321 *total = 0; 3322 *entries = 0; 3323 3324 for_each_tracing_cpu(cpu) { 3325 get_total_entries_cpu(buf, &t, &e, cpu); 3326 *total += t; 3327 *entries += e; 3328 } 3329 } 3330 3331 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) 3332 { 3333 unsigned long total, entries; 3334 3335 if (!tr) 3336 tr = &global_trace; 3337 3338 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); 3339 3340 return entries; 3341 } 3342 3343 unsigned long trace_total_entries(struct trace_array *tr) 3344 { 3345 unsigned long total, entries; 3346 3347 if (!tr) 3348 tr = &global_trace; 3349 3350 get_total_entries(&tr->array_buffer, &total, &entries); 3351 3352 return entries; 3353 } 3354 3355 static void print_lat_help_header(struct seq_file *m) 3356 { 3357 seq_puts(m, "# _------=> CPU# \n" 3358 "# / _-----=> irqs-off/BH-disabled\n" 3359 "# | / _----=> need-resched \n" 3360 "# || / _---=> hardirq/softirq \n" 3361 "# ||| / _--=> preempt-depth \n" 3362 "# |||| / _-=> migrate-disable \n" 3363 "# ||||| / delay \n" 3364 "# cmd pid |||||| time | caller \n" 3365 "# \\ / |||||| \\ | / \n"); 3366 } 3367 3368 static void print_event_info(struct array_buffer *buf, struct seq_file *m) 3369 { 3370 unsigned long total; 3371 unsigned long entries; 3372 3373 get_total_entries(buf, &total, &entries); 3374 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 3375 entries, total, num_online_cpus()); 3376 seq_puts(m, "#\n"); 3377 } 3378 3379 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, 3380 unsigned int flags) 3381 { 3382 bool tgid = flags & TRACE_ITER(RECORD_TGID); 3383 3384 print_event_info(buf, m); 3385 3386 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); 3387 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); 3388 } 3389 3390 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, 3391 unsigned int flags) 3392 { 3393 bool tgid = flags & TRACE_ITER(RECORD_TGID); 3394 static const char space[] = " "; 3395 int prec = tgid ? 12 : 2; 3396 3397 print_event_info(buf, m); 3398 3399 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); 3400 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); 3401 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); 3402 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); 3403 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); 3404 seq_printf(m, "# %.*s|||| / delay\n", prec, space); 3405 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); 3406 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); 3407 } 3408 3409 void 3410 print_trace_header(struct seq_file *m, struct trace_iterator *iter) 3411 { 3412 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); 3413 struct array_buffer *buf = iter->array_buffer; 3414 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); 3415 struct tracer *type = iter->trace; 3416 unsigned long entries; 3417 unsigned long total; 3418 const char *name = type->name; 3419 3420 get_total_entries(buf, &total, &entries); 3421 3422 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 3423 name, init_utsname()->release); 3424 seq_puts(m, "# -----------------------------------" 3425 "---------------------------------\n"); 3426 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" 3427 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 3428 nsecs_to_usecs(data->saved_latency), 3429 entries, 3430 total, 3431 buf->cpu, 3432 preempt_model_str(), 3433 /* These are reserved for later use */ 3434 0, 0, 0, 0); 3435 #ifdef CONFIG_SMP 3436 seq_printf(m, " #P:%d)\n", num_online_cpus()); 3437 #else 3438 seq_puts(m, ")\n"); 3439 #endif 3440 seq_puts(m, "# -----------------\n"); 3441 seq_printf(m, "# | task: %.16s-%d " 3442 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 3443 data->comm, data->pid, 3444 from_kuid_munged(seq_user_ns(m), data->uid), data->nice, 3445 data->policy, data->rt_priority); 3446 seq_puts(m, "# -----------------\n"); 3447 3448 if (data->critical_start) { 3449 seq_puts(m, "# => started at: "); 3450 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 3451 trace_print_seq(m, &iter->seq); 3452 seq_puts(m, "\n# => ended at: "); 3453 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 3454 trace_print_seq(m, &iter->seq); 3455 seq_puts(m, "\n#\n"); 3456 } 3457 3458 seq_puts(m, "#\n"); 3459 } 3460 3461 static void test_cpu_buff_start(struct trace_iterator *iter) 3462 { 3463 struct trace_seq *s = &iter->seq; 3464 struct trace_array *tr = iter->tr; 3465 3466 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE))) 3467 return; 3468 3469 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) 3470 return; 3471 3472 if (cpumask_available(iter->started) && 3473 cpumask_test_cpu(iter->cpu, iter->started)) 3474 return; 3475 3476 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) 3477 return; 3478 3479 if (cpumask_available(iter->started)) 3480 cpumask_set_cpu(iter->cpu, iter->started); 3481 3482 /* Don't print started cpu buffer for the first entry of the trace */ 3483 if (iter->idx > 1) 3484 trace_seq_printf(s, "##### CPU %u buffer started ####\n", 3485 iter->cpu); 3486 } 3487 3488 #ifdef CONFIG_FTRACE_SYSCALLS 3489 static bool is_syscall_event(struct trace_event *event) 3490 { 3491 return (event->funcs == &enter_syscall_print_funcs) || 3492 (event->funcs == &exit_syscall_print_funcs); 3493 3494 } 3495 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT 3496 #else 3497 static inline bool is_syscall_event(struct trace_event *event) 3498 { 3499 return false; 3500 } 3501 #define syscall_buf_size 0 3502 #endif /* CONFIG_FTRACE_SYSCALLS */ 3503 3504 static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 3505 { 3506 struct trace_array *tr = iter->tr; 3507 struct trace_seq *s = &iter->seq; 3508 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); 3509 struct trace_entry *entry; 3510 struct trace_event *event; 3511 3512 entry = iter->ent; 3513 3514 test_cpu_buff_start(iter); 3515 3516 event = ftrace_find_event(entry->type); 3517 3518 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3519 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 3520 trace_print_lat_context(iter); 3521 else 3522 trace_print_context(iter); 3523 } 3524 3525 if (trace_seq_has_overflowed(s)) 3526 return TRACE_TYPE_PARTIAL_LINE; 3527 3528 if (event) { 3529 if (tr->trace_flags & TRACE_ITER(FIELDS)) 3530 return print_event_fields(iter, event); 3531 /* 3532 * For TRACE_EVENT() events, the print_fmt is not 3533 * safe to use if the array has delta offsets 3534 * Force printing via the fields. 3535 */ 3536 if ((tr->text_delta)) { 3537 /* ftrace and system call events are still OK */ 3538 if ((event->type > __TRACE_LAST_TYPE) && 3539 !is_syscall_event(event)) 3540 return print_event_fields(iter, event); 3541 } 3542 return event->funcs->trace(iter, sym_flags, event); 3543 } 3544 3545 trace_seq_printf(s, "Unknown type %d\n", entry->type); 3546 3547 return trace_handle_return(s); 3548 } 3549 3550 static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 3551 { 3552 struct trace_array *tr = iter->tr; 3553 struct trace_seq *s = &iter->seq; 3554 struct trace_entry *entry; 3555 struct trace_event *event; 3556 3557 entry = iter->ent; 3558 3559 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) 3560 trace_seq_printf(s, "%d %d %llu ", 3561 entry->pid, iter->cpu, iter->ts); 3562 3563 if (trace_seq_has_overflowed(s)) 3564 return TRACE_TYPE_PARTIAL_LINE; 3565 3566 event = ftrace_find_event(entry->type); 3567 if (event) 3568 return event->funcs->raw(iter, 0, event); 3569 3570 trace_seq_printf(s, "%d ?\n", entry->type); 3571 3572 return trace_handle_return(s); 3573 } 3574 3575 static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 3576 { 3577 struct trace_array *tr = iter->tr; 3578 struct trace_seq *s = &iter->seq; 3579 unsigned char newline = '\n'; 3580 struct trace_entry *entry; 3581 struct trace_event *event; 3582 3583 entry = iter->ent; 3584 3585 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3586 SEQ_PUT_HEX_FIELD(s, entry->pid); 3587 SEQ_PUT_HEX_FIELD(s, iter->cpu); 3588 SEQ_PUT_HEX_FIELD(s, iter->ts); 3589 if (trace_seq_has_overflowed(s)) 3590 return TRACE_TYPE_PARTIAL_LINE; 3591 } 3592 3593 event = ftrace_find_event(entry->type); 3594 if (event) { 3595 enum print_line_t ret = event->funcs->hex(iter, 0, event); 3596 if (ret != TRACE_TYPE_HANDLED) 3597 return ret; 3598 } 3599 3600 SEQ_PUT_FIELD(s, newline); 3601 3602 return trace_handle_return(s); 3603 } 3604 3605 static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 3606 { 3607 struct trace_array *tr = iter->tr; 3608 struct trace_seq *s = &iter->seq; 3609 struct trace_entry *entry; 3610 struct trace_event *event; 3611 3612 entry = iter->ent; 3613 3614 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3615 SEQ_PUT_FIELD(s, entry->pid); 3616 SEQ_PUT_FIELD(s, iter->cpu); 3617 SEQ_PUT_FIELD(s, iter->ts); 3618 if (trace_seq_has_overflowed(s)) 3619 return TRACE_TYPE_PARTIAL_LINE; 3620 } 3621 3622 event = ftrace_find_event(entry->type); 3623 return event ? event->funcs->binary(iter, 0, event) : 3624 TRACE_TYPE_HANDLED; 3625 } 3626 3627 int trace_empty(struct trace_iterator *iter) 3628 { 3629 struct ring_buffer_iter *buf_iter; 3630 int cpu; 3631 3632 /* If we are looking at one CPU buffer, only check that one */ 3633 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 3634 cpu = iter->cpu_file; 3635 buf_iter = trace_buffer_iter(iter, cpu); 3636 if (buf_iter) { 3637 if (!ring_buffer_iter_empty(buf_iter)) 3638 return 0; 3639 } else { 3640 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 3641 return 0; 3642 } 3643 return 1; 3644 } 3645 3646 for_each_tracing_cpu(cpu) { 3647 buf_iter = trace_buffer_iter(iter, cpu); 3648 if (buf_iter) { 3649 if (!ring_buffer_iter_empty(buf_iter)) 3650 return 0; 3651 } else { 3652 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 3653 return 0; 3654 } 3655 } 3656 3657 return 1; 3658 } 3659 3660 /* Called with trace_event_read_lock() held. */ 3661 enum print_line_t print_trace_line(struct trace_iterator *iter) 3662 { 3663 struct trace_array *tr = iter->tr; 3664 unsigned long trace_flags = tr->trace_flags; 3665 enum print_line_t ret; 3666 3667 if (iter->lost_events) { 3668 if (iter->lost_events == (unsigned long)-1) 3669 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", 3670 iter->cpu); 3671 else 3672 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 3673 iter->cpu, iter->lost_events); 3674 if (trace_seq_has_overflowed(&iter->seq)) 3675 return TRACE_TYPE_PARTIAL_LINE; 3676 } 3677 3678 if (iter->trace && iter->trace->print_line) { 3679 ret = iter->trace->print_line(iter); 3680 if (ret != TRACE_TYPE_UNHANDLED) 3681 return ret; 3682 } 3683 3684 if (iter->ent->type == TRACE_BPUTS && 3685 trace_flags & TRACE_ITER(PRINTK) && 3686 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3687 return trace_print_bputs_msg_only(iter); 3688 3689 if (iter->ent->type == TRACE_BPRINT && 3690 trace_flags & TRACE_ITER(PRINTK) && 3691 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3692 return trace_print_bprintk_msg_only(iter); 3693 3694 if (iter->ent->type == TRACE_PRINT && 3695 trace_flags & TRACE_ITER(PRINTK) && 3696 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3697 return trace_print_printk_msg_only(iter); 3698 3699 if (trace_flags & TRACE_ITER(BIN)) 3700 return print_bin_fmt(iter); 3701 3702 if (trace_flags & TRACE_ITER(HEX)) 3703 return print_hex_fmt(iter); 3704 3705 if (trace_flags & TRACE_ITER(RAW)) 3706 return print_raw_fmt(iter); 3707 3708 return print_trace_fmt(iter); 3709 } 3710 3711 void trace_latency_header(struct seq_file *m) 3712 { 3713 struct trace_iterator *iter = m->private; 3714 struct trace_array *tr = iter->tr; 3715 3716 /* print nothing if the buffers are empty */ 3717 if (trace_empty(iter)) 3718 return; 3719 3720 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 3721 print_trace_header(m, iter); 3722 3723 if (!(tr->trace_flags & TRACE_ITER(VERBOSE))) 3724 print_lat_help_header(m); 3725 } 3726 3727 void trace_default_header(struct seq_file *m) 3728 { 3729 struct trace_iterator *iter = m->private; 3730 struct trace_array *tr = iter->tr; 3731 unsigned long trace_flags = tr->trace_flags; 3732 3733 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO))) 3734 return; 3735 3736 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 3737 /* print nothing if the buffers are empty */ 3738 if (trace_empty(iter)) 3739 return; 3740 print_trace_header(m, iter); 3741 if (!(trace_flags & TRACE_ITER(VERBOSE))) 3742 print_lat_help_header(m); 3743 } else { 3744 if (!(trace_flags & TRACE_ITER(VERBOSE))) { 3745 if (trace_flags & TRACE_ITER(IRQ_INFO)) 3746 print_func_help_header_irq(iter->array_buffer, 3747 m, trace_flags); 3748 else 3749 print_func_help_header(iter->array_buffer, m, 3750 trace_flags); 3751 } 3752 } 3753 } 3754 3755 static void test_ftrace_alive(struct seq_file *m) 3756 { 3757 if (!ftrace_is_dead()) 3758 return; 3759 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" 3760 "# MAY BE MISSING FUNCTION EVENTS\n"); 3761 } 3762 3763 #ifdef CONFIG_TRACER_SNAPSHOT 3764 static void show_snapshot_main_help(struct seq_file *m) 3765 { 3766 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" 3767 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 3768 "# Takes a snapshot of the main buffer.\n" 3769 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" 3770 "# (Doesn't have to be '2' works with any number that\n" 3771 "# is not a '0' or '1')\n"); 3772 } 3773 3774 static void show_snapshot_percpu_help(struct seq_file *m) 3775 { 3776 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); 3777 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 3778 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 3779 "# Takes a snapshot of the main buffer for this cpu.\n"); 3780 #else 3781 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" 3782 "# Must use main snapshot file to allocate.\n"); 3783 #endif 3784 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" 3785 "# (Doesn't have to be '2' works with any number that\n" 3786 "# is not a '0' or '1')\n"); 3787 } 3788 3789 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) 3790 { 3791 if (iter->tr->allocated_snapshot) 3792 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); 3793 else 3794 seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); 3795 3796 seq_puts(m, "# Snapshot commands:\n"); 3797 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 3798 show_snapshot_main_help(m); 3799 else 3800 show_snapshot_percpu_help(m); 3801 } 3802 #else 3803 /* Should never be called */ 3804 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } 3805 #endif 3806 3807 static int s_show(struct seq_file *m, void *v) 3808 { 3809 struct trace_iterator *iter = v; 3810 int ret; 3811 3812 if (iter->ent == NULL) { 3813 if (iter->tr) { 3814 seq_printf(m, "# tracer: %s\n", iter->trace->name); 3815 seq_puts(m, "#\n"); 3816 test_ftrace_alive(m); 3817 } 3818 if (iter->snapshot && trace_empty(iter)) 3819 print_snapshot_help(m, iter); 3820 else if (iter->trace && iter->trace->print_header) 3821 iter->trace->print_header(m); 3822 else 3823 trace_default_header(m); 3824 3825 } else if (iter->leftover) { 3826 /* 3827 * If we filled the seq_file buffer earlier, we 3828 * want to just show it now. 3829 */ 3830 ret = trace_print_seq(m, &iter->seq); 3831 3832 /* ret should this time be zero, but you never know */ 3833 iter->leftover = ret; 3834 3835 } else { 3836 ret = print_trace_line(iter); 3837 if (ret == TRACE_TYPE_PARTIAL_LINE) { 3838 iter->seq.full = 0; 3839 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 3840 } 3841 ret = trace_print_seq(m, &iter->seq); 3842 /* 3843 * If we overflow the seq_file buffer, then it will 3844 * ask us for this data again at start up. 3845 * Use that instead. 3846 * ret is 0 if seq_file write succeeded. 3847 * -1 otherwise. 3848 */ 3849 iter->leftover = ret; 3850 } 3851 3852 return 0; 3853 } 3854 3855 /* 3856 * Should be used after trace_array_get(), trace_types_lock 3857 * ensures that i_cdev was already initialized. 3858 */ 3859 static inline int tracing_get_cpu(struct inode *inode) 3860 { 3861 if (inode->i_cdev) /* See trace_create_cpu_file() */ 3862 return (long)inode->i_cdev - 1; 3863 return RING_BUFFER_ALL_CPUS; 3864 } 3865 3866 static const struct seq_operations tracer_seq_ops = { 3867 .start = s_start, 3868 .next = s_next, 3869 .stop = s_stop, 3870 .show = s_show, 3871 }; 3872 3873 /* 3874 * Note, as iter itself can be allocated and freed in different 3875 * ways, this function is only used to free its content, and not 3876 * the iterator itself. The only requirement to all the allocations 3877 * is that it must zero all fields (kzalloc), as freeing works with 3878 * ethier allocated content or NULL. 3879 */ 3880 static void free_trace_iter_content(struct trace_iterator *iter) 3881 { 3882 /* The fmt is either NULL, allocated or points to static_fmt_buf */ 3883 if (iter->fmt != static_fmt_buf) 3884 kfree(iter->fmt); 3885 3886 kfree(iter->temp); 3887 kfree(iter->buffer_iter); 3888 mutex_destroy(&iter->mutex); 3889 free_cpumask_var(iter->started); 3890 } 3891 3892 static struct trace_iterator * 3893 __tracing_open(struct inode *inode, struct file *file, bool snapshot) 3894 { 3895 struct trace_array *tr = inode->i_private; 3896 struct trace_iterator *iter; 3897 int cpu; 3898 3899 if (tracing_disabled) 3900 return ERR_PTR(-ENODEV); 3901 3902 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 3903 if (!iter) 3904 return ERR_PTR(-ENOMEM); 3905 3906 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter), 3907 GFP_KERNEL); 3908 if (!iter->buffer_iter) 3909 goto release; 3910 3911 /* 3912 * trace_find_next_entry() may need to save off iter->ent. 3913 * It will place it into the iter->temp buffer. As most 3914 * events are less than 128, allocate a buffer of that size. 3915 * If one is greater, then trace_find_next_entry() will 3916 * allocate a new buffer to adjust for the bigger iter->ent. 3917 * It's not critical if it fails to get allocated here. 3918 */ 3919 iter->temp = kmalloc(128, GFP_KERNEL); 3920 if (iter->temp) 3921 iter->temp_size = 128; 3922 3923 /* 3924 * trace_event_printf() may need to modify given format 3925 * string to replace %p with %px so that it shows real address 3926 * instead of hash value. However, that is only for the event 3927 * tracing, other tracer may not need. Defer the allocation 3928 * until it is needed. 3929 */ 3930 iter->fmt = NULL; 3931 iter->fmt_size = 0; 3932 3933 mutex_lock(&trace_types_lock); 3934 iter->trace = tr->current_trace; 3935 3936 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 3937 goto fail; 3938 3939 iter->tr = tr; 3940 3941 #ifdef CONFIG_TRACER_SNAPSHOT 3942 /* Currently only the top directory has a snapshot */ 3943 if (tr->current_trace->print_max || snapshot) 3944 iter->array_buffer = &tr->snapshot_buffer; 3945 else 3946 #endif 3947 iter->array_buffer = &tr->array_buffer; 3948 iter->snapshot = snapshot; 3949 iter->pos = -1; 3950 iter->cpu_file = tracing_get_cpu(inode); 3951 mutex_init(&iter->mutex); 3952 3953 /* Notify the tracer early; before we stop tracing. */ 3954 if (iter->trace->open) 3955 iter->trace->open(iter); 3956 3957 /* Annotate start of buffers if we had overruns */ 3958 if (ring_buffer_overruns(iter->array_buffer->buffer)) 3959 iter->iter_flags |= TRACE_FILE_ANNOTATE; 3960 3961 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 3962 if (trace_clocks[tr->clock_id].in_ns) 3963 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 3964 3965 /* 3966 * If pause-on-trace is enabled, then stop the trace while 3967 * dumping, unless this is the "snapshot" file 3968 */ 3969 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) { 3970 iter->iter_flags |= TRACE_FILE_PAUSE; 3971 tracing_stop_tr(tr); 3972 } 3973 3974 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 3975 for_each_tracing_cpu(cpu) { 3976 iter->buffer_iter[cpu] = 3977 ring_buffer_read_start(iter->array_buffer->buffer, 3978 cpu, GFP_KERNEL); 3979 tracing_iter_reset(iter, cpu); 3980 } 3981 } else { 3982 cpu = iter->cpu_file; 3983 iter->buffer_iter[cpu] = 3984 ring_buffer_read_start(iter->array_buffer->buffer, 3985 cpu, GFP_KERNEL); 3986 tracing_iter_reset(iter, cpu); 3987 } 3988 3989 mutex_unlock(&trace_types_lock); 3990 3991 return iter; 3992 3993 fail: 3994 mutex_unlock(&trace_types_lock); 3995 free_trace_iter_content(iter); 3996 release: 3997 seq_release_private(inode, file); 3998 return ERR_PTR(-ENOMEM); 3999 } 4000 4001 int tracing_open_generic(struct inode *inode, struct file *filp) 4002 { 4003 int ret; 4004 4005 ret = tracing_check_open_get_tr(NULL); 4006 if (ret) 4007 return ret; 4008 4009 filp->private_data = inode->i_private; 4010 return 0; 4011 } 4012 4013 /* 4014 * Open and update trace_array ref count. 4015 * Must have the current trace_array passed to it. 4016 */ 4017 int tracing_open_generic_tr(struct inode *inode, struct file *filp) 4018 { 4019 struct trace_array *tr = inode->i_private; 4020 int ret; 4021 4022 ret = tracing_check_open_get_tr(tr); 4023 if (ret) 4024 return ret; 4025 4026 filp->private_data = inode->i_private; 4027 4028 return 0; 4029 } 4030 4031 /* 4032 * The private pointer of the inode is the trace_event_file. 4033 * Update the tr ref count associated to it. 4034 */ 4035 int tracing_open_file_tr(struct inode *inode, struct file *filp) 4036 { 4037 struct trace_event_file *file = inode->i_private; 4038 int ret; 4039 4040 ret = tracing_check_open_get_tr(file->tr); 4041 if (ret) 4042 return ret; 4043 4044 guard(mutex)(&event_mutex); 4045 4046 /* Fail if the file is marked for removal */ 4047 if (file->flags & EVENT_FILE_FL_FREED) { 4048 trace_array_put(file->tr); 4049 return -ENODEV; 4050 } else { 4051 event_file_get(file); 4052 } 4053 4054 filp->private_data = inode->i_private; 4055 4056 return 0; 4057 } 4058 4059 int tracing_release_file_tr(struct inode *inode, struct file *filp) 4060 { 4061 struct trace_event_file *file = inode->i_private; 4062 4063 trace_array_put(file->tr); 4064 event_file_put(file); 4065 4066 return 0; 4067 } 4068 4069 int tracing_single_release_file_tr(struct inode *inode, struct file *filp) 4070 { 4071 tracing_release_file_tr(inode, filp); 4072 return single_release(inode, filp); 4073 } 4074 4075 static int tracing_release(struct inode *inode, struct file *file) 4076 { 4077 struct trace_array *tr = inode->i_private; 4078 struct seq_file *m = file->private_data; 4079 struct trace_iterator *iter; 4080 int cpu; 4081 4082 if (!(file->f_mode & FMODE_READ)) { 4083 trace_array_put(tr); 4084 return 0; 4085 } 4086 4087 /* Writes do not use seq_file */ 4088 iter = m->private; 4089 mutex_lock(&trace_types_lock); 4090 4091 for_each_tracing_cpu(cpu) { 4092 if (iter->buffer_iter[cpu]) 4093 ring_buffer_read_finish(iter->buffer_iter[cpu]); 4094 } 4095 4096 if (iter->trace && iter->trace->close) 4097 iter->trace->close(iter); 4098 4099 if (iter->iter_flags & TRACE_FILE_PAUSE) 4100 /* reenable tracing if it was previously enabled */ 4101 tracing_start_tr(tr); 4102 4103 __trace_array_put(tr); 4104 4105 mutex_unlock(&trace_types_lock); 4106 4107 free_trace_iter_content(iter); 4108 seq_release_private(inode, file); 4109 4110 return 0; 4111 } 4112 4113 int tracing_release_generic_tr(struct inode *inode, struct file *file) 4114 { 4115 struct trace_array *tr = inode->i_private; 4116 4117 trace_array_put(tr); 4118 return 0; 4119 } 4120 4121 static int tracing_single_release_tr(struct inode *inode, struct file *file) 4122 { 4123 struct trace_array *tr = inode->i_private; 4124 4125 trace_array_put(tr); 4126 4127 return single_release(inode, file); 4128 } 4129 4130 static bool update_last_data_if_empty(struct trace_array *tr); 4131 4132 static int tracing_open(struct inode *inode, struct file *file) 4133 { 4134 struct trace_array *tr = inode->i_private; 4135 struct trace_iterator *iter; 4136 int ret; 4137 4138 ret = tracing_check_open_get_tr(tr); 4139 if (ret) 4140 return ret; 4141 4142 /* If this file was open for write, then erase contents */ 4143 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 4144 int cpu = tracing_get_cpu(inode); 4145 struct array_buffer *trace_buf = &tr->array_buffer; 4146 4147 #ifdef CONFIG_TRACER_MAX_TRACE 4148 if (tr->current_trace->print_max) 4149 trace_buf = &tr->snapshot_buffer; 4150 #endif 4151 4152 if (cpu == RING_BUFFER_ALL_CPUS) 4153 tracing_reset_online_cpus(trace_buf); 4154 else 4155 tracing_reset_cpu(trace_buf, cpu); 4156 4157 update_last_data_if_empty(tr); 4158 } 4159 4160 if (file->f_mode & FMODE_READ) { 4161 iter = __tracing_open(inode, file, false); 4162 if (IS_ERR(iter)) 4163 ret = PTR_ERR(iter); 4164 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 4165 iter->iter_flags |= TRACE_FILE_LAT_FMT; 4166 } 4167 4168 if (ret < 0) 4169 trace_array_put(tr); 4170 4171 return ret; 4172 } 4173 4174 /* 4175 * Some tracers are not suitable for instance buffers. 4176 * A tracer is always available for the global array (toplevel) 4177 * or if it explicitly states that it is. 4178 */ 4179 static bool 4180 trace_ok_for_array(struct tracer *t, struct trace_array *tr) 4181 { 4182 /* arrays with mapped buffer range do not have snapshots */ 4183 if (tr->range_addr_start && tracer_uses_snapshot(t)) 4184 return false; 4185 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; 4186 } 4187 4188 /* Find the next tracer that this trace array may use */ 4189 static struct tracer * 4190 get_tracer_for_array(struct trace_array *tr, struct tracer *t) 4191 { 4192 while (t && !trace_ok_for_array(t, tr)) 4193 t = t->next; 4194 4195 return t; 4196 } 4197 4198 static void * 4199 t_next(struct seq_file *m, void *v, loff_t *pos) 4200 { 4201 struct trace_array *tr = m->private; 4202 struct tracer *t = v; 4203 4204 (*pos)++; 4205 4206 if (t) 4207 t = get_tracer_for_array(tr, t->next); 4208 4209 return t; 4210 } 4211 4212 static void *t_start(struct seq_file *m, loff_t *pos) 4213 { 4214 struct trace_array *tr = m->private; 4215 struct tracer *t; 4216 loff_t l = 0; 4217 4218 mutex_lock(&trace_types_lock); 4219 4220 t = get_tracer_for_array(tr, trace_types); 4221 for (; t && l < *pos; t = t_next(m, t, &l)) 4222 ; 4223 4224 return t; 4225 } 4226 4227 static void t_stop(struct seq_file *m, void *p) 4228 { 4229 mutex_unlock(&trace_types_lock); 4230 } 4231 4232 static int t_show(struct seq_file *m, void *v) 4233 { 4234 struct tracer *t = v; 4235 4236 if (!t) 4237 return 0; 4238 4239 seq_puts(m, t->name); 4240 if (t->next) 4241 seq_putc(m, ' '); 4242 else 4243 seq_putc(m, '\n'); 4244 4245 return 0; 4246 } 4247 4248 static const struct seq_operations show_traces_seq_ops = { 4249 .start = t_start, 4250 .next = t_next, 4251 .stop = t_stop, 4252 .show = t_show, 4253 }; 4254 4255 static int show_traces_open(struct inode *inode, struct file *file) 4256 { 4257 struct trace_array *tr = inode->i_private; 4258 struct seq_file *m; 4259 int ret; 4260 4261 ret = tracing_check_open_get_tr(tr); 4262 if (ret) 4263 return ret; 4264 4265 ret = seq_open(file, &show_traces_seq_ops); 4266 if (ret) { 4267 trace_array_put(tr); 4268 return ret; 4269 } 4270 4271 m = file->private_data; 4272 m->private = tr; 4273 4274 return 0; 4275 } 4276 4277 static int tracing_seq_release(struct inode *inode, struct file *file) 4278 { 4279 struct trace_array *tr = inode->i_private; 4280 4281 trace_array_put(tr); 4282 return seq_release(inode, file); 4283 } 4284 4285 static ssize_t 4286 tracing_write_stub(struct file *filp, const char __user *ubuf, 4287 size_t count, loff_t *ppos) 4288 { 4289 return count; 4290 } 4291 4292 loff_t tracing_lseek(struct file *file, loff_t offset, int whence) 4293 { 4294 int ret; 4295 4296 if (file->f_mode & FMODE_READ) 4297 ret = seq_lseek(file, offset, whence); 4298 else 4299 file->f_pos = ret = 0; 4300 4301 return ret; 4302 } 4303 4304 static const struct file_operations tracing_fops = { 4305 .open = tracing_open, 4306 .read = seq_read, 4307 .read_iter = seq_read_iter, 4308 .splice_read = copy_splice_read, 4309 .write = tracing_write_stub, 4310 .llseek = tracing_lseek, 4311 .release = tracing_release, 4312 }; 4313 4314 static const struct file_operations show_traces_fops = { 4315 .open = show_traces_open, 4316 .read = seq_read, 4317 .llseek = seq_lseek, 4318 .release = tracing_seq_release, 4319 }; 4320 4321 static ssize_t 4322 tracing_cpumask_read(struct file *filp, char __user *ubuf, 4323 size_t count, loff_t *ppos) 4324 { 4325 struct trace_array *tr = file_inode(filp)->i_private; 4326 char *mask_str __free(kfree) = NULL; 4327 int len; 4328 4329 len = snprintf(NULL, 0, "%*pb\n", 4330 cpumask_pr_args(tr->tracing_cpumask)) + 1; 4331 mask_str = kmalloc(len, GFP_KERNEL); 4332 if (!mask_str) 4333 return -ENOMEM; 4334 4335 len = snprintf(mask_str, len, "%*pb\n", 4336 cpumask_pr_args(tr->tracing_cpumask)); 4337 if (len >= count) 4338 return -EINVAL; 4339 4340 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 4341 } 4342 4343 int tracing_set_cpumask(struct trace_array *tr, 4344 cpumask_var_t tracing_cpumask_new) 4345 { 4346 int cpu; 4347 4348 if (!tr) 4349 return -EINVAL; 4350 4351 local_irq_disable(); 4352 arch_spin_lock(&tr->max_lock); 4353 for_each_tracing_cpu(cpu) { 4354 /* 4355 * Increase/decrease the disabled counter if we are 4356 * about to flip a bit in the cpumask: 4357 */ 4358 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && 4359 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 4360 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); 4361 #ifdef CONFIG_TRACER_SNAPSHOT 4362 ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu); 4363 #endif 4364 } 4365 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && 4366 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 4367 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); 4368 #ifdef CONFIG_TRACER_SNAPSHOT 4369 ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu); 4370 #endif 4371 } 4372 } 4373 arch_spin_unlock(&tr->max_lock); 4374 local_irq_enable(); 4375 4376 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 4377 4378 return 0; 4379 } 4380 4381 static ssize_t 4382 tracing_cpumask_write(struct file *filp, const char __user *ubuf, 4383 size_t count, loff_t *ppos) 4384 { 4385 struct trace_array *tr = file_inode(filp)->i_private; 4386 cpumask_var_t tracing_cpumask_new; 4387 int err; 4388 4389 if (count == 0 || count > KMALLOC_MAX_SIZE) 4390 return -EINVAL; 4391 4392 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 4393 return -ENOMEM; 4394 4395 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 4396 if (err) 4397 goto err_free; 4398 4399 err = tracing_set_cpumask(tr, tracing_cpumask_new); 4400 if (err) 4401 goto err_free; 4402 4403 free_cpumask_var(tracing_cpumask_new); 4404 4405 return count; 4406 4407 err_free: 4408 free_cpumask_var(tracing_cpumask_new); 4409 4410 return err; 4411 } 4412 4413 static const struct file_operations tracing_cpumask_fops = { 4414 .open = tracing_open_generic_tr, 4415 .read = tracing_cpumask_read, 4416 .write = tracing_cpumask_write, 4417 .release = tracing_release_generic_tr, 4418 .llseek = generic_file_llseek, 4419 }; 4420 4421 static int tracing_trace_options_show(struct seq_file *m, void *v) 4422 { 4423 struct tracer_opt *trace_opts; 4424 struct trace_array *tr = m->private; 4425 struct tracer_flags *flags; 4426 u32 tracer_flags; 4427 int i; 4428 4429 guard(mutex)(&trace_types_lock); 4430 4431 for (i = 0; trace_options[i]; i++) { 4432 if (tr->trace_flags & (1ULL << i)) 4433 seq_printf(m, "%s\n", trace_options[i]); 4434 else 4435 seq_printf(m, "no%s\n", trace_options[i]); 4436 } 4437 4438 flags = tr->current_trace_flags; 4439 if (!flags || !flags->opts) 4440 return 0; 4441 4442 tracer_flags = flags->val; 4443 trace_opts = flags->opts; 4444 4445 for (i = 0; trace_opts[i].name; i++) { 4446 if (tracer_flags & trace_opts[i].bit) 4447 seq_printf(m, "%s\n", trace_opts[i].name); 4448 else 4449 seq_printf(m, "no%s\n", trace_opts[i].name); 4450 } 4451 4452 return 0; 4453 } 4454 4455 static int __set_tracer_option(struct trace_array *tr, 4456 struct tracer_flags *tracer_flags, 4457 struct tracer_opt *opts, int neg) 4458 { 4459 struct tracer *trace = tracer_flags->trace; 4460 int ret = 0; 4461 4462 if (trace->set_flag) 4463 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); 4464 if (ret) 4465 return ret; 4466 4467 if (neg) 4468 tracer_flags->val &= ~opts->bit; 4469 else 4470 tracer_flags->val |= opts->bit; 4471 return 0; 4472 } 4473 4474 /* Try to assign a tracer specific option */ 4475 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) 4476 { 4477 struct tracer_flags *tracer_flags = tr->current_trace_flags; 4478 struct tracer_opt *opts = NULL; 4479 int i; 4480 4481 if (!tracer_flags || !tracer_flags->opts) 4482 return 0; 4483 4484 for (i = 0; tracer_flags->opts[i].name; i++) { 4485 opts = &tracer_flags->opts[i]; 4486 4487 if (strcmp(cmp, opts->name) == 0) 4488 return __set_tracer_option(tr, tracer_flags, opts, neg); 4489 } 4490 4491 return -EINVAL; 4492 } 4493 4494 /* Some tracers require overwrite to stay enabled */ 4495 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set) 4496 { 4497 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set) 4498 return -1; 4499 4500 return 0; 4501 } 4502 4503 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) 4504 { 4505 switch (mask) { 4506 case TRACE_ITER(RECORD_TGID): 4507 case TRACE_ITER(RECORD_CMD): 4508 case TRACE_ITER(TRACE_PRINTK): 4509 case TRACE_ITER(COPY_MARKER): 4510 lockdep_assert_held(&event_mutex); 4511 } 4512 4513 /* do nothing if flag is already set */ 4514 if (!!(tr->trace_flags & mask) == !!enabled) 4515 return 0; 4516 4517 /* Give the tracer a chance to approve the change */ 4518 if (tr->current_trace->flag_changed) 4519 if (tr->current_trace->flag_changed(tr, mask, !!enabled)) 4520 return -EINVAL; 4521 4522 switch (mask) { 4523 case TRACE_ITER(TRACE_PRINTK): 4524 if (enabled) { 4525 update_printk_trace(tr); 4526 } else { 4527 /* 4528 * The global_trace cannot clear this. 4529 * It's flag only gets cleared if another instance sets it. 4530 */ 4531 if (printk_trace == &global_trace) 4532 return -EINVAL; 4533 /* 4534 * An instance must always have it set. 4535 * by default, that's the global_trace instance. 4536 */ 4537 if (printk_trace == tr) 4538 update_printk_trace(&global_trace); 4539 } 4540 break; 4541 4542 case TRACE_ITER(COPY_MARKER): 4543 update_marker_trace(tr, enabled); 4544 /* update_marker_trace updates the tr->trace_flags */ 4545 return 0; 4546 } 4547 4548 if (enabled) 4549 tr->trace_flags |= mask; 4550 else 4551 tr->trace_flags &= ~mask; 4552 4553 switch (mask) { 4554 case TRACE_ITER(RECORD_CMD): 4555 trace_event_enable_cmd_record(enabled); 4556 break; 4557 4558 case TRACE_ITER(RECORD_TGID): 4559 4560 if (trace_alloc_tgid_map() < 0) { 4561 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID); 4562 return -ENOMEM; 4563 } 4564 4565 trace_event_enable_tgid_record(enabled); 4566 break; 4567 4568 case TRACE_ITER(EVENT_FORK): 4569 trace_event_follow_fork(tr, enabled); 4570 break; 4571 4572 case TRACE_ITER(FUNC_FORK): 4573 ftrace_pid_follow_fork(tr, enabled); 4574 break; 4575 4576 case TRACE_ITER(OVERWRITE): 4577 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); 4578 #ifdef CONFIG_TRACER_SNAPSHOT 4579 ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled); 4580 #endif 4581 break; 4582 4583 case TRACE_ITER(PRINTK): 4584 trace_printk_start_stop_comm(enabled); 4585 trace_printk_control(enabled); 4586 break; 4587 4588 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER) 4589 case TRACE_GRAPH_GRAPH_TIME: 4590 ftrace_graph_graph_time_control(enabled); 4591 break; 4592 #endif 4593 } 4594 4595 return 0; 4596 } 4597 4598 int trace_set_options(struct trace_array *tr, char *option) 4599 { 4600 char *cmp; 4601 int neg = 0; 4602 int ret; 4603 size_t orig_len = strlen(option); 4604 int len; 4605 4606 cmp = strstrip(option); 4607 4608 len = str_has_prefix(cmp, "no"); 4609 if (len) 4610 neg = 1; 4611 4612 cmp += len; 4613 4614 mutex_lock(&event_mutex); 4615 mutex_lock(&trace_types_lock); 4616 4617 ret = match_string(trace_options, -1, cmp); 4618 /* If no option could be set, test the specific tracer options */ 4619 if (ret < 0) 4620 ret = set_tracer_option(tr, cmp, neg); 4621 else 4622 ret = set_tracer_flag(tr, 1ULL << ret, !neg); 4623 4624 mutex_unlock(&trace_types_lock); 4625 mutex_unlock(&event_mutex); 4626 4627 /* 4628 * If the first trailing whitespace is replaced with '\0' by strstrip, 4629 * turn it back into a space. 4630 */ 4631 if (orig_len > strlen(option)) 4632 option[strlen(option)] = ' '; 4633 4634 return ret; 4635 } 4636 4637 static void __init apply_trace_boot_options(void) 4638 { 4639 char *buf = trace_boot_options_buf; 4640 char *option; 4641 4642 while (true) { 4643 option = strsep(&buf, ","); 4644 4645 if (!option) 4646 break; 4647 4648 if (*option) 4649 trace_set_options(&global_trace, option); 4650 4651 /* Put back the comma to allow this to be called again */ 4652 if (buf) 4653 *(buf - 1) = ','; 4654 } 4655 } 4656 4657 static ssize_t 4658 tracing_trace_options_write(struct file *filp, const char __user *ubuf, 4659 size_t cnt, loff_t *ppos) 4660 { 4661 struct seq_file *m = filp->private_data; 4662 struct trace_array *tr = m->private; 4663 char buf[64]; 4664 int ret; 4665 4666 if (cnt >= sizeof(buf)) 4667 return -EINVAL; 4668 4669 if (copy_from_user(buf, ubuf, cnt)) 4670 return -EFAULT; 4671 4672 buf[cnt] = 0; 4673 4674 ret = trace_set_options(tr, buf); 4675 if (ret < 0) 4676 return ret; 4677 4678 *ppos += cnt; 4679 4680 return cnt; 4681 } 4682 4683 static int tracing_trace_options_open(struct inode *inode, struct file *file) 4684 { 4685 struct trace_array *tr = inode->i_private; 4686 int ret; 4687 4688 ret = tracing_check_open_get_tr(tr); 4689 if (ret) 4690 return ret; 4691 4692 ret = single_open(file, tracing_trace_options_show, inode->i_private); 4693 if (ret < 0) 4694 trace_array_put(tr); 4695 4696 return ret; 4697 } 4698 4699 static const struct file_operations tracing_iter_fops = { 4700 .open = tracing_trace_options_open, 4701 .read = seq_read, 4702 .llseek = seq_lseek, 4703 .release = tracing_single_release_tr, 4704 .write = tracing_trace_options_write, 4705 }; 4706 4707 static const char readme_msg[] = 4708 "tracing mini-HOWTO:\n\n" 4709 "By default tracefs removes all OTH file permission bits.\n" 4710 "When mounting tracefs an optional group id can be specified\n" 4711 "which adds the group to every directory and file in tracefs:\n\n" 4712 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" 4713 "# echo 0 > tracing_on : quick way to disable tracing\n" 4714 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" 4715 " Important files:\n" 4716 " trace\t\t\t- The static contents of the buffer\n" 4717 "\t\t\t To clear the buffer write into this file: echo > trace\n" 4718 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" 4719 " current_tracer\t- function and latency tracers\n" 4720 " available_tracers\t- list of configured tracers for current_tracer\n" 4721 " error_log\t- error log for failed commands (that support it)\n" 4722 " buffer_size_kb\t- view and modify size of per cpu buffer\n" 4723 " buffer_total_size_kb - view total size of all cpu buffers\n\n" 4724 " trace_clock\t\t- change the clock used to order events\n" 4725 " local: Per cpu clock but may not be synced across CPUs\n" 4726 " global: Synced across CPUs but slows tracing down.\n" 4727 " counter: Not a clock, but just an increment\n" 4728 " uptime: Jiffy counter from time of boot\n" 4729 " perf: Same clock that perf events use\n" 4730 #ifdef CONFIG_X86_64 4731 " x86-tsc: TSC cycle counter\n" 4732 #endif 4733 "\n timestamp_mode\t- view the mode used to timestamp events\n" 4734 " delta: Delta difference against a buffer-wide timestamp\n" 4735 " absolute: Absolute (standalone) timestamp\n" 4736 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" 4737 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" 4738 " tracing_cpumask\t- Limit which CPUs to trace\n" 4739 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" 4740 "\t\t\t Remove sub-buffer with rmdir\n" 4741 " trace_options\t\t- Set format or modify how tracing happens\n" 4742 "\t\t\t Disable an option by prefixing 'no' to the\n" 4743 "\t\t\t option name\n" 4744 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" 4745 #ifdef CONFIG_DYNAMIC_FTRACE 4746 "\n available_filter_functions - list of functions that can be filtered on\n" 4747 " set_ftrace_filter\t- echo function name in here to only trace these\n" 4748 "\t\t\t functions\n" 4749 "\t accepts: func_full_name or glob-matching-pattern\n" 4750 "\t modules: Can select a group via module\n" 4751 "\t Format: :mod:<module-name>\n" 4752 "\t example: echo :mod:ext3 > set_ftrace_filter\n" 4753 "\t triggers: a command to perform when function is hit\n" 4754 "\t Format: <function>:<trigger>[:count]\n" 4755 "\t trigger: traceon, traceoff\n" 4756 "\t\t enable_event:<system>:<event>\n" 4757 "\t\t disable_event:<system>:<event>\n" 4758 #ifdef CONFIG_STACKTRACE 4759 "\t\t stacktrace\n" 4760 #endif 4761 #ifdef CONFIG_TRACER_SNAPSHOT 4762 "\t\t snapshot\n" 4763 #endif 4764 "\t\t dump\n" 4765 "\t\t cpudump\n" 4766 "\t example: echo do_fault:traceoff > set_ftrace_filter\n" 4767 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" 4768 "\t The first one will disable tracing every time do_fault is hit\n" 4769 "\t The second will disable tracing at most 3 times when do_trap is hit\n" 4770 "\t The first time do trap is hit and it disables tracing, the\n" 4771 "\t counter will decrement to 2. If tracing is already disabled,\n" 4772 "\t the counter will not decrement. It only decrements when the\n" 4773 "\t trigger did work\n" 4774 "\t To remove trigger without count:\n" 4775 "\t echo '!<function>:<trigger> > set_ftrace_filter\n" 4776 "\t To remove trigger with a count:\n" 4777 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" 4778 " set_ftrace_notrace\t- echo function name in here to never trace.\n" 4779 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" 4780 "\t modules: Can select a group via module command :mod:\n" 4781 "\t Does not accept triggers\n" 4782 #endif /* CONFIG_DYNAMIC_FTRACE */ 4783 #ifdef CONFIG_FUNCTION_TRACER 4784 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" 4785 "\t\t (function)\n" 4786 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" 4787 "\t\t (function)\n" 4788 #endif 4789 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 4790 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" 4791 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" 4792 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" 4793 #endif 4794 #ifdef CONFIG_TRACER_SNAPSHOT 4795 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" 4796 "\t\t\t snapshot buffer. Read the contents for more\n" 4797 "\t\t\t information\n" 4798 #endif 4799 #ifdef CONFIG_STACK_TRACER 4800 " stack_trace\t\t- Shows the max stack trace when active\n" 4801 " stack_max_size\t- Shows current max stack size that was traced\n" 4802 "\t\t\t Write into this file to reset the max size (trigger a\n" 4803 "\t\t\t new trace)\n" 4804 #ifdef CONFIG_DYNAMIC_FTRACE 4805 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" 4806 "\t\t\t traces\n" 4807 #endif 4808 #endif /* CONFIG_STACK_TRACER */ 4809 #ifdef CONFIG_DYNAMIC_EVENTS 4810 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" 4811 "\t\t\t Write into this file to define/undefine new trace events.\n" 4812 #endif 4813 #ifdef CONFIG_KPROBE_EVENTS 4814 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" 4815 "\t\t\t Write into this file to define/undefine new trace events.\n" 4816 #endif 4817 #ifdef CONFIG_UPROBE_EVENTS 4818 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" 4819 "\t\t\t Write into this file to define/undefine new trace events.\n" 4820 #endif 4821 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ 4822 defined(CONFIG_FPROBE_EVENTS) 4823 "\t accepts: event-definitions (one definition per line)\n" 4824 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) 4825 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" 4826 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" 4827 #endif 4828 #ifdef CONFIG_FPROBE_EVENTS 4829 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" 4830 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" 4831 #endif 4832 #ifdef CONFIG_HIST_TRIGGERS 4833 "\t s:[synthetic/]<event> <field> [<field>]\n" 4834 #endif 4835 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" 4836 "\t -:[<group>/][<event>]\n" 4837 #ifdef CONFIG_KPROBE_EVENTS 4838 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" 4839 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" 4840 #endif 4841 #ifdef CONFIG_UPROBE_EVENTS 4842 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" 4843 #endif 4844 "\t args: <name>=fetcharg[:type]\n" 4845 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" 4846 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 4847 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" 4848 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS 4849 "\t <argname>[->field[->field|.field...]],\n" 4850 #endif 4851 #else 4852 "\t $stack<index>, $stack, $retval, $comm,\n" 4853 #endif 4854 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" 4855 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 4856 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 4857 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 4858 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" 4859 #ifdef CONFIG_HIST_TRIGGERS 4860 "\t field: <stype> <name>;\n" 4861 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" 4862 "\t [unsigned] char/int/long\n" 4863 #endif 4864 "\t efield: For event probes ('e' types), the field is on of the fields\n" 4865 "\t of the <attached-group>/<attached-event>.\n" 4866 #endif 4867 " set_event\t\t- Enables events by name written into it\n" 4868 "\t\t\t Can enable module events via: :mod:<module>\n" 4869 " events/\t\t- Directory containing all trace event subsystems:\n" 4870 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" 4871 " events/<system>/\t- Directory containing all trace events for <system>:\n" 4872 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" 4873 "\t\t\t events\n" 4874 " filter\t\t- If set, only events passing filter are traced\n" 4875 " events/<system>/<event>/\t- Directory containing control files for\n" 4876 "\t\t\t <event>:\n" 4877 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" 4878 " filter\t\t- If set, only events passing filter are traced\n" 4879 " trigger\t\t- If set, a command to perform when event is hit\n" 4880 "\t Format: <trigger>[:count][if <filter>]\n" 4881 "\t trigger: traceon, traceoff\n" 4882 "\t enable_event:<system>:<event>\n" 4883 "\t disable_event:<system>:<event>\n" 4884 #ifdef CONFIG_HIST_TRIGGERS 4885 "\t enable_hist:<system>:<event>\n" 4886 "\t disable_hist:<system>:<event>\n" 4887 #endif 4888 #ifdef CONFIG_STACKTRACE 4889 "\t\t stacktrace\n" 4890 #endif 4891 #ifdef CONFIG_TRACER_SNAPSHOT 4892 "\t\t snapshot\n" 4893 #endif 4894 #ifdef CONFIG_HIST_TRIGGERS 4895 "\t\t hist (see below)\n" 4896 #endif 4897 "\t example: echo traceoff > events/block/block_unplug/trigger\n" 4898 "\t echo traceoff:3 > events/block/block_unplug/trigger\n" 4899 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" 4900 "\t events/block/block_unplug/trigger\n" 4901 "\t The first disables tracing every time block_unplug is hit.\n" 4902 "\t The second disables tracing the first 3 times block_unplug is hit.\n" 4903 "\t The third enables the kmalloc event the first 3 times block_unplug\n" 4904 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" 4905 "\t Like function triggers, the counter is only decremented if it\n" 4906 "\t enabled or disabled tracing.\n" 4907 "\t To remove a trigger without a count:\n" 4908 "\t echo '!<trigger> > <system>/<event>/trigger\n" 4909 "\t To remove a trigger with a count:\n" 4910 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" 4911 "\t Filters can be ignored when removing a trigger.\n" 4912 #ifdef CONFIG_HIST_TRIGGERS 4913 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 4914 "\t Format: hist:keys=<field1[,field2,...]>\n" 4915 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 4916 "\t [:values=<field1[,field2,...]>]\n" 4917 "\t [:sort=<field1[,field2,...]>]\n" 4918 "\t [:size=#entries]\n" 4919 "\t [:pause][:continue][:clear]\n" 4920 "\t [:name=histname1]\n" 4921 "\t [:nohitcount]\n" 4922 "\t [:<handler>.<action>]\n" 4923 "\t [if <filter>]\n\n" 4924 "\t Note, special fields can be used as well:\n" 4925 "\t common_timestamp - to record current timestamp\n" 4926 "\t common_cpu - to record the CPU the event happened on\n" 4927 "\n" 4928 "\t A hist trigger variable can be:\n" 4929 "\t - a reference to a field e.g. x=current_timestamp,\n" 4930 "\t - a reference to another variable e.g. y=$x,\n" 4931 "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 4932 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 4933 "\n" 4934 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" 4935 "\t multiplication(*) and division(/) operators. An operand can be either a\n" 4936 "\t variable reference, field or numeric literal.\n" 4937 "\n" 4938 "\t When a matching event is hit, an entry is added to a hash\n" 4939 "\t table using the key(s) and value(s) named, and the value of a\n" 4940 "\t sum called 'hitcount' is incremented. Keys and values\n" 4941 "\t correspond to fields in the event's format description. Keys\n" 4942 "\t can be any field, or the special string 'common_stacktrace'.\n" 4943 "\t Compound keys consisting of up to two fields can be specified\n" 4944 "\t by the 'keys' keyword. Values must correspond to numeric\n" 4945 "\t fields. Sort keys consisting of up to two fields can be\n" 4946 "\t specified using the 'sort' keyword. The sort direction can\n" 4947 "\t be modified by appending '.descending' or '.ascending' to a\n" 4948 "\t sort field. The 'size' parameter can be used to specify more\n" 4949 "\t or fewer than the default 2048 entries for the hashtable size.\n" 4950 "\t If a hist trigger is given a name using the 'name' parameter,\n" 4951 "\t its histogram data will be shared with other triggers of the\n" 4952 "\t same name, and trigger hits will update this common data.\n\n" 4953 "\t Reading the 'hist' file for the event will dump the hash\n" 4954 "\t table in its entirety to stdout. If there are multiple hist\n" 4955 "\t triggers attached to an event, there will be a table for each\n" 4956 "\t trigger in the output. The table displayed for a named\n" 4957 "\t trigger will be the same as any other instance having the\n" 4958 "\t same name. The default format used to display a given field\n" 4959 "\t can be modified by appending any of the following modifiers\n" 4960 "\t to the field name, as applicable:\n\n" 4961 "\t .hex display a number as a hex value\n" 4962 "\t .sym display an address as a symbol\n" 4963 "\t .sym-offset display an address as a symbol and offset\n" 4964 "\t .execname display a common_pid as a program name\n" 4965 "\t .syscall display a syscall id as a syscall name\n" 4966 "\t .log2 display log2 value rather than raw number\n" 4967 "\t .buckets=size display values in groups of size rather than raw number\n" 4968 "\t .usecs display a common_timestamp in microseconds\n" 4969 "\t .percent display a number of percentage value\n" 4970 "\t .graph display a bar-graph of a value\n\n" 4971 "\t The 'pause' parameter can be used to pause an existing hist\n" 4972 "\t trigger or to start a hist trigger but not log any events\n" 4973 "\t until told to do so. 'continue' can be used to start or\n" 4974 "\t restart a paused hist trigger.\n\n" 4975 "\t The 'clear' parameter will clear the contents of a running\n" 4976 "\t hist trigger and leave its current paused/active state\n" 4977 "\t unchanged.\n\n" 4978 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" 4979 "\t raw hitcount in the histogram.\n\n" 4980 "\t The enable_hist and disable_hist triggers can be used to\n" 4981 "\t have one event conditionally start and stop another event's\n" 4982 "\t already-attached hist trigger. The syntax is analogous to\n" 4983 "\t the enable_event and disable_event triggers.\n\n" 4984 "\t Hist trigger handlers and actions are executed whenever a\n" 4985 "\t a histogram entry is added or updated. They take the form:\n\n" 4986 "\t <handler>.<action>\n\n" 4987 "\t The available handlers are:\n\n" 4988 "\t onmatch(matching.event) - invoke on addition or update\n" 4989 "\t onmax(var) - invoke if var exceeds current max\n" 4990 "\t onchange(var) - invoke action if var changes\n\n" 4991 "\t The available actions are:\n\n" 4992 "\t trace(<synthetic_event>,param list) - generate synthetic event\n" 4993 "\t save(field,...) - save current event fields\n" 4994 #ifdef CONFIG_TRACER_SNAPSHOT 4995 "\t snapshot() - snapshot the trace buffer\n\n" 4996 #endif 4997 #ifdef CONFIG_SYNTH_EVENTS 4998 " events/synthetic_events\t- Create/append/remove/show synthetic events\n" 4999 "\t Write into this file to define/undefine new synthetic events.\n" 5000 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" 5001 #endif 5002 #endif 5003 ; 5004 5005 static ssize_t 5006 tracing_readme_read(struct file *filp, char __user *ubuf, 5007 size_t cnt, loff_t *ppos) 5008 { 5009 return simple_read_from_buffer(ubuf, cnt, ppos, 5010 readme_msg, strlen(readme_msg)); 5011 } 5012 5013 static const struct file_operations tracing_readme_fops = { 5014 .open = tracing_open_generic, 5015 .read = tracing_readme_read, 5016 .llseek = generic_file_llseek, 5017 }; 5018 5019 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 5020 static union trace_eval_map_item * 5021 update_eval_map(union trace_eval_map_item *ptr) 5022 { 5023 if (!ptr->map.eval_string) { 5024 if (ptr->tail.next) { 5025 ptr = ptr->tail.next; 5026 /* Set ptr to the next real item (skip head) */ 5027 ptr++; 5028 } else 5029 return NULL; 5030 } 5031 return ptr; 5032 } 5033 5034 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) 5035 { 5036 union trace_eval_map_item *ptr = v; 5037 5038 /* 5039 * Paranoid! If ptr points to end, we don't want to increment past it. 5040 * This really should never happen. 5041 */ 5042 (*pos)++; 5043 ptr = update_eval_map(ptr); 5044 if (WARN_ON_ONCE(!ptr)) 5045 return NULL; 5046 5047 ptr++; 5048 ptr = update_eval_map(ptr); 5049 5050 return ptr; 5051 } 5052 5053 static void *eval_map_start(struct seq_file *m, loff_t *pos) 5054 { 5055 union trace_eval_map_item *v; 5056 loff_t l = 0; 5057 5058 mutex_lock(&trace_eval_mutex); 5059 5060 v = trace_eval_maps; 5061 if (v) 5062 v++; 5063 5064 while (v && l < *pos) { 5065 v = eval_map_next(m, v, &l); 5066 } 5067 5068 return v; 5069 } 5070 5071 static void eval_map_stop(struct seq_file *m, void *v) 5072 { 5073 mutex_unlock(&trace_eval_mutex); 5074 } 5075 5076 static int eval_map_show(struct seq_file *m, void *v) 5077 { 5078 union trace_eval_map_item *ptr = v; 5079 5080 seq_printf(m, "%s %ld (%s)\n", 5081 ptr->map.eval_string, ptr->map.eval_value, 5082 ptr->map.system); 5083 5084 return 0; 5085 } 5086 5087 static const struct seq_operations tracing_eval_map_seq_ops = { 5088 .start = eval_map_start, 5089 .next = eval_map_next, 5090 .stop = eval_map_stop, 5091 .show = eval_map_show, 5092 }; 5093 5094 static int tracing_eval_map_open(struct inode *inode, struct file *filp) 5095 { 5096 int ret; 5097 5098 ret = tracing_check_open_get_tr(NULL); 5099 if (ret) 5100 return ret; 5101 5102 return seq_open(filp, &tracing_eval_map_seq_ops); 5103 } 5104 5105 static const struct file_operations tracing_eval_map_fops = { 5106 .open = tracing_eval_map_open, 5107 .read = seq_read, 5108 .llseek = seq_lseek, 5109 .release = seq_release, 5110 }; 5111 5112 static inline union trace_eval_map_item * 5113 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) 5114 { 5115 /* Return tail of array given the head */ 5116 return ptr + ptr->head.length + 1; 5117 } 5118 5119 static void 5120 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, 5121 int len) 5122 { 5123 struct trace_eval_map **stop; 5124 struct trace_eval_map **map; 5125 union trace_eval_map_item *map_array; 5126 union trace_eval_map_item *ptr; 5127 5128 stop = start + len; 5129 5130 /* 5131 * The trace_eval_maps contains the map plus a head and tail item, 5132 * where the head holds the module and length of array, and the 5133 * tail holds a pointer to the next list. 5134 */ 5135 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL); 5136 if (!map_array) { 5137 pr_warn("Unable to allocate trace eval mapping\n"); 5138 return; 5139 } 5140 5141 guard(mutex)(&trace_eval_mutex); 5142 5143 if (!trace_eval_maps) 5144 trace_eval_maps = map_array; 5145 else { 5146 ptr = trace_eval_maps; 5147 for (;;) { 5148 ptr = trace_eval_jmp_to_tail(ptr); 5149 if (!ptr->tail.next) 5150 break; 5151 ptr = ptr->tail.next; 5152 5153 } 5154 ptr->tail.next = map_array; 5155 } 5156 map_array->head.mod = mod; 5157 map_array->head.length = len; 5158 map_array++; 5159 5160 for (map = start; (unsigned long)map < (unsigned long)stop; map++) { 5161 map_array->map = **map; 5162 map_array++; 5163 } 5164 memset(map_array, 0, sizeof(*map_array)); 5165 } 5166 5167 static void trace_create_eval_file(struct dentry *d_tracer) 5168 { 5169 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, 5170 NULL, &tracing_eval_map_fops); 5171 } 5172 5173 #else /* CONFIG_TRACE_EVAL_MAP_FILE */ 5174 static inline void trace_create_eval_file(struct dentry *d_tracer) { } 5175 static inline void trace_insert_eval_map_file(struct module *mod, 5176 struct trace_eval_map **start, int len) { } 5177 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ 5178 5179 static void 5180 trace_event_update_with_eval_map(struct module *mod, 5181 struct trace_eval_map **start, 5182 int len) 5183 { 5184 struct trace_eval_map **map; 5185 5186 /* Always run sanitizer only if btf_type_tag attr exists. */ 5187 if (len <= 0) { 5188 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && 5189 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && 5190 __has_attribute(btf_type_tag))) 5191 return; 5192 } 5193 5194 map = start; 5195 5196 trace_event_update_all(map, len); 5197 5198 if (len <= 0) 5199 return; 5200 5201 trace_insert_eval_map_file(mod, start, len); 5202 } 5203 5204 static ssize_t 5205 tracing_set_trace_read(struct file *filp, char __user *ubuf, 5206 size_t cnt, loff_t *ppos) 5207 { 5208 struct trace_array *tr = filp->private_data; 5209 char buf[MAX_TRACER_SIZE+2]; 5210 int r; 5211 5212 scoped_guard(mutex, &trace_types_lock) { 5213 r = sprintf(buf, "%s\n", tr->current_trace->name); 5214 } 5215 5216 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5217 } 5218 5219 int tracer_init(struct tracer *t, struct trace_array *tr) 5220 { 5221 tracing_reset_online_cpus(&tr->array_buffer); 5222 update_last_data_if_empty(tr); 5223 return t->init(tr); 5224 } 5225 5226 static void set_buffer_entries(struct array_buffer *buf, unsigned long val) 5227 { 5228 int cpu; 5229 5230 for_each_tracing_cpu(cpu) 5231 per_cpu_ptr(buf->data, cpu)->entries = val; 5232 } 5233 5234 static void update_buffer_entries(struct array_buffer *buf, int cpu) 5235 { 5236 if (cpu == RING_BUFFER_ALL_CPUS) { 5237 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); 5238 } else { 5239 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); 5240 } 5241 } 5242 5243 #ifdef CONFIG_TRACER_SNAPSHOT 5244 /* resize @tr's buffer to the size of @size_tr's entries */ 5245 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 5246 struct array_buffer *size_buf, int cpu_id) 5247 { 5248 int cpu, ret = 0; 5249 5250 if (cpu_id == RING_BUFFER_ALL_CPUS) { 5251 for_each_tracing_cpu(cpu) { 5252 ret = ring_buffer_resize(trace_buf->buffer, 5253 per_cpu_ptr(size_buf->data, cpu)->entries, cpu); 5254 if (ret < 0) 5255 break; 5256 per_cpu_ptr(trace_buf->data, cpu)->entries = 5257 per_cpu_ptr(size_buf->data, cpu)->entries; 5258 } 5259 } else { 5260 ret = ring_buffer_resize(trace_buf->buffer, 5261 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); 5262 if (ret == 0) 5263 per_cpu_ptr(trace_buf->data, cpu_id)->entries = 5264 per_cpu_ptr(size_buf->data, cpu_id)->entries; 5265 } 5266 5267 return ret; 5268 } 5269 #endif /* CONFIG_TRACER_SNAPSHOT */ 5270 5271 static int __tracing_resize_ring_buffer(struct trace_array *tr, 5272 unsigned long size, int cpu) 5273 { 5274 int ret; 5275 5276 /* 5277 * If kernel or user changes the size of the ring buffer 5278 * we use the size that was given, and we can forget about 5279 * expanding it later. 5280 */ 5281 trace_set_ring_buffer_expanded(tr); 5282 5283 /* May be called before buffers are initialized */ 5284 if (!tr->array_buffer.buffer) 5285 return 0; 5286 5287 /* Do not allow tracing while resizing ring buffer */ 5288 tracing_stop_tr(tr); 5289 5290 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); 5291 if (ret < 0) 5292 goto out_start; 5293 5294 #ifdef CONFIG_TRACER_SNAPSHOT 5295 if (!tr->allocated_snapshot) 5296 goto out; 5297 5298 ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu); 5299 if (ret < 0) { 5300 int r = resize_buffer_duplicate_size(&tr->array_buffer, 5301 &tr->array_buffer, cpu); 5302 if (r < 0) { 5303 /* 5304 * AARGH! We are left with different 5305 * size max buffer!!!! 5306 * The max buffer is our "snapshot" buffer. 5307 * When a tracer needs a snapshot (one of the 5308 * latency tracers), it swaps the max buffer 5309 * with the saved snap shot. We succeeded to 5310 * update the size of the main buffer, but failed to 5311 * update the size of the max buffer. But when we tried 5312 * to reset the main buffer to the original size, we 5313 * failed there too. This is very unlikely to 5314 * happen, but if it does, warn and kill all 5315 * tracing. 5316 */ 5317 WARN_ON(1); 5318 tracing_disabled = 1; 5319 } 5320 goto out_start; 5321 } 5322 5323 update_buffer_entries(&tr->snapshot_buffer, cpu); 5324 5325 out: 5326 #endif /* CONFIG_TRACER_SNAPSHOT */ 5327 5328 update_buffer_entries(&tr->array_buffer, cpu); 5329 out_start: 5330 tracing_start_tr(tr); 5331 return ret; 5332 } 5333 5334 ssize_t tracing_resize_ring_buffer(struct trace_array *tr, 5335 unsigned long size, int cpu_id) 5336 { 5337 guard(mutex)(&trace_types_lock); 5338 5339 if (cpu_id != RING_BUFFER_ALL_CPUS) { 5340 /* make sure, this cpu is enabled in the mask */ 5341 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) 5342 return -EINVAL; 5343 } 5344 5345 return __tracing_resize_ring_buffer(tr, size, cpu_id); 5346 } 5347 5348 struct trace_mod_entry { 5349 unsigned long mod_addr; 5350 char mod_name[MODULE_NAME_LEN]; 5351 }; 5352 5353 struct trace_scratch { 5354 unsigned int clock_id; 5355 unsigned long text_addr; 5356 unsigned long nr_entries; 5357 struct trace_mod_entry entries[]; 5358 }; 5359 5360 static DEFINE_MUTEX(scratch_mutex); 5361 5362 static int cmp_mod_entry(const void *key, const void *pivot) 5363 { 5364 unsigned long addr = (unsigned long)key; 5365 const struct trace_mod_entry *ent = pivot; 5366 5367 if (addr < ent[0].mod_addr) 5368 return -1; 5369 5370 return addr >= ent[1].mod_addr; 5371 } 5372 5373 /** 5374 * trace_adjust_address() - Adjust prev boot address to current address. 5375 * @tr: Persistent ring buffer's trace_array. 5376 * @addr: Address in @tr which is adjusted. 5377 */ 5378 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) 5379 { 5380 struct trace_module_delta *module_delta; 5381 struct trace_scratch *tscratch; 5382 struct trace_mod_entry *entry; 5383 unsigned long raddr; 5384 int idx = 0, nr_entries; 5385 5386 /* If we don't have last boot delta, return the address */ 5387 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5388 return addr; 5389 5390 /* tr->module_delta must be protected by rcu. */ 5391 guard(rcu)(); 5392 tscratch = tr->scratch; 5393 /* if there is no tscrach, module_delta must be NULL. */ 5394 module_delta = READ_ONCE(tr->module_delta); 5395 if (!module_delta || !tscratch->nr_entries || 5396 tscratch->entries[0].mod_addr > addr) { 5397 raddr = addr + tr->text_delta; 5398 return __is_kernel(raddr) || is_kernel_core_data(raddr) || 5399 is_kernel_rodata(raddr) ? raddr : addr; 5400 } 5401 5402 /* Note that entries must be sorted. */ 5403 nr_entries = tscratch->nr_entries; 5404 if (nr_entries == 1 || 5405 tscratch->entries[nr_entries - 1].mod_addr < addr) 5406 idx = nr_entries - 1; 5407 else { 5408 entry = __inline_bsearch((void *)addr, 5409 tscratch->entries, 5410 nr_entries - 1, 5411 sizeof(tscratch->entries[0]), 5412 cmp_mod_entry); 5413 if (entry) 5414 idx = entry - tscratch->entries; 5415 } 5416 5417 return addr + module_delta->delta[idx]; 5418 } 5419 5420 #ifdef CONFIG_MODULES 5421 static int save_mod(struct module *mod, void *data) 5422 { 5423 struct trace_array *tr = data; 5424 struct trace_scratch *tscratch; 5425 struct trace_mod_entry *entry; 5426 unsigned int size; 5427 5428 tscratch = tr->scratch; 5429 if (!tscratch) 5430 return -1; 5431 size = tr->scratch_size; 5432 5433 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) 5434 return -1; 5435 5436 entry = &tscratch->entries[tscratch->nr_entries]; 5437 5438 tscratch->nr_entries++; 5439 5440 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; 5441 strscpy(entry->mod_name, mod->name); 5442 5443 return 0; 5444 } 5445 #else 5446 static int save_mod(struct module *mod, void *data) 5447 { 5448 return 0; 5449 } 5450 #endif 5451 5452 static void update_last_data(struct trace_array *tr) 5453 { 5454 struct trace_module_delta *module_delta; 5455 struct trace_scratch *tscratch; 5456 5457 if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) 5458 return; 5459 5460 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5461 return; 5462 5463 /* Only if the buffer has previous boot data clear and update it. */ 5464 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; 5465 5466 /* Reset the module list and reload them */ 5467 if (tr->scratch) { 5468 struct trace_scratch *tscratch = tr->scratch; 5469 5470 tscratch->clock_id = tr->clock_id; 5471 memset(tscratch->entries, 0, 5472 flex_array_size(tscratch, entries, tscratch->nr_entries)); 5473 tscratch->nr_entries = 0; 5474 5475 guard(mutex)(&scratch_mutex); 5476 module_for_each_mod(save_mod, tr); 5477 } 5478 5479 /* 5480 * Need to clear all CPU buffers as there cannot be events 5481 * from the previous boot mixed with events with this boot 5482 * as that will cause a confusing trace. Need to clear all 5483 * CPU buffers, even for those that may currently be offline. 5484 */ 5485 tracing_reset_all_cpus(&tr->array_buffer); 5486 5487 /* Using current data now */ 5488 tr->text_delta = 0; 5489 5490 if (!tr->scratch) 5491 return; 5492 5493 tscratch = tr->scratch; 5494 module_delta = READ_ONCE(tr->module_delta); 5495 WRITE_ONCE(tr->module_delta, NULL); 5496 kfree_rcu(module_delta, rcu); 5497 5498 /* Set the persistent ring buffer meta data to this address */ 5499 tscratch->text_addr = (unsigned long)_text; 5500 } 5501 5502 /** 5503 * tracing_update_buffers - used by tracing facility to expand ring buffers 5504 * @tr: The tracing instance 5505 * 5506 * To save on memory when the tracing is never used on a system with it 5507 * configured in. The ring buffers are set to a minimum size. But once 5508 * a user starts to use the tracing facility, then they need to grow 5509 * to their default size. 5510 * 5511 * This function is to be called when a tracer is about to be used. 5512 */ 5513 int tracing_update_buffers(struct trace_array *tr) 5514 { 5515 int ret = 0; 5516 5517 if (!tr) 5518 tr = &global_trace; 5519 5520 guard(mutex)(&trace_types_lock); 5521 5522 update_last_data(tr); 5523 5524 if (!tr->ring_buffer_expanded) 5525 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 5526 RING_BUFFER_ALL_CPUS); 5527 return ret; 5528 } 5529 5530 /* 5531 * Used to clear out the tracer before deletion of an instance. 5532 * Must have trace_types_lock held. 5533 */ 5534 static void tracing_set_nop(struct trace_array *tr) 5535 { 5536 if (tr->current_trace == &nop_trace) 5537 return; 5538 5539 tr->current_trace->enabled--; 5540 5541 if (tr->current_trace->reset) 5542 tr->current_trace->reset(tr); 5543 5544 tr->current_trace = &nop_trace; 5545 tr->current_trace_flags = nop_trace.flags; 5546 } 5547 5548 static bool tracer_options_updated; 5549 5550 int tracing_set_tracer(struct trace_array *tr, const char *buf) 5551 { 5552 struct tracer *trace = NULL; 5553 struct tracers *t; 5554 bool had_max_tr; 5555 int ret; 5556 5557 guard(mutex)(&trace_types_lock); 5558 5559 update_last_data(tr); 5560 5561 if (!tr->ring_buffer_expanded) { 5562 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 5563 RING_BUFFER_ALL_CPUS); 5564 if (ret < 0) 5565 return ret; 5566 ret = 0; 5567 } 5568 5569 list_for_each_entry(t, &tr->tracers, list) { 5570 if (strcmp(t->tracer->name, buf) == 0) { 5571 trace = t->tracer; 5572 break; 5573 } 5574 } 5575 if (!trace) 5576 return -EINVAL; 5577 5578 if (trace == tr->current_trace) 5579 return 0; 5580 5581 #ifdef CONFIG_TRACER_SNAPSHOT 5582 if (tracer_uses_snapshot(trace)) { 5583 local_irq_disable(); 5584 arch_spin_lock(&tr->max_lock); 5585 ret = tr->cond_snapshot ? -EBUSY : 0; 5586 arch_spin_unlock(&tr->max_lock); 5587 local_irq_enable(); 5588 if (ret) 5589 return ret; 5590 } 5591 #endif 5592 /* Some tracers won't work on kernel command line */ 5593 if (system_state < SYSTEM_RUNNING && trace->noboot) { 5594 pr_warn("Tracer '%s' is not allowed on command line, ignored\n", 5595 trace->name); 5596 return -EINVAL; 5597 } 5598 5599 /* Some tracers are only allowed for the top level buffer */ 5600 if (!trace_ok_for_array(trace, tr)) 5601 return -EINVAL; 5602 5603 /* If trace pipe files are being read, we can't change the tracer */ 5604 if (tr->trace_ref) 5605 return -EBUSY; 5606 5607 trace_branch_disable(); 5608 5609 tr->current_trace->enabled--; 5610 5611 if (tr->current_trace->reset) 5612 tr->current_trace->reset(tr); 5613 5614 had_max_tr = tracer_uses_snapshot(tr->current_trace); 5615 5616 /* Current trace needs to be nop_trace before synchronize_rcu */ 5617 tr->current_trace = &nop_trace; 5618 tr->current_trace_flags = nop_trace.flags; 5619 5620 if (had_max_tr && !tracer_uses_snapshot(trace)) { 5621 /* 5622 * We need to make sure that the update_max_tr sees that 5623 * current_trace changed to nop_trace to keep it from 5624 * swapping the buffers after we resize it. 5625 * The update_max_tr is called from interrupts disabled 5626 * so a synchronized_sched() is sufficient. 5627 */ 5628 synchronize_rcu(); 5629 free_snapshot(tr); 5630 tracing_disarm_snapshot(tr); 5631 } 5632 5633 if (!had_max_tr && tracer_uses_snapshot(trace)) { 5634 ret = tracing_arm_snapshot_locked(tr); 5635 if (ret) 5636 return ret; 5637 } 5638 5639 tr->current_trace_flags = t->flags ? : t->tracer->flags; 5640 5641 if (trace->init) { 5642 ret = tracer_init(trace, tr); 5643 if (ret) { 5644 if (tracer_uses_snapshot(trace)) 5645 tracing_disarm_snapshot(tr); 5646 tr->current_trace_flags = nop_trace.flags; 5647 return ret; 5648 } 5649 } 5650 5651 tr->current_trace = trace; 5652 tr->current_trace->enabled++; 5653 trace_branch_enable(tr); 5654 5655 return 0; 5656 } 5657 5658 static ssize_t 5659 tracing_set_trace_write(struct file *filp, const char __user *ubuf, 5660 size_t cnt, loff_t *ppos) 5661 { 5662 struct trace_array *tr = filp->private_data; 5663 char buf[MAX_TRACER_SIZE+1]; 5664 char *name; 5665 size_t ret; 5666 int err; 5667 5668 ret = cnt; 5669 5670 if (cnt > MAX_TRACER_SIZE) 5671 cnt = MAX_TRACER_SIZE; 5672 5673 if (copy_from_user(buf, ubuf, cnt)) 5674 return -EFAULT; 5675 5676 buf[cnt] = 0; 5677 5678 name = strim(buf); 5679 5680 err = tracing_set_tracer(tr, name); 5681 if (err) 5682 return err; 5683 5684 *ppos += ret; 5685 5686 return ret; 5687 } 5688 5689 static ssize_t 5690 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, 5691 size_t cnt, loff_t *ppos) 5692 { 5693 char buf[64]; 5694 int r; 5695 5696 r = snprintf(buf, sizeof(buf), "%ld\n", 5697 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); 5698 if (r > sizeof(buf)) 5699 r = sizeof(buf); 5700 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5701 } 5702 5703 static ssize_t 5704 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, 5705 size_t cnt, loff_t *ppos) 5706 { 5707 unsigned long val; 5708 int ret; 5709 5710 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 5711 if (ret) 5712 return ret; 5713 5714 *ptr = val * 1000; 5715 5716 return cnt; 5717 } 5718 5719 static ssize_t 5720 tracing_thresh_read(struct file *filp, char __user *ubuf, 5721 size_t cnt, loff_t *ppos) 5722 { 5723 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); 5724 } 5725 5726 static ssize_t 5727 tracing_thresh_write(struct file *filp, const char __user *ubuf, 5728 size_t cnt, loff_t *ppos) 5729 { 5730 struct trace_array *tr = filp->private_data; 5731 int ret; 5732 5733 guard(mutex)(&trace_types_lock); 5734 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); 5735 if (ret < 0) 5736 return ret; 5737 5738 if (tr->current_trace->update_thresh) { 5739 ret = tr->current_trace->update_thresh(tr); 5740 if (ret < 0) 5741 return ret; 5742 } 5743 5744 return cnt; 5745 } 5746 5747 #ifdef CONFIG_TRACER_MAX_TRACE 5748 5749 static ssize_t 5750 tracing_max_lat_read(struct file *filp, char __user *ubuf, 5751 size_t cnt, loff_t *ppos) 5752 { 5753 struct trace_array *tr = filp->private_data; 5754 5755 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); 5756 } 5757 5758 static ssize_t 5759 tracing_max_lat_write(struct file *filp, const char __user *ubuf, 5760 size_t cnt, loff_t *ppos) 5761 { 5762 struct trace_array *tr = filp->private_data; 5763 5764 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); 5765 } 5766 5767 #endif 5768 5769 static int open_pipe_on_cpu(struct trace_array *tr, int cpu) 5770 { 5771 if (cpu == RING_BUFFER_ALL_CPUS) { 5772 if (cpumask_empty(tr->pipe_cpumask)) { 5773 cpumask_setall(tr->pipe_cpumask); 5774 return 0; 5775 } 5776 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { 5777 cpumask_set_cpu(cpu, tr->pipe_cpumask); 5778 return 0; 5779 } 5780 return -EBUSY; 5781 } 5782 5783 static void close_pipe_on_cpu(struct trace_array *tr, int cpu) 5784 { 5785 if (cpu == RING_BUFFER_ALL_CPUS) { 5786 WARN_ON(!cpumask_full(tr->pipe_cpumask)); 5787 cpumask_clear(tr->pipe_cpumask); 5788 } else { 5789 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); 5790 cpumask_clear_cpu(cpu, tr->pipe_cpumask); 5791 } 5792 } 5793 5794 static int tracing_open_pipe(struct inode *inode, struct file *filp) 5795 { 5796 struct trace_array *tr = inode->i_private; 5797 struct trace_iterator *iter; 5798 int cpu; 5799 int ret; 5800 5801 ret = tracing_check_open_get_tr(tr); 5802 if (ret) 5803 return ret; 5804 5805 guard(mutex)(&trace_types_lock); 5806 cpu = tracing_get_cpu(inode); 5807 ret = open_pipe_on_cpu(tr, cpu); 5808 if (ret) 5809 goto fail_pipe_on_cpu; 5810 5811 /* create a buffer to store the information to pass to userspace */ 5812 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 5813 if (!iter) { 5814 ret = -ENOMEM; 5815 goto fail_alloc_iter; 5816 } 5817 5818 trace_seq_init(&iter->seq); 5819 iter->trace = tr->current_trace; 5820 5821 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 5822 ret = -ENOMEM; 5823 goto fail; 5824 } 5825 5826 /* trace pipe does not show start of buffer */ 5827 cpumask_setall(iter->started); 5828 5829 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 5830 iter->iter_flags |= TRACE_FILE_LAT_FMT; 5831 5832 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 5833 if (trace_clocks[tr->clock_id].in_ns) 5834 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 5835 5836 iter->tr = tr; 5837 iter->array_buffer = &tr->array_buffer; 5838 iter->cpu_file = cpu; 5839 mutex_init(&iter->mutex); 5840 filp->private_data = iter; 5841 5842 if (iter->trace->pipe_open) 5843 iter->trace->pipe_open(iter); 5844 5845 nonseekable_open(inode, filp); 5846 5847 tr->trace_ref++; 5848 5849 return ret; 5850 5851 fail: 5852 kfree(iter); 5853 fail_alloc_iter: 5854 close_pipe_on_cpu(tr, cpu); 5855 fail_pipe_on_cpu: 5856 __trace_array_put(tr); 5857 return ret; 5858 } 5859 5860 static int tracing_release_pipe(struct inode *inode, struct file *file) 5861 { 5862 struct trace_iterator *iter = file->private_data; 5863 struct trace_array *tr = inode->i_private; 5864 5865 scoped_guard(mutex, &trace_types_lock) { 5866 tr->trace_ref--; 5867 5868 if (iter->trace->pipe_close) 5869 iter->trace->pipe_close(iter); 5870 close_pipe_on_cpu(tr, iter->cpu_file); 5871 } 5872 5873 free_trace_iter_content(iter); 5874 kfree(iter); 5875 5876 trace_array_put(tr); 5877 5878 return 0; 5879 } 5880 5881 static __poll_t 5882 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 5883 { 5884 struct trace_array *tr = iter->tr; 5885 5886 /* Iterators are static, they should be filled or empty */ 5887 if (trace_buffer_iter(iter, iter->cpu_file)) 5888 return EPOLLIN | EPOLLRDNORM; 5889 5890 if (tr->trace_flags & TRACE_ITER(BLOCK)) 5891 /* 5892 * Always select as readable when in blocking mode 5893 */ 5894 return EPOLLIN | EPOLLRDNORM; 5895 else 5896 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, 5897 filp, poll_table, iter->tr->buffer_percent); 5898 } 5899 5900 static __poll_t 5901 tracing_poll_pipe(struct file *filp, poll_table *poll_table) 5902 { 5903 struct trace_iterator *iter = filp->private_data; 5904 5905 return trace_poll(iter, filp, poll_table); 5906 } 5907 5908 /* Must be called with iter->mutex held. */ 5909 static int tracing_wait_pipe(struct file *filp) 5910 { 5911 struct trace_iterator *iter = filp->private_data; 5912 int ret; 5913 5914 while (trace_empty(iter)) { 5915 5916 if ((filp->f_flags & O_NONBLOCK)) { 5917 return -EAGAIN; 5918 } 5919 5920 /* 5921 * We block until we read something and tracing is disabled. 5922 * We still block if tracing is disabled, but we have never 5923 * read anything. This allows a user to cat this file, and 5924 * then enable tracing. But after we have read something, 5925 * we give an EOF when tracing is again disabled. 5926 * 5927 * iter->pos will be 0 if we haven't read anything. 5928 */ 5929 if (!tracer_tracing_is_on(iter->tr) && iter->pos) 5930 break; 5931 5932 mutex_unlock(&iter->mutex); 5933 5934 ret = wait_on_pipe(iter, 0); 5935 5936 mutex_lock(&iter->mutex); 5937 5938 if (ret) 5939 return ret; 5940 } 5941 5942 return 1; 5943 } 5944 5945 static bool update_last_data_if_empty(struct trace_array *tr) 5946 { 5947 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5948 return false; 5949 5950 if (!ring_buffer_empty(tr->array_buffer.buffer)) 5951 return false; 5952 5953 /* 5954 * If the buffer contains the last boot data and all per-cpu 5955 * buffers are empty, reset it from the kernel side. 5956 */ 5957 update_last_data(tr); 5958 return true; 5959 } 5960 5961 /* 5962 * Consumer reader. 5963 */ 5964 static ssize_t 5965 tracing_read_pipe(struct file *filp, char __user *ubuf, 5966 size_t cnt, loff_t *ppos) 5967 { 5968 struct trace_iterator *iter = filp->private_data; 5969 ssize_t sret; 5970 5971 /* 5972 * Avoid more than one consumer on a single file descriptor 5973 * This is just a matter of traces coherency, the ring buffer itself 5974 * is protected. 5975 */ 5976 guard(mutex)(&iter->mutex); 5977 5978 /* return any leftover data */ 5979 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 5980 if (sret != -EBUSY) 5981 return sret; 5982 5983 trace_seq_init(&iter->seq); 5984 5985 if (iter->trace->read) { 5986 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 5987 if (sret) 5988 return sret; 5989 } 5990 5991 waitagain: 5992 if (update_last_data_if_empty(iter->tr)) 5993 return 0; 5994 5995 sret = tracing_wait_pipe(filp); 5996 if (sret <= 0) 5997 return sret; 5998 5999 /* stop when tracing is finished */ 6000 if (trace_empty(iter)) 6001 return 0; 6002 6003 if (cnt >= TRACE_SEQ_BUFFER_SIZE) 6004 cnt = TRACE_SEQ_BUFFER_SIZE - 1; 6005 6006 /* reset all but tr, trace, and overruns */ 6007 trace_iterator_reset(iter); 6008 cpumask_clear(iter->started); 6009 trace_seq_init(&iter->seq); 6010 6011 trace_event_read_lock(); 6012 trace_access_lock(iter->cpu_file); 6013 while (trace_find_next_entry_inc(iter) != NULL) { 6014 enum print_line_t ret; 6015 int save_len = iter->seq.seq.len; 6016 6017 ret = print_trace_line(iter); 6018 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6019 /* 6020 * If one print_trace_line() fills entire trace_seq in one shot, 6021 * trace_seq_to_user() will returns -EBUSY because save_len == 0, 6022 * In this case, we need to consume it, otherwise, loop will peek 6023 * this event next time, resulting in an infinite loop. 6024 */ 6025 if (save_len == 0) { 6026 iter->seq.full = 0; 6027 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 6028 trace_consume(iter); 6029 break; 6030 } 6031 6032 /* In other cases, don't print partial lines */ 6033 iter->seq.seq.len = save_len; 6034 break; 6035 } 6036 if (ret != TRACE_TYPE_NO_CONSUME) 6037 trace_consume(iter); 6038 6039 if (trace_seq_used(&iter->seq) >= cnt) 6040 break; 6041 6042 /* 6043 * Setting the full flag means we reached the trace_seq buffer 6044 * size and we should leave by partial output condition above. 6045 * One of the trace_seq_* functions is not used properly. 6046 */ 6047 WARN_ONCE(iter->seq.full, "full flag set for trace type %d", 6048 iter->ent->type); 6049 } 6050 trace_access_unlock(iter->cpu_file); 6051 trace_event_read_unlock(); 6052 6053 /* Now copy what we have to the user */ 6054 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6055 if (iter->seq.readpos >= trace_seq_used(&iter->seq)) 6056 trace_seq_init(&iter->seq); 6057 6058 /* 6059 * If there was nothing to send to user, in spite of consuming trace 6060 * entries, go back to wait for more entries. 6061 */ 6062 if (sret == -EBUSY) 6063 goto waitagain; 6064 6065 return sret; 6066 } 6067 6068 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, 6069 unsigned int idx) 6070 { 6071 __free_page(spd->pages[idx]); 6072 } 6073 6074 static size_t 6075 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) 6076 { 6077 size_t count; 6078 int save_len; 6079 int ret; 6080 6081 /* Seq buffer is page-sized, exactly what we need. */ 6082 for (;;) { 6083 save_len = iter->seq.seq.len; 6084 ret = print_trace_line(iter); 6085 6086 if (trace_seq_has_overflowed(&iter->seq)) { 6087 iter->seq.seq.len = save_len; 6088 break; 6089 } 6090 6091 /* 6092 * This should not be hit, because it should only 6093 * be set if the iter->seq overflowed. But check it 6094 * anyway to be safe. 6095 */ 6096 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6097 iter->seq.seq.len = save_len; 6098 break; 6099 } 6100 6101 count = trace_seq_used(&iter->seq) - save_len; 6102 if (rem < count) { 6103 rem = 0; 6104 iter->seq.seq.len = save_len; 6105 break; 6106 } 6107 6108 if (ret != TRACE_TYPE_NO_CONSUME) 6109 trace_consume(iter); 6110 rem -= count; 6111 if (!trace_find_next_entry_inc(iter)) { 6112 rem = 0; 6113 iter->ent = NULL; 6114 break; 6115 } 6116 } 6117 6118 return rem; 6119 } 6120 6121 static ssize_t tracing_splice_read_pipe(struct file *filp, 6122 loff_t *ppos, 6123 struct pipe_inode_info *pipe, 6124 size_t len, 6125 unsigned int flags) 6126 { 6127 struct page *pages_def[PIPE_DEF_BUFFERS]; 6128 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 6129 struct trace_iterator *iter = filp->private_data; 6130 struct splice_pipe_desc spd = { 6131 .pages = pages_def, 6132 .partial = partial_def, 6133 .nr_pages = 0, /* This gets updated below. */ 6134 .nr_pages_max = PIPE_DEF_BUFFERS, 6135 .ops = &default_pipe_buf_ops, 6136 .spd_release = tracing_spd_release_pipe, 6137 }; 6138 ssize_t ret; 6139 size_t rem; 6140 unsigned int i; 6141 6142 if (splice_grow_spd(pipe, &spd)) 6143 return -ENOMEM; 6144 6145 mutex_lock(&iter->mutex); 6146 6147 if (iter->trace->splice_read) { 6148 ret = iter->trace->splice_read(iter, filp, 6149 ppos, pipe, len, flags); 6150 if (ret) 6151 goto out_err; 6152 } 6153 6154 ret = tracing_wait_pipe(filp); 6155 if (ret <= 0) 6156 goto out_err; 6157 6158 if (!iter->ent && !trace_find_next_entry_inc(iter)) { 6159 ret = -EFAULT; 6160 goto out_err; 6161 } 6162 6163 trace_event_read_lock(); 6164 trace_access_lock(iter->cpu_file); 6165 6166 /* Fill as many pages as possible. */ 6167 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 6168 spd.pages[i] = alloc_page(GFP_KERNEL); 6169 if (!spd.pages[i]) 6170 break; 6171 6172 rem = tracing_fill_pipe_page(rem, iter); 6173 6174 /* Copy the data into the page, so we can start over. */ 6175 ret = trace_seq_to_buffer(&iter->seq, 6176 page_address(spd.pages[i]), 6177 min((size_t)trace_seq_used(&iter->seq), 6178 (size_t)PAGE_SIZE)); 6179 if (ret < 0) { 6180 __free_page(spd.pages[i]); 6181 break; 6182 } 6183 spd.partial[i].offset = 0; 6184 spd.partial[i].len = ret; 6185 6186 trace_seq_init(&iter->seq); 6187 } 6188 6189 trace_access_unlock(iter->cpu_file); 6190 trace_event_read_unlock(); 6191 mutex_unlock(&iter->mutex); 6192 6193 spd.nr_pages = i; 6194 6195 if (i) 6196 ret = splice_to_pipe(pipe, &spd); 6197 else 6198 ret = 0; 6199 out: 6200 splice_shrink_spd(&spd); 6201 return ret; 6202 6203 out_err: 6204 mutex_unlock(&iter->mutex); 6205 goto out; 6206 } 6207 6208 static ssize_t 6209 tracing_syscall_buf_read(struct file *filp, char __user *ubuf, 6210 size_t cnt, loff_t *ppos) 6211 { 6212 struct inode *inode = file_inode(filp); 6213 struct trace_array *tr = inode->i_private; 6214 char buf[64]; 6215 int r; 6216 6217 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz); 6218 6219 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6220 } 6221 6222 static ssize_t 6223 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf, 6224 size_t cnt, loff_t *ppos) 6225 { 6226 struct inode *inode = file_inode(filp); 6227 struct trace_array *tr = inode->i_private; 6228 unsigned long val; 6229 int ret; 6230 6231 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6232 if (ret) 6233 return ret; 6234 6235 if (val > SYSCALL_FAULT_USER_MAX) 6236 val = SYSCALL_FAULT_USER_MAX; 6237 6238 tr->syscall_buf_sz = val; 6239 6240 *ppos += cnt; 6241 6242 return cnt; 6243 } 6244 6245 static ssize_t 6246 tracing_entries_read(struct file *filp, char __user *ubuf, 6247 size_t cnt, loff_t *ppos) 6248 { 6249 struct inode *inode = file_inode(filp); 6250 struct trace_array *tr = inode->i_private; 6251 int cpu = tracing_get_cpu(inode); 6252 char buf[64]; 6253 int r = 0; 6254 ssize_t ret; 6255 6256 mutex_lock(&trace_types_lock); 6257 6258 if (cpu == RING_BUFFER_ALL_CPUS) { 6259 int cpu, buf_size_same; 6260 unsigned long size; 6261 6262 size = 0; 6263 buf_size_same = 1; 6264 /* check if all cpu sizes are same */ 6265 for_each_tracing_cpu(cpu) { 6266 /* fill in the size from first enabled cpu */ 6267 if (size == 0) 6268 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; 6269 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { 6270 buf_size_same = 0; 6271 break; 6272 } 6273 } 6274 6275 if (buf_size_same) { 6276 if (!tr->ring_buffer_expanded) 6277 r = sprintf(buf, "%lu (expanded: %lu)\n", 6278 size >> 10, 6279 trace_buf_size >> 10); 6280 else 6281 r = sprintf(buf, "%lu\n", size >> 10); 6282 } else 6283 r = sprintf(buf, "X\n"); 6284 } else 6285 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); 6286 6287 mutex_unlock(&trace_types_lock); 6288 6289 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6290 return ret; 6291 } 6292 6293 static ssize_t 6294 tracing_entries_write(struct file *filp, const char __user *ubuf, 6295 size_t cnt, loff_t *ppos) 6296 { 6297 struct inode *inode = file_inode(filp); 6298 struct trace_array *tr = inode->i_private; 6299 unsigned long val; 6300 int ret; 6301 6302 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6303 if (ret) 6304 return ret; 6305 6306 /* must have at least 1 entry */ 6307 if (!val) 6308 return -EINVAL; 6309 6310 /* value is in KB */ 6311 val <<= 10; 6312 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); 6313 if (ret < 0) 6314 return ret; 6315 6316 *ppos += cnt; 6317 6318 return cnt; 6319 } 6320 6321 static ssize_t 6322 tracing_total_entries_read(struct file *filp, char __user *ubuf, 6323 size_t cnt, loff_t *ppos) 6324 { 6325 struct trace_array *tr = filp->private_data; 6326 char buf[64]; 6327 int r, cpu; 6328 unsigned long size = 0, expanded_size = 0; 6329 6330 mutex_lock(&trace_types_lock); 6331 for_each_tracing_cpu(cpu) { 6332 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; 6333 if (!tr->ring_buffer_expanded) 6334 expanded_size += trace_buf_size >> 10; 6335 } 6336 if (tr->ring_buffer_expanded) 6337 r = sprintf(buf, "%lu\n", size); 6338 else 6339 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); 6340 mutex_unlock(&trace_types_lock); 6341 6342 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6343 } 6344 6345 #define LAST_BOOT_HEADER ((void *)1) 6346 6347 static void *l_next(struct seq_file *m, void *v, loff_t *pos) 6348 { 6349 struct trace_array *tr = m->private; 6350 struct trace_scratch *tscratch = tr->scratch; 6351 unsigned int index = *pos; 6352 6353 (*pos)++; 6354 6355 if (*pos == 1) 6356 return LAST_BOOT_HEADER; 6357 6358 /* Only show offsets of the last boot data */ 6359 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6360 return NULL; 6361 6362 /* *pos 0 is for the header, 1 is for the first module */ 6363 index--; 6364 6365 if (index >= tscratch->nr_entries) 6366 return NULL; 6367 6368 return &tscratch->entries[index]; 6369 } 6370 6371 static void *l_start(struct seq_file *m, loff_t *pos) 6372 { 6373 mutex_lock(&scratch_mutex); 6374 6375 return l_next(m, NULL, pos); 6376 } 6377 6378 static void l_stop(struct seq_file *m, void *p) 6379 { 6380 mutex_unlock(&scratch_mutex); 6381 } 6382 6383 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) 6384 { 6385 struct trace_scratch *tscratch = tr->scratch; 6386 6387 /* 6388 * Do not leak KASLR address. This only shows the KASLR address of 6389 * the last boot. When the ring buffer is started, the LAST_BOOT 6390 * flag gets cleared, and this should only report "current". 6391 * Otherwise it shows the KASLR address from the previous boot which 6392 * should not be the same as the current boot. 6393 */ 6394 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6395 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); 6396 else 6397 seq_puts(m, "# Current\n"); 6398 } 6399 6400 static int l_show(struct seq_file *m, void *v) 6401 { 6402 struct trace_array *tr = m->private; 6403 struct trace_mod_entry *entry = v; 6404 6405 if (v == LAST_BOOT_HEADER) { 6406 show_last_boot_header(m, tr); 6407 return 0; 6408 } 6409 6410 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); 6411 return 0; 6412 } 6413 6414 static const struct seq_operations last_boot_seq_ops = { 6415 .start = l_start, 6416 .next = l_next, 6417 .stop = l_stop, 6418 .show = l_show, 6419 }; 6420 6421 static int tracing_last_boot_open(struct inode *inode, struct file *file) 6422 { 6423 struct trace_array *tr = inode->i_private; 6424 struct seq_file *m; 6425 int ret; 6426 6427 ret = tracing_check_open_get_tr(tr); 6428 if (ret) 6429 return ret; 6430 6431 ret = seq_open(file, &last_boot_seq_ops); 6432 if (ret) { 6433 trace_array_put(tr); 6434 return ret; 6435 } 6436 6437 m = file->private_data; 6438 m->private = tr; 6439 6440 return 0; 6441 } 6442 6443 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) 6444 { 6445 struct trace_array *tr = inode->i_private; 6446 int cpu = tracing_get_cpu(inode); 6447 int ret; 6448 6449 ret = tracing_check_open_get_tr(tr); 6450 if (ret) 6451 return ret; 6452 6453 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); 6454 if (ret < 0) 6455 __trace_array_put(tr); 6456 return ret; 6457 } 6458 6459 static ssize_t 6460 tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 6461 size_t cnt, loff_t *ppos) 6462 { 6463 /* 6464 * There is no need to read what the user has written, this function 6465 * is just to make sure that there is no error when "echo" is used 6466 */ 6467 6468 *ppos += cnt; 6469 6470 return cnt; 6471 } 6472 6473 static int 6474 tracing_free_buffer_release(struct inode *inode, struct file *filp) 6475 { 6476 struct trace_array *tr = inode->i_private; 6477 6478 /* disable tracing ? */ 6479 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE)) 6480 tracer_tracing_off(tr); 6481 /* resize the ring buffer to 0 */ 6482 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 6483 6484 trace_array_put(tr); 6485 6486 return 0; 6487 } 6488 6489 #define TRACE_MARKER_MAX_SIZE 4096 6490 6491 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf, 6492 size_t cnt, unsigned long ip) 6493 { 6494 struct ring_buffer_event *event; 6495 enum event_trigger_type tt = ETT_NONE; 6496 struct trace_buffer *buffer; 6497 struct print_entry *entry; 6498 int meta_size; 6499 ssize_t written; 6500 size_t size; 6501 6502 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 6503 again: 6504 size = cnt + meta_size; 6505 6506 buffer = tr->array_buffer.buffer; 6507 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 6508 tracing_gen_ctx()); 6509 if (unlikely(!event)) { 6510 /* 6511 * If the size was greater than what was allowed, then 6512 * make it smaller and try again. 6513 */ 6514 if (size > ring_buffer_max_event_size(buffer)) { 6515 cnt = ring_buffer_max_event_size(buffer) - meta_size; 6516 /* The above should only happen once */ 6517 if (WARN_ON_ONCE(cnt + meta_size == size)) 6518 return -EBADF; 6519 goto again; 6520 } 6521 6522 /* Ring buffer disabled, return as if not open for write */ 6523 return -EBADF; 6524 } 6525 6526 entry = ring_buffer_event_data(event); 6527 entry->ip = ip; 6528 memcpy(&entry->buf, buf, cnt); 6529 written = cnt; 6530 6531 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { 6532 /* do not add \n before testing triggers, but add \0 */ 6533 entry->buf[cnt] = '\0'; 6534 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); 6535 } 6536 6537 if (entry->buf[cnt - 1] != '\n') { 6538 entry->buf[cnt] = '\n'; 6539 entry->buf[cnt + 1] = '\0'; 6540 } else 6541 entry->buf[cnt] = '\0'; 6542 6543 if (static_branch_unlikely(&trace_marker_exports_enabled)) 6544 ftrace_exports(event, TRACE_EXPORT_MARKER); 6545 __buffer_unlock_commit(buffer, event); 6546 6547 if (tt) 6548 event_triggers_post_call(tr->trace_marker_file, tt); 6549 6550 return written; 6551 } 6552 6553 struct trace_user_buf { 6554 char *buf; 6555 }; 6556 6557 static DEFINE_MUTEX(trace_user_buffer_mutex); 6558 static struct trace_user_buf_info *trace_user_buffer; 6559 6560 /** 6561 * trace_user_fault_destroy - free up allocated memory of a trace user buffer 6562 * @tinfo: The descriptor to free up 6563 * 6564 * Frees any data allocated in the trace info dsecriptor. 6565 */ 6566 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo) 6567 { 6568 char *buf; 6569 int cpu; 6570 6571 if (!tinfo || !tinfo->tbuf) 6572 return; 6573 6574 for_each_possible_cpu(cpu) { 6575 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6576 kfree(buf); 6577 } 6578 free_percpu(tinfo->tbuf); 6579 } 6580 6581 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size) 6582 { 6583 char *buf; 6584 int cpu; 6585 6586 lockdep_assert_held(&trace_user_buffer_mutex); 6587 6588 tinfo->tbuf = alloc_percpu(struct trace_user_buf); 6589 if (!tinfo->tbuf) 6590 return -ENOMEM; 6591 6592 tinfo->ref = 1; 6593 tinfo->size = size; 6594 6595 /* Clear each buffer in case of error */ 6596 for_each_possible_cpu(cpu) { 6597 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL; 6598 } 6599 6600 for_each_possible_cpu(cpu) { 6601 buf = kmalloc_node(size, GFP_KERNEL, 6602 cpu_to_node(cpu)); 6603 if (!buf) 6604 return -ENOMEM; 6605 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf; 6606 } 6607 6608 return 0; 6609 } 6610 6611 /* For internal use. Free and reinitialize */ 6612 static void user_buffer_free(struct trace_user_buf_info **tinfo) 6613 { 6614 lockdep_assert_held(&trace_user_buffer_mutex); 6615 6616 trace_user_fault_destroy(*tinfo); 6617 kfree(*tinfo); 6618 *tinfo = NULL; 6619 } 6620 6621 /* For internal use. Initialize and allocate */ 6622 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size) 6623 { 6624 bool alloc = false; 6625 int ret; 6626 6627 lockdep_assert_held(&trace_user_buffer_mutex); 6628 6629 if (!*tinfo) { 6630 alloc = true; 6631 *tinfo = kzalloc(sizeof(**tinfo), GFP_KERNEL); 6632 if (!*tinfo) 6633 return -ENOMEM; 6634 } 6635 6636 ret = user_fault_buffer_enable(*tinfo, size); 6637 if (ret < 0 && alloc) 6638 user_buffer_free(tinfo); 6639 6640 return ret; 6641 } 6642 6643 /* For internal use, derefrence and free if necessary */ 6644 static void user_buffer_put(struct trace_user_buf_info **tinfo) 6645 { 6646 guard(mutex)(&trace_user_buffer_mutex); 6647 6648 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref)) 6649 return; 6650 6651 if (--(*tinfo)->ref) 6652 return; 6653 6654 user_buffer_free(tinfo); 6655 } 6656 6657 /** 6658 * trace_user_fault_init - Allocated or reference a per CPU buffer 6659 * @tinfo: A pointer to the trace buffer descriptor 6660 * @size: The size to allocate each per CPU buffer 6661 * 6662 * Create a per CPU buffer that can be used to copy from user space 6663 * in a task context. When calling trace_user_fault_read(), preemption 6664 * must be disabled, and it will enable preemption and copy user 6665 * space data to the buffer. If any schedule switches occur, it will 6666 * retry until it succeeds without a schedule switch knowing the buffer 6667 * is still valid. 6668 * 6669 * Returns 0 on success, negative on failure. 6670 */ 6671 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size) 6672 { 6673 int ret; 6674 6675 if (!tinfo) 6676 return -EINVAL; 6677 6678 guard(mutex)(&trace_user_buffer_mutex); 6679 6680 ret = user_buffer_init(&tinfo, size); 6681 if (ret < 0) 6682 trace_user_fault_destroy(tinfo); 6683 6684 return ret; 6685 } 6686 6687 /** 6688 * trace_user_fault_get - up the ref count for the user buffer 6689 * @tinfo: A pointer to a pointer to the trace buffer descriptor 6690 * 6691 * Ups the ref count of the trace buffer. 6692 * 6693 * Returns the new ref count. 6694 */ 6695 int trace_user_fault_get(struct trace_user_buf_info *tinfo) 6696 { 6697 if (!tinfo) 6698 return -1; 6699 6700 guard(mutex)(&trace_user_buffer_mutex); 6701 6702 tinfo->ref++; 6703 return tinfo->ref; 6704 } 6705 6706 /** 6707 * trace_user_fault_put - dereference a per cpu trace buffer 6708 * @tinfo: The @tinfo that was passed to trace_user_fault_get() 6709 * 6710 * Decrement the ref count of @tinfo. 6711 * 6712 * Returns the new refcount (negative on error). 6713 */ 6714 int trace_user_fault_put(struct trace_user_buf_info *tinfo) 6715 { 6716 guard(mutex)(&trace_user_buffer_mutex); 6717 6718 if (WARN_ON_ONCE(!tinfo || !tinfo->ref)) 6719 return -1; 6720 6721 --tinfo->ref; 6722 return tinfo->ref; 6723 } 6724 6725 /** 6726 * trace_user_fault_read - Read user space into a per CPU buffer 6727 * @tinfo: The @tinfo allocated by trace_user_fault_get() 6728 * @ptr: The user space pointer to read 6729 * @size: The size of user space to read. 6730 * @copy_func: Optional function to use to copy from user space 6731 * @data: Data to pass to copy_func if it was supplied 6732 * 6733 * Preemption must be disabled when this is called, and must not 6734 * be enabled while using the returned buffer. 6735 * This does the copying from user space into a per CPU buffer. 6736 * 6737 * The @size must not be greater than the size passed in to 6738 * trace_user_fault_init(). 6739 * 6740 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(), 6741 * otherwise it will call @copy_func. It will call @copy_func with: 6742 * 6743 * buffer: the per CPU buffer of the @tinfo. 6744 * ptr: The pointer @ptr to user space to read 6745 * size: The @size of the ptr to read 6746 * data: The @data parameter 6747 * 6748 * It is expected that @copy_func will return 0 on success and non zero 6749 * if there was a fault. 6750 * 6751 * Returns a pointer to the buffer with the content read from @ptr. 6752 * Preemption must remain disabled while the caller accesses the 6753 * buffer returned by this function. 6754 * Returns NULL if there was a fault, or the size passed in is 6755 * greater than the size passed to trace_user_fault_init(). 6756 */ 6757 char *trace_user_fault_read(struct trace_user_buf_info *tinfo, 6758 const char __user *ptr, size_t size, 6759 trace_user_buf_copy copy_func, void *data) 6760 { 6761 int cpu = smp_processor_id(); 6762 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6763 unsigned int cnt; 6764 int trys = 0; 6765 int ret; 6766 6767 lockdep_assert_preemption_disabled(); 6768 6769 /* 6770 * It's up to the caller to not try to copy more than it said 6771 * it would. 6772 */ 6773 if (size > tinfo->size) 6774 return NULL; 6775 6776 /* 6777 * This acts similar to a seqcount. The per CPU context switches are 6778 * recorded, migration is disabled and preemption is enabled. The 6779 * read of the user space memory is copied into the per CPU buffer. 6780 * Preemption is disabled again, and if the per CPU context switches count 6781 * is still the same, it means the buffer has not been corrupted. 6782 * If the count is different, it is assumed the buffer is corrupted 6783 * and reading must be tried again. 6784 */ 6785 6786 do { 6787 /* 6788 * If for some reason, copy_from_user() always causes a context 6789 * switch, this would then cause an infinite loop. 6790 * If this task is preempted by another user space task, it 6791 * will cause this task to try again. But just in case something 6792 * changes where the copying from user space causes another task 6793 * to run, prevent this from going into an infinite loop. 6794 * 100 tries should be plenty. 6795 */ 6796 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space")) 6797 return NULL; 6798 6799 /* Read the current CPU context switch counter */ 6800 cnt = nr_context_switches_cpu(cpu); 6801 6802 /* 6803 * Preemption is going to be enabled, but this task must 6804 * remain on this CPU. 6805 */ 6806 migrate_disable(); 6807 6808 /* 6809 * Now preemption is being enabled and another task can come in 6810 * and use the same buffer and corrupt our data. 6811 */ 6812 preempt_enable_notrace(); 6813 6814 /* Make sure preemption is enabled here */ 6815 lockdep_assert_preemption_enabled(); 6816 6817 if (copy_func) { 6818 ret = copy_func(buffer, ptr, size, data); 6819 } else { 6820 ret = __copy_from_user(buffer, ptr, size); 6821 } 6822 6823 preempt_disable_notrace(); 6824 migrate_enable(); 6825 6826 /* if it faulted, no need to test if the buffer was corrupted */ 6827 if (ret) 6828 return NULL; 6829 6830 /* 6831 * Preemption is disabled again, now check the per CPU context 6832 * switch counter. If it doesn't match, then another user space 6833 * process may have schedule in and corrupted our buffer. In that 6834 * case the copying must be retried. 6835 */ 6836 } while (nr_context_switches_cpu(cpu) != cnt); 6837 6838 return buffer; 6839 } 6840 6841 static ssize_t 6842 tracing_mark_write(struct file *filp, const char __user *ubuf, 6843 size_t cnt, loff_t *fpos) 6844 { 6845 struct trace_array *tr = filp->private_data; 6846 ssize_t written = -ENODEV; 6847 unsigned long ip; 6848 char *buf; 6849 6850 if (unlikely(tracing_disabled)) 6851 return -EINVAL; 6852 6853 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 6854 return -EINVAL; 6855 6856 if ((ssize_t)cnt < 0) 6857 return -EINVAL; 6858 6859 if (cnt > TRACE_MARKER_MAX_SIZE) 6860 cnt = TRACE_MARKER_MAX_SIZE; 6861 6862 /* Must have preemption disabled while having access to the buffer */ 6863 guard(preempt_notrace)(); 6864 6865 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 6866 if (!buf) 6867 return -EFAULT; 6868 6869 /* The selftests expect this function to be the IP address */ 6870 ip = _THIS_IP_; 6871 6872 /* The global trace_marker can go to multiple instances */ 6873 if (tr == &global_trace) { 6874 guard(rcu)(); 6875 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 6876 written = write_marker_to_buffer(tr, buf, cnt, ip); 6877 if (written < 0) 6878 break; 6879 } 6880 } else { 6881 written = write_marker_to_buffer(tr, buf, cnt, ip); 6882 } 6883 6884 return written; 6885 } 6886 6887 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, 6888 const char *buf, size_t cnt) 6889 { 6890 struct ring_buffer_event *event; 6891 struct trace_buffer *buffer; 6892 struct raw_data_entry *entry; 6893 ssize_t written; 6894 size_t size; 6895 6896 /* cnt includes both the entry->id and the data behind it. */ 6897 size = struct_offset(entry, id) + cnt; 6898 6899 buffer = tr->array_buffer.buffer; 6900 6901 if (size > ring_buffer_max_event_size(buffer)) 6902 return -EINVAL; 6903 6904 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, 6905 tracing_gen_ctx()); 6906 if (!event) 6907 /* Ring buffer disabled, return as if not open for write */ 6908 return -EBADF; 6909 6910 entry = ring_buffer_event_data(event); 6911 unsafe_memcpy(&entry->id, buf, cnt, 6912 "id and content already reserved on ring buffer" 6913 "'buf' includes the 'id' and the data." 6914 "'entry' was allocated with cnt from 'id'."); 6915 written = cnt; 6916 6917 __buffer_unlock_commit(buffer, event); 6918 6919 return written; 6920 } 6921 6922 static ssize_t 6923 tracing_mark_raw_write(struct file *filp, const char __user *ubuf, 6924 size_t cnt, loff_t *fpos) 6925 { 6926 struct trace_array *tr = filp->private_data; 6927 ssize_t written = -ENODEV; 6928 char *buf; 6929 6930 if (unlikely(tracing_disabled)) 6931 return -EINVAL; 6932 6933 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 6934 return -EINVAL; 6935 6936 /* The marker must at least have a tag id */ 6937 if (cnt < sizeof(unsigned int)) 6938 return -EINVAL; 6939 6940 /* raw write is all or nothing */ 6941 if (cnt > TRACE_MARKER_MAX_SIZE) 6942 return -EINVAL; 6943 6944 /* Must have preemption disabled while having access to the buffer */ 6945 guard(preempt_notrace)(); 6946 6947 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 6948 if (!buf) 6949 return -EFAULT; 6950 6951 /* The global trace_marker_raw can go to multiple instances */ 6952 if (tr == &global_trace) { 6953 guard(rcu)(); 6954 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 6955 written = write_raw_marker_to_buffer(tr, buf, cnt); 6956 if (written < 0) 6957 break; 6958 } 6959 } else { 6960 written = write_raw_marker_to_buffer(tr, buf, cnt); 6961 } 6962 6963 return written; 6964 } 6965 6966 static int tracing_mark_open(struct inode *inode, struct file *filp) 6967 { 6968 int ret; 6969 6970 scoped_guard(mutex, &trace_user_buffer_mutex) { 6971 if (!trace_user_buffer) { 6972 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE); 6973 if (ret < 0) 6974 return ret; 6975 } else { 6976 trace_user_buffer->ref++; 6977 } 6978 } 6979 6980 stream_open(inode, filp); 6981 ret = tracing_open_generic_tr(inode, filp); 6982 if (ret < 0) 6983 user_buffer_put(&trace_user_buffer); 6984 return ret; 6985 } 6986 6987 static int tracing_mark_release(struct inode *inode, struct file *file) 6988 { 6989 user_buffer_put(&trace_user_buffer); 6990 return tracing_release_generic_tr(inode, file); 6991 } 6992 6993 static int tracing_clock_show(struct seq_file *m, void *v) 6994 { 6995 struct trace_array *tr = m->private; 6996 int i; 6997 6998 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 6999 seq_printf(m, 7000 "%s%s%s%s", i ? " " : "", 7001 i == tr->clock_id ? "[" : "", trace_clocks[i].name, 7002 i == tr->clock_id ? "]" : ""); 7003 seq_putc(m, '\n'); 7004 7005 return 0; 7006 } 7007 7008 int tracing_set_clock(struct trace_array *tr, const char *clockstr) 7009 { 7010 int i; 7011 7012 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { 7013 if (strcmp(trace_clocks[i].name, clockstr) == 0) 7014 break; 7015 } 7016 if (i == ARRAY_SIZE(trace_clocks)) 7017 return -EINVAL; 7018 7019 guard(mutex)(&trace_types_lock); 7020 7021 tr->clock_id = i; 7022 7023 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); 7024 7025 /* 7026 * New clock may not be consistent with the previous clock. 7027 * Reset the buffer so that it doesn't have incomparable timestamps. 7028 */ 7029 tracing_reset_online_cpus(&tr->array_buffer); 7030 7031 #ifdef CONFIG_TRACER_SNAPSHOT 7032 if (tr->snapshot_buffer.buffer) 7033 ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func); 7034 tracing_reset_online_cpus(&tr->snapshot_buffer); 7035 #endif 7036 update_last_data_if_empty(tr); 7037 7038 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { 7039 struct trace_scratch *tscratch = tr->scratch; 7040 7041 tscratch->clock_id = i; 7042 } 7043 7044 return 0; 7045 } 7046 7047 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 7048 size_t cnt, loff_t *fpos) 7049 { 7050 struct seq_file *m = filp->private_data; 7051 struct trace_array *tr = m->private; 7052 char buf[64]; 7053 const char *clockstr; 7054 int ret; 7055 7056 if (cnt >= sizeof(buf)) 7057 return -EINVAL; 7058 7059 if (copy_from_user(buf, ubuf, cnt)) 7060 return -EFAULT; 7061 7062 buf[cnt] = 0; 7063 7064 clockstr = strstrip(buf); 7065 7066 ret = tracing_set_clock(tr, clockstr); 7067 if (ret) 7068 return ret; 7069 7070 *fpos += cnt; 7071 7072 return cnt; 7073 } 7074 7075 static int tracing_clock_open(struct inode *inode, struct file *file) 7076 { 7077 struct trace_array *tr = inode->i_private; 7078 int ret; 7079 7080 ret = tracing_check_open_get_tr(tr); 7081 if (ret) 7082 return ret; 7083 7084 ret = single_open(file, tracing_clock_show, inode->i_private); 7085 if (ret < 0) 7086 trace_array_put(tr); 7087 7088 return ret; 7089 } 7090 7091 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) 7092 { 7093 struct trace_array *tr = m->private; 7094 7095 guard(mutex)(&trace_types_lock); 7096 7097 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) 7098 seq_puts(m, "delta [absolute]\n"); 7099 else 7100 seq_puts(m, "[delta] absolute\n"); 7101 7102 return 0; 7103 } 7104 7105 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) 7106 { 7107 struct trace_array *tr = inode->i_private; 7108 int ret; 7109 7110 ret = tracing_check_open_get_tr(tr); 7111 if (ret) 7112 return ret; 7113 7114 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); 7115 if (ret < 0) 7116 trace_array_put(tr); 7117 7118 return ret; 7119 } 7120 7121 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) 7122 { 7123 if (rbe == this_cpu_read(trace_buffered_event)) 7124 return ring_buffer_time_stamp(buffer); 7125 7126 return ring_buffer_event_time_stamp(buffer, rbe); 7127 } 7128 7129 struct ftrace_buffer_info { 7130 struct trace_iterator iter; 7131 void *spare; 7132 unsigned int spare_cpu; 7133 unsigned int spare_size; 7134 unsigned int read; 7135 }; 7136 7137 #ifdef CONFIG_TRACER_SNAPSHOT 7138 static int tracing_snapshot_open(struct inode *inode, struct file *file) 7139 { 7140 struct trace_array *tr = inode->i_private; 7141 struct trace_iterator *iter; 7142 struct seq_file *m; 7143 int ret; 7144 7145 ret = tracing_check_open_get_tr(tr); 7146 if (ret) 7147 return ret; 7148 7149 if (file->f_mode & FMODE_READ) { 7150 iter = __tracing_open(inode, file, true); 7151 if (IS_ERR(iter)) 7152 ret = PTR_ERR(iter); 7153 } else { 7154 /* Writes still need the seq_file to hold the private data */ 7155 ret = -ENOMEM; 7156 m = kzalloc(sizeof(*m), GFP_KERNEL); 7157 if (!m) 7158 goto out; 7159 iter = kzalloc(sizeof(*iter), GFP_KERNEL); 7160 if (!iter) { 7161 kfree(m); 7162 goto out; 7163 } 7164 ret = 0; 7165 7166 iter->tr = tr; 7167 iter->array_buffer = &tr->snapshot_buffer; 7168 iter->cpu_file = tracing_get_cpu(inode); 7169 m->private = iter; 7170 file->private_data = m; 7171 } 7172 out: 7173 if (ret < 0) 7174 trace_array_put(tr); 7175 7176 return ret; 7177 } 7178 7179 static void tracing_swap_cpu_buffer(void *tr) 7180 { 7181 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); 7182 } 7183 7184 static ssize_t 7185 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, 7186 loff_t *ppos) 7187 { 7188 struct seq_file *m = filp->private_data; 7189 struct trace_iterator *iter = m->private; 7190 struct trace_array *tr = iter->tr; 7191 unsigned long val; 7192 int ret; 7193 7194 ret = tracing_update_buffers(tr); 7195 if (ret < 0) 7196 return ret; 7197 7198 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7199 if (ret) 7200 return ret; 7201 7202 guard(mutex)(&trace_types_lock); 7203 7204 if (tracer_uses_snapshot(tr->current_trace)) 7205 return -EBUSY; 7206 7207 local_irq_disable(); 7208 arch_spin_lock(&tr->max_lock); 7209 if (tr->cond_snapshot) 7210 ret = -EBUSY; 7211 arch_spin_unlock(&tr->max_lock); 7212 local_irq_enable(); 7213 if (ret) 7214 return ret; 7215 7216 switch (val) { 7217 case 0: 7218 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7219 return -EINVAL; 7220 if (tr->allocated_snapshot) 7221 free_snapshot(tr); 7222 break; 7223 case 1: 7224 /* Only allow per-cpu swap if the ring buffer supports it */ 7225 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP 7226 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7227 return -EINVAL; 7228 #endif 7229 if (tr->allocated_snapshot) 7230 ret = resize_buffer_duplicate_size(&tr->snapshot_buffer, 7231 &tr->array_buffer, iter->cpu_file); 7232 7233 ret = tracing_arm_snapshot_locked(tr); 7234 if (ret) 7235 return ret; 7236 7237 /* Now, we're going to swap */ 7238 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 7239 local_irq_disable(); 7240 update_max_tr(tr, current, smp_processor_id(), NULL); 7241 local_irq_enable(); 7242 } else { 7243 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, 7244 (void *)tr, 1); 7245 } 7246 tracing_disarm_snapshot(tr); 7247 break; 7248 default: 7249 if (tr->allocated_snapshot) { 7250 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 7251 tracing_reset_online_cpus(&tr->snapshot_buffer); 7252 else 7253 tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file); 7254 } 7255 break; 7256 } 7257 7258 if (ret >= 0) { 7259 *ppos += cnt; 7260 ret = cnt; 7261 } 7262 7263 return ret; 7264 } 7265 7266 static int tracing_snapshot_release(struct inode *inode, struct file *file) 7267 { 7268 struct seq_file *m = file->private_data; 7269 int ret; 7270 7271 ret = tracing_release(inode, file); 7272 7273 if (file->f_mode & FMODE_READ) 7274 return ret; 7275 7276 /* If write only, the seq_file is just a stub */ 7277 if (m) 7278 kfree(m->private); 7279 kfree(m); 7280 7281 return 0; 7282 } 7283 7284 static int tracing_buffers_open(struct inode *inode, struct file *filp); 7285 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, 7286 size_t count, loff_t *ppos); 7287 static int tracing_buffers_release(struct inode *inode, struct file *file); 7288 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, 7289 struct pipe_inode_info *pipe, size_t len, unsigned int flags); 7290 7291 static int snapshot_raw_open(struct inode *inode, struct file *filp) 7292 { 7293 struct ftrace_buffer_info *info; 7294 int ret; 7295 7296 /* The following checks for tracefs lockdown */ 7297 ret = tracing_buffers_open(inode, filp); 7298 if (ret < 0) 7299 return ret; 7300 7301 info = filp->private_data; 7302 7303 if (tracer_uses_snapshot(info->iter.trace)) { 7304 tracing_buffers_release(inode, filp); 7305 return -EBUSY; 7306 } 7307 7308 info->iter.snapshot = true; 7309 info->iter.array_buffer = &info->iter.tr->snapshot_buffer; 7310 7311 return ret; 7312 } 7313 7314 #endif /* CONFIG_TRACER_SNAPSHOT */ 7315 7316 7317 static const struct file_operations tracing_thresh_fops = { 7318 .open = tracing_open_generic, 7319 .read = tracing_thresh_read, 7320 .write = tracing_thresh_write, 7321 .llseek = generic_file_llseek, 7322 }; 7323 7324 #ifdef CONFIG_TRACER_MAX_TRACE 7325 static const struct file_operations tracing_max_lat_fops = { 7326 .open = tracing_open_generic_tr, 7327 .read = tracing_max_lat_read, 7328 .write = tracing_max_lat_write, 7329 .llseek = generic_file_llseek, 7330 .release = tracing_release_generic_tr, 7331 }; 7332 #endif 7333 7334 static const struct file_operations set_tracer_fops = { 7335 .open = tracing_open_generic_tr, 7336 .read = tracing_set_trace_read, 7337 .write = tracing_set_trace_write, 7338 .llseek = generic_file_llseek, 7339 .release = tracing_release_generic_tr, 7340 }; 7341 7342 static const struct file_operations tracing_pipe_fops = { 7343 .open = tracing_open_pipe, 7344 .poll = tracing_poll_pipe, 7345 .read = tracing_read_pipe, 7346 .splice_read = tracing_splice_read_pipe, 7347 .release = tracing_release_pipe, 7348 }; 7349 7350 static const struct file_operations tracing_entries_fops = { 7351 .open = tracing_open_generic_tr, 7352 .read = tracing_entries_read, 7353 .write = tracing_entries_write, 7354 .llseek = generic_file_llseek, 7355 .release = tracing_release_generic_tr, 7356 }; 7357 7358 static const struct file_operations tracing_syscall_buf_fops = { 7359 .open = tracing_open_generic_tr, 7360 .read = tracing_syscall_buf_read, 7361 .write = tracing_syscall_buf_write, 7362 .llseek = generic_file_llseek, 7363 .release = tracing_release_generic_tr, 7364 }; 7365 7366 static const struct file_operations tracing_buffer_meta_fops = { 7367 .open = tracing_buffer_meta_open, 7368 .read = seq_read, 7369 .llseek = seq_lseek, 7370 .release = tracing_seq_release, 7371 }; 7372 7373 static const struct file_operations tracing_total_entries_fops = { 7374 .open = tracing_open_generic_tr, 7375 .read = tracing_total_entries_read, 7376 .llseek = generic_file_llseek, 7377 .release = tracing_release_generic_tr, 7378 }; 7379 7380 static const struct file_operations tracing_free_buffer_fops = { 7381 .open = tracing_open_generic_tr, 7382 .write = tracing_free_buffer_write, 7383 .release = tracing_free_buffer_release, 7384 }; 7385 7386 static const struct file_operations tracing_mark_fops = { 7387 .open = tracing_mark_open, 7388 .write = tracing_mark_write, 7389 .release = tracing_mark_release, 7390 }; 7391 7392 static const struct file_operations tracing_mark_raw_fops = { 7393 .open = tracing_mark_open, 7394 .write = tracing_mark_raw_write, 7395 .release = tracing_mark_release, 7396 }; 7397 7398 static const struct file_operations trace_clock_fops = { 7399 .open = tracing_clock_open, 7400 .read = seq_read, 7401 .llseek = seq_lseek, 7402 .release = tracing_single_release_tr, 7403 .write = tracing_clock_write, 7404 }; 7405 7406 static const struct file_operations trace_time_stamp_mode_fops = { 7407 .open = tracing_time_stamp_mode_open, 7408 .read = seq_read, 7409 .llseek = seq_lseek, 7410 .release = tracing_single_release_tr, 7411 }; 7412 7413 static const struct file_operations last_boot_fops = { 7414 .open = tracing_last_boot_open, 7415 .read = seq_read, 7416 .llseek = seq_lseek, 7417 .release = tracing_seq_release, 7418 }; 7419 7420 #ifdef CONFIG_TRACER_SNAPSHOT 7421 static const struct file_operations snapshot_fops = { 7422 .open = tracing_snapshot_open, 7423 .read = seq_read, 7424 .write = tracing_snapshot_write, 7425 .llseek = tracing_lseek, 7426 .release = tracing_snapshot_release, 7427 }; 7428 7429 static const struct file_operations snapshot_raw_fops = { 7430 .open = snapshot_raw_open, 7431 .read = tracing_buffers_read, 7432 .release = tracing_buffers_release, 7433 .splice_read = tracing_buffers_splice_read, 7434 }; 7435 7436 #endif /* CONFIG_TRACER_SNAPSHOT */ 7437 7438 /* 7439 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct 7440 * @filp: The active open file structure 7441 * @ubuf: The userspace provided buffer to read value into 7442 * @cnt: The maximum number of bytes to read 7443 * @ppos: The current "file" position 7444 * 7445 * This function implements the write interface for a struct trace_min_max_param. 7446 * The filp->private_data must point to a trace_min_max_param structure that 7447 * defines where to write the value, the min and the max acceptable values, 7448 * and a lock to protect the write. 7449 */ 7450 static ssize_t 7451 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 7452 { 7453 struct trace_min_max_param *param = filp->private_data; 7454 u64 val; 7455 int err; 7456 7457 if (!param) 7458 return -EFAULT; 7459 7460 err = kstrtoull_from_user(ubuf, cnt, 10, &val); 7461 if (err) 7462 return err; 7463 7464 if (param->lock) 7465 mutex_lock(param->lock); 7466 7467 if (param->min && val < *param->min) 7468 err = -EINVAL; 7469 7470 if (param->max && val > *param->max) 7471 err = -EINVAL; 7472 7473 if (!err) 7474 *param->val = val; 7475 7476 if (param->lock) 7477 mutex_unlock(param->lock); 7478 7479 if (err) 7480 return err; 7481 7482 return cnt; 7483 } 7484 7485 /* 7486 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct 7487 * @filp: The active open file structure 7488 * @ubuf: The userspace provided buffer to read value into 7489 * @cnt: The maximum number of bytes to read 7490 * @ppos: The current "file" position 7491 * 7492 * This function implements the read interface for a struct trace_min_max_param. 7493 * The filp->private_data must point to a trace_min_max_param struct with valid 7494 * data. 7495 */ 7496 static ssize_t 7497 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 7498 { 7499 struct trace_min_max_param *param = filp->private_data; 7500 char buf[U64_STR_SIZE]; 7501 int len; 7502 u64 val; 7503 7504 if (!param) 7505 return -EFAULT; 7506 7507 val = *param->val; 7508 7509 if (cnt > sizeof(buf)) 7510 cnt = sizeof(buf); 7511 7512 len = snprintf(buf, sizeof(buf), "%llu\n", val); 7513 7514 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 7515 } 7516 7517 const struct file_operations trace_min_max_fops = { 7518 .open = tracing_open_generic, 7519 .read = trace_min_max_read, 7520 .write = trace_min_max_write, 7521 }; 7522 7523 #define TRACING_LOG_ERRS_MAX 8 7524 #define TRACING_LOG_LOC_MAX 128 7525 7526 #define CMD_PREFIX " Command: " 7527 7528 struct err_info { 7529 const char **errs; /* ptr to loc-specific array of err strings */ 7530 u8 type; /* index into errs -> specific err string */ 7531 u16 pos; /* caret position */ 7532 u64 ts; 7533 }; 7534 7535 struct tracing_log_err { 7536 struct list_head list; 7537 struct err_info info; 7538 char loc[TRACING_LOG_LOC_MAX]; /* err location */ 7539 char *cmd; /* what caused err */ 7540 }; 7541 7542 static DEFINE_MUTEX(tracing_err_log_lock); 7543 7544 static struct tracing_log_err *alloc_tracing_log_err(int len) 7545 { 7546 struct tracing_log_err *err; 7547 7548 err = kzalloc(sizeof(*err), GFP_KERNEL); 7549 if (!err) 7550 return ERR_PTR(-ENOMEM); 7551 7552 err->cmd = kzalloc(len, GFP_KERNEL); 7553 if (!err->cmd) { 7554 kfree(err); 7555 return ERR_PTR(-ENOMEM); 7556 } 7557 7558 return err; 7559 } 7560 7561 static void free_tracing_log_err(struct tracing_log_err *err) 7562 { 7563 kfree(err->cmd); 7564 kfree(err); 7565 } 7566 7567 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, 7568 int len) 7569 { 7570 struct tracing_log_err *err; 7571 char *cmd; 7572 7573 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { 7574 err = alloc_tracing_log_err(len); 7575 if (PTR_ERR(err) != -ENOMEM) 7576 tr->n_err_log_entries++; 7577 7578 return err; 7579 } 7580 cmd = kzalloc(len, GFP_KERNEL); 7581 if (!cmd) 7582 return ERR_PTR(-ENOMEM); 7583 err = list_first_entry(&tr->err_log, struct tracing_log_err, list); 7584 kfree(err->cmd); 7585 err->cmd = cmd; 7586 list_del(&err->list); 7587 7588 return err; 7589 } 7590 7591 /** 7592 * err_pos - find the position of a string within a command for error careting 7593 * @cmd: The tracing command that caused the error 7594 * @str: The string to position the caret at within @cmd 7595 * 7596 * Finds the position of the first occurrence of @str within @cmd. The 7597 * return value can be passed to tracing_log_err() for caret placement 7598 * within @cmd. 7599 * 7600 * Returns the index within @cmd of the first occurrence of @str or 0 7601 * if @str was not found. 7602 */ 7603 unsigned int err_pos(char *cmd, const char *str) 7604 { 7605 char *found; 7606 7607 if (WARN_ON(!strlen(cmd))) 7608 return 0; 7609 7610 found = strstr(cmd, str); 7611 if (found) 7612 return found - cmd; 7613 7614 return 0; 7615 } 7616 7617 /** 7618 * tracing_log_err - write an error to the tracing error log 7619 * @tr: The associated trace array for the error (NULL for top level array) 7620 * @loc: A string describing where the error occurred 7621 * @cmd: The tracing command that caused the error 7622 * @errs: The array of loc-specific static error strings 7623 * @type: The index into errs[], which produces the specific static err string 7624 * @pos: The position the caret should be placed in the cmd 7625 * 7626 * Writes an error into tracing/error_log of the form: 7627 * 7628 * <loc>: error: <text> 7629 * Command: <cmd> 7630 * ^ 7631 * 7632 * tracing/error_log is a small log file containing the last 7633 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated 7634 * unless there has been a tracing error, and the error log can be 7635 * cleared and have its memory freed by writing the empty string in 7636 * truncation mode to it i.e. echo > tracing/error_log. 7637 * 7638 * NOTE: the @errs array along with the @type param are used to 7639 * produce a static error string - this string is not copied and saved 7640 * when the error is logged - only a pointer to it is saved. See 7641 * existing callers for examples of how static strings are typically 7642 * defined for use with tracing_log_err(). 7643 */ 7644 void tracing_log_err(struct trace_array *tr, 7645 const char *loc, const char *cmd, 7646 const char **errs, u8 type, u16 pos) 7647 { 7648 struct tracing_log_err *err; 7649 int len = 0; 7650 7651 if (!tr) 7652 tr = &global_trace; 7653 7654 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; 7655 7656 guard(mutex)(&tracing_err_log_lock); 7657 7658 err = get_tracing_log_err(tr, len); 7659 if (PTR_ERR(err) == -ENOMEM) 7660 return; 7661 7662 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); 7663 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); 7664 7665 err->info.errs = errs; 7666 err->info.type = type; 7667 err->info.pos = pos; 7668 err->info.ts = local_clock(); 7669 7670 list_add_tail(&err->list, &tr->err_log); 7671 } 7672 7673 static void clear_tracing_err_log(struct trace_array *tr) 7674 { 7675 struct tracing_log_err *err, *next; 7676 7677 guard(mutex)(&tracing_err_log_lock); 7678 7679 list_for_each_entry_safe(err, next, &tr->err_log, list) { 7680 list_del(&err->list); 7681 free_tracing_log_err(err); 7682 } 7683 7684 tr->n_err_log_entries = 0; 7685 } 7686 7687 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) 7688 { 7689 struct trace_array *tr = m->private; 7690 7691 mutex_lock(&tracing_err_log_lock); 7692 7693 return seq_list_start(&tr->err_log, *pos); 7694 } 7695 7696 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) 7697 { 7698 struct trace_array *tr = m->private; 7699 7700 return seq_list_next(v, &tr->err_log, pos); 7701 } 7702 7703 static void tracing_err_log_seq_stop(struct seq_file *m, void *v) 7704 { 7705 mutex_unlock(&tracing_err_log_lock); 7706 } 7707 7708 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) 7709 { 7710 u16 i; 7711 7712 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) 7713 seq_putc(m, ' '); 7714 for (i = 0; i < pos; i++) 7715 seq_putc(m, ' '); 7716 seq_puts(m, "^\n"); 7717 } 7718 7719 static int tracing_err_log_seq_show(struct seq_file *m, void *v) 7720 { 7721 struct tracing_log_err *err = v; 7722 7723 if (err) { 7724 const char *err_text = err->info.errs[err->info.type]; 7725 u64 sec = err->info.ts; 7726 u32 nsec; 7727 7728 nsec = do_div(sec, NSEC_PER_SEC); 7729 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, 7730 err->loc, err_text); 7731 seq_printf(m, "%s", err->cmd); 7732 tracing_err_log_show_pos(m, err->info.pos); 7733 } 7734 7735 return 0; 7736 } 7737 7738 static const struct seq_operations tracing_err_log_seq_ops = { 7739 .start = tracing_err_log_seq_start, 7740 .next = tracing_err_log_seq_next, 7741 .stop = tracing_err_log_seq_stop, 7742 .show = tracing_err_log_seq_show 7743 }; 7744 7745 static int tracing_err_log_open(struct inode *inode, struct file *file) 7746 { 7747 struct trace_array *tr = inode->i_private; 7748 int ret = 0; 7749 7750 ret = tracing_check_open_get_tr(tr); 7751 if (ret) 7752 return ret; 7753 7754 /* If this file was opened for write, then erase contents */ 7755 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 7756 clear_tracing_err_log(tr); 7757 7758 if (file->f_mode & FMODE_READ) { 7759 ret = seq_open(file, &tracing_err_log_seq_ops); 7760 if (!ret) { 7761 struct seq_file *m = file->private_data; 7762 m->private = tr; 7763 } else { 7764 trace_array_put(tr); 7765 } 7766 } 7767 return ret; 7768 } 7769 7770 static ssize_t tracing_err_log_write(struct file *file, 7771 const char __user *buffer, 7772 size_t count, loff_t *ppos) 7773 { 7774 return count; 7775 } 7776 7777 static int tracing_err_log_release(struct inode *inode, struct file *file) 7778 { 7779 struct trace_array *tr = inode->i_private; 7780 7781 trace_array_put(tr); 7782 7783 if (file->f_mode & FMODE_READ) 7784 seq_release(inode, file); 7785 7786 return 0; 7787 } 7788 7789 static const struct file_operations tracing_err_log_fops = { 7790 .open = tracing_err_log_open, 7791 .write = tracing_err_log_write, 7792 .read = seq_read, 7793 .llseek = tracing_lseek, 7794 .release = tracing_err_log_release, 7795 }; 7796 7797 static int tracing_buffers_open(struct inode *inode, struct file *filp) 7798 { 7799 struct trace_array *tr = inode->i_private; 7800 struct ftrace_buffer_info *info; 7801 int ret; 7802 7803 ret = tracing_check_open_get_tr(tr); 7804 if (ret) 7805 return ret; 7806 7807 info = kvzalloc(sizeof(*info), GFP_KERNEL); 7808 if (!info) { 7809 trace_array_put(tr); 7810 return -ENOMEM; 7811 } 7812 7813 mutex_lock(&trace_types_lock); 7814 7815 info->iter.tr = tr; 7816 info->iter.cpu_file = tracing_get_cpu(inode); 7817 info->iter.trace = tr->current_trace; 7818 info->iter.array_buffer = &tr->array_buffer; 7819 info->spare = NULL; 7820 /* Force reading ring buffer for first read */ 7821 info->read = (unsigned int)-1; 7822 7823 filp->private_data = info; 7824 7825 tr->trace_ref++; 7826 7827 mutex_unlock(&trace_types_lock); 7828 7829 ret = nonseekable_open(inode, filp); 7830 if (ret < 0) 7831 trace_array_put(tr); 7832 7833 return ret; 7834 } 7835 7836 static __poll_t 7837 tracing_buffers_poll(struct file *filp, poll_table *poll_table) 7838 { 7839 struct ftrace_buffer_info *info = filp->private_data; 7840 struct trace_iterator *iter = &info->iter; 7841 7842 return trace_poll(iter, filp, poll_table); 7843 } 7844 7845 static ssize_t 7846 tracing_buffers_read(struct file *filp, char __user *ubuf, 7847 size_t count, loff_t *ppos) 7848 { 7849 struct ftrace_buffer_info *info = filp->private_data; 7850 struct trace_iterator *iter = &info->iter; 7851 void *trace_data; 7852 int page_size; 7853 ssize_t ret = 0; 7854 ssize_t size; 7855 7856 if (!count) 7857 return 0; 7858 7859 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) 7860 return -EBUSY; 7861 7862 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 7863 7864 /* Make sure the spare matches the current sub buffer size */ 7865 if (info->spare) { 7866 if (page_size != info->spare_size) { 7867 ring_buffer_free_read_page(iter->array_buffer->buffer, 7868 info->spare_cpu, info->spare); 7869 info->spare = NULL; 7870 } 7871 } 7872 7873 if (!info->spare) { 7874 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, 7875 iter->cpu_file); 7876 if (IS_ERR(info->spare)) { 7877 ret = PTR_ERR(info->spare); 7878 info->spare = NULL; 7879 } else { 7880 info->spare_cpu = iter->cpu_file; 7881 info->spare_size = page_size; 7882 } 7883 } 7884 if (!info->spare) 7885 return ret; 7886 7887 /* Do we have previous read data to read? */ 7888 if (info->read < page_size) 7889 goto read; 7890 7891 again: 7892 trace_access_lock(iter->cpu_file); 7893 ret = ring_buffer_read_page(iter->array_buffer->buffer, 7894 info->spare, 7895 count, 7896 iter->cpu_file, 0); 7897 trace_access_unlock(iter->cpu_file); 7898 7899 if (ret < 0) { 7900 if (trace_empty(iter) && !iter->closed) { 7901 if (update_last_data_if_empty(iter->tr)) 7902 return 0; 7903 7904 if ((filp->f_flags & O_NONBLOCK)) 7905 return -EAGAIN; 7906 7907 ret = wait_on_pipe(iter, 0); 7908 if (ret) 7909 return ret; 7910 7911 goto again; 7912 } 7913 return 0; 7914 } 7915 7916 info->read = 0; 7917 read: 7918 size = page_size - info->read; 7919 if (size > count) 7920 size = count; 7921 trace_data = ring_buffer_read_page_data(info->spare); 7922 ret = copy_to_user(ubuf, trace_data + info->read, size); 7923 if (ret == size) 7924 return -EFAULT; 7925 7926 size -= ret; 7927 7928 *ppos += size; 7929 info->read += size; 7930 7931 return size; 7932 } 7933 7934 static int tracing_buffers_flush(struct file *file, fl_owner_t id) 7935 { 7936 struct ftrace_buffer_info *info = file->private_data; 7937 struct trace_iterator *iter = &info->iter; 7938 7939 iter->closed = true; 7940 /* Make sure the waiters see the new wait_index */ 7941 (void)atomic_fetch_inc_release(&iter->wait_index); 7942 7943 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 7944 7945 return 0; 7946 } 7947 7948 static int tracing_buffers_release(struct inode *inode, struct file *file) 7949 { 7950 struct ftrace_buffer_info *info = file->private_data; 7951 struct trace_iterator *iter = &info->iter; 7952 7953 guard(mutex)(&trace_types_lock); 7954 7955 iter->tr->trace_ref--; 7956 7957 __trace_array_put(iter->tr); 7958 7959 if (info->spare) 7960 ring_buffer_free_read_page(iter->array_buffer->buffer, 7961 info->spare_cpu, info->spare); 7962 kvfree(info); 7963 7964 return 0; 7965 } 7966 7967 struct buffer_ref { 7968 struct trace_buffer *buffer; 7969 void *page; 7970 int cpu; 7971 refcount_t refcount; 7972 }; 7973 7974 static void buffer_ref_release(struct buffer_ref *ref) 7975 { 7976 if (!refcount_dec_and_test(&ref->refcount)) 7977 return; 7978 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); 7979 kfree(ref); 7980 } 7981 7982 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, 7983 struct pipe_buffer *buf) 7984 { 7985 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7986 7987 buffer_ref_release(ref); 7988 buf->private = 0; 7989 } 7990 7991 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, 7992 struct pipe_buffer *buf) 7993 { 7994 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7995 7996 if (refcount_read(&ref->refcount) > INT_MAX/2) 7997 return false; 7998 7999 refcount_inc(&ref->refcount); 8000 return true; 8001 } 8002 8003 /* Pipe buffer operations for a buffer. */ 8004 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 8005 .release = buffer_pipe_buf_release, 8006 .get = buffer_pipe_buf_get, 8007 }; 8008 8009 /* 8010 * Callback from splice_to_pipe(), if we need to release some pages 8011 * at the end of the spd in case we error'ed out in filling the pipe. 8012 */ 8013 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) 8014 { 8015 struct buffer_ref *ref = 8016 (struct buffer_ref *)spd->partial[i].private; 8017 8018 buffer_ref_release(ref); 8019 spd->partial[i].private = 0; 8020 } 8021 8022 static ssize_t 8023 tracing_buffers_splice_read(struct file *file, loff_t *ppos, 8024 struct pipe_inode_info *pipe, size_t len, 8025 unsigned int flags) 8026 { 8027 struct ftrace_buffer_info *info = file->private_data; 8028 struct trace_iterator *iter = &info->iter; 8029 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 8030 struct page *pages_def[PIPE_DEF_BUFFERS]; 8031 struct splice_pipe_desc spd = { 8032 .pages = pages_def, 8033 .partial = partial_def, 8034 .nr_pages_max = PIPE_DEF_BUFFERS, 8035 .ops = &buffer_pipe_buf_ops, 8036 .spd_release = buffer_spd_release, 8037 }; 8038 struct buffer_ref *ref; 8039 bool woken = false; 8040 int page_size; 8041 int entries, i; 8042 ssize_t ret = 0; 8043 8044 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) 8045 return -EBUSY; 8046 8047 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8048 if (*ppos & (page_size - 1)) 8049 return -EINVAL; 8050 8051 if (len & (page_size - 1)) { 8052 if (len < page_size) 8053 return -EINVAL; 8054 len &= (~(page_size - 1)); 8055 } 8056 8057 if (splice_grow_spd(pipe, &spd)) 8058 return -ENOMEM; 8059 8060 again: 8061 trace_access_lock(iter->cpu_file); 8062 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8063 8064 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { 8065 struct page *page; 8066 int r; 8067 8068 ref = kzalloc(sizeof(*ref), GFP_KERNEL); 8069 if (!ref) { 8070 ret = -ENOMEM; 8071 break; 8072 } 8073 8074 refcount_set(&ref->refcount, 1); 8075 ref->buffer = iter->array_buffer->buffer; 8076 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 8077 if (IS_ERR(ref->page)) { 8078 ret = PTR_ERR(ref->page); 8079 ref->page = NULL; 8080 kfree(ref); 8081 break; 8082 } 8083 ref->cpu = iter->cpu_file; 8084 8085 r = ring_buffer_read_page(ref->buffer, ref->page, 8086 len, iter->cpu_file, 1); 8087 if (r < 0) { 8088 ring_buffer_free_read_page(ref->buffer, ref->cpu, 8089 ref->page); 8090 kfree(ref); 8091 break; 8092 } 8093 8094 page = virt_to_page(ring_buffer_read_page_data(ref->page)); 8095 8096 spd.pages[i] = page; 8097 spd.partial[i].len = page_size; 8098 spd.partial[i].offset = 0; 8099 spd.partial[i].private = (unsigned long)ref; 8100 spd.nr_pages++; 8101 *ppos += page_size; 8102 8103 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8104 } 8105 8106 trace_access_unlock(iter->cpu_file); 8107 spd.nr_pages = i; 8108 8109 /* did we read anything? */ 8110 if (!spd.nr_pages) { 8111 8112 if (ret) 8113 goto out; 8114 8115 if (woken) 8116 goto out; 8117 8118 ret = -EAGAIN; 8119 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) 8120 goto out; 8121 8122 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 8123 if (ret) 8124 goto out; 8125 8126 /* No need to wait after waking up when tracing is off */ 8127 if (!tracer_tracing_is_on(iter->tr)) 8128 goto out; 8129 8130 /* Iterate one more time to collect any new data then exit */ 8131 woken = true; 8132 8133 goto again; 8134 } 8135 8136 ret = splice_to_pipe(pipe, &spd); 8137 out: 8138 splice_shrink_spd(&spd); 8139 8140 return ret; 8141 } 8142 8143 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 8144 { 8145 struct ftrace_buffer_info *info = file->private_data; 8146 struct trace_iterator *iter = &info->iter; 8147 int err; 8148 8149 if (cmd == TRACE_MMAP_IOCTL_GET_READER) { 8150 if (!(file->f_flags & O_NONBLOCK)) { 8151 err = ring_buffer_wait(iter->array_buffer->buffer, 8152 iter->cpu_file, 8153 iter->tr->buffer_percent, 8154 NULL, NULL); 8155 if (err) 8156 return err; 8157 } 8158 8159 return ring_buffer_map_get_reader(iter->array_buffer->buffer, 8160 iter->cpu_file); 8161 } else if (cmd) { 8162 return -ENOTTY; 8163 } 8164 8165 /* 8166 * An ioctl call with cmd 0 to the ring buffer file will wake up all 8167 * waiters 8168 */ 8169 guard(mutex)(&trace_types_lock); 8170 8171 /* Make sure the waiters see the new wait_index */ 8172 (void)atomic_fetch_inc_release(&iter->wait_index); 8173 8174 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8175 8176 return 0; 8177 } 8178 8179 #ifdef CONFIG_TRACER_SNAPSHOT 8180 static int get_snapshot_map(struct trace_array *tr) 8181 { 8182 int err = 0; 8183 8184 /* 8185 * Called with mmap_lock held. lockdep would be unhappy if we would now 8186 * take trace_types_lock. Instead use the specific 8187 * snapshot_trigger_lock. 8188 */ 8189 spin_lock(&tr->snapshot_trigger_lock); 8190 8191 if (tr->snapshot || tr->mapped == UINT_MAX) 8192 err = -EBUSY; 8193 else 8194 tr->mapped++; 8195 8196 spin_unlock(&tr->snapshot_trigger_lock); 8197 8198 /* Wait for update_max_tr() to observe iter->tr->mapped */ 8199 if (tr->mapped == 1) 8200 synchronize_rcu(); 8201 8202 return err; 8203 8204 } 8205 static void put_snapshot_map(struct trace_array *tr) 8206 { 8207 spin_lock(&tr->snapshot_trigger_lock); 8208 if (!WARN_ON(!tr->mapped)) 8209 tr->mapped--; 8210 spin_unlock(&tr->snapshot_trigger_lock); 8211 } 8212 #else 8213 static inline int get_snapshot_map(struct trace_array *tr) { return 0; } 8214 static inline void put_snapshot_map(struct trace_array *tr) { } 8215 #endif 8216 8217 static void tracing_buffers_mmap_close(struct vm_area_struct *vma) 8218 { 8219 struct ftrace_buffer_info *info = vma->vm_file->private_data; 8220 struct trace_iterator *iter = &info->iter; 8221 8222 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); 8223 put_snapshot_map(iter->tr); 8224 } 8225 8226 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr) 8227 { 8228 /* 8229 * Trace buffer mappings require the complete buffer including 8230 * the meta page. Partial mappings are not supported. 8231 */ 8232 return -EINVAL; 8233 } 8234 8235 static const struct vm_operations_struct tracing_buffers_vmops = { 8236 .close = tracing_buffers_mmap_close, 8237 .may_split = tracing_buffers_may_split, 8238 }; 8239 8240 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) 8241 { 8242 struct ftrace_buffer_info *info = filp->private_data; 8243 struct trace_iterator *iter = &info->iter; 8244 int ret = 0; 8245 8246 /* A memmap'ed and backup buffers are not supported for user space mmap */ 8247 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC)) 8248 return -ENODEV; 8249 8250 ret = get_snapshot_map(iter->tr); 8251 if (ret) 8252 return ret; 8253 8254 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); 8255 if (ret) 8256 put_snapshot_map(iter->tr); 8257 8258 vma->vm_ops = &tracing_buffers_vmops; 8259 8260 return ret; 8261 } 8262 8263 static const struct file_operations tracing_buffers_fops = { 8264 .open = tracing_buffers_open, 8265 .read = tracing_buffers_read, 8266 .poll = tracing_buffers_poll, 8267 .release = tracing_buffers_release, 8268 .flush = tracing_buffers_flush, 8269 .splice_read = tracing_buffers_splice_read, 8270 .unlocked_ioctl = tracing_buffers_ioctl, 8271 .mmap = tracing_buffers_mmap, 8272 }; 8273 8274 static ssize_t 8275 tracing_stats_read(struct file *filp, char __user *ubuf, 8276 size_t count, loff_t *ppos) 8277 { 8278 struct inode *inode = file_inode(filp); 8279 struct trace_array *tr = inode->i_private; 8280 struct array_buffer *trace_buf = &tr->array_buffer; 8281 int cpu = tracing_get_cpu(inode); 8282 struct trace_seq *s; 8283 unsigned long cnt; 8284 unsigned long long t; 8285 unsigned long usec_rem; 8286 8287 s = kmalloc(sizeof(*s), GFP_KERNEL); 8288 if (!s) 8289 return -ENOMEM; 8290 8291 trace_seq_init(s); 8292 8293 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); 8294 trace_seq_printf(s, "entries: %ld\n", cnt); 8295 8296 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); 8297 trace_seq_printf(s, "overrun: %ld\n", cnt); 8298 8299 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); 8300 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 8301 8302 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 8303 trace_seq_printf(s, "bytes: %ld\n", cnt); 8304 8305 if (trace_clocks[tr->clock_id].in_ns) { 8306 /* local or global for trace_clock */ 8307 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 8308 usec_rem = do_div(t, USEC_PER_SEC); 8309 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", 8310 t, usec_rem); 8311 8312 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); 8313 usec_rem = do_div(t, USEC_PER_SEC); 8314 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); 8315 } else { 8316 /* counter or tsc mode for trace_clock */ 8317 trace_seq_printf(s, "oldest event ts: %llu\n", 8318 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 8319 8320 trace_seq_printf(s, "now ts: %llu\n", 8321 ring_buffer_time_stamp(trace_buf->buffer)); 8322 } 8323 8324 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); 8325 trace_seq_printf(s, "dropped events: %ld\n", cnt); 8326 8327 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); 8328 trace_seq_printf(s, "read events: %ld\n", cnt); 8329 8330 count = simple_read_from_buffer(ubuf, count, ppos, 8331 s->buffer, trace_seq_used(s)); 8332 8333 kfree(s); 8334 8335 return count; 8336 } 8337 8338 static const struct file_operations tracing_stats_fops = { 8339 .open = tracing_open_generic_tr, 8340 .read = tracing_stats_read, 8341 .llseek = generic_file_llseek, 8342 .release = tracing_release_generic_tr, 8343 }; 8344 8345 #ifdef CONFIG_DYNAMIC_FTRACE 8346 8347 static ssize_t 8348 tracing_read_dyn_info(struct file *filp, char __user *ubuf, 8349 size_t cnt, loff_t *ppos) 8350 { 8351 ssize_t ret; 8352 char *buf; 8353 int r; 8354 8355 /* 512 should be plenty to hold the amount needed */ 8356 #define DYN_INFO_BUF_SIZE 512 8357 8358 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); 8359 if (!buf) 8360 return -ENOMEM; 8361 8362 r = scnprintf(buf, DYN_INFO_BUF_SIZE, 8363 "%ld pages:%ld groups: %ld\n" 8364 "ftrace boot update time = %llu (ns)\n" 8365 "ftrace module total update time = %llu (ns)\n", 8366 ftrace_update_tot_cnt, 8367 ftrace_number_of_pages, 8368 ftrace_number_of_groups, 8369 ftrace_update_time, 8370 ftrace_total_mod_time); 8371 8372 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8373 kfree(buf); 8374 return ret; 8375 } 8376 8377 static const struct file_operations tracing_dyn_info_fops = { 8378 .open = tracing_open_generic, 8379 .read = tracing_read_dyn_info, 8380 .llseek = generic_file_llseek, 8381 }; 8382 #endif /* CONFIG_DYNAMIC_FTRACE */ 8383 8384 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) 8385 static void 8386 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, 8387 struct trace_array *tr, struct ftrace_probe_ops *ops, 8388 void *data) 8389 { 8390 tracing_snapshot_instance(tr); 8391 } 8392 8393 static void 8394 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, 8395 struct trace_array *tr, struct ftrace_probe_ops *ops, 8396 void *data) 8397 { 8398 struct ftrace_func_mapper *mapper = data; 8399 long *count = NULL; 8400 8401 if (mapper) 8402 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 8403 8404 if (count) { 8405 8406 if (*count <= 0) 8407 return; 8408 8409 (*count)--; 8410 } 8411 8412 tracing_snapshot_instance(tr); 8413 } 8414 8415 static int 8416 ftrace_snapshot_print(struct seq_file *m, unsigned long ip, 8417 struct ftrace_probe_ops *ops, void *data) 8418 { 8419 struct ftrace_func_mapper *mapper = data; 8420 long *count = NULL; 8421 8422 seq_printf(m, "%ps:", (void *)ip); 8423 8424 seq_puts(m, "snapshot"); 8425 8426 if (mapper) 8427 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 8428 8429 if (count) 8430 seq_printf(m, ":count=%ld\n", *count); 8431 else 8432 seq_puts(m, ":unlimited\n"); 8433 8434 return 0; 8435 } 8436 8437 static int 8438 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 8439 unsigned long ip, void *init_data, void **data) 8440 { 8441 struct ftrace_func_mapper *mapper = *data; 8442 8443 if (!mapper) { 8444 mapper = allocate_ftrace_func_mapper(); 8445 if (!mapper) 8446 return -ENOMEM; 8447 *data = mapper; 8448 } 8449 8450 return ftrace_func_mapper_add_ip(mapper, ip, init_data); 8451 } 8452 8453 static void 8454 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 8455 unsigned long ip, void *data) 8456 { 8457 struct ftrace_func_mapper *mapper = data; 8458 8459 if (!ip) { 8460 if (!mapper) 8461 return; 8462 free_ftrace_func_mapper(mapper, NULL); 8463 return; 8464 } 8465 8466 ftrace_func_mapper_remove_ip(mapper, ip); 8467 } 8468 8469 static struct ftrace_probe_ops snapshot_probe_ops = { 8470 .func = ftrace_snapshot, 8471 .print = ftrace_snapshot_print, 8472 }; 8473 8474 static struct ftrace_probe_ops snapshot_count_probe_ops = { 8475 .func = ftrace_count_snapshot, 8476 .print = ftrace_snapshot_print, 8477 .init = ftrace_snapshot_init, 8478 .free = ftrace_snapshot_free, 8479 }; 8480 8481 static int 8482 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, 8483 char *glob, char *cmd, char *param, int enable) 8484 { 8485 struct ftrace_probe_ops *ops; 8486 void *count = (void *)-1; 8487 char *number; 8488 int ret; 8489 8490 if (!tr) 8491 return -ENODEV; 8492 8493 /* hash funcs only work with set_ftrace_filter */ 8494 if (!enable) 8495 return -EINVAL; 8496 8497 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; 8498 8499 if (glob[0] == '!') { 8500 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); 8501 if (!ret) 8502 tracing_disarm_snapshot(tr); 8503 8504 return ret; 8505 } 8506 8507 if (!param) 8508 goto out_reg; 8509 8510 number = strsep(¶m, ":"); 8511 8512 if (!strlen(number)) 8513 goto out_reg; 8514 8515 /* 8516 * We use the callback data field (which is a pointer) 8517 * as our counter. 8518 */ 8519 ret = kstrtoul(number, 0, (unsigned long *)&count); 8520 if (ret) 8521 return ret; 8522 8523 out_reg: 8524 ret = tracing_arm_snapshot(tr); 8525 if (ret < 0) 8526 return ret; 8527 8528 ret = register_ftrace_function_probe(glob, tr, ops, count); 8529 if (ret < 0) 8530 tracing_disarm_snapshot(tr); 8531 8532 return ret < 0 ? ret : 0; 8533 } 8534 8535 static struct ftrace_func_command ftrace_snapshot_cmd = { 8536 .name = "snapshot", 8537 .func = ftrace_trace_snapshot_callback, 8538 }; 8539 8540 static __init int register_snapshot_cmd(void) 8541 { 8542 return register_ftrace_command(&ftrace_snapshot_cmd); 8543 } 8544 #else 8545 static inline __init int register_snapshot_cmd(void) { return 0; } 8546 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ 8547 8548 static struct dentry *tracing_get_dentry(struct trace_array *tr) 8549 { 8550 /* Top directory uses NULL as the parent */ 8551 if (tr->flags & TRACE_ARRAY_FL_GLOBAL) 8552 return NULL; 8553 8554 if (WARN_ON(!tr->dir)) 8555 return ERR_PTR(-ENODEV); 8556 8557 /* All sub buffers have a descriptor */ 8558 return tr->dir; 8559 } 8560 8561 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 8562 { 8563 struct dentry *d_tracer; 8564 8565 if (tr->percpu_dir) 8566 return tr->percpu_dir; 8567 8568 d_tracer = tracing_get_dentry(tr); 8569 if (IS_ERR(d_tracer)) 8570 return NULL; 8571 8572 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); 8573 8574 MEM_FAIL(!tr->percpu_dir, 8575 "Could not create tracefs directory 'per_cpu/%d'\n", cpu); 8576 8577 return tr->percpu_dir; 8578 } 8579 8580 static struct dentry * 8581 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, 8582 void *data, long cpu, const struct file_operations *fops) 8583 { 8584 struct dentry *ret = trace_create_file(name, mode, parent, data, fops); 8585 8586 if (ret) /* See tracing_get_cpu() */ 8587 d_inode(ret)->i_cdev = (void *)(cpu + 1); 8588 return ret; 8589 } 8590 8591 static void 8592 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) 8593 { 8594 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 8595 struct dentry *d_cpu; 8596 char cpu_dir[30]; /* 30 characters should be more than enough */ 8597 8598 if (!d_percpu) 8599 return; 8600 8601 snprintf(cpu_dir, 30, "cpu%ld", cpu); 8602 d_cpu = tracefs_create_dir(cpu_dir, d_percpu); 8603 if (!d_cpu) { 8604 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); 8605 return; 8606 } 8607 8608 /* per cpu trace_pipe */ 8609 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, 8610 tr, cpu, &tracing_pipe_fops); 8611 8612 /* per cpu trace */ 8613 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, 8614 tr, cpu, &tracing_fops); 8615 8616 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, 8617 tr, cpu, &tracing_buffers_fops); 8618 8619 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, 8620 tr, cpu, &tracing_stats_fops); 8621 8622 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu, 8623 tr, cpu, &tracing_entries_fops); 8624 8625 if (tr->range_addr_start) 8626 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, 8627 tr, cpu, &tracing_buffer_meta_fops); 8628 #ifdef CONFIG_TRACER_SNAPSHOT 8629 if (!tr->range_addr_start) { 8630 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, 8631 tr, cpu, &snapshot_fops); 8632 8633 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, 8634 tr, cpu, &snapshot_raw_fops); 8635 } 8636 #endif 8637 } 8638 8639 #ifdef CONFIG_FTRACE_SELFTEST 8640 /* Let selftest have access to static functions in this file */ 8641 #include "trace_selftest.c" 8642 #endif 8643 8644 static ssize_t 8645 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, 8646 loff_t *ppos) 8647 { 8648 struct trace_option_dentry *topt = filp->private_data; 8649 char *buf; 8650 8651 if (topt->flags->val & topt->opt->bit) 8652 buf = "1\n"; 8653 else 8654 buf = "0\n"; 8655 8656 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 8657 } 8658 8659 static ssize_t 8660 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, 8661 loff_t *ppos) 8662 { 8663 struct trace_option_dentry *topt = filp->private_data; 8664 unsigned long val; 8665 int ret; 8666 8667 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8668 if (ret) 8669 return ret; 8670 8671 if (val != 0 && val != 1) 8672 return -EINVAL; 8673 8674 if (!!(topt->flags->val & topt->opt->bit) != val) { 8675 guard(mutex)(&trace_types_lock); 8676 ret = __set_tracer_option(topt->tr, topt->flags, 8677 topt->opt, !val); 8678 if (ret) 8679 return ret; 8680 } 8681 8682 *ppos += cnt; 8683 8684 return cnt; 8685 } 8686 8687 static int tracing_open_options(struct inode *inode, struct file *filp) 8688 { 8689 struct trace_option_dentry *topt = inode->i_private; 8690 int ret; 8691 8692 ret = tracing_check_open_get_tr(topt->tr); 8693 if (ret) 8694 return ret; 8695 8696 filp->private_data = inode->i_private; 8697 return 0; 8698 } 8699 8700 static int tracing_release_options(struct inode *inode, struct file *file) 8701 { 8702 struct trace_option_dentry *topt = file->private_data; 8703 8704 trace_array_put(topt->tr); 8705 return 0; 8706 } 8707 8708 static const struct file_operations trace_options_fops = { 8709 .open = tracing_open_options, 8710 .read = trace_options_read, 8711 .write = trace_options_write, 8712 .llseek = generic_file_llseek, 8713 .release = tracing_release_options, 8714 }; 8715 8716 /* 8717 * In order to pass in both the trace_array descriptor as well as the index 8718 * to the flag that the trace option file represents, the trace_array 8719 * has a character array of trace_flags_index[], which holds the index 8720 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. 8721 * The address of this character array is passed to the flag option file 8722 * read/write callbacks. 8723 * 8724 * In order to extract both the index and the trace_array descriptor, 8725 * get_tr_index() uses the following algorithm. 8726 * 8727 * idx = *ptr; 8728 * 8729 * As the pointer itself contains the address of the index (remember 8730 * index[1] == 1). 8731 * 8732 * Then to get the trace_array descriptor, by subtracting that index 8733 * from the ptr, we get to the start of the index itself. 8734 * 8735 * ptr - idx == &index[0] 8736 * 8737 * Then a simple container_of() from that pointer gets us to the 8738 * trace_array descriptor. 8739 */ 8740 static void get_tr_index(void *data, struct trace_array **ptr, 8741 unsigned int *pindex) 8742 { 8743 *pindex = *(unsigned char *)data; 8744 8745 *ptr = container_of(data - *pindex, struct trace_array, 8746 trace_flags_index); 8747 } 8748 8749 static ssize_t 8750 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, 8751 loff_t *ppos) 8752 { 8753 void *tr_index = filp->private_data; 8754 struct trace_array *tr; 8755 unsigned int index; 8756 char *buf; 8757 8758 get_tr_index(tr_index, &tr, &index); 8759 8760 if (tr->trace_flags & (1ULL << index)) 8761 buf = "1\n"; 8762 else 8763 buf = "0\n"; 8764 8765 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 8766 } 8767 8768 static ssize_t 8769 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, 8770 loff_t *ppos) 8771 { 8772 void *tr_index = filp->private_data; 8773 struct trace_array *tr; 8774 unsigned int index; 8775 unsigned long val; 8776 int ret; 8777 8778 get_tr_index(tr_index, &tr, &index); 8779 8780 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8781 if (ret) 8782 return ret; 8783 8784 if (val != 0 && val != 1) 8785 return -EINVAL; 8786 8787 mutex_lock(&event_mutex); 8788 mutex_lock(&trace_types_lock); 8789 ret = set_tracer_flag(tr, 1ULL << index, val); 8790 mutex_unlock(&trace_types_lock); 8791 mutex_unlock(&event_mutex); 8792 8793 if (ret < 0) 8794 return ret; 8795 8796 *ppos += cnt; 8797 8798 return cnt; 8799 } 8800 8801 static const struct file_operations trace_options_core_fops = { 8802 .open = tracing_open_generic, 8803 .read = trace_options_core_read, 8804 .write = trace_options_core_write, 8805 .llseek = generic_file_llseek, 8806 }; 8807 8808 struct dentry *trace_create_file(const char *name, 8809 umode_t mode, 8810 struct dentry *parent, 8811 void *data, 8812 const struct file_operations *fops) 8813 { 8814 struct dentry *ret; 8815 8816 ret = tracefs_create_file(name, mode, parent, data, fops); 8817 if (!ret) 8818 pr_warn("Could not create tracefs '%s' entry\n", name); 8819 8820 return ret; 8821 } 8822 8823 8824 static struct dentry *trace_options_init_dentry(struct trace_array *tr) 8825 { 8826 struct dentry *d_tracer; 8827 8828 if (tr->options) 8829 return tr->options; 8830 8831 d_tracer = tracing_get_dentry(tr); 8832 if (IS_ERR(d_tracer)) 8833 return NULL; 8834 8835 tr->options = tracefs_create_dir("options", d_tracer); 8836 if (!tr->options) { 8837 pr_warn("Could not create tracefs directory 'options'\n"); 8838 return NULL; 8839 } 8840 8841 return tr->options; 8842 } 8843 8844 static void 8845 create_trace_option_file(struct trace_array *tr, 8846 struct trace_option_dentry *topt, 8847 struct tracer_flags *flags, 8848 struct tracer_opt *opt) 8849 { 8850 struct dentry *t_options; 8851 8852 t_options = trace_options_init_dentry(tr); 8853 if (!t_options) 8854 return; 8855 8856 topt->flags = flags; 8857 topt->opt = opt; 8858 topt->tr = tr; 8859 8860 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, 8861 t_options, topt, &trace_options_fops); 8862 } 8863 8864 static int 8865 create_trace_option_files(struct trace_array *tr, struct tracer *tracer, 8866 struct tracer_flags *flags) 8867 { 8868 struct trace_option_dentry *topts; 8869 struct trace_options *tr_topts; 8870 struct tracer_opt *opts; 8871 int cnt; 8872 8873 if (!flags || !flags->opts) 8874 return 0; 8875 8876 opts = flags->opts; 8877 8878 for (cnt = 0; opts[cnt].name; cnt++) 8879 ; 8880 8881 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL); 8882 if (!topts) 8883 return 0; 8884 8885 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), 8886 GFP_KERNEL); 8887 if (!tr_topts) { 8888 kfree(topts); 8889 return -ENOMEM; 8890 } 8891 8892 tr->topts = tr_topts; 8893 tr->topts[tr->nr_topts].tracer = tracer; 8894 tr->topts[tr->nr_topts].topts = topts; 8895 tr->nr_topts++; 8896 8897 for (cnt = 0; opts[cnt].name; cnt++) { 8898 create_trace_option_file(tr, &topts[cnt], flags, 8899 &opts[cnt]); 8900 MEM_FAIL(topts[cnt].entry == NULL, 8901 "Failed to create trace option: %s", 8902 opts[cnt].name); 8903 } 8904 return 0; 8905 } 8906 8907 static int get_global_flags_val(struct tracer *tracer) 8908 { 8909 struct tracers *t; 8910 8911 list_for_each_entry(t, &global_trace.tracers, list) { 8912 if (t->tracer != tracer) 8913 continue; 8914 if (!t->flags) 8915 return -1; 8916 return t->flags->val; 8917 } 8918 return -1; 8919 } 8920 8921 static int add_tracer_options(struct trace_array *tr, struct tracers *t) 8922 { 8923 struct tracer *tracer = t->tracer; 8924 struct tracer_flags *flags = t->flags ?: tracer->flags; 8925 8926 if (!flags) 8927 return 0; 8928 8929 /* Only add tracer options after update_tracer_options finish */ 8930 if (!tracer_options_updated) 8931 return 0; 8932 8933 return create_trace_option_files(tr, tracer, flags); 8934 } 8935 8936 static int add_tracer(struct trace_array *tr, struct tracer *tracer) 8937 { 8938 struct tracer_flags *flags; 8939 struct tracers *t; 8940 int ret; 8941 8942 /* Only enable if the directory has been created already. */ 8943 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL)) 8944 return 0; 8945 8946 /* 8947 * If this is an instance, only create flags for tracers 8948 * the instance may have. 8949 */ 8950 if (!trace_ok_for_array(tracer, tr)) 8951 return 0; 8952 8953 t = kmalloc(sizeof(*t), GFP_KERNEL); 8954 if (!t) 8955 return -ENOMEM; 8956 8957 t->tracer = tracer; 8958 t->flags = NULL; 8959 list_add(&t->list, &tr->tracers); 8960 8961 flags = tracer->flags; 8962 if (!flags) { 8963 if (!tracer->default_flags) 8964 return 0; 8965 8966 /* 8967 * If the tracer defines default flags, it means the flags are 8968 * per trace instance. 8969 */ 8970 flags = kmalloc(sizeof(*flags), GFP_KERNEL); 8971 if (!flags) 8972 return -ENOMEM; 8973 8974 *flags = *tracer->default_flags; 8975 flags->trace = tracer; 8976 8977 t->flags = flags; 8978 8979 /* If this is an instance, inherit the global_trace flags */ 8980 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { 8981 int val = get_global_flags_val(tracer); 8982 if (!WARN_ON_ONCE(val < 0)) 8983 flags->val = val; 8984 } 8985 } 8986 8987 ret = add_tracer_options(tr, t); 8988 if (ret < 0) { 8989 list_del(&t->list); 8990 kfree(t->flags); 8991 kfree(t); 8992 } 8993 8994 return ret; 8995 } 8996 8997 static struct dentry * 8998 create_trace_option_core_file(struct trace_array *tr, 8999 const char *option, long index) 9000 { 9001 struct dentry *t_options; 9002 9003 t_options = trace_options_init_dentry(tr); 9004 if (!t_options) 9005 return NULL; 9006 9007 return trace_create_file(option, TRACE_MODE_WRITE, t_options, 9008 (void *)&tr->trace_flags_index[index], 9009 &trace_options_core_fops); 9010 } 9011 9012 static void create_trace_options_dir(struct trace_array *tr) 9013 { 9014 struct dentry *t_options; 9015 bool top_level = tr == &global_trace; 9016 int i; 9017 9018 t_options = trace_options_init_dentry(tr); 9019 if (!t_options) 9020 return; 9021 9022 for (i = 0; trace_options[i]; i++) { 9023 if (top_level || 9024 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) { 9025 create_trace_option_core_file(tr, trace_options[i], i); 9026 } 9027 } 9028 } 9029 9030 static ssize_t 9031 rb_simple_read(struct file *filp, char __user *ubuf, 9032 size_t cnt, loff_t *ppos) 9033 { 9034 struct trace_array *tr = filp->private_data; 9035 char buf[64]; 9036 int r; 9037 9038 r = tracer_tracing_is_on(tr); 9039 r = sprintf(buf, "%d\n", r); 9040 9041 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9042 } 9043 9044 static ssize_t 9045 rb_simple_write(struct file *filp, const char __user *ubuf, 9046 size_t cnt, loff_t *ppos) 9047 { 9048 struct trace_array *tr = filp->private_data; 9049 struct trace_buffer *buffer = tr->array_buffer.buffer; 9050 unsigned long val; 9051 int ret; 9052 9053 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9054 if (ret) 9055 return ret; 9056 9057 if (buffer) { 9058 guard(mutex)(&trace_types_lock); 9059 if (!!val == tracer_tracing_is_on(tr)) { 9060 val = 0; /* do nothing */ 9061 } else if (val) { 9062 tracer_tracing_on(tr); 9063 if (tr->current_trace->start) 9064 tr->current_trace->start(tr); 9065 } else { 9066 tracer_tracing_off(tr); 9067 if (tr->current_trace->stop) 9068 tr->current_trace->stop(tr); 9069 /* Wake up any waiters */ 9070 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); 9071 } 9072 } 9073 9074 (*ppos)++; 9075 9076 return cnt; 9077 } 9078 9079 static const struct file_operations rb_simple_fops = { 9080 .open = tracing_open_generic_tr, 9081 .read = rb_simple_read, 9082 .write = rb_simple_write, 9083 .release = tracing_release_generic_tr, 9084 .llseek = default_llseek, 9085 }; 9086 9087 static ssize_t 9088 buffer_percent_read(struct file *filp, char __user *ubuf, 9089 size_t cnt, loff_t *ppos) 9090 { 9091 struct trace_array *tr = filp->private_data; 9092 char buf[64]; 9093 int r; 9094 9095 r = tr->buffer_percent; 9096 r = sprintf(buf, "%d\n", r); 9097 9098 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9099 } 9100 9101 static ssize_t 9102 buffer_percent_write(struct file *filp, const char __user *ubuf, 9103 size_t cnt, loff_t *ppos) 9104 { 9105 struct trace_array *tr = filp->private_data; 9106 unsigned long val; 9107 int ret; 9108 9109 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9110 if (ret) 9111 return ret; 9112 9113 if (val > 100) 9114 return -EINVAL; 9115 9116 tr->buffer_percent = val; 9117 9118 (*ppos)++; 9119 9120 return cnt; 9121 } 9122 9123 static const struct file_operations buffer_percent_fops = { 9124 .open = tracing_open_generic_tr, 9125 .read = buffer_percent_read, 9126 .write = buffer_percent_write, 9127 .release = tracing_release_generic_tr, 9128 .llseek = default_llseek, 9129 }; 9130 9131 static ssize_t 9132 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 9133 { 9134 struct trace_array *tr = filp->private_data; 9135 size_t size; 9136 char buf[64]; 9137 int order; 9138 int r; 9139 9140 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9141 size = (PAGE_SIZE << order) / 1024; 9142 9143 r = sprintf(buf, "%zd\n", size); 9144 9145 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9146 } 9147 9148 static ssize_t 9149 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 9150 size_t cnt, loff_t *ppos) 9151 { 9152 struct trace_array *tr = filp->private_data; 9153 unsigned long val; 9154 int old_order; 9155 int order; 9156 int pages; 9157 int ret; 9158 9159 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9160 if (ret) 9161 return ret; 9162 9163 val *= 1024; /* value passed in is in KB */ 9164 9165 pages = DIV_ROUND_UP(val, PAGE_SIZE); 9166 order = fls(pages - 1); 9167 9168 /* limit between 1 and 128 system pages */ 9169 if (order < 0 || order > 7) 9170 return -EINVAL; 9171 9172 /* Do not allow tracing while changing the order of the ring buffer */ 9173 tracing_stop_tr(tr); 9174 9175 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9176 if (old_order == order) 9177 goto out; 9178 9179 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 9180 if (ret) 9181 goto out; 9182 9183 #ifdef CONFIG_TRACER_SNAPSHOT 9184 9185 if (!tr->allocated_snapshot) 9186 goto out_max; 9187 9188 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); 9189 if (ret) { 9190 /* Put back the old order */ 9191 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); 9192 if (WARN_ON_ONCE(cnt)) { 9193 /* 9194 * AARGH! We are left with different orders! 9195 * The max buffer is our "snapshot" buffer. 9196 * When a tracer needs a snapshot (one of the 9197 * latency tracers), it swaps the max buffer 9198 * with the saved snap shot. We succeeded to 9199 * update the order of the main buffer, but failed to 9200 * update the order of the max buffer. But when we tried 9201 * to reset the main buffer to the original size, we 9202 * failed there too. This is very unlikely to 9203 * happen, but if it does, warn and kill all 9204 * tracing. 9205 */ 9206 tracing_disabled = 1; 9207 } 9208 goto out; 9209 } 9210 out_max: 9211 #endif 9212 (*ppos)++; 9213 out: 9214 if (ret) 9215 cnt = ret; 9216 tracing_start_tr(tr); 9217 return cnt; 9218 } 9219 9220 static const struct file_operations buffer_subbuf_size_fops = { 9221 .open = tracing_open_generic_tr, 9222 .read = buffer_subbuf_size_read, 9223 .write = buffer_subbuf_size_write, 9224 .release = tracing_release_generic_tr, 9225 .llseek = default_llseek, 9226 }; 9227 9228 static struct dentry *trace_instance_dir; 9229 9230 static void 9231 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 9232 9233 #ifdef CONFIG_MODULES 9234 static int make_mod_delta(struct module *mod, void *data) 9235 { 9236 struct trace_module_delta *module_delta; 9237 struct trace_scratch *tscratch; 9238 struct trace_mod_entry *entry; 9239 struct trace_array *tr = data; 9240 int i; 9241 9242 tscratch = tr->scratch; 9243 module_delta = READ_ONCE(tr->module_delta); 9244 for (i = 0; i < tscratch->nr_entries; i++) { 9245 entry = &tscratch->entries[i]; 9246 if (strcmp(mod->name, entry->mod_name)) 9247 continue; 9248 if (mod->state == MODULE_STATE_GOING) 9249 module_delta->delta[i] = 0; 9250 else 9251 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base 9252 - entry->mod_addr; 9253 break; 9254 } 9255 return 0; 9256 } 9257 #else 9258 static int make_mod_delta(struct module *mod, void *data) 9259 { 9260 return 0; 9261 } 9262 #endif 9263 9264 static int mod_addr_comp(const void *a, const void *b, const void *data) 9265 { 9266 const struct trace_mod_entry *e1 = a; 9267 const struct trace_mod_entry *e2 = b; 9268 9269 return e1->mod_addr > e2->mod_addr ? 1 : -1; 9270 } 9271 9272 static void setup_trace_scratch(struct trace_array *tr, 9273 struct trace_scratch *tscratch, unsigned int size) 9274 { 9275 struct trace_module_delta *module_delta; 9276 struct trace_mod_entry *entry; 9277 int i, nr_entries; 9278 9279 if (!tscratch) 9280 return; 9281 9282 tr->scratch = tscratch; 9283 tr->scratch_size = size; 9284 9285 if (tscratch->text_addr) 9286 tr->text_delta = (unsigned long)_text - tscratch->text_addr; 9287 9288 if (struct_size(tscratch, entries, tscratch->nr_entries) > size) 9289 goto reset; 9290 9291 /* Check if each module name is a valid string */ 9292 for (i = 0; i < tscratch->nr_entries; i++) { 9293 int n; 9294 9295 entry = &tscratch->entries[i]; 9296 9297 for (n = 0; n < MODULE_NAME_LEN; n++) { 9298 if (entry->mod_name[n] == '\0') 9299 break; 9300 if (!isprint(entry->mod_name[n])) 9301 goto reset; 9302 } 9303 if (n == MODULE_NAME_LEN) 9304 goto reset; 9305 } 9306 9307 /* Sort the entries so that we can find appropriate module from address. */ 9308 nr_entries = tscratch->nr_entries; 9309 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), 9310 mod_addr_comp, NULL, NULL); 9311 9312 if (IS_ENABLED(CONFIG_MODULES)) { 9313 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL); 9314 if (!module_delta) { 9315 pr_info("module_delta allocation failed. Not able to decode module address."); 9316 goto reset; 9317 } 9318 init_rcu_head(&module_delta->rcu); 9319 } else 9320 module_delta = NULL; 9321 WRITE_ONCE(tr->module_delta, module_delta); 9322 9323 /* Scan modules to make text delta for modules. */ 9324 module_for_each_mod(make_mod_delta, tr); 9325 9326 /* Set trace_clock as the same of the previous boot. */ 9327 if (tscratch->clock_id != tr->clock_id) { 9328 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) || 9329 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) { 9330 pr_info("the previous trace_clock info is not valid."); 9331 goto reset; 9332 } 9333 } 9334 return; 9335 reset: 9336 /* Invalid trace modules */ 9337 memset(tscratch, 0, size); 9338 } 9339 9340 static int 9341 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 9342 { 9343 enum ring_buffer_flags rb_flags; 9344 struct trace_scratch *tscratch; 9345 unsigned int scratch_size = 0; 9346 9347 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0; 9348 9349 buf->tr = tr; 9350 9351 if (tr->range_addr_start && tr->range_addr_size) { 9352 /* Add scratch buffer to handle 128 modules */ 9353 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, 9354 tr->range_addr_start, 9355 tr->range_addr_size, 9356 struct_size(tscratch, entries, 128)); 9357 9358 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); 9359 setup_trace_scratch(tr, tscratch, scratch_size); 9360 9361 /* 9362 * This is basically the same as a mapped buffer, 9363 * with the same restrictions. 9364 */ 9365 tr->mapped++; 9366 } else { 9367 buf->buffer = ring_buffer_alloc(size, rb_flags); 9368 } 9369 if (!buf->buffer) 9370 return -ENOMEM; 9371 9372 buf->data = alloc_percpu(struct trace_array_cpu); 9373 if (!buf->data) { 9374 ring_buffer_free(buf->buffer); 9375 buf->buffer = NULL; 9376 return -ENOMEM; 9377 } 9378 9379 /* Allocate the first page for all buffers */ 9380 set_buffer_entries(&tr->array_buffer, 9381 ring_buffer_size(tr->array_buffer.buffer, 0)); 9382 9383 return 0; 9384 } 9385 9386 static void free_trace_buffer(struct array_buffer *buf) 9387 { 9388 if (buf->buffer) { 9389 ring_buffer_free(buf->buffer); 9390 buf->buffer = NULL; 9391 free_percpu(buf->data); 9392 buf->data = NULL; 9393 } 9394 } 9395 9396 static int allocate_trace_buffers(struct trace_array *tr, int size) 9397 { 9398 int ret; 9399 9400 ret = allocate_trace_buffer(tr, &tr->array_buffer, size); 9401 if (ret) 9402 return ret; 9403 9404 #ifdef CONFIG_TRACER_SNAPSHOT 9405 /* Fix mapped buffer trace arrays do not have snapshot buffers */ 9406 if (tr->range_addr_start) 9407 return 0; 9408 9409 ret = allocate_trace_buffer(tr, &tr->snapshot_buffer, 9410 allocate_snapshot ? size : 1); 9411 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { 9412 free_trace_buffer(&tr->array_buffer); 9413 return -ENOMEM; 9414 } 9415 tr->allocated_snapshot = allocate_snapshot; 9416 9417 allocate_snapshot = false; 9418 #endif 9419 9420 return 0; 9421 } 9422 9423 static void free_trace_buffers(struct trace_array *tr) 9424 { 9425 if (!tr) 9426 return; 9427 9428 free_trace_buffer(&tr->array_buffer); 9429 kfree(tr->module_delta); 9430 9431 #ifdef CONFIG_TRACER_SNAPSHOT 9432 free_trace_buffer(&tr->snapshot_buffer); 9433 #endif 9434 } 9435 9436 static void init_trace_flags_index(struct trace_array *tr) 9437 { 9438 int i; 9439 9440 /* Used by the trace options files */ 9441 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) 9442 tr->trace_flags_index[i] = i; 9443 } 9444 9445 static int __update_tracer(struct trace_array *tr) 9446 { 9447 struct tracer *t; 9448 int ret = 0; 9449 9450 for (t = trace_types; t && !ret; t = t->next) 9451 ret = add_tracer(tr, t); 9452 9453 return ret; 9454 } 9455 9456 static __init int __update_tracer_options(struct trace_array *tr) 9457 { 9458 struct tracers *t; 9459 int ret = 0; 9460 9461 list_for_each_entry(t, &tr->tracers, list) { 9462 ret = add_tracer_options(tr, t); 9463 if (ret < 0) 9464 break; 9465 } 9466 9467 return ret; 9468 } 9469 9470 static __init void update_tracer_options(void) 9471 { 9472 struct trace_array *tr; 9473 9474 guard(mutex)(&trace_types_lock); 9475 tracer_options_updated = true; 9476 list_for_each_entry(tr, &ftrace_trace_arrays, list) 9477 __update_tracer_options(tr); 9478 } 9479 9480 /* Must have trace_types_lock held */ 9481 struct trace_array *trace_array_find(const char *instance) 9482 { 9483 struct trace_array *tr, *found = NULL; 9484 9485 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9486 if (tr->name && strcmp(tr->name, instance) == 0) { 9487 found = tr; 9488 break; 9489 } 9490 } 9491 9492 return found; 9493 } 9494 9495 struct trace_array *trace_array_find_get(const char *instance) 9496 { 9497 struct trace_array *tr; 9498 9499 guard(mutex)(&trace_types_lock); 9500 tr = trace_array_find(instance); 9501 if (tr) 9502 tr->ref++; 9503 9504 return tr; 9505 } 9506 9507 static int trace_array_create_dir(struct trace_array *tr) 9508 { 9509 int ret; 9510 9511 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); 9512 if (!tr->dir) 9513 return -EINVAL; 9514 9515 ret = event_trace_add_tracer(tr->dir, tr); 9516 if (ret) { 9517 tracefs_remove(tr->dir); 9518 return ret; 9519 } 9520 9521 init_tracer_tracefs(tr, tr->dir); 9522 ret = __update_tracer(tr); 9523 if (ret) { 9524 event_trace_del_tracer(tr); 9525 tracefs_remove(tr->dir); 9526 return ret; 9527 } 9528 return 0; 9529 } 9530 9531 static struct trace_array * 9532 trace_array_create_systems(const char *name, const char *systems, 9533 unsigned long range_addr_start, 9534 unsigned long range_addr_size) 9535 { 9536 struct trace_array *tr; 9537 int ret; 9538 9539 ret = -ENOMEM; 9540 tr = kzalloc(sizeof(*tr), GFP_KERNEL); 9541 if (!tr) 9542 return ERR_PTR(ret); 9543 9544 tr->name = kstrdup(name, GFP_KERNEL); 9545 if (!tr->name) 9546 goto out_free_tr; 9547 9548 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) 9549 goto out_free_tr; 9550 9551 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) 9552 goto out_free_tr; 9553 9554 if (systems) { 9555 tr->system_names = kstrdup_const(systems, GFP_KERNEL); 9556 if (!tr->system_names) 9557 goto out_free_tr; 9558 } 9559 9560 /* Only for boot up memory mapped ring buffers */ 9561 tr->range_addr_start = range_addr_start; 9562 tr->range_addr_size = range_addr_size; 9563 9564 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; 9565 9566 cpumask_copy(tr->tracing_cpumask, cpu_all_mask); 9567 9568 raw_spin_lock_init(&tr->start_lock); 9569 9570 tr->syscall_buf_sz = global_trace.syscall_buf_sz; 9571 9572 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 9573 #ifdef CONFIG_TRACER_SNAPSHOT 9574 spin_lock_init(&tr->snapshot_trigger_lock); 9575 #endif 9576 tr->current_trace = &nop_trace; 9577 tr->current_trace_flags = nop_trace.flags; 9578 9579 INIT_LIST_HEAD(&tr->systems); 9580 INIT_LIST_HEAD(&tr->events); 9581 INIT_LIST_HEAD(&tr->hist_vars); 9582 INIT_LIST_HEAD(&tr->err_log); 9583 INIT_LIST_HEAD(&tr->tracers); 9584 INIT_LIST_HEAD(&tr->marker_list); 9585 9586 #ifdef CONFIG_MODULES 9587 INIT_LIST_HEAD(&tr->mod_events); 9588 #endif 9589 9590 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 9591 goto out_free_tr; 9592 9593 /* The ring buffer is defaultly expanded */ 9594 trace_set_ring_buffer_expanded(tr); 9595 9596 if (ftrace_allocate_ftrace_ops(tr) < 0) 9597 goto out_free_tr; 9598 9599 ftrace_init_trace_array(tr); 9600 9601 init_trace_flags_index(tr); 9602 9603 if (trace_instance_dir) { 9604 ret = trace_array_create_dir(tr); 9605 if (ret) 9606 goto out_free_tr; 9607 } else 9608 __trace_early_add_events(tr); 9609 9610 list_add(&tr->list, &ftrace_trace_arrays); 9611 9612 tr->ref++; 9613 9614 return tr; 9615 9616 out_free_tr: 9617 ftrace_free_ftrace_ops(tr); 9618 free_trace_buffers(tr); 9619 free_cpumask_var(tr->pipe_cpumask); 9620 free_cpumask_var(tr->tracing_cpumask); 9621 kfree_const(tr->system_names); 9622 kfree(tr->range_name); 9623 kfree(tr->name); 9624 kfree(tr); 9625 9626 return ERR_PTR(ret); 9627 } 9628 9629 static struct trace_array *trace_array_create(const char *name) 9630 { 9631 return trace_array_create_systems(name, NULL, 0, 0); 9632 } 9633 9634 static int instance_mkdir(const char *name) 9635 { 9636 struct trace_array *tr; 9637 int ret; 9638 9639 guard(mutex)(&event_mutex); 9640 guard(mutex)(&trace_types_lock); 9641 9642 ret = -EEXIST; 9643 if (trace_array_find(name)) 9644 return -EEXIST; 9645 9646 tr = trace_array_create(name); 9647 9648 ret = PTR_ERR_OR_ZERO(tr); 9649 9650 return ret; 9651 } 9652 9653 #ifdef CONFIG_MMU 9654 static u64 map_pages(unsigned long start, unsigned long size) 9655 { 9656 unsigned long vmap_start, vmap_end; 9657 struct vm_struct *area; 9658 int ret; 9659 9660 area = get_vm_area(size, VM_IOREMAP); 9661 if (!area) 9662 return 0; 9663 9664 vmap_start = (unsigned long) area->addr; 9665 vmap_end = vmap_start + size; 9666 9667 ret = vmap_page_range(vmap_start, vmap_end, 9668 start, pgprot_nx(PAGE_KERNEL)); 9669 if (ret < 0) { 9670 free_vm_area(area); 9671 return 0; 9672 } 9673 9674 return (u64)vmap_start; 9675 } 9676 #else 9677 static inline u64 map_pages(unsigned long start, unsigned long size) 9678 { 9679 return 0; 9680 } 9681 #endif 9682 9683 /** 9684 * trace_array_get_by_name - Create/Lookup a trace array, given its name. 9685 * @name: The name of the trace array to be looked up/created. 9686 * @systems: A list of systems to create event directories for (NULL for all) 9687 * 9688 * Returns pointer to trace array with given name. 9689 * NULL, if it cannot be created. 9690 * 9691 * NOTE: This function increments the reference counter associated with the 9692 * trace array returned. This makes sure it cannot be freed while in use. 9693 * Use trace_array_put() once the trace array is no longer needed. 9694 * If the trace_array is to be freed, trace_array_destroy() needs to 9695 * be called after the trace_array_put(), or simply let user space delete 9696 * it from the tracefs instances directory. But until the 9697 * trace_array_put() is called, user space can not delete it. 9698 * 9699 */ 9700 struct trace_array *trace_array_get_by_name(const char *name, const char *systems) 9701 { 9702 struct trace_array *tr; 9703 9704 guard(mutex)(&event_mutex); 9705 guard(mutex)(&trace_types_lock); 9706 9707 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9708 if (tr->name && strcmp(tr->name, name) == 0) { 9709 tr->ref++; 9710 return tr; 9711 } 9712 } 9713 9714 tr = trace_array_create_systems(name, systems, 0, 0); 9715 9716 if (IS_ERR(tr)) 9717 tr = NULL; 9718 else 9719 tr->ref++; 9720 9721 return tr; 9722 } 9723 EXPORT_SYMBOL_GPL(trace_array_get_by_name); 9724 9725 static int __remove_instance(struct trace_array *tr) 9726 { 9727 int i; 9728 9729 /* Reference counter for a newly created trace array = 1. */ 9730 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) 9731 return -EBUSY; 9732 9733 list_del(&tr->list); 9734 9735 /* Disable all the flags that were enabled coming in */ 9736 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { 9737 if ((1ULL << i) & ZEROED_TRACE_FLAGS) 9738 set_tracer_flag(tr, 1ULL << i, 0); 9739 } 9740 9741 if (printk_trace == tr) 9742 update_printk_trace(&global_trace); 9743 9744 if (update_marker_trace(tr, 0)) 9745 synchronize_rcu(); 9746 9747 tracing_set_nop(tr); 9748 clear_ftrace_function_probes(tr); 9749 event_trace_del_tracer(tr); 9750 ftrace_clear_pids(tr); 9751 ftrace_destroy_function_files(tr); 9752 tracefs_remove(tr->dir); 9753 free_percpu(tr->last_func_repeats); 9754 free_trace_buffers(tr); 9755 clear_tracing_err_log(tr); 9756 free_tracers(tr); 9757 9758 if (tr->range_name) { 9759 reserve_mem_release_by_name(tr->range_name); 9760 kfree(tr->range_name); 9761 } 9762 if (tr->flags & TRACE_ARRAY_FL_VMALLOC) 9763 vfree((void *)tr->range_addr_start); 9764 9765 for (i = 0; i < tr->nr_topts; i++) { 9766 kfree(tr->topts[i].topts); 9767 } 9768 kfree(tr->topts); 9769 9770 free_cpumask_var(tr->pipe_cpumask); 9771 free_cpumask_var(tr->tracing_cpumask); 9772 kfree_const(tr->system_names); 9773 kfree(tr->name); 9774 kfree(tr); 9775 9776 return 0; 9777 } 9778 9779 int trace_array_destroy(struct trace_array *this_tr) 9780 { 9781 struct trace_array *tr; 9782 9783 if (!this_tr) 9784 return -EINVAL; 9785 9786 guard(mutex)(&event_mutex); 9787 guard(mutex)(&trace_types_lock); 9788 9789 9790 /* Making sure trace array exists before destroying it. */ 9791 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9792 if (tr == this_tr) 9793 return __remove_instance(tr); 9794 } 9795 9796 return -ENODEV; 9797 } 9798 EXPORT_SYMBOL_GPL(trace_array_destroy); 9799 9800 static int instance_rmdir(const char *name) 9801 { 9802 struct trace_array *tr; 9803 9804 guard(mutex)(&event_mutex); 9805 guard(mutex)(&trace_types_lock); 9806 9807 tr = trace_array_find(name); 9808 if (!tr) 9809 return -ENODEV; 9810 9811 return __remove_instance(tr); 9812 } 9813 9814 static __init void create_trace_instances(struct dentry *d_tracer) 9815 { 9816 struct trace_array *tr; 9817 9818 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, 9819 instance_mkdir, 9820 instance_rmdir); 9821 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) 9822 return; 9823 9824 guard(mutex)(&event_mutex); 9825 guard(mutex)(&trace_types_lock); 9826 9827 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9828 if (!tr->name) 9829 continue; 9830 if (MEM_FAIL(trace_array_create_dir(tr) < 0, 9831 "Failed to create instance directory\n")) 9832 return; 9833 } 9834 } 9835 9836 static void 9837 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) 9838 { 9839 int cpu; 9840 9841 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, 9842 tr, &show_traces_fops); 9843 9844 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, 9845 tr, &set_tracer_fops); 9846 9847 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, 9848 tr, &tracing_cpumask_fops); 9849 9850 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, 9851 tr, &tracing_iter_fops); 9852 9853 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, 9854 tr, &tracing_fops); 9855 9856 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, 9857 tr, &tracing_pipe_fops); 9858 9859 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, 9860 tr, &tracing_entries_fops); 9861 9862 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, 9863 tr, &tracing_total_entries_fops); 9864 9865 trace_create_file("free_buffer", 0200, d_tracer, 9866 tr, &tracing_free_buffer_fops); 9867 9868 trace_create_file("trace_marker", 0220, d_tracer, 9869 tr, &tracing_mark_fops); 9870 9871 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); 9872 9873 trace_create_file("trace_marker_raw", 0220, d_tracer, 9874 tr, &tracing_mark_raw_fops); 9875 9876 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, 9877 &trace_clock_fops); 9878 9879 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, 9880 tr, &rb_simple_fops); 9881 9882 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, 9883 &trace_time_stamp_mode_fops); 9884 9885 tr->buffer_percent = 50; 9886 9887 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 9888 tr, &buffer_percent_fops); 9889 9890 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, 9891 tr, &buffer_subbuf_size_fops); 9892 9893 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer, 9894 tr, &tracing_syscall_buf_fops); 9895 9896 create_trace_options_dir(tr); 9897 9898 trace_create_maxlat_file(tr, d_tracer); 9899 9900 if (ftrace_create_function_files(tr, d_tracer)) 9901 MEM_FAIL(1, "Could not allocate function filter files"); 9902 9903 if (tr->range_addr_start) { 9904 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, 9905 tr, &last_boot_fops); 9906 #ifdef CONFIG_TRACER_SNAPSHOT 9907 } else { 9908 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, 9909 tr, &snapshot_fops); 9910 #endif 9911 } 9912 9913 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, 9914 tr, &tracing_err_log_fops); 9915 9916 for_each_tracing_cpu(cpu) 9917 tracing_init_tracefs_percpu(tr, cpu); 9918 9919 ftrace_init_tracefs(tr, d_tracer); 9920 } 9921 9922 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 9923 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 9924 { 9925 struct vfsmount *mnt; 9926 struct file_system_type *type; 9927 struct fs_context *fc; 9928 int ret; 9929 9930 /* 9931 * To maintain backward compatibility for tools that mount 9932 * debugfs to get to the tracing facility, tracefs is automatically 9933 * mounted to the debugfs/tracing directory. 9934 */ 9935 type = get_fs_type("tracefs"); 9936 if (!type) 9937 return NULL; 9938 9939 fc = fs_context_for_submount(type, mntpt); 9940 put_filesystem(type); 9941 if (IS_ERR(fc)) 9942 return ERR_CAST(fc); 9943 9944 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n"); 9945 9946 ret = vfs_parse_fs_string(fc, "source", "tracefs"); 9947 if (!ret) 9948 mnt = fc_mount(fc); 9949 else 9950 mnt = ERR_PTR(ret); 9951 9952 put_fs_context(fc); 9953 return mnt; 9954 } 9955 #endif 9956 9957 /** 9958 * tracing_init_dentry - initialize top level trace array 9959 * 9960 * This is called when creating files or directories in the tracing 9961 * directory. It is called via fs_initcall() by any of the boot up code 9962 * and expects to return the dentry of the top level tracing directory. 9963 */ 9964 int tracing_init_dentry(void) 9965 { 9966 struct trace_array *tr = &global_trace; 9967 9968 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9969 pr_warn("Tracing disabled due to lockdown\n"); 9970 return -EPERM; 9971 } 9972 9973 /* The top level trace array uses NULL as parent */ 9974 if (tr->dir) 9975 return 0; 9976 9977 if (WARN_ON(!tracefs_initialized())) 9978 return -ENODEV; 9979 9980 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 9981 /* 9982 * As there may still be users that expect the tracing 9983 * files to exist in debugfs/tracing, we must automount 9984 * the tracefs file system there, so older tools still 9985 * work with the newer kernel. 9986 */ 9987 tr->dir = debugfs_create_automount("tracing", NULL, 9988 trace_automount, NULL); 9989 #endif 9990 9991 return 0; 9992 } 9993 9994 extern struct trace_eval_map *__start_ftrace_eval_maps[]; 9995 extern struct trace_eval_map *__stop_ftrace_eval_maps[]; 9996 9997 struct workqueue_struct *trace_init_wq __initdata; 9998 static struct work_struct eval_map_work __initdata; 9999 static struct work_struct tracerfs_init_work __initdata; 10000 10001 static void __init eval_map_work_func(struct work_struct *work) 10002 { 10003 int len; 10004 10005 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; 10006 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); 10007 } 10008 10009 static int __init trace_eval_init(void) 10010 { 10011 INIT_WORK(&eval_map_work, eval_map_work_func); 10012 10013 trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0); 10014 if (!trace_init_wq) { 10015 pr_err("Unable to allocate trace_init_wq\n"); 10016 /* Do work here */ 10017 eval_map_work_func(&eval_map_work); 10018 return -ENOMEM; 10019 } 10020 10021 queue_work(trace_init_wq, &eval_map_work); 10022 return 0; 10023 } 10024 10025 subsys_initcall(trace_eval_init); 10026 10027 static int __init trace_eval_sync(void) 10028 { 10029 /* Make sure the eval map updates are finished */ 10030 if (trace_init_wq) 10031 destroy_workqueue(trace_init_wq); 10032 return 0; 10033 } 10034 10035 late_initcall_sync(trace_eval_sync); 10036 10037 10038 #ifdef CONFIG_MODULES 10039 10040 bool module_exists(const char *module) 10041 { 10042 /* All modules have the symbol __this_module */ 10043 static const char this_mod[] = "__this_module"; 10044 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; 10045 unsigned long val; 10046 int n; 10047 10048 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); 10049 10050 if (n > sizeof(modname) - 1) 10051 return false; 10052 10053 val = module_kallsyms_lookup_name(modname); 10054 return val != 0; 10055 } 10056 10057 static void trace_module_add_evals(struct module *mod) 10058 { 10059 /* 10060 * Modules with bad taint do not have events created, do 10061 * not bother with enums either. 10062 */ 10063 if (trace_module_has_bad_taint(mod)) 10064 return; 10065 10066 /* Even if no trace_evals, this need to sanitize field types. */ 10067 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals); 10068 } 10069 10070 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 10071 static void trace_module_remove_evals(struct module *mod) 10072 { 10073 union trace_eval_map_item *map; 10074 union trace_eval_map_item **last = &trace_eval_maps; 10075 10076 if (!mod->num_trace_evals) 10077 return; 10078 10079 guard(mutex)(&trace_eval_mutex); 10080 10081 map = trace_eval_maps; 10082 10083 while (map) { 10084 if (map->head.mod == mod) 10085 break; 10086 map = trace_eval_jmp_to_tail(map); 10087 last = &map->tail.next; 10088 map = map->tail.next; 10089 } 10090 if (!map) 10091 return; 10092 10093 *last = trace_eval_jmp_to_tail(map)->tail.next; 10094 kfree(map); 10095 } 10096 #else 10097 static inline void trace_module_remove_evals(struct module *mod) { } 10098 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 10099 10100 static void trace_module_record(struct module *mod, bool add) 10101 { 10102 struct trace_array *tr; 10103 unsigned long flags; 10104 10105 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10106 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); 10107 /* Update any persistent trace array that has already been started */ 10108 if (flags == TRACE_ARRAY_FL_BOOT && add) { 10109 guard(mutex)(&scratch_mutex); 10110 save_mod(mod, tr); 10111 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { 10112 /* Update delta if the module loaded in previous boot */ 10113 make_mod_delta(mod, tr); 10114 } 10115 } 10116 } 10117 10118 static int trace_module_notify(struct notifier_block *self, 10119 unsigned long val, void *data) 10120 { 10121 struct module *mod = data; 10122 10123 switch (val) { 10124 case MODULE_STATE_COMING: 10125 trace_module_add_evals(mod); 10126 trace_module_record(mod, true); 10127 break; 10128 case MODULE_STATE_GOING: 10129 trace_module_remove_evals(mod); 10130 trace_module_record(mod, false); 10131 break; 10132 } 10133 10134 return NOTIFY_OK; 10135 } 10136 10137 static struct notifier_block trace_module_nb = { 10138 .notifier_call = trace_module_notify, 10139 .priority = 0, 10140 }; 10141 #endif /* CONFIG_MODULES */ 10142 10143 static __init void tracer_init_tracefs_work_func(struct work_struct *work) 10144 { 10145 10146 event_trace_init(); 10147 10148 init_tracer_tracefs(&global_trace, NULL); 10149 ftrace_init_tracefs_toplevel(&global_trace, NULL); 10150 10151 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, 10152 &global_trace, &tracing_thresh_fops); 10153 10154 trace_create_file("README", TRACE_MODE_READ, NULL, 10155 NULL, &tracing_readme_fops); 10156 10157 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, 10158 NULL, &tracing_saved_cmdlines_fops); 10159 10160 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, 10161 NULL, &tracing_saved_cmdlines_size_fops); 10162 10163 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, 10164 NULL, &tracing_saved_tgids_fops); 10165 10166 trace_create_eval_file(NULL); 10167 10168 #ifdef CONFIG_MODULES 10169 register_module_notifier(&trace_module_nb); 10170 #endif 10171 10172 #ifdef CONFIG_DYNAMIC_FTRACE 10173 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, 10174 NULL, &tracing_dyn_info_fops); 10175 #endif 10176 10177 create_trace_instances(NULL); 10178 10179 update_tracer_options(); 10180 } 10181 10182 static __init int tracer_init_tracefs(void) 10183 { 10184 int ret; 10185 10186 trace_access_lock_init(); 10187 10188 ret = tracing_init_dentry(); 10189 if (ret) 10190 return 0; 10191 10192 if (trace_init_wq) { 10193 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); 10194 queue_work(trace_init_wq, &tracerfs_init_work); 10195 } else { 10196 tracer_init_tracefs_work_func(NULL); 10197 } 10198 10199 if (rv_init_interface()) 10200 pr_err("RV: Error while creating the RV interface\n"); 10201 10202 return 0; 10203 } 10204 10205 fs_initcall(tracer_init_tracefs); 10206 10207 static int trace_die_panic_handler(struct notifier_block *self, 10208 unsigned long ev, void *unused); 10209 10210 static struct notifier_block trace_panic_notifier = { 10211 .notifier_call = trace_die_panic_handler, 10212 .priority = INT_MAX - 1, 10213 }; 10214 10215 static struct notifier_block trace_die_notifier = { 10216 .notifier_call = trace_die_panic_handler, 10217 .priority = INT_MAX - 1, 10218 }; 10219 10220 /* 10221 * The idea is to execute the following die/panic callback early, in order 10222 * to avoid showing irrelevant information in the trace (like other panic 10223 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall 10224 * warnings get disabled (to prevent potential log flooding). 10225 */ 10226 static int trace_die_panic_handler(struct notifier_block *self, 10227 unsigned long ev, void *unused) 10228 { 10229 if (!ftrace_dump_on_oops_enabled()) 10230 return NOTIFY_DONE; 10231 10232 /* The die notifier requires DIE_OOPS to trigger */ 10233 if (self == &trace_die_notifier && ev != DIE_OOPS) 10234 return NOTIFY_DONE; 10235 10236 ftrace_dump(DUMP_PARAM); 10237 10238 return NOTIFY_DONE; 10239 } 10240 10241 /* 10242 * printk is set to max of 1024, we really don't need it that big. 10243 * Nothing should be printing 1000 characters anyway. 10244 */ 10245 #define TRACE_MAX_PRINT 1000 10246 10247 /* 10248 * Define here KERN_TRACE so that we have one place to modify 10249 * it if we decide to change what log level the ftrace dump 10250 * should be at. 10251 */ 10252 #define KERN_TRACE KERN_EMERG 10253 10254 void 10255 trace_printk_seq(struct trace_seq *s) 10256 { 10257 /* Probably should print a warning here. */ 10258 if (s->seq.len >= TRACE_MAX_PRINT) 10259 s->seq.len = TRACE_MAX_PRINT; 10260 10261 /* 10262 * More paranoid code. Although the buffer size is set to 10263 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just 10264 * an extra layer of protection. 10265 */ 10266 if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) 10267 s->seq.len = s->seq.size - 1; 10268 10269 /* should be zero ended, but we are paranoid. */ 10270 s->buffer[s->seq.len] = 0; 10271 10272 printk(KERN_TRACE "%s", s->buffer); 10273 10274 trace_seq_init(s); 10275 } 10276 10277 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) 10278 { 10279 iter->tr = tr; 10280 iter->trace = iter->tr->current_trace; 10281 iter->cpu_file = RING_BUFFER_ALL_CPUS; 10282 iter->array_buffer = &tr->array_buffer; 10283 10284 if (iter->trace && iter->trace->open) 10285 iter->trace->open(iter); 10286 10287 /* Annotate start of buffers if we had overruns */ 10288 if (ring_buffer_overruns(iter->array_buffer->buffer)) 10289 iter->iter_flags |= TRACE_FILE_ANNOTATE; 10290 10291 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 10292 if (trace_clocks[iter->tr->clock_id].in_ns) 10293 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 10294 10295 /* Can not use kmalloc for iter.temp and iter.fmt */ 10296 iter->temp = static_temp_buf; 10297 iter->temp_size = STATIC_TEMP_BUF_SIZE; 10298 iter->fmt = static_fmt_buf; 10299 iter->fmt_size = STATIC_FMT_BUF_SIZE; 10300 } 10301 10302 void trace_init_global_iter(struct trace_iterator *iter) 10303 { 10304 trace_init_iter(iter, &global_trace); 10305 } 10306 10307 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) 10308 { 10309 /* use static because iter can be a bit big for the stack */ 10310 static struct trace_iterator iter; 10311 unsigned int old_userobj; 10312 unsigned long flags; 10313 int cnt = 0; 10314 10315 /* 10316 * Always turn off tracing when we dump. 10317 * We don't need to show trace output of what happens 10318 * between multiple crashes. 10319 * 10320 * If the user does a sysrq-z, then they can re-enable 10321 * tracing with echo 1 > tracing_on. 10322 */ 10323 tracer_tracing_off(tr); 10324 10325 local_irq_save(flags); 10326 10327 /* Simulate the iterator */ 10328 trace_init_iter(&iter, tr); 10329 10330 /* While dumping, do not allow the buffer to be enable */ 10331 tracer_tracing_disable(tr); 10332 10333 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ); 10334 10335 /* don't look at user memory in panic mode */ 10336 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ); 10337 10338 if (dump_mode == DUMP_ORIG) 10339 iter.cpu_file = raw_smp_processor_id(); 10340 else 10341 iter.cpu_file = RING_BUFFER_ALL_CPUS; 10342 10343 if (tr == &global_trace) 10344 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 10345 else 10346 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); 10347 10348 /* Did function tracer already get disabled? */ 10349 if (ftrace_is_dead()) { 10350 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); 10351 printk("# MAY BE MISSING FUNCTION EVENTS\n"); 10352 } 10353 10354 /* 10355 * We need to stop all tracing on all CPUS to read 10356 * the next buffer. This is a bit expensive, but is 10357 * not done often. We fill all what we can read, 10358 * and then release the locks again. 10359 */ 10360 10361 while (!trace_empty(&iter)) { 10362 10363 if (!cnt) 10364 printk(KERN_TRACE "---------------------------------\n"); 10365 10366 cnt++; 10367 10368 trace_iterator_reset(&iter); 10369 iter.iter_flags |= TRACE_FILE_LAT_FMT; 10370 10371 if (trace_find_next_entry_inc(&iter) != NULL) { 10372 int ret; 10373 10374 ret = print_trace_line(&iter); 10375 if (ret != TRACE_TYPE_NO_CONSUME) 10376 trace_consume(&iter); 10377 10378 trace_printk_seq(&iter.seq); 10379 } 10380 touch_nmi_watchdog(); 10381 } 10382 10383 if (!cnt) 10384 printk(KERN_TRACE " (ftrace buffer empty)\n"); 10385 else 10386 printk(KERN_TRACE "---------------------------------\n"); 10387 10388 tr->trace_flags |= old_userobj; 10389 10390 tracer_tracing_enable(tr); 10391 local_irq_restore(flags); 10392 } 10393 10394 static void ftrace_dump_by_param(void) 10395 { 10396 bool first_param = true; 10397 char dump_param[MAX_TRACER_SIZE]; 10398 char *buf, *token, *inst_name; 10399 struct trace_array *tr; 10400 10401 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); 10402 buf = dump_param; 10403 10404 while ((token = strsep(&buf, ",")) != NULL) { 10405 if (first_param) { 10406 first_param = false; 10407 if (!strcmp("0", token)) 10408 continue; 10409 else if (!strcmp("1", token)) { 10410 ftrace_dump_one(&global_trace, DUMP_ALL); 10411 continue; 10412 } 10413 else if (!strcmp("2", token) || 10414 !strcmp("orig_cpu", token)) { 10415 ftrace_dump_one(&global_trace, DUMP_ORIG); 10416 continue; 10417 } 10418 } 10419 10420 inst_name = strsep(&token, "="); 10421 tr = trace_array_find(inst_name); 10422 if (!tr) { 10423 printk(KERN_TRACE "Instance %s not found\n", inst_name); 10424 continue; 10425 } 10426 10427 if (token && (!strcmp("2", token) || 10428 !strcmp("orig_cpu", token))) 10429 ftrace_dump_one(tr, DUMP_ORIG); 10430 else 10431 ftrace_dump_one(tr, DUMP_ALL); 10432 } 10433 } 10434 10435 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) 10436 { 10437 static atomic_t dump_running; 10438 10439 /* Only allow one dump user at a time. */ 10440 if (atomic_inc_return(&dump_running) != 1) { 10441 atomic_dec(&dump_running); 10442 return; 10443 } 10444 10445 switch (oops_dump_mode) { 10446 case DUMP_ALL: 10447 ftrace_dump_one(&global_trace, DUMP_ALL); 10448 break; 10449 case DUMP_ORIG: 10450 ftrace_dump_one(&global_trace, DUMP_ORIG); 10451 break; 10452 case DUMP_PARAM: 10453 ftrace_dump_by_param(); 10454 break; 10455 case DUMP_NONE: 10456 break; 10457 default: 10458 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); 10459 ftrace_dump_one(&global_trace, DUMP_ALL); 10460 } 10461 10462 atomic_dec(&dump_running); 10463 } 10464 EXPORT_SYMBOL_GPL(ftrace_dump); 10465 10466 #define WRITE_BUFSIZE 4096 10467 10468 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, 10469 size_t count, loff_t *ppos, 10470 int (*createfn)(const char *)) 10471 { 10472 char *kbuf __free(kfree) = NULL; 10473 char *buf, *tmp; 10474 int ret = 0; 10475 size_t done = 0; 10476 size_t size; 10477 10478 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 10479 if (!kbuf) 10480 return -ENOMEM; 10481 10482 while (done < count) { 10483 size = count - done; 10484 10485 if (size >= WRITE_BUFSIZE) 10486 size = WRITE_BUFSIZE - 1; 10487 10488 if (copy_from_user(kbuf, buffer + done, size)) 10489 return -EFAULT; 10490 10491 kbuf[size] = '\0'; 10492 buf = kbuf; 10493 do { 10494 tmp = strchr(buf, '\n'); 10495 if (tmp) { 10496 *tmp = '\0'; 10497 size = tmp - buf + 1; 10498 } else { 10499 size = strlen(buf); 10500 if (done + size < count) { 10501 if (buf != kbuf) 10502 break; 10503 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ 10504 pr_warn("Line length is too long: Should be less than %d\n", 10505 WRITE_BUFSIZE - 2); 10506 return -EINVAL; 10507 } 10508 } 10509 done += size; 10510 10511 /* Remove comments */ 10512 tmp = strchr(buf, '#'); 10513 10514 if (tmp) 10515 *tmp = '\0'; 10516 10517 ret = createfn(buf); 10518 if (ret) 10519 return ret; 10520 buf += size; 10521 10522 } while (done < count); 10523 } 10524 return done; 10525 } 10526 10527 #ifdef CONFIG_TRACER_SNAPSHOT 10528 __init static bool tr_needs_alloc_snapshot(const char *name) 10529 { 10530 char *test; 10531 int len = strlen(name); 10532 bool ret; 10533 10534 if (!boot_snapshot_index) 10535 return false; 10536 10537 if (strncmp(name, boot_snapshot_info, len) == 0 && 10538 boot_snapshot_info[len] == '\t') 10539 return true; 10540 10541 test = kmalloc(strlen(name) + 3, GFP_KERNEL); 10542 if (!test) 10543 return false; 10544 10545 sprintf(test, "\t%s\t", name); 10546 ret = strstr(boot_snapshot_info, test) == NULL; 10547 kfree(test); 10548 return ret; 10549 } 10550 10551 __init static void do_allocate_snapshot(const char *name) 10552 { 10553 if (!tr_needs_alloc_snapshot(name)) 10554 return; 10555 10556 /* 10557 * When allocate_snapshot is set, the next call to 10558 * allocate_trace_buffers() (called by trace_array_get_by_name()) 10559 * will allocate the snapshot buffer. That will also clear 10560 * this flag. 10561 */ 10562 allocate_snapshot = true; 10563 } 10564 #else 10565 static inline void do_allocate_snapshot(const char *name) { } 10566 #endif 10567 10568 __init static int backup_instance_area(const char *backup, 10569 unsigned long *addr, phys_addr_t *size) 10570 { 10571 struct trace_array *backup_tr; 10572 void *allocated_vaddr = NULL; 10573 10574 backup_tr = trace_array_get_by_name(backup, NULL); 10575 if (!backup_tr) { 10576 pr_warn("Tracing: Instance %s is not found.\n", backup); 10577 return -ENOENT; 10578 } 10579 10580 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) { 10581 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup); 10582 trace_array_put(backup_tr); 10583 return -EINVAL; 10584 } 10585 10586 *size = backup_tr->range_addr_size; 10587 10588 allocated_vaddr = vzalloc(*size); 10589 if (!allocated_vaddr) { 10590 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n", 10591 backup, (unsigned long)*size); 10592 trace_array_put(backup_tr); 10593 return -ENOMEM; 10594 } 10595 10596 memcpy(allocated_vaddr, 10597 (void *)backup_tr->range_addr_start, (size_t)*size); 10598 *addr = (unsigned long)allocated_vaddr; 10599 10600 trace_array_put(backup_tr); 10601 return 0; 10602 } 10603 10604 __init static void enable_instances(void) 10605 { 10606 struct trace_array *tr; 10607 bool memmap_area = false; 10608 char *curr_str; 10609 char *name; 10610 char *str; 10611 char *tok; 10612 10613 /* A tab is always appended */ 10614 boot_instance_info[boot_instance_index - 1] = '\0'; 10615 str = boot_instance_info; 10616 10617 while ((curr_str = strsep(&str, "\t"))) { 10618 phys_addr_t start = 0; 10619 phys_addr_t size = 0; 10620 unsigned long addr = 0; 10621 bool traceprintk = false; 10622 bool traceoff = false; 10623 char *flag_delim; 10624 char *addr_delim; 10625 char *rname __free(kfree) = NULL; 10626 char *backup; 10627 10628 tok = strsep(&curr_str, ","); 10629 10630 name = strsep(&tok, "="); 10631 backup = tok; 10632 10633 flag_delim = strchr(name, '^'); 10634 addr_delim = strchr(name, '@'); 10635 10636 if (addr_delim) 10637 *addr_delim++ = '\0'; 10638 10639 if (flag_delim) 10640 *flag_delim++ = '\0'; 10641 10642 if (backup) { 10643 if (backup_instance_area(backup, &addr, &size) < 0) 10644 continue; 10645 } 10646 10647 if (flag_delim) { 10648 char *flag; 10649 10650 while ((flag = strsep(&flag_delim, "^"))) { 10651 if (strcmp(flag, "traceoff") == 0) { 10652 traceoff = true; 10653 } else if ((strcmp(flag, "printk") == 0) || 10654 (strcmp(flag, "traceprintk") == 0) || 10655 (strcmp(flag, "trace_printk") == 0)) { 10656 traceprintk = true; 10657 } else { 10658 pr_info("Tracing: Invalid instance flag '%s' for %s\n", 10659 flag, name); 10660 } 10661 } 10662 } 10663 10664 tok = addr_delim; 10665 if (tok && isdigit(*tok)) { 10666 start = memparse(tok, &tok); 10667 if (!start) { 10668 pr_warn("Tracing: Invalid boot instance address for %s\n", 10669 name); 10670 continue; 10671 } 10672 if (*tok != ':') { 10673 pr_warn("Tracing: No size specified for instance %s\n", name); 10674 continue; 10675 } 10676 tok++; 10677 size = memparse(tok, &tok); 10678 if (!size) { 10679 pr_warn("Tracing: Invalid boot instance size for %s\n", 10680 name); 10681 continue; 10682 } 10683 memmap_area = true; 10684 } else if (tok) { 10685 if (!reserve_mem_find_by_name(tok, &start, &size)) { 10686 start = 0; 10687 pr_warn("Failed to map boot instance %s to %s\n", name, tok); 10688 continue; 10689 } 10690 rname = kstrdup(tok, GFP_KERNEL); 10691 } 10692 10693 if (start) { 10694 /* Start and size must be page aligned */ 10695 if (start & ~PAGE_MASK) { 10696 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); 10697 continue; 10698 } 10699 if (size & ~PAGE_MASK) { 10700 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); 10701 continue; 10702 } 10703 10704 if (memmap_area) 10705 addr = map_pages(start, size); 10706 else 10707 addr = (unsigned long)phys_to_virt(start); 10708 if (addr) { 10709 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", 10710 name, &start, (unsigned long)size); 10711 } else { 10712 pr_warn("Tracing: Failed to map boot instance %s\n", name); 10713 continue; 10714 } 10715 } else { 10716 /* Only non mapped buffers have snapshot buffers */ 10717 if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT)) 10718 do_allocate_snapshot(name); 10719 } 10720 10721 tr = trace_array_create_systems(name, NULL, addr, size); 10722 if (IS_ERR(tr)) { 10723 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); 10724 continue; 10725 } 10726 10727 if (traceoff) 10728 tracer_tracing_off(tr); 10729 10730 if (traceprintk) 10731 update_printk_trace(tr); 10732 10733 /* 10734 * memmap'd buffers can not be freed. 10735 */ 10736 if (memmap_area) { 10737 tr->flags |= TRACE_ARRAY_FL_MEMMAP; 10738 tr->ref++; 10739 } 10740 10741 /* 10742 * Backup buffers can be freed but need vfree(). 10743 */ 10744 if (backup) 10745 tr->flags |= TRACE_ARRAY_FL_VMALLOC; 10746 10747 if (start || backup) { 10748 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 10749 tr->range_name = no_free_ptr(rname); 10750 } 10751 10752 while ((tok = strsep(&curr_str, ","))) { 10753 early_enable_events(tr, tok, true); 10754 } 10755 } 10756 } 10757 10758 __init static int tracer_alloc_buffers(void) 10759 { 10760 int ring_buf_size; 10761 int ret = -ENOMEM; 10762 10763 10764 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10765 pr_warn("Tracing disabled due to lockdown\n"); 10766 return -EPERM; 10767 } 10768 10769 /* 10770 * Make sure we don't accidentally add more trace options 10771 * than we have bits for. 10772 */ 10773 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); 10774 10775 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 10776 return -ENOMEM; 10777 10778 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) 10779 goto out_free_buffer_mask; 10780 10781 /* Only allocate trace_printk buffers if a trace_printk exists */ 10782 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) 10783 /* Must be called before global_trace.buffer is allocated */ 10784 trace_printk_init_buffers(); 10785 10786 /* To save memory, keep the ring buffer size to its minimum */ 10787 if (global_trace.ring_buffer_expanded) 10788 ring_buf_size = trace_buf_size; 10789 else 10790 ring_buf_size = 1; 10791 10792 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 10793 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); 10794 10795 raw_spin_lock_init(&global_trace.start_lock); 10796 10797 /* 10798 * The prepare callbacks allocates some memory for the ring buffer. We 10799 * don't free the buffer if the CPU goes down. If we were to free 10800 * the buffer, then the user would lose any trace that was in the 10801 * buffer. The memory will be removed once the "instance" is removed. 10802 */ 10803 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, 10804 "trace/RB:prepare", trace_rb_cpu_prepare, 10805 NULL); 10806 if (ret < 0) 10807 goto out_free_cpumask; 10808 /* Used for event triggers */ 10809 ret = -ENOMEM; 10810 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); 10811 if (!temp_buffer) 10812 goto out_rm_hp_state; 10813 10814 if (trace_create_savedcmd() < 0) 10815 goto out_free_temp_buffer; 10816 10817 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) 10818 goto out_free_savedcmd; 10819 10820 /* TODO: make the number of buffers hot pluggable with CPUS */ 10821 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { 10822 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); 10823 goto out_free_pipe_cpumask; 10824 } 10825 if (global_trace.buffer_disabled) 10826 tracing_off(); 10827 10828 if (trace_boot_clock) { 10829 ret = tracing_set_clock(&global_trace, trace_boot_clock); 10830 if (ret < 0) 10831 pr_warn("Trace clock %s not defined, going back to default\n", 10832 trace_boot_clock); 10833 } 10834 10835 /* 10836 * register_tracer() might reference current_trace, so it 10837 * needs to be set before we register anything. This is 10838 * just a bootstrap of current_trace anyway. 10839 */ 10840 global_trace.current_trace = &nop_trace; 10841 global_trace.current_trace_flags = nop_trace.flags; 10842 10843 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 10844 #ifdef CONFIG_TRACER_SNAPSHOT 10845 spin_lock_init(&global_trace.snapshot_trigger_lock); 10846 #endif 10847 ftrace_init_global_array_ops(&global_trace); 10848 10849 #ifdef CONFIG_MODULES 10850 INIT_LIST_HEAD(&global_trace.mod_events); 10851 #endif 10852 10853 init_trace_flags_index(&global_trace); 10854 10855 INIT_LIST_HEAD(&global_trace.tracers); 10856 10857 /* All seems OK, enable tracing */ 10858 tracing_disabled = 0; 10859 10860 atomic_notifier_chain_register(&panic_notifier_list, 10861 &trace_panic_notifier); 10862 10863 register_die_notifier(&trace_die_notifier); 10864 10865 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 10866 10867 global_trace.syscall_buf_sz = syscall_buf_size; 10868 10869 INIT_LIST_HEAD(&global_trace.systems); 10870 INIT_LIST_HEAD(&global_trace.events); 10871 INIT_LIST_HEAD(&global_trace.hist_vars); 10872 INIT_LIST_HEAD(&global_trace.err_log); 10873 list_add(&global_trace.marker_list, &marker_copies); 10874 list_add(&global_trace.list, &ftrace_trace_arrays); 10875 10876 register_tracer(&nop_trace); 10877 10878 /* Function tracing may start here (via kernel command line) */ 10879 init_function_trace(); 10880 10881 apply_trace_boot_options(); 10882 10883 register_snapshot_cmd(); 10884 10885 return 0; 10886 10887 out_free_pipe_cpumask: 10888 free_cpumask_var(global_trace.pipe_cpumask); 10889 out_free_savedcmd: 10890 trace_free_saved_cmdlines_buffer(); 10891 out_free_temp_buffer: 10892 ring_buffer_free(temp_buffer); 10893 out_rm_hp_state: 10894 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); 10895 out_free_cpumask: 10896 free_cpumask_var(global_trace.tracing_cpumask); 10897 out_free_buffer_mask: 10898 free_cpumask_var(tracing_buffer_mask); 10899 return ret; 10900 } 10901 10902 #ifdef CONFIG_FUNCTION_TRACER 10903 /* Used to set module cached ftrace filtering at boot up */ 10904 struct trace_array *trace_get_global_array(void) 10905 { 10906 return &global_trace; 10907 } 10908 #endif 10909 10910 void __init ftrace_boot_snapshot(void) 10911 { 10912 #ifdef CONFIG_TRACER_SNAPSHOT 10913 struct trace_array *tr; 10914 10915 if (!snapshot_at_boot) 10916 return; 10917 10918 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10919 if (!tr->allocated_snapshot) 10920 continue; 10921 10922 tracing_snapshot_instance(tr); 10923 trace_array_puts(tr, "** Boot snapshot taken **\n"); 10924 } 10925 #endif 10926 } 10927 10928 void __init early_trace_init(void) 10929 { 10930 if (tracepoint_printk) { 10931 tracepoint_print_iter = 10932 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL); 10933 if (MEM_FAIL(!tracepoint_print_iter, 10934 "Failed to allocate trace iterator\n")) 10935 tracepoint_printk = 0; 10936 else 10937 static_key_enable(&tracepoint_printk_key.key); 10938 } 10939 tracer_alloc_buffers(); 10940 10941 init_events(); 10942 } 10943 10944 void __init trace_init(void) 10945 { 10946 trace_event_init(); 10947 10948 if (boot_instance_index) 10949 enable_instances(); 10950 } 10951 10952 __init static void clear_boot_tracer(void) 10953 { 10954 /* 10955 * The default tracer at boot buffer is an init section. 10956 * This function is called in lateinit. If we did not 10957 * find the boot tracer, then clear it out, to prevent 10958 * later registration from accessing the buffer that is 10959 * about to be freed. 10960 */ 10961 if (!default_bootup_tracer) 10962 return; 10963 10964 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", 10965 default_bootup_tracer); 10966 default_bootup_tracer = NULL; 10967 } 10968 10969 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 10970 __init static void tracing_set_default_clock(void) 10971 { 10972 /* sched_clock_stable() is determined in late_initcall */ 10973 if (!trace_boot_clock && !sched_clock_stable()) { 10974 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10975 pr_warn("Can not set tracing clock due to lockdown\n"); 10976 return; 10977 } 10978 10979 printk(KERN_WARNING 10980 "Unstable clock detected, switching default tracing clock to \"global\"\n" 10981 "If you want to keep using the local clock, then add:\n" 10982 " \"trace_clock=local\"\n" 10983 "on the kernel command line\n"); 10984 tracing_set_clock(&global_trace, "global"); 10985 } 10986 } 10987 #else 10988 static inline void tracing_set_default_clock(void) { } 10989 #endif 10990 10991 __init static int late_trace_init(void) 10992 { 10993 if (tracepoint_printk && tracepoint_printk_stop_on_boot) { 10994 static_key_disable(&tracepoint_printk_key.key); 10995 tracepoint_printk = 0; 10996 } 10997 10998 if (traceoff_after_boot) 10999 tracing_off(); 11000 11001 tracing_set_default_clock(); 11002 clear_boot_tracer(); 11003 return 0; 11004 } 11005 11006 late_initcall_sync(late_trace_init); 11007