1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ring buffer based function tracer 4 * 5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> 6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 7 * 8 * Originally taken from the RT patch by: 9 * Arnaldo Carvalho de Melo <acme@redhat.com> 10 * 11 * Based on code from the latency_tracer, that is: 12 * Copyright (C) 2004-2006 Ingo Molnar 13 * Copyright (C) 2004 Nadia Yvette Chambers 14 */ 15 #include <linux/ring_buffer.h> 16 #include <linux/utsname.h> 17 #include <linux/stacktrace.h> 18 #include <linux/writeback.h> 19 #include <linux/kallsyms.h> 20 #include <linux/security.h> 21 #include <linux/seq_file.h> 22 #include <linux/irqflags.h> 23 #include <linux/syscalls.h> 24 #include <linux/debugfs.h> 25 #include <linux/tracefs.h> 26 #include <linux/pagemap.h> 27 #include <linux/hardirq.h> 28 #include <linux/linkage.h> 29 #include <linux/uaccess.h> 30 #include <linux/cleanup.h> 31 #include <linux/vmalloc.h> 32 #include <linux/ftrace.h> 33 #include <linux/module.h> 34 #include <linux/percpu.h> 35 #include <linux/splice.h> 36 #include <linux/kdebug.h> 37 #include <linux/string.h> 38 #include <linux/mount.h> 39 #include <linux/rwsem.h> 40 #include <linux/slab.h> 41 #include <linux/ctype.h> 42 #include <linux/init.h> 43 #include <linux/panic_notifier.h> 44 #include <linux/poll.h> 45 #include <linux/nmi.h> 46 #include <linux/fs.h> 47 #include <linux/trace.h> 48 #include <linux/sched/clock.h> 49 #include <linux/sched/rt.h> 50 #include <linux/fsnotify.h> 51 #include <linux/irq_work.h> 52 #include <linux/workqueue.h> 53 #include <linux/sort.h> 54 #include <linux/io.h> /* vmap_page_range() */ 55 #include <linux/fs_context.h> 56 57 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 58 59 #include "trace.h" 60 #include "trace_output.h" 61 62 #ifdef CONFIG_FTRACE_STARTUP_TEST 63 /* 64 * We need to change this state when a selftest is running. 65 * A selftest will lurk into the ring-buffer to count the 66 * entries inserted during the selftest although some concurrent 67 * insertions into the ring-buffer such as trace_printk could occurred 68 * at the same time, giving false positive or negative results. 69 */ 70 bool __read_mostly tracing_selftest_running; 71 72 /* 73 * If boot-time tracing including tracers/events via kernel cmdline 74 * is running, we do not want to run SELFTEST. 75 */ 76 bool __read_mostly tracing_selftest_disabled; 77 78 void __init disable_tracing_selftest(const char *reason) 79 { 80 if (!tracing_selftest_disabled) { 81 tracing_selftest_disabled = true; 82 pr_info("Ftrace startup test is disabled due to %s\n", reason); 83 } 84 } 85 #else 86 #define tracing_selftest_disabled 0 87 #endif 88 89 /* Pipe tracepoints to printk */ 90 static struct trace_iterator *tracepoint_print_iter; 91 int tracepoint_printk; 92 static bool tracepoint_printk_stop_on_boot __initdata; 93 static bool traceoff_after_boot __initdata; 94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); 95 96 /* Store tracers and their flags per instance */ 97 struct tracers { 98 struct list_head list; 99 struct tracer *tracer; 100 struct tracer_flags *flags; 101 }; 102 103 /* 104 * To prevent the comm cache from being overwritten when no 105 * tracing is active, only save the comm when a trace event 106 * occurred. 107 */ 108 DEFINE_PER_CPU(bool, trace_taskinfo_save); 109 110 /* 111 * Kill all tracing for good (never come back). 112 * It is initialized to 1 but will turn to zero if the initialization 113 * of the tracer is successful. But that is the only place that sets 114 * this back to zero. 115 */ 116 int tracing_disabled = 1; 117 118 cpumask_var_t __read_mostly tracing_buffer_mask; 119 120 #define MAX_TRACER_SIZE 100 121 /* 122 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 123 * 124 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops 125 * is set, then ftrace_dump is called. This will output the contents 126 * of the ftrace buffers to the console. This is very useful for 127 * capturing traces that lead to crashes and outputting it to a 128 * serial console. 129 * 130 * It is default off, but you can enable it with either specifying 131 * "ftrace_dump_on_oops" in the kernel command line, or setting 132 * /proc/sys/kernel/ftrace_dump_on_oops 133 * Set 1 if you want to dump buffers of all CPUs 134 * Set 2 if you want to dump the buffer of the CPU that triggered oops 135 * Set instance name if you want to dump the specific trace instance 136 * Multiple instance dump is also supported, and instances are separated 137 * by commas. 138 */ 139 /* Set to string format zero to disable by default */ 140 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; 141 142 /* When set, tracing will stop when a WARN*() is hit */ 143 static int __disable_trace_on_warning; 144 145 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 146 void *buffer, size_t *lenp, loff_t *ppos); 147 static const struct ctl_table trace_sysctl_table[] = { 148 { 149 .procname = "ftrace_dump_on_oops", 150 .data = &ftrace_dump_on_oops, 151 .maxlen = MAX_TRACER_SIZE, 152 .mode = 0644, 153 .proc_handler = proc_dostring, 154 }, 155 { 156 .procname = "traceoff_on_warning", 157 .data = &__disable_trace_on_warning, 158 .maxlen = sizeof(__disable_trace_on_warning), 159 .mode = 0644, 160 .proc_handler = proc_dointvec, 161 }, 162 { 163 .procname = "tracepoint_printk", 164 .data = &tracepoint_printk, 165 .maxlen = sizeof(tracepoint_printk), 166 .mode = 0644, 167 .proc_handler = tracepoint_printk_sysctl, 168 }, 169 }; 170 171 static int __init init_trace_sysctls(void) 172 { 173 register_sysctl_init("kernel", trace_sysctl_table); 174 return 0; 175 } 176 subsys_initcall(init_trace_sysctls); 177 178 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 179 /* Map of enums to their values, for "eval_map" file */ 180 struct trace_eval_map_head { 181 struct module *mod; 182 unsigned long length; 183 }; 184 185 union trace_eval_map_item; 186 187 struct trace_eval_map_tail { 188 /* 189 * "end" is first and points to NULL as it must be different 190 * than "mod" or "eval_string" 191 */ 192 union trace_eval_map_item *next; 193 const char *end; /* points to NULL */ 194 }; 195 196 static DEFINE_MUTEX(trace_eval_mutex); 197 198 /* 199 * The trace_eval_maps are saved in an array with two extra elements, 200 * one at the beginning, and one at the end. The beginning item contains 201 * the count of the saved maps (head.length), and the module they 202 * belong to if not built in (head.mod). The ending item contains a 203 * pointer to the next array of saved eval_map items. 204 */ 205 union trace_eval_map_item { 206 struct trace_eval_map map; 207 struct trace_eval_map_head head; 208 struct trace_eval_map_tail tail; 209 }; 210 211 static union trace_eval_map_item *trace_eval_maps; 212 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 213 214 int tracing_set_tracer(struct trace_array *tr, const char *buf); 215 static void ftrace_trace_userstack(struct trace_array *tr, 216 struct trace_buffer *buffer, 217 unsigned int trace_ctx); 218 219 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 220 static char *default_bootup_tracer; 221 222 static bool allocate_snapshot; 223 static bool snapshot_at_boot; 224 225 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; 226 static int boot_instance_index; 227 228 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata; 229 static int boot_snapshot_index; 230 231 static int __init set_cmdline_ftrace(char *str) 232 { 233 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 234 default_bootup_tracer = bootup_tracer_buf; 235 /* We are using ftrace early, expand it */ 236 trace_set_ring_buffer_expanded(NULL); 237 return 1; 238 } 239 __setup("ftrace=", set_cmdline_ftrace); 240 241 int ftrace_dump_on_oops_enabled(void) 242 { 243 if (!strcmp("0", ftrace_dump_on_oops)) 244 return 0; 245 else 246 return 1; 247 } 248 249 static int __init set_ftrace_dump_on_oops(char *str) 250 { 251 if (!*str) { 252 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 253 return 1; 254 } 255 256 if (*str == ',') { 257 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 258 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); 259 return 1; 260 } 261 262 if (*str++ == '=') { 263 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); 264 return 1; 265 } 266 267 return 0; 268 } 269 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 270 271 static int __init stop_trace_on_warning(char *str) 272 { 273 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 274 __disable_trace_on_warning = 1; 275 return 1; 276 } 277 __setup("traceoff_on_warning", stop_trace_on_warning); 278 279 static int __init boot_alloc_snapshot(char *str) 280 { 281 char *slot = boot_snapshot_info + boot_snapshot_index; 282 int left = sizeof(boot_snapshot_info) - boot_snapshot_index; 283 int ret; 284 285 if (str[0] == '=') { 286 str++; 287 if (strlen(str) >= left) 288 return -1; 289 290 ret = snprintf(slot, left, "%s\t", str); 291 boot_snapshot_index += ret; 292 } else { 293 allocate_snapshot = true; 294 /* We also need the main ring buffer expanded */ 295 trace_set_ring_buffer_expanded(NULL); 296 } 297 return 1; 298 } 299 __setup("alloc_snapshot", boot_alloc_snapshot); 300 301 302 static int __init boot_snapshot(char *str) 303 { 304 snapshot_at_boot = true; 305 boot_alloc_snapshot(str); 306 return 1; 307 } 308 __setup("ftrace_boot_snapshot", boot_snapshot); 309 310 311 static int __init boot_instance(char *str) 312 { 313 char *slot = boot_instance_info + boot_instance_index; 314 int left = sizeof(boot_instance_info) - boot_instance_index; 315 int ret; 316 317 if (strlen(str) >= left) 318 return -1; 319 320 ret = snprintf(slot, left, "%s\t", str); 321 boot_instance_index += ret; 322 323 return 1; 324 } 325 __setup("trace_instance=", boot_instance); 326 327 328 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; 329 330 static int __init set_trace_boot_options(char *str) 331 { 332 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); 333 return 1; 334 } 335 __setup("trace_options=", set_trace_boot_options); 336 337 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; 338 static char *trace_boot_clock __initdata; 339 340 static int __init set_trace_boot_clock(char *str) 341 { 342 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); 343 trace_boot_clock = trace_boot_clock_buf; 344 return 1; 345 } 346 __setup("trace_clock=", set_trace_boot_clock); 347 348 static int __init set_tracepoint_printk(char *str) 349 { 350 /* Ignore the "tp_printk_stop_on_boot" param */ 351 if (*str == '_') 352 return 0; 353 354 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 355 tracepoint_printk = 1; 356 return 1; 357 } 358 __setup("tp_printk", set_tracepoint_printk); 359 360 static int __init set_tracepoint_printk_stop(char *str) 361 { 362 tracepoint_printk_stop_on_boot = true; 363 return 1; 364 } 365 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); 366 367 static int __init set_traceoff_after_boot(char *str) 368 { 369 traceoff_after_boot = true; 370 return 1; 371 } 372 __setup("traceoff_after_boot", set_traceoff_after_boot); 373 374 unsigned long long ns2usecs(u64 nsec) 375 { 376 nsec += 500; 377 do_div(nsec, 1000); 378 return nsec; 379 } 380 381 static void 382 trace_process_export(struct trace_export *export, 383 struct ring_buffer_event *event, int flag) 384 { 385 struct trace_entry *entry; 386 unsigned int size = 0; 387 388 if (export->flags & flag) { 389 entry = ring_buffer_event_data(event); 390 size = ring_buffer_event_length(event); 391 export->write(export, entry, size); 392 } 393 } 394 395 static DEFINE_MUTEX(ftrace_export_lock); 396 397 static struct trace_export __rcu *ftrace_exports_list __read_mostly; 398 399 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); 400 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); 401 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); 402 403 static inline void ftrace_exports_enable(struct trace_export *export) 404 { 405 if (export->flags & TRACE_EXPORT_FUNCTION) 406 static_branch_inc(&trace_function_exports_enabled); 407 408 if (export->flags & TRACE_EXPORT_EVENT) 409 static_branch_inc(&trace_event_exports_enabled); 410 411 if (export->flags & TRACE_EXPORT_MARKER) 412 static_branch_inc(&trace_marker_exports_enabled); 413 } 414 415 static inline void ftrace_exports_disable(struct trace_export *export) 416 { 417 if (export->flags & TRACE_EXPORT_FUNCTION) 418 static_branch_dec(&trace_function_exports_enabled); 419 420 if (export->flags & TRACE_EXPORT_EVENT) 421 static_branch_dec(&trace_event_exports_enabled); 422 423 if (export->flags & TRACE_EXPORT_MARKER) 424 static_branch_dec(&trace_marker_exports_enabled); 425 } 426 427 static void ftrace_exports(struct ring_buffer_event *event, int flag) 428 { 429 struct trace_export *export; 430 431 guard(preempt_notrace)(); 432 433 export = rcu_dereference_raw_check(ftrace_exports_list); 434 while (export) { 435 trace_process_export(export, event, flag); 436 export = rcu_dereference_raw_check(export->next); 437 } 438 } 439 440 static inline void 441 add_trace_export(struct trace_export **list, struct trace_export *export) 442 { 443 rcu_assign_pointer(export->next, *list); 444 /* 445 * We are entering export into the list but another 446 * CPU might be walking that list. We need to make sure 447 * the export->next pointer is valid before another CPU sees 448 * the export pointer included into the list. 449 */ 450 rcu_assign_pointer(*list, export); 451 } 452 453 static inline int 454 rm_trace_export(struct trace_export **list, struct trace_export *export) 455 { 456 struct trace_export **p; 457 458 for (p = list; *p != NULL; p = &(*p)->next) 459 if (*p == export) 460 break; 461 462 if (*p != export) 463 return -1; 464 465 rcu_assign_pointer(*p, (*p)->next); 466 467 return 0; 468 } 469 470 static inline void 471 add_ftrace_export(struct trace_export **list, struct trace_export *export) 472 { 473 ftrace_exports_enable(export); 474 475 add_trace_export(list, export); 476 } 477 478 static inline int 479 rm_ftrace_export(struct trace_export **list, struct trace_export *export) 480 { 481 int ret; 482 483 ret = rm_trace_export(list, export); 484 ftrace_exports_disable(export); 485 486 return ret; 487 } 488 489 int register_ftrace_export(struct trace_export *export) 490 { 491 if (WARN_ON_ONCE(!export->write)) 492 return -1; 493 494 guard(mutex)(&ftrace_export_lock); 495 496 add_ftrace_export(&ftrace_exports_list, export); 497 498 return 0; 499 } 500 EXPORT_SYMBOL_GPL(register_ftrace_export); 501 502 int unregister_ftrace_export(struct trace_export *export) 503 { 504 guard(mutex)(&ftrace_export_lock); 505 return rm_ftrace_export(&ftrace_exports_list, export); 506 } 507 EXPORT_SYMBOL_GPL(unregister_ftrace_export); 508 509 /* trace_flags holds trace_options default values */ 510 #define TRACE_DEFAULT_FLAGS \ 511 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \ 512 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \ 513 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \ 514 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \ 515 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \ 516 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \ 517 TRACE_ITER(COPY_MARKER)) 518 519 /* trace_options that are only supported by global_trace */ 520 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \ 521 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \ 522 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS) 523 524 /* trace_flags that are default zero for instances */ 525 #define ZEROED_TRACE_FLAGS \ 526 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \ 527 TRACE_ITER(COPY_MARKER)) 528 529 /* 530 * The global_trace is the descriptor that holds the top-level tracing 531 * buffers for the live tracing. 532 */ 533 static struct trace_array global_trace = { 534 .trace_flags = TRACE_DEFAULT_FLAGS, 535 }; 536 537 struct trace_array *printk_trace = &global_trace; 538 539 /* List of trace_arrays interested in the top level trace_marker */ 540 static LIST_HEAD(marker_copies); 541 542 static void update_printk_trace(struct trace_array *tr) 543 { 544 if (printk_trace == tr) 545 return; 546 547 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK); 548 printk_trace = tr; 549 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK); 550 } 551 552 /* Returns true if the status of tr changed */ 553 static bool update_marker_trace(struct trace_array *tr, int enabled) 554 { 555 lockdep_assert_held(&event_mutex); 556 557 if (enabled) { 558 if (!list_empty(&tr->marker_list)) 559 return false; 560 561 list_add_rcu(&tr->marker_list, &marker_copies); 562 tr->trace_flags |= TRACE_ITER(COPY_MARKER); 563 return true; 564 } 565 566 if (list_empty(&tr->marker_list)) 567 return false; 568 569 list_del_init(&tr->marker_list); 570 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); 571 return true; 572 } 573 574 void trace_set_ring_buffer_expanded(struct trace_array *tr) 575 { 576 if (!tr) 577 tr = &global_trace; 578 tr->ring_buffer_expanded = true; 579 } 580 581 LIST_HEAD(ftrace_trace_arrays); 582 583 int trace_array_get(struct trace_array *this_tr) 584 { 585 struct trace_array *tr; 586 587 guard(mutex)(&trace_types_lock); 588 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 589 if (tr == this_tr) { 590 tr->ref++; 591 return 0; 592 } 593 } 594 595 return -ENODEV; 596 } 597 598 static void __trace_array_put(struct trace_array *this_tr) 599 { 600 WARN_ON(!this_tr->ref); 601 this_tr->ref--; 602 } 603 604 /** 605 * trace_array_put - Decrement the reference counter for this trace array. 606 * @this_tr : pointer to the trace array 607 * 608 * NOTE: Use this when we no longer need the trace array returned by 609 * trace_array_get_by_name(). This ensures the trace array can be later 610 * destroyed. 611 * 612 */ 613 void trace_array_put(struct trace_array *this_tr) 614 { 615 if (!this_tr) 616 return; 617 618 guard(mutex)(&trace_types_lock); 619 __trace_array_put(this_tr); 620 } 621 EXPORT_SYMBOL_GPL(trace_array_put); 622 623 int tracing_check_open_get_tr(struct trace_array *tr) 624 { 625 int ret; 626 627 ret = security_locked_down(LOCKDOWN_TRACEFS); 628 if (ret) 629 return ret; 630 631 if (tracing_disabled) 632 return -ENODEV; 633 634 if (tr && trace_array_get(tr) < 0) 635 return -ENODEV; 636 637 return 0; 638 } 639 640 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) 641 { 642 u64 ts; 643 644 /* Early boot up does not have a buffer yet */ 645 if (!buf->buffer) 646 return trace_clock_local(); 647 648 ts = ring_buffer_time_stamp(buf->buffer); 649 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); 650 651 return ts; 652 } 653 654 u64 ftrace_now(int cpu) 655 { 656 return buffer_ftrace_now(&global_trace.array_buffer, cpu); 657 } 658 659 /** 660 * tracing_is_enabled - Show if global_trace has been enabled 661 * 662 * Shows if the global trace has been enabled or not. It uses the 663 * mirror flag "buffer_disabled" to be used in fast paths such as for 664 * the irqsoff tracer. But it may be inaccurate due to races. If you 665 * need to know the accurate state, use tracing_is_on() which is a little 666 * slower, but accurate. 667 */ 668 int tracing_is_enabled(void) 669 { 670 /* 671 * For quick access (irqsoff uses this in fast path), just 672 * return the mirror variable of the state of the ring buffer. 673 * It's a little racy, but we don't really care. 674 */ 675 return !global_trace.buffer_disabled; 676 } 677 678 /* 679 * trace_buf_size is the size in bytes that is allocated 680 * for a buffer. Note, the number of bytes is always rounded 681 * to page size. 682 * 683 * This number is purposely set to a low number of 16384. 684 * If the dump on oops happens, it will be much appreciated 685 * to not have to wait for all that output. Anyway this can be 686 * boot time and run time configurable. 687 */ 688 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ 689 690 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; 691 692 /* trace_types holds a link list of available tracers. */ 693 static struct tracer *trace_types __read_mostly; 694 695 /* 696 * trace_types_lock is used to protect the trace_types list. 697 */ 698 DEFINE_MUTEX(trace_types_lock); 699 700 /* 701 * serialize the access of the ring buffer 702 * 703 * ring buffer serializes readers, but it is low level protection. 704 * The validity of the events (which returns by ring_buffer_peek() ..etc) 705 * are not protected by ring buffer. 706 * 707 * The content of events may become garbage if we allow other process consumes 708 * these events concurrently: 709 * A) the page of the consumed events may become a normal page 710 * (not reader page) in ring buffer, and this page will be rewritten 711 * by events producer. 712 * B) The page of the consumed events may become a page for splice_read, 713 * and this page will be returned to system. 714 * 715 * These primitives allow multi process access to different cpu ring buffer 716 * concurrently. 717 * 718 * These primitives don't distinguish read-only and read-consume access. 719 * Multi read-only access are also serialized. 720 */ 721 722 #ifdef CONFIG_SMP 723 static DECLARE_RWSEM(all_cpu_access_lock); 724 static DEFINE_PER_CPU(struct mutex, cpu_access_lock); 725 726 static inline void trace_access_lock(int cpu) 727 { 728 if (cpu == RING_BUFFER_ALL_CPUS) { 729 /* gain it for accessing the whole ring buffer. */ 730 down_write(&all_cpu_access_lock); 731 } else { 732 /* gain it for accessing a cpu ring buffer. */ 733 734 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ 735 down_read(&all_cpu_access_lock); 736 737 /* Secondly block other access to this @cpu ring buffer. */ 738 mutex_lock(&per_cpu(cpu_access_lock, cpu)); 739 } 740 } 741 742 static inline void trace_access_unlock(int cpu) 743 { 744 if (cpu == RING_BUFFER_ALL_CPUS) { 745 up_write(&all_cpu_access_lock); 746 } else { 747 mutex_unlock(&per_cpu(cpu_access_lock, cpu)); 748 up_read(&all_cpu_access_lock); 749 } 750 } 751 752 static inline void trace_access_lock_init(void) 753 { 754 int cpu; 755 756 for_each_possible_cpu(cpu) 757 mutex_init(&per_cpu(cpu_access_lock, cpu)); 758 } 759 760 #else 761 762 static DEFINE_MUTEX(access_lock); 763 764 static inline void trace_access_lock(int cpu) 765 { 766 (void)cpu; 767 mutex_lock(&access_lock); 768 } 769 770 static inline void trace_access_unlock(int cpu) 771 { 772 (void)cpu; 773 mutex_unlock(&access_lock); 774 } 775 776 static inline void trace_access_lock_init(void) 777 { 778 } 779 780 #endif 781 782 void tracer_tracing_on(struct trace_array *tr) 783 { 784 if (tr->array_buffer.buffer) 785 ring_buffer_record_on(tr->array_buffer.buffer); 786 /* 787 * This flag is looked at when buffers haven't been allocated 788 * yet, or by some tracers (like irqsoff), that just want to 789 * know if the ring buffer has been disabled, but it can handle 790 * races of where it gets disabled but we still do a record. 791 * As the check is in the fast path of the tracers, it is more 792 * important to be fast than accurate. 793 */ 794 tr->buffer_disabled = 0; 795 } 796 797 /** 798 * tracing_on - enable tracing buffers 799 * 800 * This function enables tracing buffers that may have been 801 * disabled with tracing_off. 802 */ 803 void tracing_on(void) 804 { 805 tracer_tracing_on(&global_trace); 806 } 807 EXPORT_SYMBOL_GPL(tracing_on); 808 809 #ifdef CONFIG_TRACER_SNAPSHOT 810 static void tracing_snapshot_instance_cond(struct trace_array *tr, 811 void *cond_data) 812 { 813 unsigned long flags; 814 815 if (in_nmi()) { 816 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); 817 trace_array_puts(tr, "*** snapshot is being ignored ***\n"); 818 return; 819 } 820 821 if (!tr->allocated_snapshot) { 822 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); 823 trace_array_puts(tr, "*** stopping trace here! ***\n"); 824 tracer_tracing_off(tr); 825 return; 826 } 827 828 if (tr->mapped) { 829 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); 830 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 831 return; 832 } 833 834 /* Note, snapshot can not be used when the tracer uses it */ 835 if (tracer_uses_snapshot(tr->current_trace)) { 836 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); 837 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); 838 return; 839 } 840 841 local_irq_save(flags); 842 update_max_tr(tr, current, smp_processor_id(), cond_data); 843 local_irq_restore(flags); 844 } 845 846 void tracing_snapshot_instance(struct trace_array *tr) 847 { 848 tracing_snapshot_instance_cond(tr, NULL); 849 } 850 851 /** 852 * tracing_snapshot - take a snapshot of the current buffer. 853 * 854 * This causes a swap between the snapshot buffer and the current live 855 * tracing buffer. You can use this to take snapshots of the live 856 * trace when some condition is triggered, but continue to trace. 857 * 858 * Note, make sure to allocate the snapshot with either 859 * a tracing_snapshot_alloc(), or by doing it manually 860 * with: echo 1 > /sys/kernel/tracing/snapshot 861 * 862 * If the snapshot buffer is not allocated, it will stop tracing. 863 * Basically making a permanent snapshot. 864 */ 865 void tracing_snapshot(void) 866 { 867 struct trace_array *tr = &global_trace; 868 869 tracing_snapshot_instance(tr); 870 } 871 EXPORT_SYMBOL_GPL(tracing_snapshot); 872 873 /** 874 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. 875 * @tr: The tracing instance to snapshot 876 * @cond_data: The data to be tested conditionally, and possibly saved 877 * 878 * This is the same as tracing_snapshot() except that the snapshot is 879 * conditional - the snapshot will only happen if the 880 * cond_snapshot.update() implementation receiving the cond_data 881 * returns true, which means that the trace array's cond_snapshot 882 * update() operation used the cond_data to determine whether the 883 * snapshot should be taken, and if it was, presumably saved it along 884 * with the snapshot. 885 */ 886 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 887 { 888 tracing_snapshot_instance_cond(tr, cond_data); 889 } 890 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 891 892 /** 893 * tracing_cond_snapshot_data - get the user data associated with a snapshot 894 * @tr: The tracing instance 895 * 896 * When the user enables a conditional snapshot using 897 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved 898 * with the snapshot. This accessor is used to retrieve it. 899 * 900 * Should not be called from cond_snapshot.update(), since it takes 901 * the tr->max_lock lock, which the code calling 902 * cond_snapshot.update() has already done. 903 * 904 * Returns the cond_data associated with the trace array's snapshot. 905 */ 906 void *tracing_cond_snapshot_data(struct trace_array *tr) 907 { 908 void *cond_data = NULL; 909 910 local_irq_disable(); 911 arch_spin_lock(&tr->max_lock); 912 913 if (tr->cond_snapshot) 914 cond_data = tr->cond_snapshot->cond_data; 915 916 arch_spin_unlock(&tr->max_lock); 917 local_irq_enable(); 918 919 return cond_data; 920 } 921 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 922 923 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 924 struct array_buffer *size_buf, int cpu_id); 925 static void set_buffer_entries(struct array_buffer *buf, unsigned long val); 926 927 int tracing_alloc_snapshot_instance(struct trace_array *tr) 928 { 929 int order; 930 int ret; 931 932 if (!tr->allocated_snapshot) { 933 934 /* Make the snapshot buffer have the same order as main buffer */ 935 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 936 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); 937 if (ret < 0) 938 return ret; 939 940 /* allocate spare buffer */ 941 ret = resize_buffer_duplicate_size(&tr->snapshot_buffer, 942 &tr->array_buffer, RING_BUFFER_ALL_CPUS); 943 if (ret < 0) 944 return ret; 945 946 tr->allocated_snapshot = true; 947 } 948 949 return 0; 950 } 951 952 static void free_snapshot(struct trace_array *tr) 953 { 954 /* 955 * We don't free the ring buffer. instead, resize it because 956 * The max_tr ring buffer has some state (e.g. ring->clock) and 957 * we want preserve it. 958 */ 959 ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0); 960 ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); 961 set_buffer_entries(&tr->snapshot_buffer, 1); 962 tracing_reset_online_cpus(&tr->snapshot_buffer); 963 tr->allocated_snapshot = false; 964 } 965 966 static int tracing_arm_snapshot_locked(struct trace_array *tr) 967 { 968 int ret; 969 970 lockdep_assert_held(&trace_types_lock); 971 972 spin_lock(&tr->snapshot_trigger_lock); 973 if (tr->snapshot == UINT_MAX || tr->mapped) { 974 spin_unlock(&tr->snapshot_trigger_lock); 975 return -EBUSY; 976 } 977 978 tr->snapshot++; 979 spin_unlock(&tr->snapshot_trigger_lock); 980 981 ret = tracing_alloc_snapshot_instance(tr); 982 if (ret) { 983 spin_lock(&tr->snapshot_trigger_lock); 984 tr->snapshot--; 985 spin_unlock(&tr->snapshot_trigger_lock); 986 } 987 988 return ret; 989 } 990 991 int tracing_arm_snapshot(struct trace_array *tr) 992 { 993 guard(mutex)(&trace_types_lock); 994 return tracing_arm_snapshot_locked(tr); 995 } 996 997 void tracing_disarm_snapshot(struct trace_array *tr) 998 { 999 spin_lock(&tr->snapshot_trigger_lock); 1000 if (!WARN_ON(!tr->snapshot)) 1001 tr->snapshot--; 1002 spin_unlock(&tr->snapshot_trigger_lock); 1003 } 1004 1005 /** 1006 * tracing_alloc_snapshot - allocate snapshot buffer. 1007 * 1008 * This only allocates the snapshot buffer if it isn't already 1009 * allocated - it doesn't also take a snapshot. 1010 * 1011 * This is meant to be used in cases where the snapshot buffer needs 1012 * to be set up for events that can't sleep but need to be able to 1013 * trigger a snapshot. 1014 */ 1015 int tracing_alloc_snapshot(void) 1016 { 1017 struct trace_array *tr = &global_trace; 1018 int ret; 1019 1020 ret = tracing_alloc_snapshot_instance(tr); 1021 WARN_ON(ret < 0); 1022 1023 return ret; 1024 } 1025 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1026 1027 /** 1028 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. 1029 * 1030 * This is similar to tracing_snapshot(), but it will allocate the 1031 * snapshot buffer if it isn't already allocated. Use this only 1032 * where it is safe to sleep, as the allocation may sleep. 1033 * 1034 * This causes a swap between the snapshot buffer and the current live 1035 * tracing buffer. You can use this to take snapshots of the live 1036 * trace when some condition is triggered, but continue to trace. 1037 */ 1038 void tracing_snapshot_alloc(void) 1039 { 1040 int ret; 1041 1042 ret = tracing_alloc_snapshot(); 1043 if (ret < 0) 1044 return; 1045 1046 tracing_snapshot(); 1047 } 1048 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1049 1050 /** 1051 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance 1052 * @tr: The tracing instance 1053 * @cond_data: User data to associate with the snapshot 1054 * @update: Implementation of the cond_snapshot update function 1055 * 1056 * Check whether the conditional snapshot for the given instance has 1057 * already been enabled, or if the current tracer is already using a 1058 * snapshot; if so, return -EBUSY, else create a cond_snapshot and 1059 * save the cond_data and update function inside. 1060 * 1061 * Returns 0 if successful, error otherwise. 1062 */ 1063 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, 1064 cond_update_fn_t update) 1065 { 1066 struct cond_snapshot *cond_snapshot __free(kfree) = 1067 kzalloc_obj(*cond_snapshot); 1068 int ret; 1069 1070 if (!cond_snapshot) 1071 return -ENOMEM; 1072 1073 cond_snapshot->cond_data = cond_data; 1074 cond_snapshot->update = update; 1075 1076 guard(mutex)(&trace_types_lock); 1077 1078 if (tracer_uses_snapshot(tr->current_trace)) 1079 return -EBUSY; 1080 1081 /* 1082 * The cond_snapshot can only change to NULL without the 1083 * trace_types_lock. We don't care if we race with it going 1084 * to NULL, but we want to make sure that it's not set to 1085 * something other than NULL when we get here, which we can 1086 * do safely with only holding the trace_types_lock and not 1087 * having to take the max_lock. 1088 */ 1089 if (tr->cond_snapshot) 1090 return -EBUSY; 1091 1092 ret = tracing_arm_snapshot_locked(tr); 1093 if (ret) 1094 return ret; 1095 1096 local_irq_disable(); 1097 arch_spin_lock(&tr->max_lock); 1098 tr->cond_snapshot = no_free_ptr(cond_snapshot); 1099 arch_spin_unlock(&tr->max_lock); 1100 local_irq_enable(); 1101 1102 return 0; 1103 } 1104 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1105 1106 /** 1107 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance 1108 * @tr: The tracing instance 1109 * 1110 * Check whether the conditional snapshot for the given instance is 1111 * enabled; if so, free the cond_snapshot associated with it, 1112 * otherwise return -EINVAL. 1113 * 1114 * Returns 0 if successful, error otherwise. 1115 */ 1116 int tracing_snapshot_cond_disable(struct trace_array *tr) 1117 { 1118 int ret = 0; 1119 1120 local_irq_disable(); 1121 arch_spin_lock(&tr->max_lock); 1122 1123 if (!tr->cond_snapshot) 1124 ret = -EINVAL; 1125 else { 1126 kfree(tr->cond_snapshot); 1127 tr->cond_snapshot = NULL; 1128 } 1129 1130 arch_spin_unlock(&tr->max_lock); 1131 local_irq_enable(); 1132 1133 tracing_disarm_snapshot(tr); 1134 1135 return ret; 1136 } 1137 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1138 #else 1139 void tracing_snapshot(void) 1140 { 1141 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); 1142 } 1143 EXPORT_SYMBOL_GPL(tracing_snapshot); 1144 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) 1145 { 1146 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used"); 1147 } 1148 EXPORT_SYMBOL_GPL(tracing_snapshot_cond); 1149 int tracing_alloc_snapshot(void) 1150 { 1151 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used"); 1152 return -ENODEV; 1153 } 1154 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1155 void tracing_snapshot_alloc(void) 1156 { 1157 /* Give warning */ 1158 tracing_snapshot(); 1159 } 1160 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 1161 void *tracing_cond_snapshot_data(struct trace_array *tr) 1162 { 1163 return NULL; 1164 } 1165 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); 1166 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) 1167 { 1168 return -ENODEV; 1169 } 1170 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); 1171 int tracing_snapshot_cond_disable(struct trace_array *tr) 1172 { 1173 return false; 1174 } 1175 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); 1176 #define free_snapshot(tr) do { } while (0) 1177 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; }) 1178 #endif /* CONFIG_TRACER_SNAPSHOT */ 1179 1180 void tracer_tracing_off(struct trace_array *tr) 1181 { 1182 if (tr->array_buffer.buffer) 1183 ring_buffer_record_off(tr->array_buffer.buffer); 1184 /* 1185 * This flag is looked at when buffers haven't been allocated 1186 * yet, or by some tracers (like irqsoff), that just want to 1187 * know if the ring buffer has been disabled, but it can handle 1188 * races of where it gets disabled but we still do a record. 1189 * As the check is in the fast path of the tracers, it is more 1190 * important to be fast than accurate. 1191 */ 1192 tr->buffer_disabled = 1; 1193 } 1194 1195 /** 1196 * tracer_tracing_disable() - temporary disable the buffer from write 1197 * @tr: The trace array to disable its buffer for 1198 * 1199 * Expects trace_tracing_enable() to re-enable tracing. 1200 * The difference between this and tracer_tracing_off() is that this 1201 * is a counter and can nest, whereas, tracer_tracing_off() can 1202 * be called multiple times and a single trace_tracing_on() will 1203 * enable it. 1204 */ 1205 void tracer_tracing_disable(struct trace_array *tr) 1206 { 1207 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 1208 return; 1209 1210 ring_buffer_record_disable(tr->array_buffer.buffer); 1211 } 1212 1213 /** 1214 * tracer_tracing_enable() - counter part of tracer_tracing_disable() 1215 * @tr: The trace array that had tracer_tracincg_disable() called on it 1216 * 1217 * This is called after tracer_tracing_disable() has been called on @tr, 1218 * when it's safe to re-enable tracing. 1219 */ 1220 void tracer_tracing_enable(struct trace_array *tr) 1221 { 1222 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 1223 return; 1224 1225 ring_buffer_record_enable(tr->array_buffer.buffer); 1226 } 1227 1228 /** 1229 * tracing_off - turn off tracing buffers 1230 * 1231 * This function stops the tracing buffers from recording data. 1232 * It does not disable any overhead the tracers themselves may 1233 * be causing. This function simply causes all recording to 1234 * the ring buffers to fail. 1235 */ 1236 void tracing_off(void) 1237 { 1238 tracer_tracing_off(&global_trace); 1239 } 1240 EXPORT_SYMBOL_GPL(tracing_off); 1241 1242 void disable_trace_on_warning(void) 1243 { 1244 if (__disable_trace_on_warning) { 1245 struct trace_array *tr = READ_ONCE(printk_trace); 1246 1247 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, 1248 "Disabling tracing due to warning\n"); 1249 tracing_off(); 1250 1251 /* Disable trace_printk() buffer too */ 1252 if (tr != &global_trace) { 1253 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 1254 "Disabling tracing due to warning\n"); 1255 tracer_tracing_off(tr); 1256 } 1257 } 1258 } 1259 1260 /** 1261 * tracer_tracing_is_on - show real state of ring buffer enabled 1262 * @tr : the trace array to know if ring buffer is enabled 1263 * 1264 * Shows real state of the ring buffer if it is enabled or not. 1265 */ 1266 bool tracer_tracing_is_on(struct trace_array *tr) 1267 { 1268 if (tr->array_buffer.buffer) 1269 return ring_buffer_record_is_set_on(tr->array_buffer.buffer); 1270 return !tr->buffer_disabled; 1271 } 1272 1273 /** 1274 * tracing_is_on - show state of ring buffers enabled 1275 */ 1276 int tracing_is_on(void) 1277 { 1278 return tracer_tracing_is_on(&global_trace); 1279 } 1280 EXPORT_SYMBOL_GPL(tracing_is_on); 1281 1282 static int __init set_buf_size(char *str) 1283 { 1284 unsigned long buf_size; 1285 1286 if (!str) 1287 return 0; 1288 buf_size = memparse(str, &str); 1289 /* 1290 * nr_entries can not be zero and the startup 1291 * tests require some buffer space. Therefore 1292 * ensure we have at least 4096 bytes of buffer. 1293 */ 1294 trace_buf_size = max(4096UL, buf_size); 1295 return 1; 1296 } 1297 __setup("trace_buf_size=", set_buf_size); 1298 1299 static int __init set_tracing_thresh(char *str) 1300 { 1301 unsigned long threshold; 1302 int ret; 1303 1304 if (!str) 1305 return 0; 1306 ret = kstrtoul(str, 0, &threshold); 1307 if (ret < 0) 1308 return 0; 1309 tracing_thresh = threshold * 1000; 1310 return 1; 1311 } 1312 __setup("tracing_thresh=", set_tracing_thresh); 1313 1314 unsigned long nsecs_to_usecs(unsigned long nsecs) 1315 { 1316 return nsecs / 1000; 1317 } 1318 1319 /* 1320 * TRACE_FLAGS is defined as a tuple matching bit masks with strings. 1321 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that 1322 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list 1323 * of strings in the order that the evals (enum) were defined. 1324 */ 1325 #undef C 1326 #define C(a, b) b 1327 1328 /* These must match the bit positions in trace_iterator_flags */ 1329 static const char *trace_options[] = { 1330 TRACE_FLAGS 1331 NULL 1332 }; 1333 1334 static struct { 1335 u64 (*func)(void); 1336 const char *name; 1337 int in_ns; /* is this clock in nanoseconds? */ 1338 } trace_clocks[] = { 1339 { trace_clock_local, "local", 1 }, 1340 { trace_clock_global, "global", 1 }, 1341 { trace_clock_counter, "counter", 0 }, 1342 { trace_clock_jiffies, "uptime", 0 }, 1343 { trace_clock, "perf", 1 }, 1344 { ktime_get_mono_fast_ns, "mono", 1 }, 1345 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1346 { ktime_get_boot_fast_ns, "boot", 1 }, 1347 { ktime_get_tai_fast_ns, "tai", 1 }, 1348 ARCH_TRACE_CLOCKS 1349 }; 1350 1351 bool trace_clock_in_ns(struct trace_array *tr) 1352 { 1353 if (trace_clocks[tr->clock_id].in_ns) 1354 return true; 1355 1356 return false; 1357 } 1358 1359 /* 1360 * trace_parser_get_init - gets the buffer for trace parser 1361 */ 1362 int trace_parser_get_init(struct trace_parser *parser, int size) 1363 { 1364 memset(parser, 0, sizeof(*parser)); 1365 1366 parser->buffer = kmalloc(size, GFP_KERNEL); 1367 if (!parser->buffer) 1368 return 1; 1369 1370 parser->size = size; 1371 return 0; 1372 } 1373 1374 /* 1375 * trace_parser_put - frees the buffer for trace parser 1376 */ 1377 void trace_parser_put(struct trace_parser *parser) 1378 { 1379 kfree(parser->buffer); 1380 parser->buffer = NULL; 1381 } 1382 1383 /* 1384 * trace_get_user - reads the user input string separated by space 1385 * (matched by isspace(ch)) 1386 * 1387 * For each string found the 'struct trace_parser' is updated, 1388 * and the function returns. 1389 * 1390 * Returns number of bytes read. 1391 * 1392 * See kernel/trace/trace.h for 'struct trace_parser' details. 1393 */ 1394 int trace_get_user(struct trace_parser *parser, const char __user *ubuf, 1395 size_t cnt, loff_t *ppos) 1396 { 1397 char ch; 1398 size_t read = 0; 1399 ssize_t ret; 1400 1401 if (!*ppos) 1402 trace_parser_clear(parser); 1403 1404 ret = get_user(ch, ubuf++); 1405 if (ret) 1406 goto fail; 1407 1408 read++; 1409 cnt--; 1410 1411 /* 1412 * The parser is not finished with the last write, 1413 * continue reading the user input without skipping spaces. 1414 */ 1415 if (!parser->cont) { 1416 /* skip white space */ 1417 while (cnt && isspace(ch)) { 1418 ret = get_user(ch, ubuf++); 1419 if (ret) 1420 goto fail; 1421 read++; 1422 cnt--; 1423 } 1424 1425 parser->idx = 0; 1426 1427 /* only spaces were written */ 1428 if (isspace(ch) || !ch) { 1429 *ppos += read; 1430 return read; 1431 } 1432 } 1433 1434 /* read the non-space input */ 1435 while (cnt && !isspace(ch) && ch) { 1436 if (parser->idx < parser->size - 1) 1437 parser->buffer[parser->idx++] = ch; 1438 else { 1439 ret = -EINVAL; 1440 goto fail; 1441 } 1442 1443 ret = get_user(ch, ubuf++); 1444 if (ret) 1445 goto fail; 1446 read++; 1447 cnt--; 1448 } 1449 1450 /* We either got finished input or we have to wait for another call. */ 1451 if (isspace(ch) || !ch) { 1452 parser->buffer[parser->idx] = 0; 1453 parser->cont = false; 1454 } else if (parser->idx < parser->size - 1) { 1455 parser->cont = true; 1456 parser->buffer[parser->idx++] = ch; 1457 /* Make sure the parsed string always terminates with '\0'. */ 1458 parser->buffer[parser->idx] = 0; 1459 } else { 1460 ret = -EINVAL; 1461 goto fail; 1462 } 1463 1464 *ppos += read; 1465 return read; 1466 fail: 1467 trace_parser_fail(parser); 1468 return ret; 1469 } 1470 1471 /* TODO add a seq_buf_to_buffer() */ 1472 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 1473 { 1474 int len; 1475 1476 if (trace_seq_used(s) <= s->readpos) 1477 return -EBUSY; 1478 1479 len = trace_seq_used(s) - s->readpos; 1480 if (cnt > len) 1481 cnt = len; 1482 memcpy(buf, s->buffer + s->readpos, cnt); 1483 1484 s->readpos += cnt; 1485 return cnt; 1486 } 1487 1488 unsigned long __read_mostly tracing_thresh; 1489 1490 #ifdef CONFIG_TRACER_MAX_TRACE 1491 #ifdef LATENCY_FS_NOTIFY 1492 static struct workqueue_struct *fsnotify_wq; 1493 1494 static void latency_fsnotify_workfn(struct work_struct *work) 1495 { 1496 struct trace_array *tr = container_of(work, struct trace_array, 1497 fsnotify_work); 1498 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY); 1499 } 1500 1501 static void latency_fsnotify_workfn_irq(struct irq_work *iwork) 1502 { 1503 struct trace_array *tr = container_of(iwork, struct trace_array, 1504 fsnotify_irqwork); 1505 queue_work(fsnotify_wq, &tr->fsnotify_work); 1506 } 1507 1508 __init static int latency_fsnotify_init(void) 1509 { 1510 fsnotify_wq = alloc_workqueue("tr_max_lat_wq", 1511 WQ_UNBOUND | WQ_HIGHPRI, 0); 1512 if (!fsnotify_wq) { 1513 pr_err("Unable to allocate tr_max_lat_wq\n"); 1514 return -ENOMEM; 1515 } 1516 return 0; 1517 } 1518 1519 late_initcall_sync(latency_fsnotify_init); 1520 1521 void latency_fsnotify(struct trace_array *tr) 1522 { 1523 if (!fsnotify_wq) 1524 return; 1525 /* 1526 * We cannot call queue_work(&tr->fsnotify_work) from here because it's 1527 * possible that we are called from __schedule() or do_idle(), which 1528 * could cause a deadlock. 1529 */ 1530 irq_work_queue(&tr->fsnotify_irqwork); 1531 } 1532 #endif /* !LATENCY_FS_NOTIFY */ 1533 1534 static const struct file_operations tracing_max_lat_fops; 1535 1536 static void trace_create_maxlat_file(struct trace_array *tr, 1537 struct dentry *d_tracer) 1538 { 1539 #ifdef LATENCY_FS_NOTIFY 1540 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); 1541 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); 1542 #endif 1543 tr->d_max_latency = trace_create_file("tracing_max_latency", 1544 TRACE_MODE_WRITE, 1545 d_tracer, tr, 1546 &tracing_max_lat_fops); 1547 } 1548 1549 /* 1550 * Copy the new maximum trace into the separate maximum-trace 1551 * structure. (this way the maximum trace is permanently saved, 1552 * for later retrieval via /sys/kernel/tracing/tracing_max_latency) 1553 */ 1554 static void 1555 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1556 { 1557 struct array_buffer *trace_buf = &tr->array_buffer; 1558 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); 1559 struct array_buffer *max_buf = &tr->snapshot_buffer; 1560 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); 1561 1562 max_buf->cpu = cpu; 1563 max_buf->time_start = data->preempt_timestamp; 1564 1565 max_data->saved_latency = tr->max_latency; 1566 max_data->critical_start = data->critical_start; 1567 max_data->critical_end = data->critical_end; 1568 1569 strscpy(max_data->comm, tsk->comm); 1570 max_data->pid = tsk->pid; 1571 /* 1572 * If tsk == current, then use current_uid(), as that does not use 1573 * RCU. The irq tracer can be called out of RCU scope. 1574 */ 1575 if (tsk == current) 1576 max_data->uid = current_uid(); 1577 else 1578 max_data->uid = task_uid(tsk); 1579 1580 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 1581 max_data->policy = tsk->policy; 1582 max_data->rt_priority = tsk->rt_priority; 1583 1584 /* record this tasks comm */ 1585 tracing_record_cmdline(tsk); 1586 latency_fsnotify(tr); 1587 } 1588 #else 1589 static inline void trace_create_maxlat_file(struct trace_array *tr, 1590 struct dentry *d_tracer) { } 1591 static inline void __update_max_tr(struct trace_array *tr, 1592 struct task_struct *tsk, int cpu) { } 1593 #endif /* CONFIG_TRACER_MAX_TRACE */ 1594 1595 #ifdef CONFIG_TRACER_SNAPSHOT 1596 /** 1597 * update_max_tr - snapshot all trace buffers from global_trace to max_tr 1598 * @tr: tracer 1599 * @tsk: the task with the latency 1600 * @cpu: The cpu that initiated the trace. 1601 * @cond_data: User data associated with a conditional snapshot 1602 * 1603 * Flip the buffers between the @tr and the max_tr and record information 1604 * about which task was the cause of this latency. 1605 */ 1606 void 1607 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, 1608 void *cond_data) 1609 { 1610 if (tr->stop_count) 1611 return; 1612 1613 WARN_ON_ONCE(!irqs_disabled()); 1614 1615 if (!tr->allocated_snapshot) { 1616 /* Only the nop tracer should hit this when disabling */ 1617 WARN_ON_ONCE(tr->current_trace != &nop_trace); 1618 return; 1619 } 1620 1621 arch_spin_lock(&tr->max_lock); 1622 1623 /* Inherit the recordable setting from array_buffer */ 1624 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) 1625 ring_buffer_record_on(tr->snapshot_buffer.buffer); 1626 else 1627 ring_buffer_record_off(tr->snapshot_buffer.buffer); 1628 1629 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { 1630 arch_spin_unlock(&tr->max_lock); 1631 return; 1632 } 1633 1634 swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer); 1635 1636 __update_max_tr(tr, tsk, cpu); 1637 1638 arch_spin_unlock(&tr->max_lock); 1639 1640 /* Any waiters on the old snapshot buffer need to wake up */ 1641 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS); 1642 } 1643 1644 /** 1645 * update_max_tr_single - only copy one trace over, and reset the rest 1646 * @tr: tracer 1647 * @tsk: task with the latency 1648 * @cpu: the cpu of the buffer to copy. 1649 * 1650 * Flip the trace of a single CPU buffer between the @tr and the max_tr. 1651 */ 1652 void 1653 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) 1654 { 1655 int ret; 1656 1657 if (tr->stop_count) 1658 return; 1659 1660 WARN_ON_ONCE(!irqs_disabled()); 1661 if (!tr->allocated_snapshot) { 1662 /* Only the nop tracer should hit this when disabling */ 1663 WARN_ON_ONCE(tr->current_trace != &nop_trace); 1664 return; 1665 } 1666 1667 arch_spin_lock(&tr->max_lock); 1668 1669 ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu); 1670 1671 if (ret == -EBUSY) { 1672 /* 1673 * We failed to swap the buffer due to a commit taking 1674 * place on this CPU. We fail to record, but we reset 1675 * the max trace buffer (no one writes directly to it) 1676 * and flag that it failed. 1677 * Another reason is resize is in progress. 1678 */ 1679 trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_, 1680 "Failed to swap buffers due to commit or resize in progress\n"); 1681 } 1682 1683 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); 1684 1685 __update_max_tr(tr, tsk, cpu); 1686 arch_spin_unlock(&tr->max_lock); 1687 } 1688 #endif /* CONFIG_TRACER_SNAPSHOT */ 1689 1690 struct pipe_wait { 1691 struct trace_iterator *iter; 1692 int wait_index; 1693 }; 1694 1695 static bool wait_pipe_cond(void *data) 1696 { 1697 struct pipe_wait *pwait = data; 1698 struct trace_iterator *iter = pwait->iter; 1699 1700 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) 1701 return true; 1702 1703 return iter->closed; 1704 } 1705 1706 static int wait_on_pipe(struct trace_iterator *iter, int full) 1707 { 1708 struct pipe_wait pwait; 1709 int ret; 1710 1711 /* Iterators are static, they should be filled or empty */ 1712 if (trace_buffer_iter(iter, iter->cpu_file)) 1713 return 0; 1714 1715 pwait.wait_index = atomic_read_acquire(&iter->wait_index); 1716 pwait.iter = iter; 1717 1718 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, 1719 wait_pipe_cond, &pwait); 1720 1721 #ifdef CONFIG_TRACER_SNAPSHOT 1722 /* 1723 * Make sure this is still the snapshot buffer, as if a snapshot were 1724 * to happen, this would now be the main buffer. 1725 */ 1726 if (iter->snapshot) 1727 iter->array_buffer = &iter->tr->snapshot_buffer; 1728 #endif 1729 return ret; 1730 } 1731 1732 #ifdef CONFIG_FTRACE_STARTUP_TEST 1733 static bool selftests_can_run; 1734 1735 struct trace_selftests { 1736 struct list_head list; 1737 struct tracer *type; 1738 }; 1739 1740 static LIST_HEAD(postponed_selftests); 1741 1742 static int save_selftest(struct tracer *type) 1743 { 1744 struct trace_selftests *selftest; 1745 1746 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); 1747 if (!selftest) 1748 return -ENOMEM; 1749 1750 selftest->type = type; 1751 list_add(&selftest->list, &postponed_selftests); 1752 return 0; 1753 } 1754 1755 static int run_tracer_selftest(struct tracer *type) 1756 { 1757 struct trace_array *tr = &global_trace; 1758 struct tracer_flags *saved_flags = tr->current_trace_flags; 1759 struct tracer *saved_tracer = tr->current_trace; 1760 int ret; 1761 1762 if (!type->selftest || tracing_selftest_disabled) 1763 return 0; 1764 1765 /* 1766 * If a tracer registers early in boot up (before scheduling is 1767 * initialized and such), then do not run its selftests yet. 1768 * Instead, run it a little later in the boot process. 1769 */ 1770 if (!selftests_can_run) 1771 return save_selftest(type); 1772 1773 if (!tracing_is_on()) { 1774 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", 1775 type->name); 1776 return 0; 1777 } 1778 1779 /* 1780 * Run a selftest on this tracer. 1781 * Here we reset the trace buffer, and set the current 1782 * tracer to be this tracer. The tracer can then run some 1783 * internal tracing to verify that everything is in order. 1784 * If we fail, we do not register this tracer. 1785 */ 1786 tracing_reset_online_cpus(&tr->array_buffer); 1787 1788 tr->current_trace = type; 1789 tr->current_trace_flags = type->flags ? : type->default_flags; 1790 1791 #ifdef CONFIG_TRACER_MAX_TRACE 1792 if (tracer_uses_snapshot(type)) { 1793 /* If we expanded the buffers, make sure the max is expanded too */ 1794 if (tr->ring_buffer_expanded) 1795 ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size, 1796 RING_BUFFER_ALL_CPUS); 1797 tr->allocated_snapshot = true; 1798 } 1799 #endif 1800 1801 /* the test is responsible for initializing and enabling */ 1802 pr_info("Testing tracer %s: ", type->name); 1803 ret = type->selftest(type, tr); 1804 /* the test is responsible for resetting too */ 1805 tr->current_trace = saved_tracer; 1806 tr->current_trace_flags = saved_flags; 1807 if (ret) { 1808 printk(KERN_CONT "FAILED!\n"); 1809 /* Add the warning after printing 'FAILED' */ 1810 WARN_ON(1); 1811 return -1; 1812 } 1813 /* Only reset on passing, to avoid touching corrupted buffers */ 1814 tracing_reset_online_cpus(&tr->array_buffer); 1815 1816 #ifdef CONFIG_TRACER_MAX_TRACE 1817 if (tracer_uses_snapshot(type)) { 1818 tr->allocated_snapshot = false; 1819 1820 /* Shrink the max buffer again */ 1821 if (tr->ring_buffer_expanded) 1822 ring_buffer_resize(tr->snapshot_buffer.buffer, 1, 1823 RING_BUFFER_ALL_CPUS); 1824 } 1825 #endif 1826 1827 printk(KERN_CONT "PASSED\n"); 1828 return 0; 1829 } 1830 1831 static int do_run_tracer_selftest(struct tracer *type) 1832 { 1833 int ret; 1834 1835 /* 1836 * Tests can take a long time, especially if they are run one after the 1837 * other, as does happen during bootup when all the tracers are 1838 * registered. This could cause the soft lockup watchdog to trigger. 1839 */ 1840 cond_resched(); 1841 1842 tracing_selftest_running = true; 1843 ret = run_tracer_selftest(type); 1844 tracing_selftest_running = false; 1845 1846 return ret; 1847 } 1848 1849 static __init int init_trace_selftests(void) 1850 { 1851 struct trace_selftests *p, *n; 1852 struct tracer *t, **last; 1853 int ret; 1854 1855 selftests_can_run = true; 1856 1857 guard(mutex)(&trace_types_lock); 1858 1859 if (list_empty(&postponed_selftests)) 1860 return 0; 1861 1862 pr_info("Running postponed tracer tests:\n"); 1863 1864 tracing_selftest_running = true; 1865 list_for_each_entry_safe(p, n, &postponed_selftests, list) { 1866 /* This loop can take minutes when sanitizers are enabled, so 1867 * lets make sure we allow RCU processing. 1868 */ 1869 cond_resched(); 1870 ret = run_tracer_selftest(p->type); 1871 /* If the test fails, then warn and remove from available_tracers */ 1872 if (ret < 0) { 1873 WARN(1, "tracer: %s failed selftest, disabling\n", 1874 p->type->name); 1875 last = &trace_types; 1876 for (t = trace_types; t; t = t->next) { 1877 if (t == p->type) { 1878 *last = t->next; 1879 break; 1880 } 1881 last = &t->next; 1882 } 1883 } 1884 list_del(&p->list); 1885 kfree(p); 1886 } 1887 tracing_selftest_running = false; 1888 1889 return 0; 1890 } 1891 core_initcall(init_trace_selftests); 1892 #else 1893 static inline int do_run_tracer_selftest(struct tracer *type) 1894 { 1895 return 0; 1896 } 1897 #endif /* CONFIG_FTRACE_STARTUP_TEST */ 1898 1899 static int add_tracer(struct trace_array *tr, struct tracer *t); 1900 1901 static void __init apply_trace_boot_options(void); 1902 1903 static void free_tracers(struct trace_array *tr) 1904 { 1905 struct tracers *t, *n; 1906 1907 lockdep_assert_held(&trace_types_lock); 1908 1909 list_for_each_entry_safe(t, n, &tr->tracers, list) { 1910 list_del(&t->list); 1911 kfree(t->flags); 1912 kfree(t); 1913 } 1914 } 1915 1916 /** 1917 * register_tracer - register a tracer with the ftrace system. 1918 * @type: the plugin for the tracer 1919 * 1920 * Register a new plugin tracer. 1921 */ 1922 int __init register_tracer(struct tracer *type) 1923 { 1924 struct trace_array *tr; 1925 struct tracer *t; 1926 int ret = 0; 1927 1928 if (!type->name) { 1929 pr_info("Tracer must have a name\n"); 1930 return -1; 1931 } 1932 1933 if (strlen(type->name) >= MAX_TRACER_SIZE) { 1934 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 1935 return -1; 1936 } 1937 1938 if (security_locked_down(LOCKDOWN_TRACEFS)) { 1939 pr_warn("Can not register tracer %s due to lockdown\n", 1940 type->name); 1941 return -EPERM; 1942 } 1943 1944 mutex_lock(&trace_types_lock); 1945 1946 for (t = trace_types; t; t = t->next) { 1947 if (strcmp(type->name, t->name) == 0) { 1948 /* already found */ 1949 pr_info("Tracer %s already registered\n", 1950 type->name); 1951 ret = -1; 1952 goto out; 1953 } 1954 } 1955 1956 /* store the tracer for __set_tracer_option */ 1957 if (type->flags) 1958 type->flags->trace = type; 1959 1960 ret = do_run_tracer_selftest(type); 1961 if (ret < 0) 1962 goto out; 1963 1964 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 1965 ret = add_tracer(tr, type); 1966 if (ret < 0) { 1967 /* The tracer will still exist but without options */ 1968 pr_warn("Failed to create tracer options for %s\n", type->name); 1969 break; 1970 } 1971 } 1972 1973 type->next = trace_types; 1974 trace_types = type; 1975 1976 out: 1977 mutex_unlock(&trace_types_lock); 1978 1979 if (ret || !default_bootup_tracer) 1980 return ret; 1981 1982 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) 1983 return 0; 1984 1985 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 1986 /* Do we want this tracer to start on bootup? */ 1987 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0); 1988 default_bootup_tracer = NULL; 1989 1990 apply_trace_boot_options(); 1991 1992 /* disable other selftests, since this will break it. */ 1993 disable_tracing_selftest("running a tracer"); 1994 1995 return 0; 1996 } 1997 1998 static void tracing_reset_cpu(struct array_buffer *buf, int cpu) 1999 { 2000 struct trace_buffer *buffer = buf->buffer; 2001 2002 if (!buffer) 2003 return; 2004 2005 ring_buffer_record_disable(buffer); 2006 2007 /* Make sure all commits have finished */ 2008 synchronize_rcu(); 2009 ring_buffer_reset_cpu(buffer, cpu); 2010 2011 ring_buffer_record_enable(buffer); 2012 } 2013 2014 void tracing_reset_online_cpus(struct array_buffer *buf) 2015 { 2016 struct trace_buffer *buffer = buf->buffer; 2017 2018 if (!buffer) 2019 return; 2020 2021 ring_buffer_record_disable(buffer); 2022 2023 /* Make sure all commits have finished */ 2024 synchronize_rcu(); 2025 2026 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2027 2028 ring_buffer_reset_online_cpus(buffer); 2029 2030 ring_buffer_record_enable(buffer); 2031 } 2032 2033 static void tracing_reset_all_cpus(struct array_buffer *buf) 2034 { 2035 struct trace_buffer *buffer = buf->buffer; 2036 2037 if (!buffer) 2038 return; 2039 2040 ring_buffer_record_disable(buffer); 2041 2042 /* Make sure all commits have finished */ 2043 synchronize_rcu(); 2044 2045 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 2046 2047 ring_buffer_reset(buffer); 2048 2049 ring_buffer_record_enable(buffer); 2050 } 2051 2052 /* Must have trace_types_lock held */ 2053 void tracing_reset_all_online_cpus_unlocked(void) 2054 { 2055 struct trace_array *tr; 2056 2057 lockdep_assert_held(&trace_types_lock); 2058 2059 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 2060 if (!tr->clear_trace) 2061 continue; 2062 tr->clear_trace = false; 2063 tracing_reset_online_cpus(&tr->array_buffer); 2064 #ifdef CONFIG_TRACER_SNAPSHOT 2065 tracing_reset_online_cpus(&tr->snapshot_buffer); 2066 #endif 2067 } 2068 } 2069 2070 void tracing_reset_all_online_cpus(void) 2071 { 2072 guard(mutex)(&trace_types_lock); 2073 tracing_reset_all_online_cpus_unlocked(); 2074 } 2075 2076 int is_tracing_stopped(void) 2077 { 2078 return global_trace.stop_count; 2079 } 2080 2081 static void tracing_start_tr(struct trace_array *tr) 2082 { 2083 struct trace_buffer *buffer; 2084 2085 if (tracing_disabled) 2086 return; 2087 2088 guard(raw_spinlock_irqsave)(&tr->start_lock); 2089 if (--tr->stop_count) { 2090 if (WARN_ON_ONCE(tr->stop_count < 0)) { 2091 /* Someone screwed up their debugging */ 2092 tr->stop_count = 0; 2093 } 2094 return; 2095 } 2096 2097 /* Prevent the buffers from switching */ 2098 arch_spin_lock(&tr->max_lock); 2099 2100 buffer = tr->array_buffer.buffer; 2101 if (buffer) 2102 ring_buffer_record_enable(buffer); 2103 2104 #ifdef CONFIG_TRACER_SNAPSHOT 2105 buffer = tr->snapshot_buffer.buffer; 2106 if (buffer) 2107 ring_buffer_record_enable(buffer); 2108 #endif 2109 2110 arch_spin_unlock(&tr->max_lock); 2111 } 2112 2113 /** 2114 * tracing_start - quick start of the tracer 2115 * 2116 * If tracing is enabled but was stopped by tracing_stop, 2117 * this will start the tracer back up. 2118 */ 2119 void tracing_start(void) 2120 2121 { 2122 return tracing_start_tr(&global_trace); 2123 } 2124 2125 static void tracing_stop_tr(struct trace_array *tr) 2126 { 2127 struct trace_buffer *buffer; 2128 2129 guard(raw_spinlock_irqsave)(&tr->start_lock); 2130 if (tr->stop_count++) 2131 return; 2132 2133 /* Prevent the buffers from switching */ 2134 arch_spin_lock(&tr->max_lock); 2135 2136 buffer = tr->array_buffer.buffer; 2137 if (buffer) 2138 ring_buffer_record_disable(buffer); 2139 2140 #ifdef CONFIG_TRACER_SNAPSHOT 2141 buffer = tr->snapshot_buffer.buffer; 2142 if (buffer) 2143 ring_buffer_record_disable(buffer); 2144 #endif 2145 2146 arch_spin_unlock(&tr->max_lock); 2147 } 2148 2149 /** 2150 * tracing_stop - quick stop of the tracer 2151 * 2152 * Light weight way to stop tracing. Use in conjunction with 2153 * tracing_start. 2154 */ 2155 void tracing_stop(void) 2156 { 2157 return tracing_stop_tr(&global_trace); 2158 } 2159 2160 /* 2161 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 2162 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 2163 * simplifies those functions and keeps them in sync. 2164 */ 2165 enum print_line_t trace_handle_return(struct trace_seq *s) 2166 { 2167 return trace_seq_has_overflowed(s) ? 2168 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 2169 } 2170 EXPORT_SYMBOL_GPL(trace_handle_return); 2171 2172 static unsigned short migration_disable_value(void) 2173 { 2174 #if defined(CONFIG_SMP) 2175 return current->migration_disabled; 2176 #else 2177 return 0; 2178 #endif 2179 } 2180 2181 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) 2182 { 2183 unsigned int trace_flags = irqs_status; 2184 unsigned int pc; 2185 2186 pc = preempt_count(); 2187 2188 if (pc & NMI_MASK) 2189 trace_flags |= TRACE_FLAG_NMI; 2190 if (pc & HARDIRQ_MASK) 2191 trace_flags |= TRACE_FLAG_HARDIRQ; 2192 if (in_serving_softirq()) 2193 trace_flags |= TRACE_FLAG_SOFTIRQ; 2194 if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) 2195 trace_flags |= TRACE_FLAG_BH_OFF; 2196 2197 if (tif_need_resched()) 2198 trace_flags |= TRACE_FLAG_NEED_RESCHED; 2199 if (test_preempt_need_resched()) 2200 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; 2201 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) 2202 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; 2203 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | 2204 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; 2205 } 2206 2207 struct ring_buffer_event * 2208 trace_buffer_lock_reserve(struct trace_buffer *buffer, 2209 int type, 2210 unsigned long len, 2211 unsigned int trace_ctx) 2212 { 2213 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); 2214 } 2215 2216 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); 2217 DEFINE_PER_CPU(int, trace_buffered_event_cnt); 2218 static int trace_buffered_event_ref; 2219 2220 /** 2221 * trace_buffered_event_enable - enable buffering events 2222 * 2223 * When events are being filtered, it is quicker to use a temporary 2224 * buffer to write the event data into if there's a likely chance 2225 * that it will not be committed. The discard of the ring buffer 2226 * is not as fast as committing, and is much slower than copying 2227 * a commit. 2228 * 2229 * When an event is to be filtered, allocate per cpu buffers to 2230 * write the event data into, and if the event is filtered and discarded 2231 * it is simply dropped, otherwise, the entire data is to be committed 2232 * in one shot. 2233 */ 2234 void trace_buffered_event_enable(void) 2235 { 2236 struct ring_buffer_event *event; 2237 struct page *page; 2238 int cpu; 2239 2240 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2241 2242 if (trace_buffered_event_ref++) 2243 return; 2244 2245 for_each_tracing_cpu(cpu) { 2246 page = alloc_pages_node(cpu_to_node(cpu), 2247 GFP_KERNEL | __GFP_NORETRY, 0); 2248 /* This is just an optimization and can handle failures */ 2249 if (!page) { 2250 pr_err("Failed to allocate event buffer\n"); 2251 break; 2252 } 2253 2254 event = page_address(page); 2255 memset(event, 0, sizeof(*event)); 2256 2257 per_cpu(trace_buffered_event, cpu) = event; 2258 2259 scoped_guard(preempt,) { 2260 if (cpu == smp_processor_id() && 2261 __this_cpu_read(trace_buffered_event) != 2262 per_cpu(trace_buffered_event, cpu)) 2263 WARN_ON_ONCE(1); 2264 } 2265 } 2266 } 2267 2268 static void enable_trace_buffered_event(void *data) 2269 { 2270 this_cpu_dec(trace_buffered_event_cnt); 2271 } 2272 2273 static void disable_trace_buffered_event(void *data) 2274 { 2275 this_cpu_inc(trace_buffered_event_cnt); 2276 } 2277 2278 /** 2279 * trace_buffered_event_disable - disable buffering events 2280 * 2281 * When a filter is removed, it is faster to not use the buffered 2282 * events, and to commit directly into the ring buffer. Free up 2283 * the temp buffers when there are no more users. This requires 2284 * special synchronization with current events. 2285 */ 2286 void trace_buffered_event_disable(void) 2287 { 2288 int cpu; 2289 2290 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 2291 2292 if (WARN_ON_ONCE(!trace_buffered_event_ref)) 2293 return; 2294 2295 if (--trace_buffered_event_ref) 2296 return; 2297 2298 /* For each CPU, set the buffer as used. */ 2299 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, 2300 NULL, true); 2301 2302 /* Wait for all current users to finish */ 2303 synchronize_rcu(); 2304 2305 for_each_tracing_cpu(cpu) { 2306 free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); 2307 per_cpu(trace_buffered_event, cpu) = NULL; 2308 } 2309 2310 /* 2311 * Wait for all CPUs that potentially started checking if they can use 2312 * their event buffer only after the previous synchronize_rcu() call and 2313 * they still read a valid pointer from trace_buffered_event. It must be 2314 * ensured they don't see cleared trace_buffered_event_cnt else they 2315 * could wrongly decide to use the pointed-to buffer which is now freed. 2316 */ 2317 synchronize_rcu(); 2318 2319 /* For each CPU, relinquish the buffer */ 2320 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, 2321 true); 2322 } 2323 2324 static struct trace_buffer *temp_buffer; 2325 2326 struct ring_buffer_event * 2327 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, 2328 struct trace_event_file *trace_file, 2329 int type, unsigned long len, 2330 unsigned int trace_ctx) 2331 { 2332 struct ring_buffer_event *entry; 2333 struct trace_array *tr = trace_file->tr; 2334 int val; 2335 2336 *current_rb = tr->array_buffer.buffer; 2337 2338 if (!tr->no_filter_buffering_ref && 2339 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { 2340 preempt_disable_notrace(); 2341 /* 2342 * Filtering is on, so try to use the per cpu buffer first. 2343 * This buffer will simulate a ring_buffer_event, 2344 * where the type_len is zero and the array[0] will 2345 * hold the full length. 2346 * (see include/linux/ring-buffer.h for details on 2347 * how the ring_buffer_event is structured). 2348 * 2349 * Using a temp buffer during filtering and copying it 2350 * on a matched filter is quicker than writing directly 2351 * into the ring buffer and then discarding it when 2352 * it doesn't match. That is because the discard 2353 * requires several atomic operations to get right. 2354 * Copying on match and doing nothing on a failed match 2355 * is still quicker than no copy on match, but having 2356 * to discard out of the ring buffer on a failed match. 2357 */ 2358 if ((entry = __this_cpu_read(trace_buffered_event))) { 2359 int max_len = PAGE_SIZE - struct_size(entry, array, 1); 2360 2361 val = this_cpu_inc_return(trace_buffered_event_cnt); 2362 2363 /* 2364 * Preemption is disabled, but interrupts and NMIs 2365 * can still come in now. If that happens after 2366 * the above increment, then it will have to go 2367 * back to the old method of allocating the event 2368 * on the ring buffer, and if the filter fails, it 2369 * will have to call ring_buffer_discard_commit() 2370 * to remove it. 2371 * 2372 * Need to also check the unlikely case that the 2373 * length is bigger than the temp buffer size. 2374 * If that happens, then the reserve is pretty much 2375 * guaranteed to fail, as the ring buffer currently 2376 * only allows events less than a page. But that may 2377 * change in the future, so let the ring buffer reserve 2378 * handle the failure in that case. 2379 */ 2380 if (val == 1 && likely(len <= max_len)) { 2381 trace_event_setup(entry, type, trace_ctx); 2382 entry->array[0] = len; 2383 /* Return with preemption disabled */ 2384 return entry; 2385 } 2386 this_cpu_dec(trace_buffered_event_cnt); 2387 } 2388 /* __trace_buffer_lock_reserve() disables preemption */ 2389 preempt_enable_notrace(); 2390 } 2391 2392 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2393 trace_ctx); 2394 /* 2395 * If tracing is off, but we have triggers enabled 2396 * we still need to look at the event data. Use the temp_buffer 2397 * to store the trace event for the trigger to use. It's recursive 2398 * safe and will not be recorded anywhere. 2399 */ 2400 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { 2401 *current_rb = temp_buffer; 2402 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 2403 trace_ctx); 2404 } 2405 return entry; 2406 } 2407 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); 2408 2409 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); 2410 static DEFINE_MUTEX(tracepoint_printk_mutex); 2411 2412 static void output_printk(struct trace_event_buffer *fbuffer) 2413 { 2414 struct trace_event_call *event_call; 2415 struct trace_event_file *file; 2416 struct trace_event *event; 2417 unsigned long flags; 2418 struct trace_iterator *iter = tracepoint_print_iter; 2419 2420 /* We should never get here if iter is NULL */ 2421 if (WARN_ON_ONCE(!iter)) 2422 return; 2423 2424 event_call = fbuffer->trace_file->event_call; 2425 if (!event_call || !event_call->event.funcs || 2426 !event_call->event.funcs->trace) 2427 return; 2428 2429 file = fbuffer->trace_file; 2430 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 2431 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 2432 !filter_match_preds(file->filter, fbuffer->entry))) 2433 return; 2434 2435 event = &fbuffer->trace_file->event_call->event; 2436 2437 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); 2438 trace_seq_init(&iter->seq); 2439 iter->ent = fbuffer->entry; 2440 event_call->event.funcs->trace(iter, 0, event); 2441 trace_seq_putc(&iter->seq, 0); 2442 printk("%s", iter->seq.buffer); 2443 2444 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); 2445 } 2446 2447 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 2448 void *buffer, size_t *lenp, 2449 loff_t *ppos) 2450 { 2451 int save_tracepoint_printk; 2452 int ret; 2453 2454 guard(mutex)(&tracepoint_printk_mutex); 2455 save_tracepoint_printk = tracepoint_printk; 2456 2457 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2458 2459 /* 2460 * This will force exiting early, as tracepoint_printk 2461 * is always zero when tracepoint_printk_iter is not allocated 2462 */ 2463 if (!tracepoint_print_iter) 2464 tracepoint_printk = 0; 2465 2466 if (save_tracepoint_printk == tracepoint_printk) 2467 return ret; 2468 2469 if (tracepoint_printk) 2470 static_key_enable(&tracepoint_printk_key.key); 2471 else 2472 static_key_disable(&tracepoint_printk_key.key); 2473 2474 return ret; 2475 } 2476 2477 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) 2478 { 2479 enum event_trigger_type tt = ETT_NONE; 2480 struct trace_event_file *file = fbuffer->trace_file; 2481 2482 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, 2483 fbuffer->entry, &tt)) 2484 goto discard; 2485 2486 if (static_key_false(&tracepoint_printk_key.key)) 2487 output_printk(fbuffer); 2488 2489 if (static_branch_unlikely(&trace_event_exports_enabled)) 2490 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); 2491 2492 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, 2493 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); 2494 2495 discard: 2496 if (tt) 2497 event_triggers_post_call(file, tt); 2498 2499 } 2500 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2501 2502 /* 2503 * Skip 3: 2504 * 2505 * trace_buffer_unlock_commit_regs() 2506 * trace_event_buffer_commit() 2507 * trace_event_raw_event_xxx() 2508 */ 2509 # define STACK_SKIP 3 2510 2511 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2512 struct trace_buffer *buffer, 2513 struct ring_buffer_event *event, 2514 unsigned int trace_ctx, 2515 struct pt_regs *regs) 2516 { 2517 __buffer_unlock_commit(buffer, event); 2518 2519 /* 2520 * If regs is not set, then skip the necessary functions. 2521 * Note, we can still get here via blktrace, wakeup tracer 2522 * and mmiotrace, but that's ok if they lose a function or 2523 * two. They are not that meaningful. 2524 */ 2525 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); 2526 ftrace_trace_userstack(tr, buffer, trace_ctx); 2527 } 2528 2529 /* 2530 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. 2531 */ 2532 void 2533 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, 2534 struct ring_buffer_event *event) 2535 { 2536 __buffer_unlock_commit(buffer, event); 2537 } 2538 2539 void 2540 trace_function(struct trace_array *tr, unsigned long ip, unsigned long 2541 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) 2542 { 2543 struct trace_buffer *buffer = tr->array_buffer.buffer; 2544 struct ring_buffer_event *event; 2545 struct ftrace_entry *entry; 2546 int size = sizeof(*entry); 2547 2548 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); 2549 2550 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, 2551 trace_ctx); 2552 if (!event) 2553 return; 2554 entry = ring_buffer_event_data(event); 2555 entry->ip = ip; 2556 entry->parent_ip = parent_ip; 2557 2558 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 2559 if (fregs) { 2560 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) 2561 entry->args[i] = ftrace_regs_get_argument(fregs, i); 2562 } 2563 #endif 2564 2565 if (static_branch_unlikely(&trace_function_exports_enabled)) 2566 ftrace_exports(event, TRACE_EXPORT_FUNCTION); 2567 __buffer_unlock_commit(buffer, event); 2568 } 2569 2570 #ifdef CONFIG_STACKTRACE 2571 2572 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ 2573 #define FTRACE_KSTACK_NESTING 4 2574 2575 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) 2576 2577 struct ftrace_stack { 2578 unsigned long calls[FTRACE_KSTACK_ENTRIES]; 2579 }; 2580 2581 2582 struct ftrace_stacks { 2583 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; 2584 }; 2585 2586 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); 2587 static DEFINE_PER_CPU(int, ftrace_stack_reserve); 2588 2589 void __ftrace_trace_stack(struct trace_array *tr, 2590 struct trace_buffer *buffer, 2591 unsigned int trace_ctx, 2592 int skip, struct pt_regs *regs) 2593 { 2594 struct ring_buffer_event *event; 2595 unsigned int size, nr_entries; 2596 struct ftrace_stack *fstack; 2597 struct stack_entry *entry; 2598 int stackidx; 2599 int bit; 2600 2601 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START); 2602 if (bit < 0) 2603 return; 2604 2605 /* 2606 * Add one, for this function and the call to save_stack_trace() 2607 * If regs is set, then these functions will not be in the way. 2608 */ 2609 #ifndef CONFIG_UNWINDER_ORC 2610 if (!regs) 2611 skip++; 2612 #endif 2613 2614 guard(preempt_notrace)(); 2615 2616 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; 2617 2618 /* This should never happen. If it does, yell once and skip */ 2619 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) 2620 goto out; 2621 2622 /* 2623 * The above __this_cpu_inc_return() is 'atomic' cpu local. An 2624 * interrupt will either see the value pre increment or post 2625 * increment. If the interrupt happens pre increment it will have 2626 * restored the counter when it returns. We just need a barrier to 2627 * keep gcc from moving things around. 2628 */ 2629 barrier(); 2630 2631 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; 2632 size = ARRAY_SIZE(fstack->calls); 2633 2634 if (regs) { 2635 nr_entries = stack_trace_save_regs(regs, fstack->calls, 2636 size, skip); 2637 } else { 2638 nr_entries = stack_trace_save(fstack->calls, size, skip); 2639 } 2640 2641 #ifdef CONFIG_DYNAMIC_FTRACE 2642 /* Mark entry of stack trace as trampoline code */ 2643 if (tr->ops && tr->ops->trampoline) { 2644 unsigned long tramp_start = tr->ops->trampoline; 2645 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; 2646 unsigned long *calls = fstack->calls; 2647 2648 for (int i = 0; i < nr_entries; i++) { 2649 if (calls[i] >= tramp_start && calls[i] < tramp_end) 2650 calls[i] = FTRACE_TRAMPOLINE_MARKER; 2651 } 2652 } 2653 #endif 2654 2655 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, 2656 struct_size(entry, caller, nr_entries), 2657 trace_ctx); 2658 if (!event) 2659 goto out; 2660 entry = ring_buffer_event_data(event); 2661 2662 entry->size = nr_entries; 2663 memcpy(&entry->caller, fstack->calls, 2664 flex_array_size(entry, caller, nr_entries)); 2665 2666 __buffer_unlock_commit(buffer, event); 2667 2668 out: 2669 /* Again, don't let gcc optimize things here */ 2670 barrier(); 2671 __this_cpu_dec(ftrace_stack_reserve); 2672 trace_clear_recursion(bit); 2673 } 2674 2675 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, 2676 int skip) 2677 { 2678 struct trace_buffer *buffer = tr->array_buffer.buffer; 2679 2680 if (rcu_is_watching()) { 2681 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 2682 return; 2683 } 2684 2685 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) 2686 return; 2687 2688 /* 2689 * When an NMI triggers, RCU is enabled via ct_nmi_enter(), 2690 * but if the above rcu_is_watching() failed, then the NMI 2691 * triggered someplace critical, and ct_irq_enter() should 2692 * not be called from NMI. 2693 */ 2694 if (unlikely(in_nmi())) 2695 return; 2696 2697 ct_irq_enter_irqson(); 2698 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 2699 ct_irq_exit_irqson(); 2700 } 2701 2702 /** 2703 * trace_dump_stack - record a stack back trace in the trace buffer 2704 * @skip: Number of functions to skip (helper handlers) 2705 */ 2706 void trace_dump_stack(int skip) 2707 { 2708 if (tracing_disabled || tracing_selftest_running) 2709 return; 2710 2711 #ifndef CONFIG_UNWINDER_ORC 2712 /* Skip 1 to skip this function. */ 2713 skip++; 2714 #endif 2715 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, 2716 tracing_gen_ctx(), skip, NULL); 2717 } 2718 EXPORT_SYMBOL_GPL(trace_dump_stack); 2719 2720 #ifdef CONFIG_USER_STACKTRACE_SUPPORT 2721 static DEFINE_PER_CPU(int, user_stack_count); 2722 2723 static void 2724 ftrace_trace_userstack(struct trace_array *tr, 2725 struct trace_buffer *buffer, unsigned int trace_ctx) 2726 { 2727 struct ring_buffer_event *event; 2728 struct userstack_entry *entry; 2729 2730 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE))) 2731 return; 2732 2733 /* 2734 * NMIs can not handle page faults, even with fix ups. 2735 * The save user stack can (and often does) fault. 2736 */ 2737 if (unlikely(in_nmi())) 2738 return; 2739 2740 /* 2741 * prevent recursion, since the user stack tracing may 2742 * trigger other kernel events. 2743 */ 2744 guard(preempt)(); 2745 if (__this_cpu_read(user_stack_count)) 2746 return; 2747 2748 __this_cpu_inc(user_stack_count); 2749 2750 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 2751 sizeof(*entry), trace_ctx); 2752 if (!event) 2753 goto out_drop_count; 2754 entry = ring_buffer_event_data(event); 2755 2756 entry->tgid = current->tgid; 2757 memset(&entry->caller, 0, sizeof(entry->caller)); 2758 2759 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); 2760 __buffer_unlock_commit(buffer, event); 2761 2762 out_drop_count: 2763 __this_cpu_dec(user_stack_count); 2764 } 2765 #else /* CONFIG_USER_STACKTRACE_SUPPORT */ 2766 static void ftrace_trace_userstack(struct trace_array *tr, 2767 struct trace_buffer *buffer, 2768 unsigned int trace_ctx) 2769 { 2770 } 2771 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ 2772 2773 #endif /* CONFIG_STACKTRACE */ 2774 2775 static inline void 2776 func_repeats_set_delta_ts(struct func_repeats_entry *entry, 2777 unsigned long long delta) 2778 { 2779 entry->bottom_delta_ts = delta & U32_MAX; 2780 entry->top_delta_ts = (delta >> 32); 2781 } 2782 2783 void trace_last_func_repeats(struct trace_array *tr, 2784 struct trace_func_repeats *last_info, 2785 unsigned int trace_ctx) 2786 { 2787 struct trace_buffer *buffer = tr->array_buffer.buffer; 2788 struct func_repeats_entry *entry; 2789 struct ring_buffer_event *event; 2790 u64 delta; 2791 2792 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, 2793 sizeof(*entry), trace_ctx); 2794 if (!event) 2795 return; 2796 2797 delta = ring_buffer_event_time_stamp(buffer, event) - 2798 last_info->ts_last_call; 2799 2800 entry = ring_buffer_event_data(event); 2801 entry->ip = last_info->ip; 2802 entry->parent_ip = last_info->parent_ip; 2803 entry->count = last_info->count; 2804 func_repeats_set_delta_ts(entry, delta); 2805 2806 __buffer_unlock_commit(buffer, event); 2807 } 2808 2809 static void trace_iterator_increment(struct trace_iterator *iter) 2810 { 2811 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); 2812 2813 iter->idx++; 2814 if (buf_iter) 2815 ring_buffer_iter_advance(buf_iter); 2816 } 2817 2818 static struct trace_entry * 2819 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, 2820 unsigned long *lost_events) 2821 { 2822 struct ring_buffer_event *event; 2823 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 2824 2825 if (buf_iter) { 2826 event = ring_buffer_iter_peek(buf_iter, ts); 2827 if (lost_events) 2828 *lost_events = ring_buffer_iter_dropped(buf_iter) ? 2829 (unsigned long)-1 : 0; 2830 } else { 2831 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, 2832 lost_events); 2833 } 2834 2835 if (event) { 2836 iter->ent_size = ring_buffer_event_length(event); 2837 return ring_buffer_event_data(event); 2838 } 2839 iter->ent_size = 0; 2840 return NULL; 2841 } 2842 2843 static struct trace_entry * 2844 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, 2845 unsigned long *missing_events, u64 *ent_ts) 2846 { 2847 struct trace_buffer *buffer = iter->array_buffer->buffer; 2848 struct trace_entry *ent, *next = NULL; 2849 unsigned long lost_events = 0, next_lost = 0; 2850 int cpu_file = iter->cpu_file; 2851 u64 next_ts = 0, ts; 2852 int next_cpu = -1; 2853 int next_size = 0; 2854 int cpu; 2855 2856 /* 2857 * If we are in a per_cpu trace file, don't bother by iterating over 2858 * all cpu and peek directly. 2859 */ 2860 if (cpu_file > RING_BUFFER_ALL_CPUS) { 2861 if (ring_buffer_empty_cpu(buffer, cpu_file)) 2862 return NULL; 2863 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); 2864 if (ent_cpu) 2865 *ent_cpu = cpu_file; 2866 2867 return ent; 2868 } 2869 2870 for_each_tracing_cpu(cpu) { 2871 2872 if (ring_buffer_empty_cpu(buffer, cpu)) 2873 continue; 2874 2875 ent = peek_next_entry(iter, cpu, &ts, &lost_events); 2876 2877 /* 2878 * Pick the entry with the smallest timestamp: 2879 */ 2880 if (ent && (!next || ts < next_ts)) { 2881 next = ent; 2882 next_cpu = cpu; 2883 next_ts = ts; 2884 next_lost = lost_events; 2885 next_size = iter->ent_size; 2886 } 2887 } 2888 2889 iter->ent_size = next_size; 2890 2891 if (ent_cpu) 2892 *ent_cpu = next_cpu; 2893 2894 if (ent_ts) 2895 *ent_ts = next_ts; 2896 2897 if (missing_events) 2898 *missing_events = next_lost; 2899 2900 return next; 2901 } 2902 2903 #define STATIC_FMT_BUF_SIZE 128 2904 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 2905 2906 char *trace_iter_expand_format(struct trace_iterator *iter) 2907 { 2908 char *tmp; 2909 2910 /* 2911 * iter->tr is NULL when used with tp_printk, which makes 2912 * this get called where it is not safe to call krealloc(). 2913 */ 2914 if (!iter->tr || iter->fmt == static_fmt_buf) 2915 return NULL; 2916 2917 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, 2918 GFP_KERNEL); 2919 if (tmp) { 2920 iter->fmt_size += STATIC_FMT_BUF_SIZE; 2921 iter->fmt = tmp; 2922 } 2923 2924 return tmp; 2925 } 2926 2927 /* Returns true if the string is safe to dereference from an event */ 2928 static bool trace_safe_str(struct trace_iterator *iter, const char *str) 2929 { 2930 unsigned long addr = (unsigned long)str; 2931 struct trace_event *trace_event; 2932 struct trace_event_call *event; 2933 2934 /* OK if part of the event data */ 2935 if ((addr >= (unsigned long)iter->ent) && 2936 (addr < (unsigned long)iter->ent + iter->ent_size)) 2937 return true; 2938 2939 /* OK if part of the temp seq buffer */ 2940 if ((addr >= (unsigned long)iter->tmp_seq.buffer) && 2941 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) 2942 return true; 2943 2944 /* Core rodata can not be freed */ 2945 if (is_kernel_rodata(addr)) 2946 return true; 2947 2948 if (trace_is_tracepoint_string(str)) 2949 return true; 2950 2951 /* 2952 * Now this could be a module event, referencing core module 2953 * data, which is OK. 2954 */ 2955 if (!iter->ent) 2956 return false; 2957 2958 trace_event = ftrace_find_event(iter->ent->type); 2959 if (!trace_event) 2960 return false; 2961 2962 event = container_of(trace_event, struct trace_event_call, event); 2963 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) 2964 return false; 2965 2966 /* Would rather have rodata, but this will suffice */ 2967 if (within_module_core(addr, event->module)) 2968 return true; 2969 2970 return false; 2971 } 2972 2973 /** 2974 * ignore_event - Check dereferenced fields while writing to the seq buffer 2975 * @iter: The iterator that holds the seq buffer and the event being printed 2976 * 2977 * At boot up, test_event_printk() will flag any event that dereferences 2978 * a string with "%s" that does exist in the ring buffer. It may still 2979 * be valid, as the string may point to a static string in the kernel 2980 * rodata that never gets freed. But if the string pointer is pointing 2981 * to something that was allocated, there's a chance that it can be freed 2982 * by the time the user reads the trace. This would cause a bad memory 2983 * access by the kernel and possibly crash the system. 2984 * 2985 * This function will check if the event has any fields flagged as needing 2986 * to be checked at runtime and perform those checks. 2987 * 2988 * If it is found that a field is unsafe, it will write into the @iter->seq 2989 * a message stating what was found to be unsafe. 2990 * 2991 * @return: true if the event is unsafe and should be ignored, 2992 * false otherwise. 2993 */ 2994 bool ignore_event(struct trace_iterator *iter) 2995 { 2996 struct ftrace_event_field *field; 2997 struct trace_event *trace_event; 2998 struct trace_event_call *event; 2999 struct list_head *head; 3000 struct trace_seq *seq; 3001 const void *ptr; 3002 3003 trace_event = ftrace_find_event(iter->ent->type); 3004 3005 seq = &iter->seq; 3006 3007 if (!trace_event) { 3008 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); 3009 return true; 3010 } 3011 3012 event = container_of(trace_event, struct trace_event_call, event); 3013 if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) 3014 return false; 3015 3016 head = trace_get_fields(event); 3017 if (!head) { 3018 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", 3019 trace_event_name(event)); 3020 return true; 3021 } 3022 3023 /* Offsets are from the iter->ent that points to the raw event */ 3024 ptr = iter->ent; 3025 3026 list_for_each_entry(field, head, link) { 3027 const char *str; 3028 bool good; 3029 3030 if (!field->needs_test) 3031 continue; 3032 3033 str = *(const char **)(ptr + field->offset); 3034 3035 good = trace_safe_str(iter, str); 3036 3037 /* 3038 * If you hit this warning, it is likely that the 3039 * trace event in question used %s on a string that 3040 * was saved at the time of the event, but may not be 3041 * around when the trace is read. Use __string(), 3042 * __assign_str() and __get_str() helpers in the TRACE_EVENT() 3043 * instead. See samples/trace_events/trace-events-sample.h 3044 * for reference. 3045 */ 3046 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", 3047 trace_event_name(event), field->name)) { 3048 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", 3049 trace_event_name(event), field->name); 3050 return true; 3051 } 3052 } 3053 return false; 3054 } 3055 3056 const char *trace_event_format(struct trace_iterator *iter, const char *fmt) 3057 { 3058 const char *p, *new_fmt; 3059 char *q; 3060 3061 if (WARN_ON_ONCE(!fmt)) 3062 return fmt; 3063 3064 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR)) 3065 return fmt; 3066 3067 p = fmt; 3068 new_fmt = q = iter->fmt; 3069 while (*p) { 3070 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { 3071 if (!trace_iter_expand_format(iter)) 3072 return fmt; 3073 3074 q += iter->fmt - new_fmt; 3075 new_fmt = iter->fmt; 3076 } 3077 3078 *q++ = *p++; 3079 3080 /* Replace %p with %px */ 3081 if (p[-1] == '%') { 3082 if (p[0] == '%') { 3083 *q++ = *p++; 3084 } else if (p[0] == 'p' && !isalnum(p[1])) { 3085 *q++ = *p++; 3086 *q++ = 'x'; 3087 } 3088 } 3089 } 3090 *q = '\0'; 3091 3092 return new_fmt; 3093 } 3094 3095 #define STATIC_TEMP_BUF_SIZE 128 3096 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); 3097 3098 /* Find the next real entry, without updating the iterator itself */ 3099 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 3100 int *ent_cpu, u64 *ent_ts) 3101 { 3102 /* __find_next_entry will reset ent_size */ 3103 int ent_size = iter->ent_size; 3104 struct trace_entry *entry; 3105 3106 /* 3107 * If called from ftrace_dump(), then the iter->temp buffer 3108 * will be the static_temp_buf and not created from kmalloc. 3109 * If the entry size is greater than the buffer, we can 3110 * not save it. Just return NULL in that case. This is only 3111 * used to add markers when two consecutive events' time 3112 * stamps have a large delta. See trace_print_lat_context() 3113 */ 3114 if (iter->temp == static_temp_buf && 3115 STATIC_TEMP_BUF_SIZE < ent_size) 3116 return NULL; 3117 3118 /* 3119 * The __find_next_entry() may call peek_next_entry(), which may 3120 * call ring_buffer_peek() that may make the contents of iter->ent 3121 * undefined. Need to copy iter->ent now. 3122 */ 3123 if (iter->ent && iter->ent != iter->temp) { 3124 if ((!iter->temp || iter->temp_size < iter->ent_size) && 3125 !WARN_ON_ONCE(iter->temp == static_temp_buf)) { 3126 void *temp; 3127 temp = kmalloc(iter->ent_size, GFP_KERNEL); 3128 if (!temp) 3129 return NULL; 3130 kfree(iter->temp); 3131 iter->temp = temp; 3132 iter->temp_size = iter->ent_size; 3133 } 3134 memcpy(iter->temp, iter->ent, iter->ent_size); 3135 iter->ent = iter->temp; 3136 } 3137 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 3138 /* Put back the original ent_size */ 3139 iter->ent_size = ent_size; 3140 3141 return entry; 3142 } 3143 3144 /* Find the next real entry, and increment the iterator to the next entry */ 3145 void *trace_find_next_entry_inc(struct trace_iterator *iter) 3146 { 3147 iter->ent = __find_next_entry(iter, &iter->cpu, 3148 &iter->lost_events, &iter->ts); 3149 3150 if (iter->ent) 3151 trace_iterator_increment(iter); 3152 3153 return iter->ent ? iter : NULL; 3154 } 3155 3156 static void trace_consume(struct trace_iterator *iter) 3157 { 3158 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, 3159 &iter->lost_events); 3160 } 3161 3162 static void *s_next(struct seq_file *m, void *v, loff_t *pos) 3163 { 3164 struct trace_iterator *iter = m->private; 3165 int i = (int)*pos; 3166 void *ent; 3167 3168 WARN_ON_ONCE(iter->leftover); 3169 3170 (*pos)++; 3171 3172 /* can't go backwards */ 3173 if (iter->idx > i) 3174 return NULL; 3175 3176 if (iter->idx < 0) 3177 ent = trace_find_next_entry_inc(iter); 3178 else 3179 ent = iter; 3180 3181 while (ent && iter->idx < i) 3182 ent = trace_find_next_entry_inc(iter); 3183 3184 iter->pos = *pos; 3185 3186 return ent; 3187 } 3188 3189 void tracing_iter_reset(struct trace_iterator *iter, int cpu) 3190 { 3191 struct ring_buffer_iter *buf_iter; 3192 unsigned long entries = 0; 3193 u64 ts; 3194 3195 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; 3196 3197 buf_iter = trace_buffer_iter(iter, cpu); 3198 if (!buf_iter) 3199 return; 3200 3201 ring_buffer_iter_reset(buf_iter); 3202 3203 /* 3204 * We could have the case with the max latency tracers 3205 * that a reset never took place on a cpu. This is evident 3206 * by the timestamp being before the start of the buffer. 3207 */ 3208 while (ring_buffer_iter_peek(buf_iter, &ts)) { 3209 if (ts >= iter->array_buffer->time_start) 3210 break; 3211 entries++; 3212 ring_buffer_iter_advance(buf_iter); 3213 /* This could be a big loop */ 3214 cond_resched(); 3215 } 3216 3217 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; 3218 } 3219 3220 /* 3221 * The current tracer is copied to avoid a global locking 3222 * all around. 3223 */ 3224 static void *s_start(struct seq_file *m, loff_t *pos) 3225 { 3226 struct trace_iterator *iter = m->private; 3227 struct trace_array *tr = iter->tr; 3228 int cpu_file = iter->cpu_file; 3229 void *p = NULL; 3230 loff_t l = 0; 3231 int cpu; 3232 3233 mutex_lock(&trace_types_lock); 3234 if (unlikely(tr->current_trace != iter->trace)) { 3235 /* Close iter->trace before switching to the new current tracer */ 3236 if (iter->trace->close) 3237 iter->trace->close(iter); 3238 iter->trace = tr->current_trace; 3239 /* Reopen the new current tracer */ 3240 if (iter->trace->open) 3241 iter->trace->open(iter); 3242 } 3243 mutex_unlock(&trace_types_lock); 3244 3245 if (iter->snapshot && tracer_uses_snapshot(iter->trace)) 3246 return ERR_PTR(-EBUSY); 3247 3248 if (*pos != iter->pos) { 3249 iter->ent = NULL; 3250 iter->cpu = 0; 3251 iter->idx = -1; 3252 3253 if (cpu_file == RING_BUFFER_ALL_CPUS) { 3254 for_each_tracing_cpu(cpu) 3255 tracing_iter_reset(iter, cpu); 3256 } else 3257 tracing_iter_reset(iter, cpu_file); 3258 3259 iter->leftover = 0; 3260 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 3261 ; 3262 3263 } else { 3264 /* 3265 * If we overflowed the seq_file before, then we want 3266 * to just reuse the trace_seq buffer again. 3267 */ 3268 if (iter->leftover) 3269 p = iter; 3270 else { 3271 l = *pos - 1; 3272 p = s_next(m, p, &l); 3273 } 3274 } 3275 3276 trace_event_read_lock(); 3277 trace_access_lock(cpu_file); 3278 return p; 3279 } 3280 3281 static void s_stop(struct seq_file *m, void *p) 3282 { 3283 struct trace_iterator *iter = m->private; 3284 3285 if (iter->snapshot && tracer_uses_snapshot(iter->trace)) 3286 return; 3287 3288 trace_access_unlock(iter->cpu_file); 3289 trace_event_read_unlock(); 3290 } 3291 3292 static void 3293 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, 3294 unsigned long *entries, int cpu) 3295 { 3296 unsigned long count; 3297 3298 count = ring_buffer_entries_cpu(buf->buffer, cpu); 3299 /* 3300 * If this buffer has skipped entries, then we hold all 3301 * entries for the trace and we need to ignore the 3302 * ones before the time stamp. 3303 */ 3304 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { 3305 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; 3306 /* total is the same as the entries */ 3307 *total = count; 3308 } else 3309 *total = count + 3310 ring_buffer_overrun_cpu(buf->buffer, cpu); 3311 *entries = count; 3312 } 3313 3314 static void 3315 get_total_entries(struct array_buffer *buf, 3316 unsigned long *total, unsigned long *entries) 3317 { 3318 unsigned long t, e; 3319 int cpu; 3320 3321 *total = 0; 3322 *entries = 0; 3323 3324 for_each_tracing_cpu(cpu) { 3325 get_total_entries_cpu(buf, &t, &e, cpu); 3326 *total += t; 3327 *entries += e; 3328 } 3329 } 3330 3331 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) 3332 { 3333 unsigned long total, entries; 3334 3335 if (!tr) 3336 tr = &global_trace; 3337 3338 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); 3339 3340 return entries; 3341 } 3342 3343 unsigned long trace_total_entries(struct trace_array *tr) 3344 { 3345 unsigned long total, entries; 3346 3347 if (!tr) 3348 tr = &global_trace; 3349 3350 get_total_entries(&tr->array_buffer, &total, &entries); 3351 3352 return entries; 3353 } 3354 3355 static void print_lat_help_header(struct seq_file *m) 3356 { 3357 seq_puts(m, "# _------=> CPU# \n" 3358 "# / _-----=> irqs-off/BH-disabled\n" 3359 "# | / _----=> need-resched \n" 3360 "# || / _---=> hardirq/softirq \n" 3361 "# ||| / _--=> preempt-depth \n" 3362 "# |||| / _-=> migrate-disable \n" 3363 "# ||||| / delay \n" 3364 "# cmd pid |||||| time | caller \n" 3365 "# \\ / |||||| \\ | / \n"); 3366 } 3367 3368 static void print_event_info(struct array_buffer *buf, struct seq_file *m) 3369 { 3370 unsigned long total; 3371 unsigned long entries; 3372 3373 get_total_entries(buf, &total, &entries); 3374 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 3375 entries, total, num_online_cpus()); 3376 seq_puts(m, "#\n"); 3377 } 3378 3379 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, 3380 unsigned int flags) 3381 { 3382 bool tgid = flags & TRACE_ITER(RECORD_TGID); 3383 3384 print_event_info(buf, m); 3385 3386 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); 3387 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); 3388 } 3389 3390 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, 3391 unsigned int flags) 3392 { 3393 bool tgid = flags & TRACE_ITER(RECORD_TGID); 3394 static const char space[] = " "; 3395 int prec = tgid ? 12 : 2; 3396 3397 print_event_info(buf, m); 3398 3399 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); 3400 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); 3401 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); 3402 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); 3403 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); 3404 seq_printf(m, "# %.*s|||| / delay\n", prec, space); 3405 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); 3406 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); 3407 } 3408 3409 void 3410 print_trace_header(struct seq_file *m, struct trace_iterator *iter) 3411 { 3412 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); 3413 struct array_buffer *buf = iter->array_buffer; 3414 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); 3415 struct tracer *type = iter->trace; 3416 unsigned long entries; 3417 unsigned long total; 3418 const char *name = type->name; 3419 3420 get_total_entries(buf, &total, &entries); 3421 3422 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 3423 name, init_utsname()->release); 3424 seq_puts(m, "# -----------------------------------" 3425 "---------------------------------\n"); 3426 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" 3427 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 3428 nsecs_to_usecs(data->saved_latency), 3429 entries, 3430 total, 3431 buf->cpu, 3432 preempt_model_str(), 3433 /* These are reserved for later use */ 3434 0, 0, 0, 0); 3435 #ifdef CONFIG_SMP 3436 seq_printf(m, " #P:%d)\n", num_online_cpus()); 3437 #else 3438 seq_puts(m, ")\n"); 3439 #endif 3440 seq_puts(m, "# -----------------\n"); 3441 seq_printf(m, "# | task: %.16s-%d " 3442 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 3443 data->comm, data->pid, 3444 from_kuid_munged(seq_user_ns(m), data->uid), data->nice, 3445 data->policy, data->rt_priority); 3446 seq_puts(m, "# -----------------\n"); 3447 3448 if (data->critical_start) { 3449 seq_puts(m, "# => started at: "); 3450 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 3451 trace_print_seq(m, &iter->seq); 3452 seq_puts(m, "\n# => ended at: "); 3453 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 3454 trace_print_seq(m, &iter->seq); 3455 seq_puts(m, "\n#\n"); 3456 } 3457 3458 seq_puts(m, "#\n"); 3459 } 3460 3461 static void test_cpu_buff_start(struct trace_iterator *iter) 3462 { 3463 struct trace_seq *s = &iter->seq; 3464 struct trace_array *tr = iter->tr; 3465 3466 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE))) 3467 return; 3468 3469 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) 3470 return; 3471 3472 if (cpumask_available(iter->started) && 3473 cpumask_test_cpu(iter->cpu, iter->started)) 3474 return; 3475 3476 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) 3477 return; 3478 3479 if (cpumask_available(iter->started)) 3480 cpumask_set_cpu(iter->cpu, iter->started); 3481 3482 /* Don't print started cpu buffer for the first entry of the trace */ 3483 if (iter->idx > 1) 3484 trace_seq_printf(s, "##### CPU %u buffer started ####\n", 3485 iter->cpu); 3486 } 3487 3488 #ifdef CONFIG_FTRACE_SYSCALLS 3489 static bool is_syscall_event(struct trace_event *event) 3490 { 3491 return (event->funcs == &enter_syscall_print_funcs) || 3492 (event->funcs == &exit_syscall_print_funcs); 3493 3494 } 3495 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT 3496 #else 3497 static inline bool is_syscall_event(struct trace_event *event) 3498 { 3499 return false; 3500 } 3501 #define syscall_buf_size 0 3502 #endif /* CONFIG_FTRACE_SYSCALLS */ 3503 3504 static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 3505 { 3506 struct trace_array *tr = iter->tr; 3507 struct trace_seq *s = &iter->seq; 3508 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); 3509 struct trace_entry *entry; 3510 struct trace_event *event; 3511 3512 entry = iter->ent; 3513 3514 test_cpu_buff_start(iter); 3515 3516 event = ftrace_find_event(entry->type); 3517 3518 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3519 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 3520 trace_print_lat_context(iter); 3521 else 3522 trace_print_context(iter); 3523 } 3524 3525 if (trace_seq_has_overflowed(s)) 3526 return TRACE_TYPE_PARTIAL_LINE; 3527 3528 if (event) { 3529 if (tr->trace_flags & TRACE_ITER(FIELDS)) 3530 return print_event_fields(iter, event); 3531 /* 3532 * For TRACE_EVENT() events, the print_fmt is not 3533 * safe to use if the array has delta offsets 3534 * Force printing via the fields. 3535 */ 3536 if ((tr->text_delta)) { 3537 /* ftrace and system call events are still OK */ 3538 if ((event->type > __TRACE_LAST_TYPE) && 3539 !is_syscall_event(event)) 3540 return print_event_fields(iter, event); 3541 } 3542 return event->funcs->trace(iter, sym_flags, event); 3543 } 3544 3545 trace_seq_printf(s, "Unknown type %d\n", entry->type); 3546 3547 return trace_handle_return(s); 3548 } 3549 3550 static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 3551 { 3552 struct trace_array *tr = iter->tr; 3553 struct trace_seq *s = &iter->seq; 3554 struct trace_entry *entry; 3555 struct trace_event *event; 3556 3557 entry = iter->ent; 3558 3559 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) 3560 trace_seq_printf(s, "%d %d %llu ", 3561 entry->pid, iter->cpu, iter->ts); 3562 3563 if (trace_seq_has_overflowed(s)) 3564 return TRACE_TYPE_PARTIAL_LINE; 3565 3566 event = ftrace_find_event(entry->type); 3567 if (event) 3568 return event->funcs->raw(iter, 0, event); 3569 3570 trace_seq_printf(s, "%d ?\n", entry->type); 3571 3572 return trace_handle_return(s); 3573 } 3574 3575 static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 3576 { 3577 struct trace_array *tr = iter->tr; 3578 struct trace_seq *s = &iter->seq; 3579 unsigned char newline = '\n'; 3580 struct trace_entry *entry; 3581 struct trace_event *event; 3582 3583 entry = iter->ent; 3584 3585 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3586 SEQ_PUT_HEX_FIELD(s, entry->pid); 3587 SEQ_PUT_HEX_FIELD(s, iter->cpu); 3588 SEQ_PUT_HEX_FIELD(s, iter->ts); 3589 if (trace_seq_has_overflowed(s)) 3590 return TRACE_TYPE_PARTIAL_LINE; 3591 } 3592 3593 event = ftrace_find_event(entry->type); 3594 if (event) { 3595 enum print_line_t ret = event->funcs->hex(iter, 0, event); 3596 if (ret != TRACE_TYPE_HANDLED) 3597 return ret; 3598 } 3599 3600 SEQ_PUT_FIELD(s, newline); 3601 3602 return trace_handle_return(s); 3603 } 3604 3605 static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 3606 { 3607 struct trace_array *tr = iter->tr; 3608 struct trace_seq *s = &iter->seq; 3609 struct trace_entry *entry; 3610 struct trace_event *event; 3611 3612 entry = iter->ent; 3613 3614 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3615 SEQ_PUT_FIELD(s, entry->pid); 3616 SEQ_PUT_FIELD(s, iter->cpu); 3617 SEQ_PUT_FIELD(s, iter->ts); 3618 if (trace_seq_has_overflowed(s)) 3619 return TRACE_TYPE_PARTIAL_LINE; 3620 } 3621 3622 event = ftrace_find_event(entry->type); 3623 return event ? event->funcs->binary(iter, 0, event) : 3624 TRACE_TYPE_HANDLED; 3625 } 3626 3627 int trace_empty(struct trace_iterator *iter) 3628 { 3629 struct ring_buffer_iter *buf_iter; 3630 int cpu; 3631 3632 /* If we are looking at one CPU buffer, only check that one */ 3633 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 3634 cpu = iter->cpu_file; 3635 buf_iter = trace_buffer_iter(iter, cpu); 3636 if (buf_iter) { 3637 if (!ring_buffer_iter_empty(buf_iter)) 3638 return 0; 3639 } else { 3640 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 3641 return 0; 3642 } 3643 return 1; 3644 } 3645 3646 for_each_tracing_cpu(cpu) { 3647 buf_iter = trace_buffer_iter(iter, cpu); 3648 if (buf_iter) { 3649 if (!ring_buffer_iter_empty(buf_iter)) 3650 return 0; 3651 } else { 3652 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 3653 return 0; 3654 } 3655 } 3656 3657 return 1; 3658 } 3659 3660 /* Called with trace_event_read_lock() held. */ 3661 enum print_line_t print_trace_line(struct trace_iterator *iter) 3662 { 3663 struct trace_array *tr = iter->tr; 3664 unsigned long trace_flags = tr->trace_flags; 3665 enum print_line_t ret; 3666 3667 if (iter->lost_events) { 3668 if (iter->lost_events == (unsigned long)-1) 3669 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", 3670 iter->cpu); 3671 else 3672 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 3673 iter->cpu, iter->lost_events); 3674 if (trace_seq_has_overflowed(&iter->seq)) 3675 return TRACE_TYPE_PARTIAL_LINE; 3676 } 3677 3678 if (iter->trace && iter->trace->print_line) { 3679 ret = iter->trace->print_line(iter); 3680 if (ret != TRACE_TYPE_UNHANDLED) 3681 return ret; 3682 } 3683 3684 if (iter->ent->type == TRACE_BPUTS && 3685 trace_flags & TRACE_ITER(PRINTK) && 3686 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3687 return trace_print_bputs_msg_only(iter); 3688 3689 if (iter->ent->type == TRACE_BPRINT && 3690 trace_flags & TRACE_ITER(PRINTK) && 3691 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3692 return trace_print_bprintk_msg_only(iter); 3693 3694 if (iter->ent->type == TRACE_PRINT && 3695 trace_flags & TRACE_ITER(PRINTK) && 3696 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3697 return trace_print_printk_msg_only(iter); 3698 3699 if (trace_flags & TRACE_ITER(BIN)) 3700 return print_bin_fmt(iter); 3701 3702 if (trace_flags & TRACE_ITER(HEX)) 3703 return print_hex_fmt(iter); 3704 3705 if (trace_flags & TRACE_ITER(RAW)) 3706 return print_raw_fmt(iter); 3707 3708 return print_trace_fmt(iter); 3709 } 3710 3711 void trace_latency_header(struct seq_file *m) 3712 { 3713 struct trace_iterator *iter = m->private; 3714 struct trace_array *tr = iter->tr; 3715 3716 /* print nothing if the buffers are empty */ 3717 if (trace_empty(iter)) 3718 return; 3719 3720 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 3721 print_trace_header(m, iter); 3722 3723 if (!(tr->trace_flags & TRACE_ITER(VERBOSE))) 3724 print_lat_help_header(m); 3725 } 3726 3727 void trace_default_header(struct seq_file *m) 3728 { 3729 struct trace_iterator *iter = m->private; 3730 struct trace_array *tr = iter->tr; 3731 unsigned long trace_flags = tr->trace_flags; 3732 3733 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO))) 3734 return; 3735 3736 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 3737 /* print nothing if the buffers are empty */ 3738 if (trace_empty(iter)) 3739 return; 3740 print_trace_header(m, iter); 3741 if (!(trace_flags & TRACE_ITER(VERBOSE))) 3742 print_lat_help_header(m); 3743 } else { 3744 if (!(trace_flags & TRACE_ITER(VERBOSE))) { 3745 if (trace_flags & TRACE_ITER(IRQ_INFO)) 3746 print_func_help_header_irq(iter->array_buffer, 3747 m, trace_flags); 3748 else 3749 print_func_help_header(iter->array_buffer, m, 3750 trace_flags); 3751 } 3752 } 3753 } 3754 3755 static void test_ftrace_alive(struct seq_file *m) 3756 { 3757 if (!ftrace_is_dead()) 3758 return; 3759 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" 3760 "# MAY BE MISSING FUNCTION EVENTS\n"); 3761 } 3762 3763 #ifdef CONFIG_TRACER_SNAPSHOT 3764 static void show_snapshot_main_help(struct seq_file *m) 3765 { 3766 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" 3767 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 3768 "# Takes a snapshot of the main buffer.\n" 3769 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" 3770 "# (Doesn't have to be '2' works with any number that\n" 3771 "# is not a '0' or '1')\n"); 3772 } 3773 3774 static void show_snapshot_percpu_help(struct seq_file *m) 3775 { 3776 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); 3777 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP 3778 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" 3779 "# Takes a snapshot of the main buffer for this cpu.\n"); 3780 #else 3781 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" 3782 "# Must use main snapshot file to allocate.\n"); 3783 #endif 3784 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" 3785 "# (Doesn't have to be '2' works with any number that\n" 3786 "# is not a '0' or '1')\n"); 3787 } 3788 3789 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) 3790 { 3791 if (iter->tr->allocated_snapshot) 3792 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); 3793 else 3794 seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); 3795 3796 seq_puts(m, "# Snapshot commands:\n"); 3797 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 3798 show_snapshot_main_help(m); 3799 else 3800 show_snapshot_percpu_help(m); 3801 } 3802 #else 3803 /* Should never be called */ 3804 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } 3805 #endif 3806 3807 static int s_show(struct seq_file *m, void *v) 3808 { 3809 struct trace_iterator *iter = v; 3810 int ret; 3811 3812 if (iter->ent == NULL) { 3813 if (iter->tr) { 3814 seq_printf(m, "# tracer: %s\n", iter->trace->name); 3815 seq_puts(m, "#\n"); 3816 test_ftrace_alive(m); 3817 } 3818 if (iter->snapshot && trace_empty(iter)) 3819 print_snapshot_help(m, iter); 3820 else if (iter->trace && iter->trace->print_header) 3821 iter->trace->print_header(m); 3822 else 3823 trace_default_header(m); 3824 3825 } else if (iter->leftover) { 3826 /* 3827 * If we filled the seq_file buffer earlier, we 3828 * want to just show it now. 3829 */ 3830 ret = trace_print_seq(m, &iter->seq); 3831 3832 /* ret should this time be zero, but you never know */ 3833 iter->leftover = ret; 3834 3835 } else { 3836 ret = print_trace_line(iter); 3837 if (ret == TRACE_TYPE_PARTIAL_LINE) { 3838 iter->seq.full = 0; 3839 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 3840 } 3841 ret = trace_print_seq(m, &iter->seq); 3842 /* 3843 * If we overflow the seq_file buffer, then it will 3844 * ask us for this data again at start up. 3845 * Use that instead. 3846 * ret is 0 if seq_file write succeeded. 3847 * -1 otherwise. 3848 */ 3849 iter->leftover = ret; 3850 } 3851 3852 return 0; 3853 } 3854 3855 /* 3856 * Should be used after trace_array_get(), trace_types_lock 3857 * ensures that i_cdev was already initialized. 3858 */ 3859 static inline int tracing_get_cpu(struct inode *inode) 3860 { 3861 if (inode->i_cdev) /* See trace_create_cpu_file() */ 3862 return (long)inode->i_cdev - 1; 3863 return RING_BUFFER_ALL_CPUS; 3864 } 3865 3866 static const struct seq_operations tracer_seq_ops = { 3867 .start = s_start, 3868 .next = s_next, 3869 .stop = s_stop, 3870 .show = s_show, 3871 }; 3872 3873 /* 3874 * Note, as iter itself can be allocated and freed in different 3875 * ways, this function is only used to free its content, and not 3876 * the iterator itself. The only requirement to all the allocations 3877 * is that it must zero all fields (kzalloc), as freeing works with 3878 * ethier allocated content or NULL. 3879 */ 3880 static void free_trace_iter_content(struct trace_iterator *iter) 3881 { 3882 /* The fmt is either NULL, allocated or points to static_fmt_buf */ 3883 if (iter->fmt != static_fmt_buf) 3884 kfree(iter->fmt); 3885 3886 kfree(iter->temp); 3887 kfree(iter->buffer_iter); 3888 mutex_destroy(&iter->mutex); 3889 free_cpumask_var(iter->started); 3890 } 3891 3892 static struct trace_iterator * 3893 __tracing_open(struct inode *inode, struct file *file, bool snapshot) 3894 { 3895 struct trace_array *tr = inode->i_private; 3896 struct trace_iterator *iter; 3897 int cpu; 3898 3899 if (tracing_disabled) 3900 return ERR_PTR(-ENODEV); 3901 3902 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 3903 if (!iter) 3904 return ERR_PTR(-ENOMEM); 3905 3906 iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids); 3907 if (!iter->buffer_iter) 3908 goto release; 3909 3910 /* 3911 * trace_find_next_entry() may need to save off iter->ent. 3912 * It will place it into the iter->temp buffer. As most 3913 * events are less than 128, allocate a buffer of that size. 3914 * If one is greater, then trace_find_next_entry() will 3915 * allocate a new buffer to adjust for the bigger iter->ent. 3916 * It's not critical if it fails to get allocated here. 3917 */ 3918 iter->temp = kmalloc(128, GFP_KERNEL); 3919 if (iter->temp) 3920 iter->temp_size = 128; 3921 3922 /* 3923 * trace_event_printf() may need to modify given format 3924 * string to replace %p with %px so that it shows real address 3925 * instead of hash value. However, that is only for the event 3926 * tracing, other tracer may not need. Defer the allocation 3927 * until it is needed. 3928 */ 3929 iter->fmt = NULL; 3930 iter->fmt_size = 0; 3931 3932 mutex_lock(&trace_types_lock); 3933 iter->trace = tr->current_trace; 3934 3935 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 3936 goto fail; 3937 3938 iter->tr = tr; 3939 3940 #ifdef CONFIG_TRACER_SNAPSHOT 3941 /* Currently only the top directory has a snapshot */ 3942 if (tr->current_trace->print_max || snapshot) 3943 iter->array_buffer = &tr->snapshot_buffer; 3944 else 3945 #endif 3946 iter->array_buffer = &tr->array_buffer; 3947 iter->snapshot = snapshot; 3948 iter->pos = -1; 3949 iter->cpu_file = tracing_get_cpu(inode); 3950 mutex_init(&iter->mutex); 3951 3952 /* Notify the tracer early; before we stop tracing. */ 3953 if (iter->trace->open) 3954 iter->trace->open(iter); 3955 3956 /* Annotate start of buffers if we had overruns */ 3957 if (ring_buffer_overruns(iter->array_buffer->buffer)) 3958 iter->iter_flags |= TRACE_FILE_ANNOTATE; 3959 3960 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 3961 if (trace_clocks[tr->clock_id].in_ns) 3962 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 3963 3964 /* 3965 * If pause-on-trace is enabled, then stop the trace while 3966 * dumping, unless this is the "snapshot" file 3967 */ 3968 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) { 3969 iter->iter_flags |= TRACE_FILE_PAUSE; 3970 tracing_stop_tr(tr); 3971 } 3972 3973 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 3974 for_each_tracing_cpu(cpu) { 3975 iter->buffer_iter[cpu] = 3976 ring_buffer_read_start(iter->array_buffer->buffer, 3977 cpu, GFP_KERNEL); 3978 tracing_iter_reset(iter, cpu); 3979 } 3980 } else { 3981 cpu = iter->cpu_file; 3982 iter->buffer_iter[cpu] = 3983 ring_buffer_read_start(iter->array_buffer->buffer, 3984 cpu, GFP_KERNEL); 3985 tracing_iter_reset(iter, cpu); 3986 } 3987 3988 mutex_unlock(&trace_types_lock); 3989 3990 return iter; 3991 3992 fail: 3993 mutex_unlock(&trace_types_lock); 3994 free_trace_iter_content(iter); 3995 release: 3996 seq_release_private(inode, file); 3997 return ERR_PTR(-ENOMEM); 3998 } 3999 4000 int tracing_open_generic(struct inode *inode, struct file *filp) 4001 { 4002 int ret; 4003 4004 ret = tracing_check_open_get_tr(NULL); 4005 if (ret) 4006 return ret; 4007 4008 filp->private_data = inode->i_private; 4009 return 0; 4010 } 4011 4012 /* 4013 * Open and update trace_array ref count. 4014 * Must have the current trace_array passed to it. 4015 */ 4016 int tracing_open_generic_tr(struct inode *inode, struct file *filp) 4017 { 4018 struct trace_array *tr = inode->i_private; 4019 int ret; 4020 4021 ret = tracing_check_open_get_tr(tr); 4022 if (ret) 4023 return ret; 4024 4025 filp->private_data = inode->i_private; 4026 4027 return 0; 4028 } 4029 4030 /* 4031 * The private pointer of the inode is the trace_event_file. 4032 * Update the tr ref count associated to it. 4033 */ 4034 int tracing_open_file_tr(struct inode *inode, struct file *filp) 4035 { 4036 struct trace_event_file *file = inode->i_private; 4037 int ret; 4038 4039 ret = tracing_check_open_get_tr(file->tr); 4040 if (ret) 4041 return ret; 4042 4043 guard(mutex)(&event_mutex); 4044 4045 /* Fail if the file is marked for removal */ 4046 if (file->flags & EVENT_FILE_FL_FREED) { 4047 trace_array_put(file->tr); 4048 return -ENODEV; 4049 } else { 4050 event_file_get(file); 4051 } 4052 4053 filp->private_data = inode->i_private; 4054 4055 return 0; 4056 } 4057 4058 int tracing_release_file_tr(struct inode *inode, struct file *filp) 4059 { 4060 struct trace_event_file *file = inode->i_private; 4061 4062 trace_array_put(file->tr); 4063 event_file_put(file); 4064 4065 return 0; 4066 } 4067 4068 int tracing_single_release_file_tr(struct inode *inode, struct file *filp) 4069 { 4070 tracing_release_file_tr(inode, filp); 4071 return single_release(inode, filp); 4072 } 4073 4074 static int tracing_release(struct inode *inode, struct file *file) 4075 { 4076 struct trace_array *tr = inode->i_private; 4077 struct seq_file *m = file->private_data; 4078 struct trace_iterator *iter; 4079 int cpu; 4080 4081 if (!(file->f_mode & FMODE_READ)) { 4082 trace_array_put(tr); 4083 return 0; 4084 } 4085 4086 /* Writes do not use seq_file */ 4087 iter = m->private; 4088 mutex_lock(&trace_types_lock); 4089 4090 for_each_tracing_cpu(cpu) { 4091 if (iter->buffer_iter[cpu]) 4092 ring_buffer_read_finish(iter->buffer_iter[cpu]); 4093 } 4094 4095 if (iter->trace && iter->trace->close) 4096 iter->trace->close(iter); 4097 4098 if (iter->iter_flags & TRACE_FILE_PAUSE) 4099 /* reenable tracing if it was previously enabled */ 4100 tracing_start_tr(tr); 4101 4102 __trace_array_put(tr); 4103 4104 mutex_unlock(&trace_types_lock); 4105 4106 free_trace_iter_content(iter); 4107 seq_release_private(inode, file); 4108 4109 return 0; 4110 } 4111 4112 int tracing_release_generic_tr(struct inode *inode, struct file *file) 4113 { 4114 struct trace_array *tr = inode->i_private; 4115 4116 trace_array_put(tr); 4117 return 0; 4118 } 4119 4120 static int tracing_single_release_tr(struct inode *inode, struct file *file) 4121 { 4122 struct trace_array *tr = inode->i_private; 4123 4124 trace_array_put(tr); 4125 4126 return single_release(inode, file); 4127 } 4128 4129 static bool update_last_data_if_empty(struct trace_array *tr); 4130 4131 static int tracing_open(struct inode *inode, struct file *file) 4132 { 4133 struct trace_array *tr = inode->i_private; 4134 struct trace_iterator *iter; 4135 int ret; 4136 4137 ret = tracing_check_open_get_tr(tr); 4138 if (ret) 4139 return ret; 4140 4141 /* If this file was open for write, then erase contents */ 4142 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 4143 int cpu = tracing_get_cpu(inode); 4144 struct array_buffer *trace_buf = &tr->array_buffer; 4145 4146 #ifdef CONFIG_TRACER_MAX_TRACE 4147 if (tr->current_trace->print_max) 4148 trace_buf = &tr->snapshot_buffer; 4149 #endif 4150 4151 if (cpu == RING_BUFFER_ALL_CPUS) 4152 tracing_reset_online_cpus(trace_buf); 4153 else 4154 tracing_reset_cpu(trace_buf, cpu); 4155 4156 update_last_data_if_empty(tr); 4157 } 4158 4159 if (file->f_mode & FMODE_READ) { 4160 iter = __tracing_open(inode, file, false); 4161 if (IS_ERR(iter)) 4162 ret = PTR_ERR(iter); 4163 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 4164 iter->iter_flags |= TRACE_FILE_LAT_FMT; 4165 } 4166 4167 if (ret < 0) 4168 trace_array_put(tr); 4169 4170 return ret; 4171 } 4172 4173 /* 4174 * Some tracers are not suitable for instance buffers. 4175 * A tracer is always available for the global array (toplevel) 4176 * or if it explicitly states that it is. 4177 */ 4178 static bool 4179 trace_ok_for_array(struct tracer *t, struct trace_array *tr) 4180 { 4181 /* arrays with mapped buffer range do not have snapshots */ 4182 if (tr->range_addr_start && tracer_uses_snapshot(t)) 4183 return false; 4184 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; 4185 } 4186 4187 /* Find the next tracer that this trace array may use */ 4188 static struct tracer * 4189 get_tracer_for_array(struct trace_array *tr, struct tracer *t) 4190 { 4191 while (t && !trace_ok_for_array(t, tr)) 4192 t = t->next; 4193 4194 return t; 4195 } 4196 4197 static void * 4198 t_next(struct seq_file *m, void *v, loff_t *pos) 4199 { 4200 struct trace_array *tr = m->private; 4201 struct tracer *t = v; 4202 4203 (*pos)++; 4204 4205 if (t) 4206 t = get_tracer_for_array(tr, t->next); 4207 4208 return t; 4209 } 4210 4211 static void *t_start(struct seq_file *m, loff_t *pos) 4212 { 4213 struct trace_array *tr = m->private; 4214 struct tracer *t; 4215 loff_t l = 0; 4216 4217 mutex_lock(&trace_types_lock); 4218 4219 t = get_tracer_for_array(tr, trace_types); 4220 for (; t && l < *pos; t = t_next(m, t, &l)) 4221 ; 4222 4223 return t; 4224 } 4225 4226 static void t_stop(struct seq_file *m, void *p) 4227 { 4228 mutex_unlock(&trace_types_lock); 4229 } 4230 4231 static int t_show(struct seq_file *m, void *v) 4232 { 4233 struct tracer *t = v; 4234 4235 if (!t) 4236 return 0; 4237 4238 seq_puts(m, t->name); 4239 if (t->next) 4240 seq_putc(m, ' '); 4241 else 4242 seq_putc(m, '\n'); 4243 4244 return 0; 4245 } 4246 4247 static const struct seq_operations show_traces_seq_ops = { 4248 .start = t_start, 4249 .next = t_next, 4250 .stop = t_stop, 4251 .show = t_show, 4252 }; 4253 4254 static int show_traces_open(struct inode *inode, struct file *file) 4255 { 4256 struct trace_array *tr = inode->i_private; 4257 struct seq_file *m; 4258 int ret; 4259 4260 ret = tracing_check_open_get_tr(tr); 4261 if (ret) 4262 return ret; 4263 4264 ret = seq_open(file, &show_traces_seq_ops); 4265 if (ret) { 4266 trace_array_put(tr); 4267 return ret; 4268 } 4269 4270 m = file->private_data; 4271 m->private = tr; 4272 4273 return 0; 4274 } 4275 4276 static int tracing_seq_release(struct inode *inode, struct file *file) 4277 { 4278 struct trace_array *tr = inode->i_private; 4279 4280 trace_array_put(tr); 4281 return seq_release(inode, file); 4282 } 4283 4284 static ssize_t 4285 tracing_write_stub(struct file *filp, const char __user *ubuf, 4286 size_t count, loff_t *ppos) 4287 { 4288 return count; 4289 } 4290 4291 loff_t tracing_lseek(struct file *file, loff_t offset, int whence) 4292 { 4293 int ret; 4294 4295 if (file->f_mode & FMODE_READ) 4296 ret = seq_lseek(file, offset, whence); 4297 else 4298 file->f_pos = ret = 0; 4299 4300 return ret; 4301 } 4302 4303 static const struct file_operations tracing_fops = { 4304 .open = tracing_open, 4305 .read = seq_read, 4306 .read_iter = seq_read_iter, 4307 .splice_read = copy_splice_read, 4308 .write = tracing_write_stub, 4309 .llseek = tracing_lseek, 4310 .release = tracing_release, 4311 }; 4312 4313 static const struct file_operations show_traces_fops = { 4314 .open = show_traces_open, 4315 .read = seq_read, 4316 .llseek = seq_lseek, 4317 .release = tracing_seq_release, 4318 }; 4319 4320 static ssize_t 4321 tracing_cpumask_read(struct file *filp, char __user *ubuf, 4322 size_t count, loff_t *ppos) 4323 { 4324 struct trace_array *tr = file_inode(filp)->i_private; 4325 char *mask_str __free(kfree) = NULL; 4326 int len; 4327 4328 len = snprintf(NULL, 0, "%*pb\n", 4329 cpumask_pr_args(tr->tracing_cpumask)) + 1; 4330 mask_str = kmalloc(len, GFP_KERNEL); 4331 if (!mask_str) 4332 return -ENOMEM; 4333 4334 len = snprintf(mask_str, len, "%*pb\n", 4335 cpumask_pr_args(tr->tracing_cpumask)); 4336 if (len >= count) 4337 return -EINVAL; 4338 4339 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 4340 } 4341 4342 int tracing_set_cpumask(struct trace_array *tr, 4343 cpumask_var_t tracing_cpumask_new) 4344 { 4345 int cpu; 4346 4347 if (!tr) 4348 return -EINVAL; 4349 4350 local_irq_disable(); 4351 arch_spin_lock(&tr->max_lock); 4352 for_each_tracing_cpu(cpu) { 4353 /* 4354 * Increase/decrease the disabled counter if we are 4355 * about to flip a bit in the cpumask: 4356 */ 4357 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && 4358 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 4359 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); 4360 #ifdef CONFIG_TRACER_SNAPSHOT 4361 ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu); 4362 #endif 4363 } 4364 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && 4365 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 4366 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); 4367 #ifdef CONFIG_TRACER_SNAPSHOT 4368 ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu); 4369 #endif 4370 } 4371 } 4372 arch_spin_unlock(&tr->max_lock); 4373 local_irq_enable(); 4374 4375 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 4376 4377 return 0; 4378 } 4379 4380 static ssize_t 4381 tracing_cpumask_write(struct file *filp, const char __user *ubuf, 4382 size_t count, loff_t *ppos) 4383 { 4384 struct trace_array *tr = file_inode(filp)->i_private; 4385 cpumask_var_t tracing_cpumask_new; 4386 int err; 4387 4388 if (count == 0 || count > KMALLOC_MAX_SIZE) 4389 return -EINVAL; 4390 4391 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 4392 return -ENOMEM; 4393 4394 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 4395 if (err) 4396 goto err_free; 4397 4398 err = tracing_set_cpumask(tr, tracing_cpumask_new); 4399 if (err) 4400 goto err_free; 4401 4402 free_cpumask_var(tracing_cpumask_new); 4403 4404 return count; 4405 4406 err_free: 4407 free_cpumask_var(tracing_cpumask_new); 4408 4409 return err; 4410 } 4411 4412 static const struct file_operations tracing_cpumask_fops = { 4413 .open = tracing_open_generic_tr, 4414 .read = tracing_cpumask_read, 4415 .write = tracing_cpumask_write, 4416 .release = tracing_release_generic_tr, 4417 .llseek = generic_file_llseek, 4418 }; 4419 4420 static int tracing_trace_options_show(struct seq_file *m, void *v) 4421 { 4422 struct tracer_opt *trace_opts; 4423 struct trace_array *tr = m->private; 4424 struct tracer_flags *flags; 4425 u32 tracer_flags; 4426 int i; 4427 4428 guard(mutex)(&trace_types_lock); 4429 4430 for (i = 0; trace_options[i]; i++) { 4431 if (tr->trace_flags & (1ULL << i)) 4432 seq_printf(m, "%s\n", trace_options[i]); 4433 else 4434 seq_printf(m, "no%s\n", trace_options[i]); 4435 } 4436 4437 flags = tr->current_trace_flags; 4438 if (!flags || !flags->opts) 4439 return 0; 4440 4441 tracer_flags = flags->val; 4442 trace_opts = flags->opts; 4443 4444 for (i = 0; trace_opts[i].name; i++) { 4445 if (tracer_flags & trace_opts[i].bit) 4446 seq_printf(m, "%s\n", trace_opts[i].name); 4447 else 4448 seq_printf(m, "no%s\n", trace_opts[i].name); 4449 } 4450 4451 return 0; 4452 } 4453 4454 static int __set_tracer_option(struct trace_array *tr, 4455 struct tracer_flags *tracer_flags, 4456 struct tracer_opt *opts, int neg) 4457 { 4458 struct tracer *trace = tracer_flags->trace; 4459 int ret = 0; 4460 4461 if (trace->set_flag) 4462 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); 4463 if (ret) 4464 return ret; 4465 4466 if (neg) 4467 tracer_flags->val &= ~opts->bit; 4468 else 4469 tracer_flags->val |= opts->bit; 4470 return 0; 4471 } 4472 4473 /* Try to assign a tracer specific option */ 4474 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) 4475 { 4476 struct tracer_flags *tracer_flags = tr->current_trace_flags; 4477 struct tracer_opt *opts = NULL; 4478 int i; 4479 4480 if (!tracer_flags || !tracer_flags->opts) 4481 return 0; 4482 4483 for (i = 0; tracer_flags->opts[i].name; i++) { 4484 opts = &tracer_flags->opts[i]; 4485 4486 if (strcmp(cmp, opts->name) == 0) 4487 return __set_tracer_option(tr, tracer_flags, opts, neg); 4488 } 4489 4490 return -EINVAL; 4491 } 4492 4493 /* Some tracers require overwrite to stay enabled */ 4494 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set) 4495 { 4496 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set) 4497 return -1; 4498 4499 return 0; 4500 } 4501 4502 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) 4503 { 4504 switch (mask) { 4505 case TRACE_ITER(RECORD_TGID): 4506 case TRACE_ITER(RECORD_CMD): 4507 case TRACE_ITER(TRACE_PRINTK): 4508 case TRACE_ITER(COPY_MARKER): 4509 lockdep_assert_held(&event_mutex); 4510 } 4511 4512 /* do nothing if flag is already set */ 4513 if (!!(tr->trace_flags & mask) == !!enabled) 4514 return 0; 4515 4516 /* Give the tracer a chance to approve the change */ 4517 if (tr->current_trace->flag_changed) 4518 if (tr->current_trace->flag_changed(tr, mask, !!enabled)) 4519 return -EINVAL; 4520 4521 switch (mask) { 4522 case TRACE_ITER(TRACE_PRINTK): 4523 if (enabled) { 4524 update_printk_trace(tr); 4525 } else { 4526 /* 4527 * The global_trace cannot clear this. 4528 * It's flag only gets cleared if another instance sets it. 4529 */ 4530 if (printk_trace == &global_trace) 4531 return -EINVAL; 4532 /* 4533 * An instance must always have it set. 4534 * by default, that's the global_trace instance. 4535 */ 4536 if (printk_trace == tr) 4537 update_printk_trace(&global_trace); 4538 } 4539 break; 4540 4541 case TRACE_ITER(COPY_MARKER): 4542 update_marker_trace(tr, enabled); 4543 /* update_marker_trace updates the tr->trace_flags */ 4544 return 0; 4545 } 4546 4547 if (enabled) 4548 tr->trace_flags |= mask; 4549 else 4550 tr->trace_flags &= ~mask; 4551 4552 switch (mask) { 4553 case TRACE_ITER(RECORD_CMD): 4554 trace_event_enable_cmd_record(enabled); 4555 break; 4556 4557 case TRACE_ITER(RECORD_TGID): 4558 4559 if (trace_alloc_tgid_map() < 0) { 4560 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID); 4561 return -ENOMEM; 4562 } 4563 4564 trace_event_enable_tgid_record(enabled); 4565 break; 4566 4567 case TRACE_ITER(EVENT_FORK): 4568 trace_event_follow_fork(tr, enabled); 4569 break; 4570 4571 case TRACE_ITER(FUNC_FORK): 4572 ftrace_pid_follow_fork(tr, enabled); 4573 break; 4574 4575 case TRACE_ITER(OVERWRITE): 4576 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); 4577 #ifdef CONFIG_TRACER_SNAPSHOT 4578 ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled); 4579 #endif 4580 break; 4581 4582 case TRACE_ITER(PRINTK): 4583 trace_printk_start_stop_comm(enabled); 4584 trace_printk_control(enabled); 4585 break; 4586 4587 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER) 4588 case TRACE_GRAPH_GRAPH_TIME: 4589 ftrace_graph_graph_time_control(enabled); 4590 break; 4591 #endif 4592 } 4593 4594 return 0; 4595 } 4596 4597 int trace_set_options(struct trace_array *tr, char *option) 4598 { 4599 char *cmp; 4600 int neg = 0; 4601 int ret; 4602 size_t orig_len = strlen(option); 4603 int len; 4604 4605 cmp = strstrip(option); 4606 4607 len = str_has_prefix(cmp, "no"); 4608 if (len) 4609 neg = 1; 4610 4611 cmp += len; 4612 4613 mutex_lock(&event_mutex); 4614 mutex_lock(&trace_types_lock); 4615 4616 ret = match_string(trace_options, -1, cmp); 4617 /* If no option could be set, test the specific tracer options */ 4618 if (ret < 0) 4619 ret = set_tracer_option(tr, cmp, neg); 4620 else 4621 ret = set_tracer_flag(tr, 1ULL << ret, !neg); 4622 4623 mutex_unlock(&trace_types_lock); 4624 mutex_unlock(&event_mutex); 4625 4626 /* 4627 * If the first trailing whitespace is replaced with '\0' by strstrip, 4628 * turn it back into a space. 4629 */ 4630 if (orig_len > strlen(option)) 4631 option[strlen(option)] = ' '; 4632 4633 return ret; 4634 } 4635 4636 static void __init apply_trace_boot_options(void) 4637 { 4638 char *buf = trace_boot_options_buf; 4639 char *option; 4640 4641 while (true) { 4642 option = strsep(&buf, ","); 4643 4644 if (!option) 4645 break; 4646 4647 if (*option) 4648 trace_set_options(&global_trace, option); 4649 4650 /* Put back the comma to allow this to be called again */ 4651 if (buf) 4652 *(buf - 1) = ','; 4653 } 4654 } 4655 4656 static ssize_t 4657 tracing_trace_options_write(struct file *filp, const char __user *ubuf, 4658 size_t cnt, loff_t *ppos) 4659 { 4660 struct seq_file *m = filp->private_data; 4661 struct trace_array *tr = m->private; 4662 char buf[64]; 4663 int ret; 4664 4665 if (cnt >= sizeof(buf)) 4666 return -EINVAL; 4667 4668 if (copy_from_user(buf, ubuf, cnt)) 4669 return -EFAULT; 4670 4671 buf[cnt] = 0; 4672 4673 ret = trace_set_options(tr, buf); 4674 if (ret < 0) 4675 return ret; 4676 4677 *ppos += cnt; 4678 4679 return cnt; 4680 } 4681 4682 static int tracing_trace_options_open(struct inode *inode, struct file *file) 4683 { 4684 struct trace_array *tr = inode->i_private; 4685 int ret; 4686 4687 ret = tracing_check_open_get_tr(tr); 4688 if (ret) 4689 return ret; 4690 4691 ret = single_open(file, tracing_trace_options_show, inode->i_private); 4692 if (ret < 0) 4693 trace_array_put(tr); 4694 4695 return ret; 4696 } 4697 4698 static const struct file_operations tracing_iter_fops = { 4699 .open = tracing_trace_options_open, 4700 .read = seq_read, 4701 .llseek = seq_lseek, 4702 .release = tracing_single_release_tr, 4703 .write = tracing_trace_options_write, 4704 }; 4705 4706 static const char readme_msg[] = 4707 "tracing mini-HOWTO:\n\n" 4708 "By default tracefs removes all OTH file permission bits.\n" 4709 "When mounting tracefs an optional group id can be specified\n" 4710 "which adds the group to every directory and file in tracefs:\n\n" 4711 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" 4712 "# echo 0 > tracing_on : quick way to disable tracing\n" 4713 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" 4714 " Important files:\n" 4715 " trace\t\t\t- The static contents of the buffer\n" 4716 "\t\t\t To clear the buffer write into this file: echo > trace\n" 4717 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" 4718 " current_tracer\t- function and latency tracers\n" 4719 " available_tracers\t- list of configured tracers for current_tracer\n" 4720 " error_log\t- error log for failed commands (that support it)\n" 4721 " buffer_size_kb\t- view and modify size of per cpu buffer\n" 4722 " buffer_total_size_kb - view total size of all cpu buffers\n\n" 4723 " trace_clock\t\t- change the clock used to order events\n" 4724 " local: Per cpu clock but may not be synced across CPUs\n" 4725 " global: Synced across CPUs but slows tracing down.\n" 4726 " counter: Not a clock, but just an increment\n" 4727 " uptime: Jiffy counter from time of boot\n" 4728 " perf: Same clock that perf events use\n" 4729 #ifdef CONFIG_X86_64 4730 " x86-tsc: TSC cycle counter\n" 4731 #endif 4732 "\n timestamp_mode\t- view the mode used to timestamp events\n" 4733 " delta: Delta difference against a buffer-wide timestamp\n" 4734 " absolute: Absolute (standalone) timestamp\n" 4735 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" 4736 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" 4737 " tracing_cpumask\t- Limit which CPUs to trace\n" 4738 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" 4739 "\t\t\t Remove sub-buffer with rmdir\n" 4740 " trace_options\t\t- Set format or modify how tracing happens\n" 4741 "\t\t\t Disable an option by prefixing 'no' to the\n" 4742 "\t\t\t option name\n" 4743 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" 4744 #ifdef CONFIG_DYNAMIC_FTRACE 4745 "\n available_filter_functions - list of functions that can be filtered on\n" 4746 " set_ftrace_filter\t- echo function name in here to only trace these\n" 4747 "\t\t\t functions\n" 4748 "\t accepts: func_full_name or glob-matching-pattern\n" 4749 "\t modules: Can select a group via module\n" 4750 "\t Format: :mod:<module-name>\n" 4751 "\t example: echo :mod:ext3 > set_ftrace_filter\n" 4752 "\t triggers: a command to perform when function is hit\n" 4753 "\t Format: <function>:<trigger>[:count]\n" 4754 "\t trigger: traceon, traceoff\n" 4755 "\t\t enable_event:<system>:<event>\n" 4756 "\t\t disable_event:<system>:<event>\n" 4757 #ifdef CONFIG_STACKTRACE 4758 "\t\t stacktrace\n" 4759 #endif 4760 #ifdef CONFIG_TRACER_SNAPSHOT 4761 "\t\t snapshot\n" 4762 #endif 4763 "\t\t dump\n" 4764 "\t\t cpudump\n" 4765 "\t example: echo do_fault:traceoff > set_ftrace_filter\n" 4766 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" 4767 "\t The first one will disable tracing every time do_fault is hit\n" 4768 "\t The second will disable tracing at most 3 times when do_trap is hit\n" 4769 "\t The first time do trap is hit and it disables tracing, the\n" 4770 "\t counter will decrement to 2. If tracing is already disabled,\n" 4771 "\t the counter will not decrement. It only decrements when the\n" 4772 "\t trigger did work\n" 4773 "\t To remove trigger without count:\n" 4774 "\t echo '!<function>:<trigger> > set_ftrace_filter\n" 4775 "\t To remove trigger with a count:\n" 4776 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" 4777 " set_ftrace_notrace\t- echo function name in here to never trace.\n" 4778 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" 4779 "\t modules: Can select a group via module command :mod:\n" 4780 "\t Does not accept triggers\n" 4781 #endif /* CONFIG_DYNAMIC_FTRACE */ 4782 #ifdef CONFIG_FUNCTION_TRACER 4783 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" 4784 "\t\t (function)\n" 4785 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" 4786 "\t\t (function)\n" 4787 #endif 4788 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 4789 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" 4790 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" 4791 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" 4792 #endif 4793 #ifdef CONFIG_TRACER_SNAPSHOT 4794 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" 4795 "\t\t\t snapshot buffer. Read the contents for more\n" 4796 "\t\t\t information\n" 4797 #endif 4798 #ifdef CONFIG_STACK_TRACER 4799 " stack_trace\t\t- Shows the max stack trace when active\n" 4800 " stack_max_size\t- Shows current max stack size that was traced\n" 4801 "\t\t\t Write into this file to reset the max size (trigger a\n" 4802 "\t\t\t new trace)\n" 4803 #ifdef CONFIG_DYNAMIC_FTRACE 4804 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" 4805 "\t\t\t traces\n" 4806 #endif 4807 #endif /* CONFIG_STACK_TRACER */ 4808 #ifdef CONFIG_DYNAMIC_EVENTS 4809 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" 4810 "\t\t\t Write into this file to define/undefine new trace events.\n" 4811 #endif 4812 #ifdef CONFIG_KPROBE_EVENTS 4813 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" 4814 "\t\t\t Write into this file to define/undefine new trace events.\n" 4815 #endif 4816 #ifdef CONFIG_UPROBE_EVENTS 4817 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" 4818 "\t\t\t Write into this file to define/undefine new trace events.\n" 4819 #endif 4820 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ 4821 defined(CONFIG_FPROBE_EVENTS) 4822 "\t accepts: event-definitions (one definition per line)\n" 4823 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) 4824 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" 4825 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" 4826 #endif 4827 #ifdef CONFIG_FPROBE_EVENTS 4828 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" 4829 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" 4830 #endif 4831 #ifdef CONFIG_HIST_TRIGGERS 4832 "\t s:[synthetic/]<event> <field> [<field>]\n" 4833 #endif 4834 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" 4835 "\t -:[<group>/][<event>]\n" 4836 #ifdef CONFIG_KPROBE_EVENTS 4837 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" 4838 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" 4839 #endif 4840 #ifdef CONFIG_UPROBE_EVENTS 4841 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" 4842 #endif 4843 "\t args: <name>=fetcharg[:type]\n" 4844 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" 4845 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 4846 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" 4847 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS 4848 "\t <argname>[->field[->field|.field...]],\n" 4849 #endif 4850 #else 4851 "\t $stack<index>, $stack, $retval, $comm,\n" 4852 #endif 4853 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" 4854 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 4855 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 4856 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 4857 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" 4858 #ifdef CONFIG_HIST_TRIGGERS 4859 "\t field: <stype> <name>;\n" 4860 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" 4861 "\t [unsigned] char/int/long\n" 4862 #endif 4863 "\t efield: For event probes ('e' types), the field is on of the fields\n" 4864 "\t of the <attached-group>/<attached-event>.\n" 4865 #endif 4866 " set_event\t\t- Enables events by name written into it\n" 4867 "\t\t\t Can enable module events via: :mod:<module>\n" 4868 " events/\t\t- Directory containing all trace event subsystems:\n" 4869 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" 4870 " events/<system>/\t- Directory containing all trace events for <system>:\n" 4871 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" 4872 "\t\t\t events\n" 4873 " filter\t\t- If set, only events passing filter are traced\n" 4874 " events/<system>/<event>/\t- Directory containing control files for\n" 4875 "\t\t\t <event>:\n" 4876 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" 4877 " filter\t\t- If set, only events passing filter are traced\n" 4878 " trigger\t\t- If set, a command to perform when event is hit\n" 4879 "\t Format: <trigger>[:count][if <filter>]\n" 4880 "\t trigger: traceon, traceoff\n" 4881 "\t enable_event:<system>:<event>\n" 4882 "\t disable_event:<system>:<event>\n" 4883 #ifdef CONFIG_HIST_TRIGGERS 4884 "\t enable_hist:<system>:<event>\n" 4885 "\t disable_hist:<system>:<event>\n" 4886 #endif 4887 #ifdef CONFIG_STACKTRACE 4888 "\t\t stacktrace\n" 4889 #endif 4890 #ifdef CONFIG_TRACER_SNAPSHOT 4891 "\t\t snapshot\n" 4892 #endif 4893 #ifdef CONFIG_HIST_TRIGGERS 4894 "\t\t hist (see below)\n" 4895 #endif 4896 "\t example: echo traceoff > events/block/block_unplug/trigger\n" 4897 "\t echo traceoff:3 > events/block/block_unplug/trigger\n" 4898 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" 4899 "\t events/block/block_unplug/trigger\n" 4900 "\t The first disables tracing every time block_unplug is hit.\n" 4901 "\t The second disables tracing the first 3 times block_unplug is hit.\n" 4902 "\t The third enables the kmalloc event the first 3 times block_unplug\n" 4903 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" 4904 "\t Like function triggers, the counter is only decremented if it\n" 4905 "\t enabled or disabled tracing.\n" 4906 "\t To remove a trigger without a count:\n" 4907 "\t echo '!<trigger> > <system>/<event>/trigger\n" 4908 "\t To remove a trigger with a count:\n" 4909 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" 4910 "\t Filters can be ignored when removing a trigger.\n" 4911 #ifdef CONFIG_HIST_TRIGGERS 4912 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 4913 "\t Format: hist:keys=<field1[,field2,...]>\n" 4914 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 4915 "\t [:values=<field1[,field2,...]>]\n" 4916 "\t [:sort=<field1[,field2,...]>]\n" 4917 "\t [:size=#entries]\n" 4918 "\t [:pause][:continue][:clear]\n" 4919 "\t [:name=histname1]\n" 4920 "\t [:nohitcount]\n" 4921 "\t [:<handler>.<action>]\n" 4922 "\t [if <filter>]\n\n" 4923 "\t Note, special fields can be used as well:\n" 4924 "\t common_timestamp - to record current timestamp\n" 4925 "\t common_cpu - to record the CPU the event happened on\n" 4926 "\n" 4927 "\t A hist trigger variable can be:\n" 4928 "\t - a reference to a field e.g. x=current_timestamp,\n" 4929 "\t - a reference to another variable e.g. y=$x,\n" 4930 "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 4931 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 4932 "\n" 4933 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" 4934 "\t multiplication(*) and division(/) operators. An operand can be either a\n" 4935 "\t variable reference, field or numeric literal.\n" 4936 "\n" 4937 "\t When a matching event is hit, an entry is added to a hash\n" 4938 "\t table using the key(s) and value(s) named, and the value of a\n" 4939 "\t sum called 'hitcount' is incremented. Keys and values\n" 4940 "\t correspond to fields in the event's format description. Keys\n" 4941 "\t can be any field, or the special string 'common_stacktrace'.\n" 4942 "\t Compound keys consisting of up to two fields can be specified\n" 4943 "\t by the 'keys' keyword. Values must correspond to numeric\n" 4944 "\t fields. Sort keys consisting of up to two fields can be\n" 4945 "\t specified using the 'sort' keyword. The sort direction can\n" 4946 "\t be modified by appending '.descending' or '.ascending' to a\n" 4947 "\t sort field. The 'size' parameter can be used to specify more\n" 4948 "\t or fewer than the default 2048 entries for the hashtable size.\n" 4949 "\t If a hist trigger is given a name using the 'name' parameter,\n" 4950 "\t its histogram data will be shared with other triggers of the\n" 4951 "\t same name, and trigger hits will update this common data.\n\n" 4952 "\t Reading the 'hist' file for the event will dump the hash\n" 4953 "\t table in its entirety to stdout. If there are multiple hist\n" 4954 "\t triggers attached to an event, there will be a table for each\n" 4955 "\t trigger in the output. The table displayed for a named\n" 4956 "\t trigger will be the same as any other instance having the\n" 4957 "\t same name. The default format used to display a given field\n" 4958 "\t can be modified by appending any of the following modifiers\n" 4959 "\t to the field name, as applicable:\n\n" 4960 "\t .hex display a number as a hex value\n" 4961 "\t .sym display an address as a symbol\n" 4962 "\t .sym-offset display an address as a symbol and offset\n" 4963 "\t .execname display a common_pid as a program name\n" 4964 "\t .syscall display a syscall id as a syscall name\n" 4965 "\t .log2 display log2 value rather than raw number\n" 4966 "\t .buckets=size display values in groups of size rather than raw number\n" 4967 "\t .usecs display a common_timestamp in microseconds\n" 4968 "\t .percent display a number of percentage value\n" 4969 "\t .graph display a bar-graph of a value\n\n" 4970 "\t The 'pause' parameter can be used to pause an existing hist\n" 4971 "\t trigger or to start a hist trigger but not log any events\n" 4972 "\t until told to do so. 'continue' can be used to start or\n" 4973 "\t restart a paused hist trigger.\n\n" 4974 "\t The 'clear' parameter will clear the contents of a running\n" 4975 "\t hist trigger and leave its current paused/active state\n" 4976 "\t unchanged.\n\n" 4977 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" 4978 "\t raw hitcount in the histogram.\n\n" 4979 "\t The enable_hist and disable_hist triggers can be used to\n" 4980 "\t have one event conditionally start and stop another event's\n" 4981 "\t already-attached hist trigger. The syntax is analogous to\n" 4982 "\t the enable_event and disable_event triggers.\n\n" 4983 "\t Hist trigger handlers and actions are executed whenever a\n" 4984 "\t a histogram entry is added or updated. They take the form:\n\n" 4985 "\t <handler>.<action>\n\n" 4986 "\t The available handlers are:\n\n" 4987 "\t onmatch(matching.event) - invoke on addition or update\n" 4988 "\t onmax(var) - invoke if var exceeds current max\n" 4989 "\t onchange(var) - invoke action if var changes\n\n" 4990 "\t The available actions are:\n\n" 4991 "\t trace(<synthetic_event>,param list) - generate synthetic event\n" 4992 "\t save(field,...) - save current event fields\n" 4993 #ifdef CONFIG_TRACER_SNAPSHOT 4994 "\t snapshot() - snapshot the trace buffer\n\n" 4995 #endif 4996 #ifdef CONFIG_SYNTH_EVENTS 4997 " events/synthetic_events\t- Create/append/remove/show synthetic events\n" 4998 "\t Write into this file to define/undefine new synthetic events.\n" 4999 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" 5000 #endif 5001 #endif 5002 ; 5003 5004 static ssize_t 5005 tracing_readme_read(struct file *filp, char __user *ubuf, 5006 size_t cnt, loff_t *ppos) 5007 { 5008 return simple_read_from_buffer(ubuf, cnt, ppos, 5009 readme_msg, strlen(readme_msg)); 5010 } 5011 5012 static const struct file_operations tracing_readme_fops = { 5013 .open = tracing_open_generic, 5014 .read = tracing_readme_read, 5015 .llseek = generic_file_llseek, 5016 }; 5017 5018 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 5019 static union trace_eval_map_item * 5020 update_eval_map(union trace_eval_map_item *ptr) 5021 { 5022 if (!ptr->map.eval_string) { 5023 if (ptr->tail.next) { 5024 ptr = ptr->tail.next; 5025 /* Set ptr to the next real item (skip head) */ 5026 ptr++; 5027 } else 5028 return NULL; 5029 } 5030 return ptr; 5031 } 5032 5033 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) 5034 { 5035 union trace_eval_map_item *ptr = v; 5036 5037 /* 5038 * Paranoid! If ptr points to end, we don't want to increment past it. 5039 * This really should never happen. 5040 */ 5041 (*pos)++; 5042 ptr = update_eval_map(ptr); 5043 if (WARN_ON_ONCE(!ptr)) 5044 return NULL; 5045 5046 ptr++; 5047 ptr = update_eval_map(ptr); 5048 5049 return ptr; 5050 } 5051 5052 static void *eval_map_start(struct seq_file *m, loff_t *pos) 5053 { 5054 union trace_eval_map_item *v; 5055 loff_t l = 0; 5056 5057 mutex_lock(&trace_eval_mutex); 5058 5059 v = trace_eval_maps; 5060 if (v) 5061 v++; 5062 5063 while (v && l < *pos) { 5064 v = eval_map_next(m, v, &l); 5065 } 5066 5067 return v; 5068 } 5069 5070 static void eval_map_stop(struct seq_file *m, void *v) 5071 { 5072 mutex_unlock(&trace_eval_mutex); 5073 } 5074 5075 static int eval_map_show(struct seq_file *m, void *v) 5076 { 5077 union trace_eval_map_item *ptr = v; 5078 5079 seq_printf(m, "%s %ld (%s)\n", 5080 ptr->map.eval_string, ptr->map.eval_value, 5081 ptr->map.system); 5082 5083 return 0; 5084 } 5085 5086 static const struct seq_operations tracing_eval_map_seq_ops = { 5087 .start = eval_map_start, 5088 .next = eval_map_next, 5089 .stop = eval_map_stop, 5090 .show = eval_map_show, 5091 }; 5092 5093 static int tracing_eval_map_open(struct inode *inode, struct file *filp) 5094 { 5095 int ret; 5096 5097 ret = tracing_check_open_get_tr(NULL); 5098 if (ret) 5099 return ret; 5100 5101 return seq_open(filp, &tracing_eval_map_seq_ops); 5102 } 5103 5104 static const struct file_operations tracing_eval_map_fops = { 5105 .open = tracing_eval_map_open, 5106 .read = seq_read, 5107 .llseek = seq_lseek, 5108 .release = seq_release, 5109 }; 5110 5111 static inline union trace_eval_map_item * 5112 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) 5113 { 5114 /* Return tail of array given the head */ 5115 return ptr + ptr->head.length + 1; 5116 } 5117 5118 static void 5119 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, 5120 int len) 5121 { 5122 struct trace_eval_map **stop; 5123 struct trace_eval_map **map; 5124 union trace_eval_map_item *map_array; 5125 union trace_eval_map_item *ptr; 5126 5127 stop = start + len; 5128 5129 /* 5130 * The trace_eval_maps contains the map plus a head and tail item, 5131 * where the head holds the module and length of array, and the 5132 * tail holds a pointer to the next list. 5133 */ 5134 map_array = kmalloc_objs(*map_array, len + 2); 5135 if (!map_array) { 5136 pr_warn("Unable to allocate trace eval mapping\n"); 5137 return; 5138 } 5139 5140 guard(mutex)(&trace_eval_mutex); 5141 5142 if (!trace_eval_maps) 5143 trace_eval_maps = map_array; 5144 else { 5145 ptr = trace_eval_maps; 5146 for (;;) { 5147 ptr = trace_eval_jmp_to_tail(ptr); 5148 if (!ptr->tail.next) 5149 break; 5150 ptr = ptr->tail.next; 5151 5152 } 5153 ptr->tail.next = map_array; 5154 } 5155 map_array->head.mod = mod; 5156 map_array->head.length = len; 5157 map_array++; 5158 5159 for (map = start; (unsigned long)map < (unsigned long)stop; map++) { 5160 map_array->map = **map; 5161 map_array++; 5162 } 5163 memset(map_array, 0, sizeof(*map_array)); 5164 } 5165 5166 static void trace_create_eval_file(struct dentry *d_tracer) 5167 { 5168 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, 5169 NULL, &tracing_eval_map_fops); 5170 } 5171 5172 #else /* CONFIG_TRACE_EVAL_MAP_FILE */ 5173 static inline void trace_create_eval_file(struct dentry *d_tracer) { } 5174 static inline void trace_insert_eval_map_file(struct module *mod, 5175 struct trace_eval_map **start, int len) { } 5176 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ 5177 5178 static void 5179 trace_event_update_with_eval_map(struct module *mod, 5180 struct trace_eval_map **start, 5181 int len) 5182 { 5183 struct trace_eval_map **map; 5184 5185 /* Always run sanitizer only if btf_type_tag attr exists. */ 5186 if (len <= 0) { 5187 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && 5188 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && 5189 __has_attribute(btf_type_tag))) 5190 return; 5191 } 5192 5193 map = start; 5194 5195 trace_event_update_all(map, len); 5196 5197 if (len <= 0) 5198 return; 5199 5200 trace_insert_eval_map_file(mod, start, len); 5201 } 5202 5203 static ssize_t 5204 tracing_set_trace_read(struct file *filp, char __user *ubuf, 5205 size_t cnt, loff_t *ppos) 5206 { 5207 struct trace_array *tr = filp->private_data; 5208 char buf[MAX_TRACER_SIZE+2]; 5209 int r; 5210 5211 scoped_guard(mutex, &trace_types_lock) { 5212 r = sprintf(buf, "%s\n", tr->current_trace->name); 5213 } 5214 5215 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5216 } 5217 5218 int tracer_init(struct tracer *t, struct trace_array *tr) 5219 { 5220 tracing_reset_online_cpus(&tr->array_buffer); 5221 update_last_data_if_empty(tr); 5222 return t->init(tr); 5223 } 5224 5225 static void set_buffer_entries(struct array_buffer *buf, unsigned long val) 5226 { 5227 int cpu; 5228 5229 for_each_tracing_cpu(cpu) 5230 per_cpu_ptr(buf->data, cpu)->entries = val; 5231 } 5232 5233 static void update_buffer_entries(struct array_buffer *buf, int cpu) 5234 { 5235 if (cpu == RING_BUFFER_ALL_CPUS) { 5236 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); 5237 } else { 5238 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); 5239 } 5240 } 5241 5242 #ifdef CONFIG_TRACER_SNAPSHOT 5243 /* resize @tr's buffer to the size of @size_tr's entries */ 5244 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, 5245 struct array_buffer *size_buf, int cpu_id) 5246 { 5247 int cpu, ret = 0; 5248 5249 if (cpu_id == RING_BUFFER_ALL_CPUS) { 5250 for_each_tracing_cpu(cpu) { 5251 ret = ring_buffer_resize(trace_buf->buffer, 5252 per_cpu_ptr(size_buf->data, cpu)->entries, cpu); 5253 if (ret < 0) 5254 break; 5255 per_cpu_ptr(trace_buf->data, cpu)->entries = 5256 per_cpu_ptr(size_buf->data, cpu)->entries; 5257 } 5258 } else { 5259 ret = ring_buffer_resize(trace_buf->buffer, 5260 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id); 5261 if (ret == 0) 5262 per_cpu_ptr(trace_buf->data, cpu_id)->entries = 5263 per_cpu_ptr(size_buf->data, cpu_id)->entries; 5264 } 5265 5266 return ret; 5267 } 5268 #endif /* CONFIG_TRACER_SNAPSHOT */ 5269 5270 static int __tracing_resize_ring_buffer(struct trace_array *tr, 5271 unsigned long size, int cpu) 5272 { 5273 int ret; 5274 5275 /* 5276 * If kernel or user changes the size of the ring buffer 5277 * we use the size that was given, and we can forget about 5278 * expanding it later. 5279 */ 5280 trace_set_ring_buffer_expanded(tr); 5281 5282 /* May be called before buffers are initialized */ 5283 if (!tr->array_buffer.buffer) 5284 return 0; 5285 5286 /* Do not allow tracing while resizing ring buffer */ 5287 tracing_stop_tr(tr); 5288 5289 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); 5290 if (ret < 0) 5291 goto out_start; 5292 5293 #ifdef CONFIG_TRACER_SNAPSHOT 5294 if (!tr->allocated_snapshot) 5295 goto out; 5296 5297 ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu); 5298 if (ret < 0) { 5299 int r = resize_buffer_duplicate_size(&tr->array_buffer, 5300 &tr->array_buffer, cpu); 5301 if (r < 0) { 5302 /* 5303 * AARGH! We are left with different 5304 * size max buffer!!!! 5305 * The max buffer is our "snapshot" buffer. 5306 * When a tracer needs a snapshot (one of the 5307 * latency tracers), it swaps the max buffer 5308 * with the saved snap shot. We succeeded to 5309 * update the size of the main buffer, but failed to 5310 * update the size of the max buffer. But when we tried 5311 * to reset the main buffer to the original size, we 5312 * failed there too. This is very unlikely to 5313 * happen, but if it does, warn and kill all 5314 * tracing. 5315 */ 5316 WARN_ON(1); 5317 tracing_disabled = 1; 5318 } 5319 goto out_start; 5320 } 5321 5322 update_buffer_entries(&tr->snapshot_buffer, cpu); 5323 5324 out: 5325 #endif /* CONFIG_TRACER_SNAPSHOT */ 5326 5327 update_buffer_entries(&tr->array_buffer, cpu); 5328 out_start: 5329 tracing_start_tr(tr); 5330 return ret; 5331 } 5332 5333 ssize_t tracing_resize_ring_buffer(struct trace_array *tr, 5334 unsigned long size, int cpu_id) 5335 { 5336 guard(mutex)(&trace_types_lock); 5337 5338 if (cpu_id != RING_BUFFER_ALL_CPUS) { 5339 /* make sure, this cpu is enabled in the mask */ 5340 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) 5341 return -EINVAL; 5342 } 5343 5344 return __tracing_resize_ring_buffer(tr, size, cpu_id); 5345 } 5346 5347 struct trace_mod_entry { 5348 unsigned long mod_addr; 5349 char mod_name[MODULE_NAME_LEN]; 5350 }; 5351 5352 struct trace_scratch { 5353 unsigned int clock_id; 5354 unsigned long text_addr; 5355 unsigned long nr_entries; 5356 struct trace_mod_entry entries[]; 5357 }; 5358 5359 static DEFINE_MUTEX(scratch_mutex); 5360 5361 static int cmp_mod_entry(const void *key, const void *pivot) 5362 { 5363 unsigned long addr = (unsigned long)key; 5364 const struct trace_mod_entry *ent = pivot; 5365 5366 if (addr < ent[0].mod_addr) 5367 return -1; 5368 5369 return addr >= ent[1].mod_addr; 5370 } 5371 5372 /** 5373 * trace_adjust_address() - Adjust prev boot address to current address. 5374 * @tr: Persistent ring buffer's trace_array. 5375 * @addr: Address in @tr which is adjusted. 5376 */ 5377 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) 5378 { 5379 struct trace_module_delta *module_delta; 5380 struct trace_scratch *tscratch; 5381 struct trace_mod_entry *entry; 5382 unsigned long raddr; 5383 int idx = 0, nr_entries; 5384 5385 /* If we don't have last boot delta, return the address */ 5386 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5387 return addr; 5388 5389 /* tr->module_delta must be protected by rcu. */ 5390 guard(rcu)(); 5391 tscratch = tr->scratch; 5392 /* if there is no tscrach, module_delta must be NULL. */ 5393 module_delta = READ_ONCE(tr->module_delta); 5394 if (!module_delta || !tscratch->nr_entries || 5395 tscratch->entries[0].mod_addr > addr) { 5396 raddr = addr + tr->text_delta; 5397 return __is_kernel(raddr) || is_kernel_core_data(raddr) || 5398 is_kernel_rodata(raddr) ? raddr : addr; 5399 } 5400 5401 /* Note that entries must be sorted. */ 5402 nr_entries = tscratch->nr_entries; 5403 if (nr_entries == 1 || 5404 tscratch->entries[nr_entries - 1].mod_addr < addr) 5405 idx = nr_entries - 1; 5406 else { 5407 entry = __inline_bsearch((void *)addr, 5408 tscratch->entries, 5409 nr_entries - 1, 5410 sizeof(tscratch->entries[0]), 5411 cmp_mod_entry); 5412 if (entry) 5413 idx = entry - tscratch->entries; 5414 } 5415 5416 return addr + module_delta->delta[idx]; 5417 } 5418 5419 #ifdef CONFIG_MODULES 5420 static int save_mod(struct module *mod, void *data) 5421 { 5422 struct trace_array *tr = data; 5423 struct trace_scratch *tscratch; 5424 struct trace_mod_entry *entry; 5425 unsigned int size; 5426 5427 tscratch = tr->scratch; 5428 if (!tscratch) 5429 return -1; 5430 size = tr->scratch_size; 5431 5432 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) 5433 return -1; 5434 5435 entry = &tscratch->entries[tscratch->nr_entries]; 5436 5437 tscratch->nr_entries++; 5438 5439 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; 5440 strscpy(entry->mod_name, mod->name); 5441 5442 return 0; 5443 } 5444 #else 5445 static int save_mod(struct module *mod, void *data) 5446 { 5447 return 0; 5448 } 5449 #endif 5450 5451 static void update_last_data(struct trace_array *tr) 5452 { 5453 struct trace_module_delta *module_delta; 5454 struct trace_scratch *tscratch; 5455 5456 if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) 5457 return; 5458 5459 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5460 return; 5461 5462 /* Only if the buffer has previous boot data clear and update it. */ 5463 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; 5464 5465 /* Reset the module list and reload them */ 5466 if (tr->scratch) { 5467 struct trace_scratch *tscratch = tr->scratch; 5468 5469 tscratch->clock_id = tr->clock_id; 5470 memset(tscratch->entries, 0, 5471 flex_array_size(tscratch, entries, tscratch->nr_entries)); 5472 tscratch->nr_entries = 0; 5473 5474 guard(mutex)(&scratch_mutex); 5475 module_for_each_mod(save_mod, tr); 5476 } 5477 5478 /* 5479 * Need to clear all CPU buffers as there cannot be events 5480 * from the previous boot mixed with events with this boot 5481 * as that will cause a confusing trace. Need to clear all 5482 * CPU buffers, even for those that may currently be offline. 5483 */ 5484 tracing_reset_all_cpus(&tr->array_buffer); 5485 5486 /* Using current data now */ 5487 tr->text_delta = 0; 5488 5489 if (!tr->scratch) 5490 return; 5491 5492 tscratch = tr->scratch; 5493 module_delta = READ_ONCE(tr->module_delta); 5494 WRITE_ONCE(tr->module_delta, NULL); 5495 kfree_rcu(module_delta, rcu); 5496 5497 /* Set the persistent ring buffer meta data to this address */ 5498 tscratch->text_addr = (unsigned long)_text; 5499 } 5500 5501 /** 5502 * tracing_update_buffers - used by tracing facility to expand ring buffers 5503 * @tr: The tracing instance 5504 * 5505 * To save on memory when the tracing is never used on a system with it 5506 * configured in. The ring buffers are set to a minimum size. But once 5507 * a user starts to use the tracing facility, then they need to grow 5508 * to their default size. 5509 * 5510 * This function is to be called when a tracer is about to be used. 5511 */ 5512 int tracing_update_buffers(struct trace_array *tr) 5513 { 5514 int ret = 0; 5515 5516 if (!tr) 5517 tr = &global_trace; 5518 5519 guard(mutex)(&trace_types_lock); 5520 5521 update_last_data(tr); 5522 5523 if (!tr->ring_buffer_expanded) 5524 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 5525 RING_BUFFER_ALL_CPUS); 5526 return ret; 5527 } 5528 5529 /* 5530 * Used to clear out the tracer before deletion of an instance. 5531 * Must have trace_types_lock held. 5532 */ 5533 static void tracing_set_nop(struct trace_array *tr) 5534 { 5535 if (tr->current_trace == &nop_trace) 5536 return; 5537 5538 tr->current_trace->enabled--; 5539 5540 if (tr->current_trace->reset) 5541 tr->current_trace->reset(tr); 5542 5543 tr->current_trace = &nop_trace; 5544 tr->current_trace_flags = nop_trace.flags; 5545 } 5546 5547 static bool tracer_options_updated; 5548 5549 int tracing_set_tracer(struct trace_array *tr, const char *buf) 5550 { 5551 struct tracer *trace = NULL; 5552 struct tracers *t; 5553 bool had_max_tr; 5554 int ret; 5555 5556 guard(mutex)(&trace_types_lock); 5557 5558 update_last_data(tr); 5559 5560 if (!tr->ring_buffer_expanded) { 5561 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 5562 RING_BUFFER_ALL_CPUS); 5563 if (ret < 0) 5564 return ret; 5565 ret = 0; 5566 } 5567 5568 list_for_each_entry(t, &tr->tracers, list) { 5569 if (strcmp(t->tracer->name, buf) == 0) { 5570 trace = t->tracer; 5571 break; 5572 } 5573 } 5574 if (!trace) 5575 return -EINVAL; 5576 5577 if (trace == tr->current_trace) 5578 return 0; 5579 5580 #ifdef CONFIG_TRACER_SNAPSHOT 5581 if (tracer_uses_snapshot(trace)) { 5582 local_irq_disable(); 5583 arch_spin_lock(&tr->max_lock); 5584 ret = tr->cond_snapshot ? -EBUSY : 0; 5585 arch_spin_unlock(&tr->max_lock); 5586 local_irq_enable(); 5587 if (ret) 5588 return ret; 5589 } 5590 #endif 5591 /* Some tracers won't work on kernel command line */ 5592 if (system_state < SYSTEM_RUNNING && trace->noboot) { 5593 pr_warn("Tracer '%s' is not allowed on command line, ignored\n", 5594 trace->name); 5595 return -EINVAL; 5596 } 5597 5598 /* Some tracers are only allowed for the top level buffer */ 5599 if (!trace_ok_for_array(trace, tr)) 5600 return -EINVAL; 5601 5602 /* If trace pipe files are being read, we can't change the tracer */ 5603 if (tr->trace_ref) 5604 return -EBUSY; 5605 5606 trace_branch_disable(); 5607 5608 tr->current_trace->enabled--; 5609 5610 if (tr->current_trace->reset) 5611 tr->current_trace->reset(tr); 5612 5613 had_max_tr = tracer_uses_snapshot(tr->current_trace); 5614 5615 /* Current trace needs to be nop_trace before synchronize_rcu */ 5616 tr->current_trace = &nop_trace; 5617 tr->current_trace_flags = nop_trace.flags; 5618 5619 if (had_max_tr && !tracer_uses_snapshot(trace)) { 5620 /* 5621 * We need to make sure that the update_max_tr sees that 5622 * current_trace changed to nop_trace to keep it from 5623 * swapping the buffers after we resize it. 5624 * The update_max_tr is called from interrupts disabled 5625 * so a synchronized_sched() is sufficient. 5626 */ 5627 synchronize_rcu(); 5628 free_snapshot(tr); 5629 tracing_disarm_snapshot(tr); 5630 } 5631 5632 if (!had_max_tr && tracer_uses_snapshot(trace)) { 5633 ret = tracing_arm_snapshot_locked(tr); 5634 if (ret) 5635 return ret; 5636 } 5637 5638 tr->current_trace_flags = t->flags ? : t->tracer->flags; 5639 5640 if (trace->init) { 5641 ret = tracer_init(trace, tr); 5642 if (ret) { 5643 if (tracer_uses_snapshot(trace)) 5644 tracing_disarm_snapshot(tr); 5645 tr->current_trace_flags = nop_trace.flags; 5646 return ret; 5647 } 5648 } 5649 5650 tr->current_trace = trace; 5651 tr->current_trace->enabled++; 5652 trace_branch_enable(tr); 5653 5654 return 0; 5655 } 5656 5657 static ssize_t 5658 tracing_set_trace_write(struct file *filp, const char __user *ubuf, 5659 size_t cnt, loff_t *ppos) 5660 { 5661 struct trace_array *tr = filp->private_data; 5662 char buf[MAX_TRACER_SIZE+1]; 5663 char *name; 5664 size_t ret; 5665 int err; 5666 5667 ret = cnt; 5668 5669 if (cnt > MAX_TRACER_SIZE) 5670 cnt = MAX_TRACER_SIZE; 5671 5672 if (copy_from_user(buf, ubuf, cnt)) 5673 return -EFAULT; 5674 5675 buf[cnt] = 0; 5676 5677 name = strim(buf); 5678 5679 err = tracing_set_tracer(tr, name); 5680 if (err) 5681 return err; 5682 5683 *ppos += ret; 5684 5685 return ret; 5686 } 5687 5688 static ssize_t 5689 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, 5690 size_t cnt, loff_t *ppos) 5691 { 5692 char buf[64]; 5693 int r; 5694 5695 r = snprintf(buf, sizeof(buf), "%ld\n", 5696 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); 5697 if (r > sizeof(buf)) 5698 r = sizeof(buf); 5699 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5700 } 5701 5702 static ssize_t 5703 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, 5704 size_t cnt, loff_t *ppos) 5705 { 5706 unsigned long val; 5707 int ret; 5708 5709 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 5710 if (ret) 5711 return ret; 5712 5713 *ptr = val * 1000; 5714 5715 return cnt; 5716 } 5717 5718 static ssize_t 5719 tracing_thresh_read(struct file *filp, char __user *ubuf, 5720 size_t cnt, loff_t *ppos) 5721 { 5722 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); 5723 } 5724 5725 static ssize_t 5726 tracing_thresh_write(struct file *filp, const char __user *ubuf, 5727 size_t cnt, loff_t *ppos) 5728 { 5729 struct trace_array *tr = filp->private_data; 5730 int ret; 5731 5732 guard(mutex)(&trace_types_lock); 5733 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); 5734 if (ret < 0) 5735 return ret; 5736 5737 if (tr->current_trace->update_thresh) { 5738 ret = tr->current_trace->update_thresh(tr); 5739 if (ret < 0) 5740 return ret; 5741 } 5742 5743 return cnt; 5744 } 5745 5746 #ifdef CONFIG_TRACER_MAX_TRACE 5747 5748 static ssize_t 5749 tracing_max_lat_read(struct file *filp, char __user *ubuf, 5750 size_t cnt, loff_t *ppos) 5751 { 5752 struct trace_array *tr = filp->private_data; 5753 5754 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos); 5755 } 5756 5757 static ssize_t 5758 tracing_max_lat_write(struct file *filp, const char __user *ubuf, 5759 size_t cnt, loff_t *ppos) 5760 { 5761 struct trace_array *tr = filp->private_data; 5762 5763 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos); 5764 } 5765 5766 #endif 5767 5768 static int open_pipe_on_cpu(struct trace_array *tr, int cpu) 5769 { 5770 if (cpu == RING_BUFFER_ALL_CPUS) { 5771 if (cpumask_empty(tr->pipe_cpumask)) { 5772 cpumask_setall(tr->pipe_cpumask); 5773 return 0; 5774 } 5775 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { 5776 cpumask_set_cpu(cpu, tr->pipe_cpumask); 5777 return 0; 5778 } 5779 return -EBUSY; 5780 } 5781 5782 static void close_pipe_on_cpu(struct trace_array *tr, int cpu) 5783 { 5784 if (cpu == RING_BUFFER_ALL_CPUS) { 5785 WARN_ON(!cpumask_full(tr->pipe_cpumask)); 5786 cpumask_clear(tr->pipe_cpumask); 5787 } else { 5788 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); 5789 cpumask_clear_cpu(cpu, tr->pipe_cpumask); 5790 } 5791 } 5792 5793 static int tracing_open_pipe(struct inode *inode, struct file *filp) 5794 { 5795 struct trace_array *tr = inode->i_private; 5796 struct trace_iterator *iter; 5797 int cpu; 5798 int ret; 5799 5800 ret = tracing_check_open_get_tr(tr); 5801 if (ret) 5802 return ret; 5803 5804 guard(mutex)(&trace_types_lock); 5805 cpu = tracing_get_cpu(inode); 5806 ret = open_pipe_on_cpu(tr, cpu); 5807 if (ret) 5808 goto fail_pipe_on_cpu; 5809 5810 /* create a buffer to store the information to pass to userspace */ 5811 iter = kzalloc_obj(*iter); 5812 if (!iter) { 5813 ret = -ENOMEM; 5814 goto fail_alloc_iter; 5815 } 5816 5817 trace_seq_init(&iter->seq); 5818 iter->trace = tr->current_trace; 5819 5820 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 5821 ret = -ENOMEM; 5822 goto fail; 5823 } 5824 5825 /* trace pipe does not show start of buffer */ 5826 cpumask_setall(iter->started); 5827 5828 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 5829 iter->iter_flags |= TRACE_FILE_LAT_FMT; 5830 5831 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 5832 if (trace_clocks[tr->clock_id].in_ns) 5833 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 5834 5835 iter->tr = tr; 5836 iter->array_buffer = &tr->array_buffer; 5837 iter->cpu_file = cpu; 5838 mutex_init(&iter->mutex); 5839 filp->private_data = iter; 5840 5841 if (iter->trace->pipe_open) 5842 iter->trace->pipe_open(iter); 5843 5844 nonseekable_open(inode, filp); 5845 5846 tr->trace_ref++; 5847 5848 return ret; 5849 5850 fail: 5851 kfree(iter); 5852 fail_alloc_iter: 5853 close_pipe_on_cpu(tr, cpu); 5854 fail_pipe_on_cpu: 5855 __trace_array_put(tr); 5856 return ret; 5857 } 5858 5859 static int tracing_release_pipe(struct inode *inode, struct file *file) 5860 { 5861 struct trace_iterator *iter = file->private_data; 5862 struct trace_array *tr = inode->i_private; 5863 5864 scoped_guard(mutex, &trace_types_lock) { 5865 tr->trace_ref--; 5866 5867 if (iter->trace->pipe_close) 5868 iter->trace->pipe_close(iter); 5869 close_pipe_on_cpu(tr, iter->cpu_file); 5870 } 5871 5872 free_trace_iter_content(iter); 5873 kfree(iter); 5874 5875 trace_array_put(tr); 5876 5877 return 0; 5878 } 5879 5880 static __poll_t 5881 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 5882 { 5883 struct trace_array *tr = iter->tr; 5884 5885 /* Iterators are static, they should be filled or empty */ 5886 if (trace_buffer_iter(iter, iter->cpu_file)) 5887 return EPOLLIN | EPOLLRDNORM; 5888 5889 if (tr->trace_flags & TRACE_ITER(BLOCK)) 5890 /* 5891 * Always select as readable when in blocking mode 5892 */ 5893 return EPOLLIN | EPOLLRDNORM; 5894 else 5895 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, 5896 filp, poll_table, iter->tr->buffer_percent); 5897 } 5898 5899 static __poll_t 5900 tracing_poll_pipe(struct file *filp, poll_table *poll_table) 5901 { 5902 struct trace_iterator *iter = filp->private_data; 5903 5904 return trace_poll(iter, filp, poll_table); 5905 } 5906 5907 /* Must be called with iter->mutex held. */ 5908 static int tracing_wait_pipe(struct file *filp) 5909 { 5910 struct trace_iterator *iter = filp->private_data; 5911 int ret; 5912 5913 while (trace_empty(iter)) { 5914 5915 if ((filp->f_flags & O_NONBLOCK)) { 5916 return -EAGAIN; 5917 } 5918 5919 /* 5920 * We block until we read something and tracing is disabled. 5921 * We still block if tracing is disabled, but we have never 5922 * read anything. This allows a user to cat this file, and 5923 * then enable tracing. But after we have read something, 5924 * we give an EOF when tracing is again disabled. 5925 * 5926 * iter->pos will be 0 if we haven't read anything. 5927 */ 5928 if (!tracer_tracing_is_on(iter->tr) && iter->pos) 5929 break; 5930 5931 mutex_unlock(&iter->mutex); 5932 5933 ret = wait_on_pipe(iter, 0); 5934 5935 mutex_lock(&iter->mutex); 5936 5937 if (ret) 5938 return ret; 5939 } 5940 5941 return 1; 5942 } 5943 5944 static bool update_last_data_if_empty(struct trace_array *tr) 5945 { 5946 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5947 return false; 5948 5949 if (!ring_buffer_empty(tr->array_buffer.buffer)) 5950 return false; 5951 5952 /* 5953 * If the buffer contains the last boot data and all per-cpu 5954 * buffers are empty, reset it from the kernel side. 5955 */ 5956 update_last_data(tr); 5957 return true; 5958 } 5959 5960 /* 5961 * Consumer reader. 5962 */ 5963 static ssize_t 5964 tracing_read_pipe(struct file *filp, char __user *ubuf, 5965 size_t cnt, loff_t *ppos) 5966 { 5967 struct trace_iterator *iter = filp->private_data; 5968 ssize_t sret; 5969 5970 /* 5971 * Avoid more than one consumer on a single file descriptor 5972 * This is just a matter of traces coherency, the ring buffer itself 5973 * is protected. 5974 */ 5975 guard(mutex)(&iter->mutex); 5976 5977 /* return any leftover data */ 5978 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 5979 if (sret != -EBUSY) 5980 return sret; 5981 5982 trace_seq_init(&iter->seq); 5983 5984 if (iter->trace->read) { 5985 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 5986 if (sret) 5987 return sret; 5988 } 5989 5990 waitagain: 5991 if (update_last_data_if_empty(iter->tr)) 5992 return 0; 5993 5994 sret = tracing_wait_pipe(filp); 5995 if (sret <= 0) 5996 return sret; 5997 5998 /* stop when tracing is finished */ 5999 if (trace_empty(iter)) 6000 return 0; 6001 6002 if (cnt >= TRACE_SEQ_BUFFER_SIZE) 6003 cnt = TRACE_SEQ_BUFFER_SIZE - 1; 6004 6005 /* reset all but tr, trace, and overruns */ 6006 trace_iterator_reset(iter); 6007 cpumask_clear(iter->started); 6008 trace_seq_init(&iter->seq); 6009 6010 trace_event_read_lock(); 6011 trace_access_lock(iter->cpu_file); 6012 while (trace_find_next_entry_inc(iter) != NULL) { 6013 enum print_line_t ret; 6014 int save_len = iter->seq.seq.len; 6015 6016 ret = print_trace_line(iter); 6017 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6018 /* 6019 * If one print_trace_line() fills entire trace_seq in one shot, 6020 * trace_seq_to_user() will returns -EBUSY because save_len == 0, 6021 * In this case, we need to consume it, otherwise, loop will peek 6022 * this event next time, resulting in an infinite loop. 6023 */ 6024 if (save_len == 0) { 6025 iter->seq.full = 0; 6026 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 6027 trace_consume(iter); 6028 break; 6029 } 6030 6031 /* In other cases, don't print partial lines */ 6032 iter->seq.seq.len = save_len; 6033 break; 6034 } 6035 if (ret != TRACE_TYPE_NO_CONSUME) 6036 trace_consume(iter); 6037 6038 if (trace_seq_used(&iter->seq) >= cnt) 6039 break; 6040 6041 /* 6042 * Setting the full flag means we reached the trace_seq buffer 6043 * size and we should leave by partial output condition above. 6044 * One of the trace_seq_* functions is not used properly. 6045 */ 6046 WARN_ONCE(iter->seq.full, "full flag set for trace type %d", 6047 iter->ent->type); 6048 } 6049 trace_access_unlock(iter->cpu_file); 6050 trace_event_read_unlock(); 6051 6052 /* Now copy what we have to the user */ 6053 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 6054 if (iter->seq.readpos >= trace_seq_used(&iter->seq)) 6055 trace_seq_init(&iter->seq); 6056 6057 /* 6058 * If there was nothing to send to user, in spite of consuming trace 6059 * entries, go back to wait for more entries. 6060 */ 6061 if (sret == -EBUSY) 6062 goto waitagain; 6063 6064 return sret; 6065 } 6066 6067 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, 6068 unsigned int idx) 6069 { 6070 __free_page(spd->pages[idx]); 6071 } 6072 6073 static size_t 6074 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) 6075 { 6076 size_t count; 6077 int save_len; 6078 int ret; 6079 6080 /* Seq buffer is page-sized, exactly what we need. */ 6081 for (;;) { 6082 save_len = iter->seq.seq.len; 6083 ret = print_trace_line(iter); 6084 6085 if (trace_seq_has_overflowed(&iter->seq)) { 6086 iter->seq.seq.len = save_len; 6087 break; 6088 } 6089 6090 /* 6091 * This should not be hit, because it should only 6092 * be set if the iter->seq overflowed. But check it 6093 * anyway to be safe. 6094 */ 6095 if (ret == TRACE_TYPE_PARTIAL_LINE) { 6096 iter->seq.seq.len = save_len; 6097 break; 6098 } 6099 6100 count = trace_seq_used(&iter->seq) - save_len; 6101 if (rem < count) { 6102 rem = 0; 6103 iter->seq.seq.len = save_len; 6104 break; 6105 } 6106 6107 if (ret != TRACE_TYPE_NO_CONSUME) 6108 trace_consume(iter); 6109 rem -= count; 6110 if (!trace_find_next_entry_inc(iter)) { 6111 rem = 0; 6112 iter->ent = NULL; 6113 break; 6114 } 6115 } 6116 6117 return rem; 6118 } 6119 6120 static ssize_t tracing_splice_read_pipe(struct file *filp, 6121 loff_t *ppos, 6122 struct pipe_inode_info *pipe, 6123 size_t len, 6124 unsigned int flags) 6125 { 6126 struct page *pages_def[PIPE_DEF_BUFFERS]; 6127 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 6128 struct trace_iterator *iter = filp->private_data; 6129 struct splice_pipe_desc spd = { 6130 .pages = pages_def, 6131 .partial = partial_def, 6132 .nr_pages = 0, /* This gets updated below. */ 6133 .nr_pages_max = PIPE_DEF_BUFFERS, 6134 .ops = &default_pipe_buf_ops, 6135 .spd_release = tracing_spd_release_pipe, 6136 }; 6137 ssize_t ret; 6138 size_t rem; 6139 unsigned int i; 6140 6141 if (splice_grow_spd(pipe, &spd)) 6142 return -ENOMEM; 6143 6144 mutex_lock(&iter->mutex); 6145 6146 if (iter->trace->splice_read) { 6147 ret = iter->trace->splice_read(iter, filp, 6148 ppos, pipe, len, flags); 6149 if (ret) 6150 goto out_err; 6151 } 6152 6153 ret = tracing_wait_pipe(filp); 6154 if (ret <= 0) 6155 goto out_err; 6156 6157 if (!iter->ent && !trace_find_next_entry_inc(iter)) { 6158 ret = -EFAULT; 6159 goto out_err; 6160 } 6161 6162 trace_event_read_lock(); 6163 trace_access_lock(iter->cpu_file); 6164 6165 /* Fill as many pages as possible. */ 6166 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 6167 spd.pages[i] = alloc_page(GFP_KERNEL); 6168 if (!spd.pages[i]) 6169 break; 6170 6171 rem = tracing_fill_pipe_page(rem, iter); 6172 6173 /* Copy the data into the page, so we can start over. */ 6174 ret = trace_seq_to_buffer(&iter->seq, 6175 page_address(spd.pages[i]), 6176 min((size_t)trace_seq_used(&iter->seq), 6177 (size_t)PAGE_SIZE)); 6178 if (ret < 0) { 6179 __free_page(spd.pages[i]); 6180 break; 6181 } 6182 spd.partial[i].offset = 0; 6183 spd.partial[i].len = ret; 6184 6185 trace_seq_init(&iter->seq); 6186 } 6187 6188 trace_access_unlock(iter->cpu_file); 6189 trace_event_read_unlock(); 6190 mutex_unlock(&iter->mutex); 6191 6192 spd.nr_pages = i; 6193 6194 if (i) 6195 ret = splice_to_pipe(pipe, &spd); 6196 else 6197 ret = 0; 6198 out: 6199 splice_shrink_spd(&spd); 6200 return ret; 6201 6202 out_err: 6203 mutex_unlock(&iter->mutex); 6204 goto out; 6205 } 6206 6207 static ssize_t 6208 tracing_syscall_buf_read(struct file *filp, char __user *ubuf, 6209 size_t cnt, loff_t *ppos) 6210 { 6211 struct inode *inode = file_inode(filp); 6212 struct trace_array *tr = inode->i_private; 6213 char buf[64]; 6214 int r; 6215 6216 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz); 6217 6218 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6219 } 6220 6221 static ssize_t 6222 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf, 6223 size_t cnt, loff_t *ppos) 6224 { 6225 struct inode *inode = file_inode(filp); 6226 struct trace_array *tr = inode->i_private; 6227 unsigned long val; 6228 int ret; 6229 6230 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6231 if (ret) 6232 return ret; 6233 6234 if (val > SYSCALL_FAULT_USER_MAX) 6235 val = SYSCALL_FAULT_USER_MAX; 6236 6237 tr->syscall_buf_sz = val; 6238 6239 *ppos += cnt; 6240 6241 return cnt; 6242 } 6243 6244 static ssize_t 6245 tracing_entries_read(struct file *filp, char __user *ubuf, 6246 size_t cnt, loff_t *ppos) 6247 { 6248 struct inode *inode = file_inode(filp); 6249 struct trace_array *tr = inode->i_private; 6250 int cpu = tracing_get_cpu(inode); 6251 char buf[64]; 6252 int r = 0; 6253 ssize_t ret; 6254 6255 mutex_lock(&trace_types_lock); 6256 6257 if (cpu == RING_BUFFER_ALL_CPUS) { 6258 int cpu, buf_size_same; 6259 unsigned long size; 6260 6261 size = 0; 6262 buf_size_same = 1; 6263 /* check if all cpu sizes are same */ 6264 for_each_tracing_cpu(cpu) { 6265 /* fill in the size from first enabled cpu */ 6266 if (size == 0) 6267 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; 6268 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { 6269 buf_size_same = 0; 6270 break; 6271 } 6272 } 6273 6274 if (buf_size_same) { 6275 if (!tr->ring_buffer_expanded) 6276 r = sprintf(buf, "%lu (expanded: %lu)\n", 6277 size >> 10, 6278 trace_buf_size >> 10); 6279 else 6280 r = sprintf(buf, "%lu\n", size >> 10); 6281 } else 6282 r = sprintf(buf, "X\n"); 6283 } else 6284 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); 6285 6286 mutex_unlock(&trace_types_lock); 6287 6288 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6289 return ret; 6290 } 6291 6292 static ssize_t 6293 tracing_entries_write(struct file *filp, const char __user *ubuf, 6294 size_t cnt, loff_t *ppos) 6295 { 6296 struct inode *inode = file_inode(filp); 6297 struct trace_array *tr = inode->i_private; 6298 unsigned long val; 6299 int ret; 6300 6301 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 6302 if (ret) 6303 return ret; 6304 6305 /* must have at least 1 entry */ 6306 if (!val) 6307 return -EINVAL; 6308 6309 /* value is in KB */ 6310 val <<= 10; 6311 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); 6312 if (ret < 0) 6313 return ret; 6314 6315 *ppos += cnt; 6316 6317 return cnt; 6318 } 6319 6320 static ssize_t 6321 tracing_total_entries_read(struct file *filp, char __user *ubuf, 6322 size_t cnt, loff_t *ppos) 6323 { 6324 struct trace_array *tr = filp->private_data; 6325 char buf[64]; 6326 int r, cpu; 6327 unsigned long size = 0, expanded_size = 0; 6328 6329 mutex_lock(&trace_types_lock); 6330 for_each_tracing_cpu(cpu) { 6331 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; 6332 if (!tr->ring_buffer_expanded) 6333 expanded_size += trace_buf_size >> 10; 6334 } 6335 if (tr->ring_buffer_expanded) 6336 r = sprintf(buf, "%lu\n", size); 6337 else 6338 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); 6339 mutex_unlock(&trace_types_lock); 6340 6341 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 6342 } 6343 6344 #define LAST_BOOT_HEADER ((void *)1) 6345 6346 static void *l_next(struct seq_file *m, void *v, loff_t *pos) 6347 { 6348 struct trace_array *tr = m->private; 6349 struct trace_scratch *tscratch = tr->scratch; 6350 unsigned int index = *pos; 6351 6352 (*pos)++; 6353 6354 if (*pos == 1) 6355 return LAST_BOOT_HEADER; 6356 6357 /* Only show offsets of the last boot data */ 6358 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6359 return NULL; 6360 6361 /* *pos 0 is for the header, 1 is for the first module */ 6362 index--; 6363 6364 if (index >= tscratch->nr_entries) 6365 return NULL; 6366 6367 return &tscratch->entries[index]; 6368 } 6369 6370 static void *l_start(struct seq_file *m, loff_t *pos) 6371 { 6372 mutex_lock(&scratch_mutex); 6373 6374 return l_next(m, NULL, pos); 6375 } 6376 6377 static void l_stop(struct seq_file *m, void *p) 6378 { 6379 mutex_unlock(&scratch_mutex); 6380 } 6381 6382 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) 6383 { 6384 struct trace_scratch *tscratch = tr->scratch; 6385 6386 /* 6387 * Do not leak KASLR address. This only shows the KASLR address of 6388 * the last boot. When the ring buffer is started, the LAST_BOOT 6389 * flag gets cleared, and this should only report "current". 6390 * Otherwise it shows the KASLR address from the previous boot which 6391 * should not be the same as the current boot. 6392 */ 6393 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 6394 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); 6395 else 6396 seq_puts(m, "# Current\n"); 6397 } 6398 6399 static int l_show(struct seq_file *m, void *v) 6400 { 6401 struct trace_array *tr = m->private; 6402 struct trace_mod_entry *entry = v; 6403 6404 if (v == LAST_BOOT_HEADER) { 6405 show_last_boot_header(m, tr); 6406 return 0; 6407 } 6408 6409 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); 6410 return 0; 6411 } 6412 6413 static const struct seq_operations last_boot_seq_ops = { 6414 .start = l_start, 6415 .next = l_next, 6416 .stop = l_stop, 6417 .show = l_show, 6418 }; 6419 6420 static int tracing_last_boot_open(struct inode *inode, struct file *file) 6421 { 6422 struct trace_array *tr = inode->i_private; 6423 struct seq_file *m; 6424 int ret; 6425 6426 ret = tracing_check_open_get_tr(tr); 6427 if (ret) 6428 return ret; 6429 6430 ret = seq_open(file, &last_boot_seq_ops); 6431 if (ret) { 6432 trace_array_put(tr); 6433 return ret; 6434 } 6435 6436 m = file->private_data; 6437 m->private = tr; 6438 6439 return 0; 6440 } 6441 6442 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) 6443 { 6444 struct trace_array *tr = inode->i_private; 6445 int cpu = tracing_get_cpu(inode); 6446 int ret; 6447 6448 ret = tracing_check_open_get_tr(tr); 6449 if (ret) 6450 return ret; 6451 6452 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); 6453 if (ret < 0) 6454 __trace_array_put(tr); 6455 return ret; 6456 } 6457 6458 static ssize_t 6459 tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 6460 size_t cnt, loff_t *ppos) 6461 { 6462 /* 6463 * There is no need to read what the user has written, this function 6464 * is just to make sure that there is no error when "echo" is used 6465 */ 6466 6467 *ppos += cnt; 6468 6469 return cnt; 6470 } 6471 6472 static int 6473 tracing_free_buffer_release(struct inode *inode, struct file *filp) 6474 { 6475 struct trace_array *tr = inode->i_private; 6476 6477 /* disable tracing ? */ 6478 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE)) 6479 tracer_tracing_off(tr); 6480 /* resize the ring buffer to 0 */ 6481 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 6482 6483 trace_array_put(tr); 6484 6485 return 0; 6486 } 6487 6488 #define TRACE_MARKER_MAX_SIZE 4096 6489 6490 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf, 6491 size_t cnt, unsigned long ip) 6492 { 6493 struct ring_buffer_event *event; 6494 enum event_trigger_type tt = ETT_NONE; 6495 struct trace_buffer *buffer; 6496 struct print_entry *entry; 6497 int meta_size; 6498 ssize_t written; 6499 size_t size; 6500 6501 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 6502 again: 6503 size = cnt + meta_size; 6504 6505 buffer = tr->array_buffer.buffer; 6506 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 6507 tracing_gen_ctx()); 6508 if (unlikely(!event)) { 6509 /* 6510 * If the size was greater than what was allowed, then 6511 * make it smaller and try again. 6512 */ 6513 if (size > ring_buffer_max_event_size(buffer)) { 6514 cnt = ring_buffer_max_event_size(buffer) - meta_size; 6515 /* The above should only happen once */ 6516 if (WARN_ON_ONCE(cnt + meta_size == size)) 6517 return -EBADF; 6518 goto again; 6519 } 6520 6521 /* Ring buffer disabled, return as if not open for write */ 6522 return -EBADF; 6523 } 6524 6525 entry = ring_buffer_event_data(event); 6526 entry->ip = ip; 6527 memcpy(&entry->buf, buf, cnt); 6528 written = cnt; 6529 6530 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { 6531 /* do not add \n before testing triggers, but add \0 */ 6532 entry->buf[cnt] = '\0'; 6533 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); 6534 } 6535 6536 if (entry->buf[cnt - 1] != '\n') { 6537 entry->buf[cnt] = '\n'; 6538 entry->buf[cnt + 1] = '\0'; 6539 } else 6540 entry->buf[cnt] = '\0'; 6541 6542 if (static_branch_unlikely(&trace_marker_exports_enabled)) 6543 ftrace_exports(event, TRACE_EXPORT_MARKER); 6544 __buffer_unlock_commit(buffer, event); 6545 6546 if (tt) 6547 event_triggers_post_call(tr->trace_marker_file, tt); 6548 6549 return written; 6550 } 6551 6552 struct trace_user_buf { 6553 char *buf; 6554 }; 6555 6556 static DEFINE_MUTEX(trace_user_buffer_mutex); 6557 static struct trace_user_buf_info *trace_user_buffer; 6558 6559 /** 6560 * trace_user_fault_destroy - free up allocated memory of a trace user buffer 6561 * @tinfo: The descriptor to free up 6562 * 6563 * Frees any data allocated in the trace info dsecriptor. 6564 */ 6565 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo) 6566 { 6567 char *buf; 6568 int cpu; 6569 6570 if (!tinfo || !tinfo->tbuf) 6571 return; 6572 6573 for_each_possible_cpu(cpu) { 6574 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6575 kfree(buf); 6576 } 6577 free_percpu(tinfo->tbuf); 6578 } 6579 6580 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size) 6581 { 6582 char *buf; 6583 int cpu; 6584 6585 lockdep_assert_held(&trace_user_buffer_mutex); 6586 6587 tinfo->tbuf = alloc_percpu(struct trace_user_buf); 6588 if (!tinfo->tbuf) 6589 return -ENOMEM; 6590 6591 tinfo->ref = 1; 6592 tinfo->size = size; 6593 6594 /* Clear each buffer in case of error */ 6595 for_each_possible_cpu(cpu) { 6596 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL; 6597 } 6598 6599 for_each_possible_cpu(cpu) { 6600 buf = kmalloc_node(size, GFP_KERNEL, 6601 cpu_to_node(cpu)); 6602 if (!buf) 6603 return -ENOMEM; 6604 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf; 6605 } 6606 6607 return 0; 6608 } 6609 6610 /* For internal use. Free and reinitialize */ 6611 static void user_buffer_free(struct trace_user_buf_info **tinfo) 6612 { 6613 lockdep_assert_held(&trace_user_buffer_mutex); 6614 6615 trace_user_fault_destroy(*tinfo); 6616 kfree(*tinfo); 6617 *tinfo = NULL; 6618 } 6619 6620 /* For internal use. Initialize and allocate */ 6621 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size) 6622 { 6623 bool alloc = false; 6624 int ret; 6625 6626 lockdep_assert_held(&trace_user_buffer_mutex); 6627 6628 if (!*tinfo) { 6629 alloc = true; 6630 *tinfo = kzalloc_obj(**tinfo); 6631 if (!*tinfo) 6632 return -ENOMEM; 6633 } 6634 6635 ret = user_fault_buffer_enable(*tinfo, size); 6636 if (ret < 0 && alloc) 6637 user_buffer_free(tinfo); 6638 6639 return ret; 6640 } 6641 6642 /* For internal use, derefrence and free if necessary */ 6643 static void user_buffer_put(struct trace_user_buf_info **tinfo) 6644 { 6645 guard(mutex)(&trace_user_buffer_mutex); 6646 6647 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref)) 6648 return; 6649 6650 if (--(*tinfo)->ref) 6651 return; 6652 6653 user_buffer_free(tinfo); 6654 } 6655 6656 /** 6657 * trace_user_fault_init - Allocated or reference a per CPU buffer 6658 * @tinfo: A pointer to the trace buffer descriptor 6659 * @size: The size to allocate each per CPU buffer 6660 * 6661 * Create a per CPU buffer that can be used to copy from user space 6662 * in a task context. When calling trace_user_fault_read(), preemption 6663 * must be disabled, and it will enable preemption and copy user 6664 * space data to the buffer. If any schedule switches occur, it will 6665 * retry until it succeeds without a schedule switch knowing the buffer 6666 * is still valid. 6667 * 6668 * Returns 0 on success, negative on failure. 6669 */ 6670 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size) 6671 { 6672 int ret; 6673 6674 if (!tinfo) 6675 return -EINVAL; 6676 6677 guard(mutex)(&trace_user_buffer_mutex); 6678 6679 ret = user_buffer_init(&tinfo, size); 6680 if (ret < 0) 6681 trace_user_fault_destroy(tinfo); 6682 6683 return ret; 6684 } 6685 6686 /** 6687 * trace_user_fault_get - up the ref count for the user buffer 6688 * @tinfo: A pointer to a pointer to the trace buffer descriptor 6689 * 6690 * Ups the ref count of the trace buffer. 6691 * 6692 * Returns the new ref count. 6693 */ 6694 int trace_user_fault_get(struct trace_user_buf_info *tinfo) 6695 { 6696 if (!tinfo) 6697 return -1; 6698 6699 guard(mutex)(&trace_user_buffer_mutex); 6700 6701 tinfo->ref++; 6702 return tinfo->ref; 6703 } 6704 6705 /** 6706 * trace_user_fault_put - dereference a per cpu trace buffer 6707 * @tinfo: The @tinfo that was passed to trace_user_fault_get() 6708 * 6709 * Decrement the ref count of @tinfo. 6710 * 6711 * Returns the new refcount (negative on error). 6712 */ 6713 int trace_user_fault_put(struct trace_user_buf_info *tinfo) 6714 { 6715 guard(mutex)(&trace_user_buffer_mutex); 6716 6717 if (WARN_ON_ONCE(!tinfo || !tinfo->ref)) 6718 return -1; 6719 6720 --tinfo->ref; 6721 return tinfo->ref; 6722 } 6723 6724 /** 6725 * trace_user_fault_read - Read user space into a per CPU buffer 6726 * @tinfo: The @tinfo allocated by trace_user_fault_get() 6727 * @ptr: The user space pointer to read 6728 * @size: The size of user space to read. 6729 * @copy_func: Optional function to use to copy from user space 6730 * @data: Data to pass to copy_func if it was supplied 6731 * 6732 * Preemption must be disabled when this is called, and must not 6733 * be enabled while using the returned buffer. 6734 * This does the copying from user space into a per CPU buffer. 6735 * 6736 * The @size must not be greater than the size passed in to 6737 * trace_user_fault_init(). 6738 * 6739 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(), 6740 * otherwise it will call @copy_func. It will call @copy_func with: 6741 * 6742 * buffer: the per CPU buffer of the @tinfo. 6743 * ptr: The pointer @ptr to user space to read 6744 * size: The @size of the ptr to read 6745 * data: The @data parameter 6746 * 6747 * It is expected that @copy_func will return 0 on success and non zero 6748 * if there was a fault. 6749 * 6750 * Returns a pointer to the buffer with the content read from @ptr. 6751 * Preemption must remain disabled while the caller accesses the 6752 * buffer returned by this function. 6753 * Returns NULL if there was a fault, or the size passed in is 6754 * greater than the size passed to trace_user_fault_init(). 6755 */ 6756 char *trace_user_fault_read(struct trace_user_buf_info *tinfo, 6757 const char __user *ptr, size_t size, 6758 trace_user_buf_copy copy_func, void *data) 6759 { 6760 int cpu = smp_processor_id(); 6761 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6762 unsigned int cnt; 6763 int trys = 0; 6764 int ret; 6765 6766 lockdep_assert_preemption_disabled(); 6767 6768 /* 6769 * It's up to the caller to not try to copy more than it said 6770 * it would. 6771 */ 6772 if (size > tinfo->size) 6773 return NULL; 6774 6775 /* 6776 * This acts similar to a seqcount. The per CPU context switches are 6777 * recorded, migration is disabled and preemption is enabled. The 6778 * read of the user space memory is copied into the per CPU buffer. 6779 * Preemption is disabled again, and if the per CPU context switches count 6780 * is still the same, it means the buffer has not been corrupted. 6781 * If the count is different, it is assumed the buffer is corrupted 6782 * and reading must be tried again. 6783 */ 6784 6785 do { 6786 /* 6787 * If for some reason, copy_from_user() always causes a context 6788 * switch, this would then cause an infinite loop. 6789 * If this task is preempted by another user space task, it 6790 * will cause this task to try again. But just in case something 6791 * changes where the copying from user space causes another task 6792 * to run, prevent this from going into an infinite loop. 6793 * 100 tries should be plenty. 6794 */ 6795 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space")) 6796 return NULL; 6797 6798 /* Read the current CPU context switch counter */ 6799 cnt = nr_context_switches_cpu(cpu); 6800 6801 /* 6802 * Preemption is going to be enabled, but this task must 6803 * remain on this CPU. 6804 */ 6805 migrate_disable(); 6806 6807 /* 6808 * Now preemption is being enabled and another task can come in 6809 * and use the same buffer and corrupt our data. 6810 */ 6811 preempt_enable_notrace(); 6812 6813 /* Make sure preemption is enabled here */ 6814 lockdep_assert_preemption_enabled(); 6815 6816 if (copy_func) { 6817 ret = copy_func(buffer, ptr, size, data); 6818 } else { 6819 ret = __copy_from_user(buffer, ptr, size); 6820 } 6821 6822 preempt_disable_notrace(); 6823 migrate_enable(); 6824 6825 /* if it faulted, no need to test if the buffer was corrupted */ 6826 if (ret) 6827 return NULL; 6828 6829 /* 6830 * Preemption is disabled again, now check the per CPU context 6831 * switch counter. If it doesn't match, then another user space 6832 * process may have schedule in and corrupted our buffer. In that 6833 * case the copying must be retried. 6834 */ 6835 } while (nr_context_switches_cpu(cpu) != cnt); 6836 6837 return buffer; 6838 } 6839 6840 static ssize_t 6841 tracing_mark_write(struct file *filp, const char __user *ubuf, 6842 size_t cnt, loff_t *fpos) 6843 { 6844 struct trace_array *tr = filp->private_data; 6845 ssize_t written = -ENODEV; 6846 unsigned long ip; 6847 char *buf; 6848 6849 if (unlikely(tracing_disabled)) 6850 return -EINVAL; 6851 6852 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 6853 return -EINVAL; 6854 6855 if ((ssize_t)cnt < 0) 6856 return -EINVAL; 6857 6858 if (cnt > TRACE_MARKER_MAX_SIZE) 6859 cnt = TRACE_MARKER_MAX_SIZE; 6860 6861 /* Must have preemption disabled while having access to the buffer */ 6862 guard(preempt_notrace)(); 6863 6864 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 6865 if (!buf) 6866 return -EFAULT; 6867 6868 /* The selftests expect this function to be the IP address */ 6869 ip = _THIS_IP_; 6870 6871 /* The global trace_marker can go to multiple instances */ 6872 if (tr == &global_trace) { 6873 guard(rcu)(); 6874 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 6875 written = write_marker_to_buffer(tr, buf, cnt, ip); 6876 if (written < 0) 6877 break; 6878 } 6879 } else { 6880 written = write_marker_to_buffer(tr, buf, cnt, ip); 6881 } 6882 6883 return written; 6884 } 6885 6886 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, 6887 const char *buf, size_t cnt) 6888 { 6889 struct ring_buffer_event *event; 6890 struct trace_buffer *buffer; 6891 struct raw_data_entry *entry; 6892 ssize_t written; 6893 size_t size; 6894 6895 /* cnt includes both the entry->id and the data behind it. */ 6896 size = struct_offset(entry, id) + cnt; 6897 6898 buffer = tr->array_buffer.buffer; 6899 6900 if (size > ring_buffer_max_event_size(buffer)) 6901 return -EINVAL; 6902 6903 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, 6904 tracing_gen_ctx()); 6905 if (!event) 6906 /* Ring buffer disabled, return as if not open for write */ 6907 return -EBADF; 6908 6909 entry = ring_buffer_event_data(event); 6910 unsafe_memcpy(&entry->id, buf, cnt, 6911 "id and content already reserved on ring buffer" 6912 "'buf' includes the 'id' and the data." 6913 "'entry' was allocated with cnt from 'id'."); 6914 written = cnt; 6915 6916 __buffer_unlock_commit(buffer, event); 6917 6918 return written; 6919 } 6920 6921 static ssize_t 6922 tracing_mark_raw_write(struct file *filp, const char __user *ubuf, 6923 size_t cnt, loff_t *fpos) 6924 { 6925 struct trace_array *tr = filp->private_data; 6926 ssize_t written = -ENODEV; 6927 char *buf; 6928 6929 if (unlikely(tracing_disabled)) 6930 return -EINVAL; 6931 6932 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 6933 return -EINVAL; 6934 6935 /* The marker must at least have a tag id */ 6936 if (cnt < sizeof(unsigned int)) 6937 return -EINVAL; 6938 6939 /* raw write is all or nothing */ 6940 if (cnt > TRACE_MARKER_MAX_SIZE) 6941 return -EINVAL; 6942 6943 /* Must have preemption disabled while having access to the buffer */ 6944 guard(preempt_notrace)(); 6945 6946 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 6947 if (!buf) 6948 return -EFAULT; 6949 6950 /* The global trace_marker_raw can go to multiple instances */ 6951 if (tr == &global_trace) { 6952 guard(rcu)(); 6953 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 6954 written = write_raw_marker_to_buffer(tr, buf, cnt); 6955 if (written < 0) 6956 break; 6957 } 6958 } else { 6959 written = write_raw_marker_to_buffer(tr, buf, cnt); 6960 } 6961 6962 return written; 6963 } 6964 6965 static int tracing_mark_open(struct inode *inode, struct file *filp) 6966 { 6967 int ret; 6968 6969 scoped_guard(mutex, &trace_user_buffer_mutex) { 6970 if (!trace_user_buffer) { 6971 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE); 6972 if (ret < 0) 6973 return ret; 6974 } else { 6975 trace_user_buffer->ref++; 6976 } 6977 } 6978 6979 stream_open(inode, filp); 6980 ret = tracing_open_generic_tr(inode, filp); 6981 if (ret < 0) 6982 user_buffer_put(&trace_user_buffer); 6983 return ret; 6984 } 6985 6986 static int tracing_mark_release(struct inode *inode, struct file *file) 6987 { 6988 user_buffer_put(&trace_user_buffer); 6989 return tracing_release_generic_tr(inode, file); 6990 } 6991 6992 static int tracing_clock_show(struct seq_file *m, void *v) 6993 { 6994 struct trace_array *tr = m->private; 6995 int i; 6996 6997 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 6998 seq_printf(m, 6999 "%s%s%s%s", i ? " " : "", 7000 i == tr->clock_id ? "[" : "", trace_clocks[i].name, 7001 i == tr->clock_id ? "]" : ""); 7002 seq_putc(m, '\n'); 7003 7004 return 0; 7005 } 7006 7007 int tracing_set_clock(struct trace_array *tr, const char *clockstr) 7008 { 7009 int i; 7010 7011 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { 7012 if (strcmp(trace_clocks[i].name, clockstr) == 0) 7013 break; 7014 } 7015 if (i == ARRAY_SIZE(trace_clocks)) 7016 return -EINVAL; 7017 7018 guard(mutex)(&trace_types_lock); 7019 7020 tr->clock_id = i; 7021 7022 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); 7023 7024 /* 7025 * New clock may not be consistent with the previous clock. 7026 * Reset the buffer so that it doesn't have incomparable timestamps. 7027 */ 7028 tracing_reset_online_cpus(&tr->array_buffer); 7029 7030 #ifdef CONFIG_TRACER_SNAPSHOT 7031 if (tr->snapshot_buffer.buffer) 7032 ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func); 7033 tracing_reset_online_cpus(&tr->snapshot_buffer); 7034 #endif 7035 update_last_data_if_empty(tr); 7036 7037 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { 7038 struct trace_scratch *tscratch = tr->scratch; 7039 7040 tscratch->clock_id = i; 7041 } 7042 7043 return 0; 7044 } 7045 7046 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 7047 size_t cnt, loff_t *fpos) 7048 { 7049 struct seq_file *m = filp->private_data; 7050 struct trace_array *tr = m->private; 7051 char buf[64]; 7052 const char *clockstr; 7053 int ret; 7054 7055 if (cnt >= sizeof(buf)) 7056 return -EINVAL; 7057 7058 if (copy_from_user(buf, ubuf, cnt)) 7059 return -EFAULT; 7060 7061 buf[cnt] = 0; 7062 7063 clockstr = strstrip(buf); 7064 7065 ret = tracing_set_clock(tr, clockstr); 7066 if (ret) 7067 return ret; 7068 7069 *fpos += cnt; 7070 7071 return cnt; 7072 } 7073 7074 static int tracing_clock_open(struct inode *inode, struct file *file) 7075 { 7076 struct trace_array *tr = inode->i_private; 7077 int ret; 7078 7079 ret = tracing_check_open_get_tr(tr); 7080 if (ret) 7081 return ret; 7082 7083 ret = single_open(file, tracing_clock_show, inode->i_private); 7084 if (ret < 0) 7085 trace_array_put(tr); 7086 7087 return ret; 7088 } 7089 7090 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) 7091 { 7092 struct trace_array *tr = m->private; 7093 7094 guard(mutex)(&trace_types_lock); 7095 7096 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) 7097 seq_puts(m, "delta [absolute]\n"); 7098 else 7099 seq_puts(m, "[delta] absolute\n"); 7100 7101 return 0; 7102 } 7103 7104 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) 7105 { 7106 struct trace_array *tr = inode->i_private; 7107 int ret; 7108 7109 ret = tracing_check_open_get_tr(tr); 7110 if (ret) 7111 return ret; 7112 7113 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); 7114 if (ret < 0) 7115 trace_array_put(tr); 7116 7117 return ret; 7118 } 7119 7120 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) 7121 { 7122 if (rbe == this_cpu_read(trace_buffered_event)) 7123 return ring_buffer_time_stamp(buffer); 7124 7125 return ring_buffer_event_time_stamp(buffer, rbe); 7126 } 7127 7128 struct ftrace_buffer_info { 7129 struct trace_iterator iter; 7130 void *spare; 7131 unsigned int spare_cpu; 7132 unsigned int spare_size; 7133 unsigned int read; 7134 }; 7135 7136 #ifdef CONFIG_TRACER_SNAPSHOT 7137 static int tracing_snapshot_open(struct inode *inode, struct file *file) 7138 { 7139 struct trace_array *tr = inode->i_private; 7140 struct trace_iterator *iter; 7141 struct seq_file *m; 7142 int ret; 7143 7144 ret = tracing_check_open_get_tr(tr); 7145 if (ret) 7146 return ret; 7147 7148 if (file->f_mode & FMODE_READ) { 7149 iter = __tracing_open(inode, file, true); 7150 if (IS_ERR(iter)) 7151 ret = PTR_ERR(iter); 7152 } else { 7153 /* Writes still need the seq_file to hold the private data */ 7154 ret = -ENOMEM; 7155 m = kzalloc_obj(*m); 7156 if (!m) 7157 goto out; 7158 iter = kzalloc_obj(*iter); 7159 if (!iter) { 7160 kfree(m); 7161 goto out; 7162 } 7163 ret = 0; 7164 7165 iter->tr = tr; 7166 iter->array_buffer = &tr->snapshot_buffer; 7167 iter->cpu_file = tracing_get_cpu(inode); 7168 m->private = iter; 7169 file->private_data = m; 7170 } 7171 out: 7172 if (ret < 0) 7173 trace_array_put(tr); 7174 7175 return ret; 7176 } 7177 7178 static void tracing_swap_cpu_buffer(void *tr) 7179 { 7180 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id()); 7181 } 7182 7183 static ssize_t 7184 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, 7185 loff_t *ppos) 7186 { 7187 struct seq_file *m = filp->private_data; 7188 struct trace_iterator *iter = m->private; 7189 struct trace_array *tr = iter->tr; 7190 unsigned long val; 7191 int ret; 7192 7193 ret = tracing_update_buffers(tr); 7194 if (ret < 0) 7195 return ret; 7196 7197 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7198 if (ret) 7199 return ret; 7200 7201 guard(mutex)(&trace_types_lock); 7202 7203 if (tracer_uses_snapshot(tr->current_trace)) 7204 return -EBUSY; 7205 7206 local_irq_disable(); 7207 arch_spin_lock(&tr->max_lock); 7208 if (tr->cond_snapshot) 7209 ret = -EBUSY; 7210 arch_spin_unlock(&tr->max_lock); 7211 local_irq_enable(); 7212 if (ret) 7213 return ret; 7214 7215 switch (val) { 7216 case 0: 7217 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7218 return -EINVAL; 7219 if (tr->allocated_snapshot) 7220 free_snapshot(tr); 7221 break; 7222 case 1: 7223 /* Only allow per-cpu swap if the ring buffer supports it */ 7224 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP 7225 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) 7226 return -EINVAL; 7227 #endif 7228 if (tr->allocated_snapshot) 7229 ret = resize_buffer_duplicate_size(&tr->snapshot_buffer, 7230 &tr->array_buffer, iter->cpu_file); 7231 7232 ret = tracing_arm_snapshot_locked(tr); 7233 if (ret) 7234 return ret; 7235 7236 /* Now, we're going to swap */ 7237 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 7238 local_irq_disable(); 7239 update_max_tr(tr, current, smp_processor_id(), NULL); 7240 local_irq_enable(); 7241 } else { 7242 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer, 7243 (void *)tr, 1); 7244 } 7245 tracing_disarm_snapshot(tr); 7246 break; 7247 default: 7248 if (tr->allocated_snapshot) { 7249 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) 7250 tracing_reset_online_cpus(&tr->snapshot_buffer); 7251 else 7252 tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file); 7253 } 7254 break; 7255 } 7256 7257 if (ret >= 0) { 7258 *ppos += cnt; 7259 ret = cnt; 7260 } 7261 7262 return ret; 7263 } 7264 7265 static int tracing_snapshot_release(struct inode *inode, struct file *file) 7266 { 7267 struct seq_file *m = file->private_data; 7268 int ret; 7269 7270 ret = tracing_release(inode, file); 7271 7272 if (file->f_mode & FMODE_READ) 7273 return ret; 7274 7275 /* If write only, the seq_file is just a stub */ 7276 if (m) 7277 kfree(m->private); 7278 kfree(m); 7279 7280 return 0; 7281 } 7282 7283 static int tracing_buffers_open(struct inode *inode, struct file *filp); 7284 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, 7285 size_t count, loff_t *ppos); 7286 static int tracing_buffers_release(struct inode *inode, struct file *file); 7287 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, 7288 struct pipe_inode_info *pipe, size_t len, unsigned int flags); 7289 7290 static int snapshot_raw_open(struct inode *inode, struct file *filp) 7291 { 7292 struct ftrace_buffer_info *info; 7293 int ret; 7294 7295 /* The following checks for tracefs lockdown */ 7296 ret = tracing_buffers_open(inode, filp); 7297 if (ret < 0) 7298 return ret; 7299 7300 info = filp->private_data; 7301 7302 if (tracer_uses_snapshot(info->iter.trace)) { 7303 tracing_buffers_release(inode, filp); 7304 return -EBUSY; 7305 } 7306 7307 info->iter.snapshot = true; 7308 info->iter.array_buffer = &info->iter.tr->snapshot_buffer; 7309 7310 return ret; 7311 } 7312 7313 #endif /* CONFIG_TRACER_SNAPSHOT */ 7314 7315 7316 static const struct file_operations tracing_thresh_fops = { 7317 .open = tracing_open_generic, 7318 .read = tracing_thresh_read, 7319 .write = tracing_thresh_write, 7320 .llseek = generic_file_llseek, 7321 }; 7322 7323 #ifdef CONFIG_TRACER_MAX_TRACE 7324 static const struct file_operations tracing_max_lat_fops = { 7325 .open = tracing_open_generic_tr, 7326 .read = tracing_max_lat_read, 7327 .write = tracing_max_lat_write, 7328 .llseek = generic_file_llseek, 7329 .release = tracing_release_generic_tr, 7330 }; 7331 #endif 7332 7333 static const struct file_operations set_tracer_fops = { 7334 .open = tracing_open_generic_tr, 7335 .read = tracing_set_trace_read, 7336 .write = tracing_set_trace_write, 7337 .llseek = generic_file_llseek, 7338 .release = tracing_release_generic_tr, 7339 }; 7340 7341 static const struct file_operations tracing_pipe_fops = { 7342 .open = tracing_open_pipe, 7343 .poll = tracing_poll_pipe, 7344 .read = tracing_read_pipe, 7345 .splice_read = tracing_splice_read_pipe, 7346 .release = tracing_release_pipe, 7347 }; 7348 7349 static const struct file_operations tracing_entries_fops = { 7350 .open = tracing_open_generic_tr, 7351 .read = tracing_entries_read, 7352 .write = tracing_entries_write, 7353 .llseek = generic_file_llseek, 7354 .release = tracing_release_generic_tr, 7355 }; 7356 7357 static const struct file_operations tracing_syscall_buf_fops = { 7358 .open = tracing_open_generic_tr, 7359 .read = tracing_syscall_buf_read, 7360 .write = tracing_syscall_buf_write, 7361 .llseek = generic_file_llseek, 7362 .release = tracing_release_generic_tr, 7363 }; 7364 7365 static const struct file_operations tracing_buffer_meta_fops = { 7366 .open = tracing_buffer_meta_open, 7367 .read = seq_read, 7368 .llseek = seq_lseek, 7369 .release = tracing_seq_release, 7370 }; 7371 7372 static const struct file_operations tracing_total_entries_fops = { 7373 .open = tracing_open_generic_tr, 7374 .read = tracing_total_entries_read, 7375 .llseek = generic_file_llseek, 7376 .release = tracing_release_generic_tr, 7377 }; 7378 7379 static const struct file_operations tracing_free_buffer_fops = { 7380 .open = tracing_open_generic_tr, 7381 .write = tracing_free_buffer_write, 7382 .release = tracing_free_buffer_release, 7383 }; 7384 7385 static const struct file_operations tracing_mark_fops = { 7386 .open = tracing_mark_open, 7387 .write = tracing_mark_write, 7388 .release = tracing_mark_release, 7389 }; 7390 7391 static const struct file_operations tracing_mark_raw_fops = { 7392 .open = tracing_mark_open, 7393 .write = tracing_mark_raw_write, 7394 .release = tracing_mark_release, 7395 }; 7396 7397 static const struct file_operations trace_clock_fops = { 7398 .open = tracing_clock_open, 7399 .read = seq_read, 7400 .llseek = seq_lseek, 7401 .release = tracing_single_release_tr, 7402 .write = tracing_clock_write, 7403 }; 7404 7405 static const struct file_operations trace_time_stamp_mode_fops = { 7406 .open = tracing_time_stamp_mode_open, 7407 .read = seq_read, 7408 .llseek = seq_lseek, 7409 .release = tracing_single_release_tr, 7410 }; 7411 7412 static const struct file_operations last_boot_fops = { 7413 .open = tracing_last_boot_open, 7414 .read = seq_read, 7415 .llseek = seq_lseek, 7416 .release = tracing_seq_release, 7417 }; 7418 7419 #ifdef CONFIG_TRACER_SNAPSHOT 7420 static const struct file_operations snapshot_fops = { 7421 .open = tracing_snapshot_open, 7422 .read = seq_read, 7423 .write = tracing_snapshot_write, 7424 .llseek = tracing_lseek, 7425 .release = tracing_snapshot_release, 7426 }; 7427 7428 static const struct file_operations snapshot_raw_fops = { 7429 .open = snapshot_raw_open, 7430 .read = tracing_buffers_read, 7431 .release = tracing_buffers_release, 7432 .splice_read = tracing_buffers_splice_read, 7433 }; 7434 7435 #endif /* CONFIG_TRACER_SNAPSHOT */ 7436 7437 /* 7438 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct 7439 * @filp: The active open file structure 7440 * @ubuf: The userspace provided buffer to read value into 7441 * @cnt: The maximum number of bytes to read 7442 * @ppos: The current "file" position 7443 * 7444 * This function implements the write interface for a struct trace_min_max_param. 7445 * The filp->private_data must point to a trace_min_max_param structure that 7446 * defines where to write the value, the min and the max acceptable values, 7447 * and a lock to protect the write. 7448 */ 7449 static ssize_t 7450 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 7451 { 7452 struct trace_min_max_param *param = filp->private_data; 7453 u64 val; 7454 int err; 7455 7456 if (!param) 7457 return -EFAULT; 7458 7459 err = kstrtoull_from_user(ubuf, cnt, 10, &val); 7460 if (err) 7461 return err; 7462 7463 if (param->lock) 7464 mutex_lock(param->lock); 7465 7466 if (param->min && val < *param->min) 7467 err = -EINVAL; 7468 7469 if (param->max && val > *param->max) 7470 err = -EINVAL; 7471 7472 if (!err) 7473 *param->val = val; 7474 7475 if (param->lock) 7476 mutex_unlock(param->lock); 7477 7478 if (err) 7479 return err; 7480 7481 return cnt; 7482 } 7483 7484 /* 7485 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct 7486 * @filp: The active open file structure 7487 * @ubuf: The userspace provided buffer to read value into 7488 * @cnt: The maximum number of bytes to read 7489 * @ppos: The current "file" position 7490 * 7491 * This function implements the read interface for a struct trace_min_max_param. 7492 * The filp->private_data must point to a trace_min_max_param struct with valid 7493 * data. 7494 */ 7495 static ssize_t 7496 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 7497 { 7498 struct trace_min_max_param *param = filp->private_data; 7499 char buf[U64_STR_SIZE]; 7500 int len; 7501 u64 val; 7502 7503 if (!param) 7504 return -EFAULT; 7505 7506 val = *param->val; 7507 7508 if (cnt > sizeof(buf)) 7509 cnt = sizeof(buf); 7510 7511 len = snprintf(buf, sizeof(buf), "%llu\n", val); 7512 7513 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 7514 } 7515 7516 const struct file_operations trace_min_max_fops = { 7517 .open = tracing_open_generic, 7518 .read = trace_min_max_read, 7519 .write = trace_min_max_write, 7520 }; 7521 7522 #define TRACING_LOG_ERRS_MAX 8 7523 #define TRACING_LOG_LOC_MAX 128 7524 7525 #define CMD_PREFIX " Command: " 7526 7527 struct err_info { 7528 const char **errs; /* ptr to loc-specific array of err strings */ 7529 u8 type; /* index into errs -> specific err string */ 7530 u16 pos; /* caret position */ 7531 u64 ts; 7532 }; 7533 7534 struct tracing_log_err { 7535 struct list_head list; 7536 struct err_info info; 7537 char loc[TRACING_LOG_LOC_MAX]; /* err location */ 7538 char *cmd; /* what caused err */ 7539 }; 7540 7541 static DEFINE_MUTEX(tracing_err_log_lock); 7542 7543 static struct tracing_log_err *alloc_tracing_log_err(int len) 7544 { 7545 struct tracing_log_err *err; 7546 7547 err = kzalloc_obj(*err); 7548 if (!err) 7549 return ERR_PTR(-ENOMEM); 7550 7551 err->cmd = kzalloc(len, GFP_KERNEL); 7552 if (!err->cmd) { 7553 kfree(err); 7554 return ERR_PTR(-ENOMEM); 7555 } 7556 7557 return err; 7558 } 7559 7560 static void free_tracing_log_err(struct tracing_log_err *err) 7561 { 7562 kfree(err->cmd); 7563 kfree(err); 7564 } 7565 7566 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, 7567 int len) 7568 { 7569 struct tracing_log_err *err; 7570 char *cmd; 7571 7572 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { 7573 err = alloc_tracing_log_err(len); 7574 if (PTR_ERR(err) != -ENOMEM) 7575 tr->n_err_log_entries++; 7576 7577 return err; 7578 } 7579 cmd = kzalloc(len, GFP_KERNEL); 7580 if (!cmd) 7581 return ERR_PTR(-ENOMEM); 7582 err = list_first_entry(&tr->err_log, struct tracing_log_err, list); 7583 kfree(err->cmd); 7584 err->cmd = cmd; 7585 list_del(&err->list); 7586 7587 return err; 7588 } 7589 7590 /** 7591 * err_pos - find the position of a string within a command for error careting 7592 * @cmd: The tracing command that caused the error 7593 * @str: The string to position the caret at within @cmd 7594 * 7595 * Finds the position of the first occurrence of @str within @cmd. The 7596 * return value can be passed to tracing_log_err() for caret placement 7597 * within @cmd. 7598 * 7599 * Returns the index within @cmd of the first occurrence of @str or 0 7600 * if @str was not found. 7601 */ 7602 unsigned int err_pos(char *cmd, const char *str) 7603 { 7604 char *found; 7605 7606 if (WARN_ON(!strlen(cmd))) 7607 return 0; 7608 7609 found = strstr(cmd, str); 7610 if (found) 7611 return found - cmd; 7612 7613 return 0; 7614 } 7615 7616 /** 7617 * tracing_log_err - write an error to the tracing error log 7618 * @tr: The associated trace array for the error (NULL for top level array) 7619 * @loc: A string describing where the error occurred 7620 * @cmd: The tracing command that caused the error 7621 * @errs: The array of loc-specific static error strings 7622 * @type: The index into errs[], which produces the specific static err string 7623 * @pos: The position the caret should be placed in the cmd 7624 * 7625 * Writes an error into tracing/error_log of the form: 7626 * 7627 * <loc>: error: <text> 7628 * Command: <cmd> 7629 * ^ 7630 * 7631 * tracing/error_log is a small log file containing the last 7632 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated 7633 * unless there has been a tracing error, and the error log can be 7634 * cleared and have its memory freed by writing the empty string in 7635 * truncation mode to it i.e. echo > tracing/error_log. 7636 * 7637 * NOTE: the @errs array along with the @type param are used to 7638 * produce a static error string - this string is not copied and saved 7639 * when the error is logged - only a pointer to it is saved. See 7640 * existing callers for examples of how static strings are typically 7641 * defined for use with tracing_log_err(). 7642 */ 7643 void tracing_log_err(struct trace_array *tr, 7644 const char *loc, const char *cmd, 7645 const char **errs, u8 type, u16 pos) 7646 { 7647 struct tracing_log_err *err; 7648 int len = 0; 7649 7650 if (!tr) 7651 tr = &global_trace; 7652 7653 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; 7654 7655 guard(mutex)(&tracing_err_log_lock); 7656 7657 err = get_tracing_log_err(tr, len); 7658 if (PTR_ERR(err) == -ENOMEM) 7659 return; 7660 7661 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); 7662 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); 7663 7664 err->info.errs = errs; 7665 err->info.type = type; 7666 err->info.pos = pos; 7667 err->info.ts = local_clock(); 7668 7669 list_add_tail(&err->list, &tr->err_log); 7670 } 7671 7672 static void clear_tracing_err_log(struct trace_array *tr) 7673 { 7674 struct tracing_log_err *err, *next; 7675 7676 guard(mutex)(&tracing_err_log_lock); 7677 7678 list_for_each_entry_safe(err, next, &tr->err_log, list) { 7679 list_del(&err->list); 7680 free_tracing_log_err(err); 7681 } 7682 7683 tr->n_err_log_entries = 0; 7684 } 7685 7686 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) 7687 { 7688 struct trace_array *tr = m->private; 7689 7690 mutex_lock(&tracing_err_log_lock); 7691 7692 return seq_list_start(&tr->err_log, *pos); 7693 } 7694 7695 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) 7696 { 7697 struct trace_array *tr = m->private; 7698 7699 return seq_list_next(v, &tr->err_log, pos); 7700 } 7701 7702 static void tracing_err_log_seq_stop(struct seq_file *m, void *v) 7703 { 7704 mutex_unlock(&tracing_err_log_lock); 7705 } 7706 7707 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) 7708 { 7709 u16 i; 7710 7711 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) 7712 seq_putc(m, ' '); 7713 for (i = 0; i < pos; i++) 7714 seq_putc(m, ' '); 7715 seq_puts(m, "^\n"); 7716 } 7717 7718 static int tracing_err_log_seq_show(struct seq_file *m, void *v) 7719 { 7720 struct tracing_log_err *err = v; 7721 7722 if (err) { 7723 const char *err_text = err->info.errs[err->info.type]; 7724 u64 sec = err->info.ts; 7725 u32 nsec; 7726 7727 nsec = do_div(sec, NSEC_PER_SEC); 7728 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, 7729 err->loc, err_text); 7730 seq_printf(m, "%s", err->cmd); 7731 tracing_err_log_show_pos(m, err->info.pos); 7732 } 7733 7734 return 0; 7735 } 7736 7737 static const struct seq_operations tracing_err_log_seq_ops = { 7738 .start = tracing_err_log_seq_start, 7739 .next = tracing_err_log_seq_next, 7740 .stop = tracing_err_log_seq_stop, 7741 .show = tracing_err_log_seq_show 7742 }; 7743 7744 static int tracing_err_log_open(struct inode *inode, struct file *file) 7745 { 7746 struct trace_array *tr = inode->i_private; 7747 int ret = 0; 7748 7749 ret = tracing_check_open_get_tr(tr); 7750 if (ret) 7751 return ret; 7752 7753 /* If this file was opened for write, then erase contents */ 7754 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 7755 clear_tracing_err_log(tr); 7756 7757 if (file->f_mode & FMODE_READ) { 7758 ret = seq_open(file, &tracing_err_log_seq_ops); 7759 if (!ret) { 7760 struct seq_file *m = file->private_data; 7761 m->private = tr; 7762 } else { 7763 trace_array_put(tr); 7764 } 7765 } 7766 return ret; 7767 } 7768 7769 static ssize_t tracing_err_log_write(struct file *file, 7770 const char __user *buffer, 7771 size_t count, loff_t *ppos) 7772 { 7773 return count; 7774 } 7775 7776 static int tracing_err_log_release(struct inode *inode, struct file *file) 7777 { 7778 struct trace_array *tr = inode->i_private; 7779 7780 trace_array_put(tr); 7781 7782 if (file->f_mode & FMODE_READ) 7783 seq_release(inode, file); 7784 7785 return 0; 7786 } 7787 7788 static const struct file_operations tracing_err_log_fops = { 7789 .open = tracing_err_log_open, 7790 .write = tracing_err_log_write, 7791 .read = seq_read, 7792 .llseek = tracing_lseek, 7793 .release = tracing_err_log_release, 7794 }; 7795 7796 static int tracing_buffers_open(struct inode *inode, struct file *filp) 7797 { 7798 struct trace_array *tr = inode->i_private; 7799 struct ftrace_buffer_info *info; 7800 int ret; 7801 7802 ret = tracing_check_open_get_tr(tr); 7803 if (ret) 7804 return ret; 7805 7806 info = kvzalloc_obj(*info); 7807 if (!info) { 7808 trace_array_put(tr); 7809 return -ENOMEM; 7810 } 7811 7812 mutex_lock(&trace_types_lock); 7813 7814 info->iter.tr = tr; 7815 info->iter.cpu_file = tracing_get_cpu(inode); 7816 info->iter.trace = tr->current_trace; 7817 info->iter.array_buffer = &tr->array_buffer; 7818 info->spare = NULL; 7819 /* Force reading ring buffer for first read */ 7820 info->read = (unsigned int)-1; 7821 7822 filp->private_data = info; 7823 7824 tr->trace_ref++; 7825 7826 mutex_unlock(&trace_types_lock); 7827 7828 ret = nonseekable_open(inode, filp); 7829 if (ret < 0) 7830 trace_array_put(tr); 7831 7832 return ret; 7833 } 7834 7835 static __poll_t 7836 tracing_buffers_poll(struct file *filp, poll_table *poll_table) 7837 { 7838 struct ftrace_buffer_info *info = filp->private_data; 7839 struct trace_iterator *iter = &info->iter; 7840 7841 return trace_poll(iter, filp, poll_table); 7842 } 7843 7844 static ssize_t 7845 tracing_buffers_read(struct file *filp, char __user *ubuf, 7846 size_t count, loff_t *ppos) 7847 { 7848 struct ftrace_buffer_info *info = filp->private_data; 7849 struct trace_iterator *iter = &info->iter; 7850 void *trace_data; 7851 int page_size; 7852 ssize_t ret = 0; 7853 ssize_t size; 7854 7855 if (!count) 7856 return 0; 7857 7858 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) 7859 return -EBUSY; 7860 7861 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 7862 7863 /* Make sure the spare matches the current sub buffer size */ 7864 if (info->spare) { 7865 if (page_size != info->spare_size) { 7866 ring_buffer_free_read_page(iter->array_buffer->buffer, 7867 info->spare_cpu, info->spare); 7868 info->spare = NULL; 7869 } 7870 } 7871 7872 if (!info->spare) { 7873 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, 7874 iter->cpu_file); 7875 if (IS_ERR(info->spare)) { 7876 ret = PTR_ERR(info->spare); 7877 info->spare = NULL; 7878 } else { 7879 info->spare_cpu = iter->cpu_file; 7880 info->spare_size = page_size; 7881 } 7882 } 7883 if (!info->spare) 7884 return ret; 7885 7886 /* Do we have previous read data to read? */ 7887 if (info->read < page_size) 7888 goto read; 7889 7890 again: 7891 trace_access_lock(iter->cpu_file); 7892 ret = ring_buffer_read_page(iter->array_buffer->buffer, 7893 info->spare, 7894 count, 7895 iter->cpu_file, 0); 7896 trace_access_unlock(iter->cpu_file); 7897 7898 if (ret < 0) { 7899 if (trace_empty(iter) && !iter->closed) { 7900 if (update_last_data_if_empty(iter->tr)) 7901 return 0; 7902 7903 if ((filp->f_flags & O_NONBLOCK)) 7904 return -EAGAIN; 7905 7906 ret = wait_on_pipe(iter, 0); 7907 if (ret) 7908 return ret; 7909 7910 goto again; 7911 } 7912 return 0; 7913 } 7914 7915 info->read = 0; 7916 read: 7917 size = page_size - info->read; 7918 if (size > count) 7919 size = count; 7920 trace_data = ring_buffer_read_page_data(info->spare); 7921 ret = copy_to_user(ubuf, trace_data + info->read, size); 7922 if (ret == size) 7923 return -EFAULT; 7924 7925 size -= ret; 7926 7927 *ppos += size; 7928 info->read += size; 7929 7930 return size; 7931 } 7932 7933 static int tracing_buffers_flush(struct file *file, fl_owner_t id) 7934 { 7935 struct ftrace_buffer_info *info = file->private_data; 7936 struct trace_iterator *iter = &info->iter; 7937 7938 iter->closed = true; 7939 /* Make sure the waiters see the new wait_index */ 7940 (void)atomic_fetch_inc_release(&iter->wait_index); 7941 7942 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 7943 7944 return 0; 7945 } 7946 7947 static int tracing_buffers_release(struct inode *inode, struct file *file) 7948 { 7949 struct ftrace_buffer_info *info = file->private_data; 7950 struct trace_iterator *iter = &info->iter; 7951 7952 guard(mutex)(&trace_types_lock); 7953 7954 iter->tr->trace_ref--; 7955 7956 __trace_array_put(iter->tr); 7957 7958 if (info->spare) 7959 ring_buffer_free_read_page(iter->array_buffer->buffer, 7960 info->spare_cpu, info->spare); 7961 kvfree(info); 7962 7963 return 0; 7964 } 7965 7966 struct buffer_ref { 7967 struct trace_buffer *buffer; 7968 void *page; 7969 int cpu; 7970 refcount_t refcount; 7971 }; 7972 7973 static void buffer_ref_release(struct buffer_ref *ref) 7974 { 7975 if (!refcount_dec_and_test(&ref->refcount)) 7976 return; 7977 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); 7978 kfree(ref); 7979 } 7980 7981 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, 7982 struct pipe_buffer *buf) 7983 { 7984 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7985 7986 buffer_ref_release(ref); 7987 buf->private = 0; 7988 } 7989 7990 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, 7991 struct pipe_buffer *buf) 7992 { 7993 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7994 7995 if (refcount_read(&ref->refcount) > INT_MAX/2) 7996 return false; 7997 7998 refcount_inc(&ref->refcount); 7999 return true; 8000 } 8001 8002 /* Pipe buffer operations for a buffer. */ 8003 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 8004 .release = buffer_pipe_buf_release, 8005 .get = buffer_pipe_buf_get, 8006 }; 8007 8008 /* 8009 * Callback from splice_to_pipe(), if we need to release some pages 8010 * at the end of the spd in case we error'ed out in filling the pipe. 8011 */ 8012 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) 8013 { 8014 struct buffer_ref *ref = 8015 (struct buffer_ref *)spd->partial[i].private; 8016 8017 buffer_ref_release(ref); 8018 spd->partial[i].private = 0; 8019 } 8020 8021 static ssize_t 8022 tracing_buffers_splice_read(struct file *file, loff_t *ppos, 8023 struct pipe_inode_info *pipe, size_t len, 8024 unsigned int flags) 8025 { 8026 struct ftrace_buffer_info *info = file->private_data; 8027 struct trace_iterator *iter = &info->iter; 8028 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 8029 struct page *pages_def[PIPE_DEF_BUFFERS]; 8030 struct splice_pipe_desc spd = { 8031 .pages = pages_def, 8032 .partial = partial_def, 8033 .nr_pages_max = PIPE_DEF_BUFFERS, 8034 .ops = &buffer_pipe_buf_ops, 8035 .spd_release = buffer_spd_release, 8036 }; 8037 struct buffer_ref *ref; 8038 bool woken = false; 8039 int page_size; 8040 int entries, i; 8041 ssize_t ret = 0; 8042 8043 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) 8044 return -EBUSY; 8045 8046 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 8047 if (*ppos & (page_size - 1)) 8048 return -EINVAL; 8049 8050 if (len & (page_size - 1)) { 8051 if (len < page_size) 8052 return -EINVAL; 8053 len &= (~(page_size - 1)); 8054 } 8055 8056 if (splice_grow_spd(pipe, &spd)) 8057 return -ENOMEM; 8058 8059 again: 8060 trace_access_lock(iter->cpu_file); 8061 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8062 8063 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { 8064 struct page *page; 8065 int r; 8066 8067 ref = kzalloc_obj(*ref); 8068 if (!ref) { 8069 ret = -ENOMEM; 8070 break; 8071 } 8072 8073 refcount_set(&ref->refcount, 1); 8074 ref->buffer = iter->array_buffer->buffer; 8075 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 8076 if (IS_ERR(ref->page)) { 8077 ret = PTR_ERR(ref->page); 8078 ref->page = NULL; 8079 kfree(ref); 8080 break; 8081 } 8082 ref->cpu = iter->cpu_file; 8083 8084 r = ring_buffer_read_page(ref->buffer, ref->page, 8085 len, iter->cpu_file, 1); 8086 if (r < 0) { 8087 ring_buffer_free_read_page(ref->buffer, ref->cpu, 8088 ref->page); 8089 kfree(ref); 8090 break; 8091 } 8092 8093 page = virt_to_page(ring_buffer_read_page_data(ref->page)); 8094 8095 spd.pages[i] = page; 8096 spd.partial[i].len = page_size; 8097 spd.partial[i].offset = 0; 8098 spd.partial[i].private = (unsigned long)ref; 8099 spd.nr_pages++; 8100 *ppos += page_size; 8101 8102 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 8103 } 8104 8105 trace_access_unlock(iter->cpu_file); 8106 spd.nr_pages = i; 8107 8108 /* did we read anything? */ 8109 if (!spd.nr_pages) { 8110 8111 if (ret) 8112 goto out; 8113 8114 if (woken) 8115 goto out; 8116 8117 ret = -EAGAIN; 8118 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) 8119 goto out; 8120 8121 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 8122 if (ret) 8123 goto out; 8124 8125 /* No need to wait after waking up when tracing is off */ 8126 if (!tracer_tracing_is_on(iter->tr)) 8127 goto out; 8128 8129 /* Iterate one more time to collect any new data then exit */ 8130 woken = true; 8131 8132 goto again; 8133 } 8134 8135 ret = splice_to_pipe(pipe, &spd); 8136 out: 8137 splice_shrink_spd(&spd); 8138 8139 return ret; 8140 } 8141 8142 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 8143 { 8144 struct ftrace_buffer_info *info = file->private_data; 8145 struct trace_iterator *iter = &info->iter; 8146 int err; 8147 8148 if (cmd == TRACE_MMAP_IOCTL_GET_READER) { 8149 if (!(file->f_flags & O_NONBLOCK)) { 8150 err = ring_buffer_wait(iter->array_buffer->buffer, 8151 iter->cpu_file, 8152 iter->tr->buffer_percent, 8153 NULL, NULL); 8154 if (err) 8155 return err; 8156 } 8157 8158 return ring_buffer_map_get_reader(iter->array_buffer->buffer, 8159 iter->cpu_file); 8160 } else if (cmd) { 8161 return -ENOTTY; 8162 } 8163 8164 /* 8165 * An ioctl call with cmd 0 to the ring buffer file will wake up all 8166 * waiters 8167 */ 8168 guard(mutex)(&trace_types_lock); 8169 8170 /* Make sure the waiters see the new wait_index */ 8171 (void)atomic_fetch_inc_release(&iter->wait_index); 8172 8173 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 8174 8175 return 0; 8176 } 8177 8178 #ifdef CONFIG_TRACER_SNAPSHOT 8179 static int get_snapshot_map(struct trace_array *tr) 8180 { 8181 int err = 0; 8182 8183 /* 8184 * Called with mmap_lock held. lockdep would be unhappy if we would now 8185 * take trace_types_lock. Instead use the specific 8186 * snapshot_trigger_lock. 8187 */ 8188 spin_lock(&tr->snapshot_trigger_lock); 8189 8190 if (tr->snapshot || tr->mapped == UINT_MAX) 8191 err = -EBUSY; 8192 else 8193 tr->mapped++; 8194 8195 spin_unlock(&tr->snapshot_trigger_lock); 8196 8197 /* Wait for update_max_tr() to observe iter->tr->mapped */ 8198 if (tr->mapped == 1) 8199 synchronize_rcu(); 8200 8201 return err; 8202 8203 } 8204 static void put_snapshot_map(struct trace_array *tr) 8205 { 8206 spin_lock(&tr->snapshot_trigger_lock); 8207 if (!WARN_ON(!tr->mapped)) 8208 tr->mapped--; 8209 spin_unlock(&tr->snapshot_trigger_lock); 8210 } 8211 #else 8212 static inline int get_snapshot_map(struct trace_array *tr) { return 0; } 8213 static inline void put_snapshot_map(struct trace_array *tr) { } 8214 #endif 8215 8216 /* 8217 * This is called when a VMA is duplicated (e.g., on fork()) to increment 8218 * the user_mapped counter without remapping pages. 8219 */ 8220 static void tracing_buffers_mmap_open(struct vm_area_struct *vma) 8221 { 8222 struct ftrace_buffer_info *info = vma->vm_file->private_data; 8223 struct trace_iterator *iter = &info->iter; 8224 8225 ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file); 8226 } 8227 8228 static void tracing_buffers_mmap_close(struct vm_area_struct *vma) 8229 { 8230 struct ftrace_buffer_info *info = vma->vm_file->private_data; 8231 struct trace_iterator *iter = &info->iter; 8232 8233 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); 8234 put_snapshot_map(iter->tr); 8235 } 8236 8237 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr) 8238 { 8239 /* 8240 * Trace buffer mappings require the complete buffer including 8241 * the meta page. Partial mappings are not supported. 8242 */ 8243 return -EINVAL; 8244 } 8245 8246 static const struct vm_operations_struct tracing_buffers_vmops = { 8247 .open = tracing_buffers_mmap_open, 8248 .close = tracing_buffers_mmap_close, 8249 .may_split = tracing_buffers_may_split, 8250 }; 8251 8252 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) 8253 { 8254 struct ftrace_buffer_info *info = filp->private_data; 8255 struct trace_iterator *iter = &info->iter; 8256 int ret = 0; 8257 8258 /* A memmap'ed and backup buffers are not supported for user space mmap */ 8259 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC)) 8260 return -ENODEV; 8261 8262 ret = get_snapshot_map(iter->tr); 8263 if (ret) 8264 return ret; 8265 8266 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); 8267 if (ret) 8268 put_snapshot_map(iter->tr); 8269 8270 vma->vm_ops = &tracing_buffers_vmops; 8271 8272 return ret; 8273 } 8274 8275 static const struct file_operations tracing_buffers_fops = { 8276 .open = tracing_buffers_open, 8277 .read = tracing_buffers_read, 8278 .poll = tracing_buffers_poll, 8279 .release = tracing_buffers_release, 8280 .flush = tracing_buffers_flush, 8281 .splice_read = tracing_buffers_splice_read, 8282 .unlocked_ioctl = tracing_buffers_ioctl, 8283 .mmap = tracing_buffers_mmap, 8284 }; 8285 8286 static ssize_t 8287 tracing_stats_read(struct file *filp, char __user *ubuf, 8288 size_t count, loff_t *ppos) 8289 { 8290 struct inode *inode = file_inode(filp); 8291 struct trace_array *tr = inode->i_private; 8292 struct array_buffer *trace_buf = &tr->array_buffer; 8293 int cpu = tracing_get_cpu(inode); 8294 struct trace_seq *s; 8295 unsigned long cnt; 8296 unsigned long long t; 8297 unsigned long usec_rem; 8298 8299 s = kmalloc_obj(*s); 8300 if (!s) 8301 return -ENOMEM; 8302 8303 trace_seq_init(s); 8304 8305 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); 8306 trace_seq_printf(s, "entries: %ld\n", cnt); 8307 8308 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); 8309 trace_seq_printf(s, "overrun: %ld\n", cnt); 8310 8311 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); 8312 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 8313 8314 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 8315 trace_seq_printf(s, "bytes: %ld\n", cnt); 8316 8317 if (trace_clocks[tr->clock_id].in_ns) { 8318 /* local or global for trace_clock */ 8319 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 8320 usec_rem = do_div(t, USEC_PER_SEC); 8321 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", 8322 t, usec_rem); 8323 8324 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); 8325 usec_rem = do_div(t, USEC_PER_SEC); 8326 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); 8327 } else { 8328 /* counter or tsc mode for trace_clock */ 8329 trace_seq_printf(s, "oldest event ts: %llu\n", 8330 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 8331 8332 trace_seq_printf(s, "now ts: %llu\n", 8333 ring_buffer_time_stamp(trace_buf->buffer)); 8334 } 8335 8336 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); 8337 trace_seq_printf(s, "dropped events: %ld\n", cnt); 8338 8339 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); 8340 trace_seq_printf(s, "read events: %ld\n", cnt); 8341 8342 count = simple_read_from_buffer(ubuf, count, ppos, 8343 s->buffer, trace_seq_used(s)); 8344 8345 kfree(s); 8346 8347 return count; 8348 } 8349 8350 static const struct file_operations tracing_stats_fops = { 8351 .open = tracing_open_generic_tr, 8352 .read = tracing_stats_read, 8353 .llseek = generic_file_llseek, 8354 .release = tracing_release_generic_tr, 8355 }; 8356 8357 #ifdef CONFIG_DYNAMIC_FTRACE 8358 8359 static ssize_t 8360 tracing_read_dyn_info(struct file *filp, char __user *ubuf, 8361 size_t cnt, loff_t *ppos) 8362 { 8363 ssize_t ret; 8364 char *buf; 8365 int r; 8366 8367 /* 512 should be plenty to hold the amount needed */ 8368 #define DYN_INFO_BUF_SIZE 512 8369 8370 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); 8371 if (!buf) 8372 return -ENOMEM; 8373 8374 r = scnprintf(buf, DYN_INFO_BUF_SIZE, 8375 "%ld pages:%ld groups: %ld\n" 8376 "ftrace boot update time = %llu (ns)\n" 8377 "ftrace module total update time = %llu (ns)\n", 8378 ftrace_update_tot_cnt, 8379 ftrace_number_of_pages, 8380 ftrace_number_of_groups, 8381 ftrace_update_time, 8382 ftrace_total_mod_time); 8383 8384 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8385 kfree(buf); 8386 return ret; 8387 } 8388 8389 static const struct file_operations tracing_dyn_info_fops = { 8390 .open = tracing_open_generic, 8391 .read = tracing_read_dyn_info, 8392 .llseek = generic_file_llseek, 8393 }; 8394 #endif /* CONFIG_DYNAMIC_FTRACE */ 8395 8396 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) 8397 static void 8398 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, 8399 struct trace_array *tr, struct ftrace_probe_ops *ops, 8400 void *data) 8401 { 8402 tracing_snapshot_instance(tr); 8403 } 8404 8405 static void 8406 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, 8407 struct trace_array *tr, struct ftrace_probe_ops *ops, 8408 void *data) 8409 { 8410 struct ftrace_func_mapper *mapper = data; 8411 long *count = NULL; 8412 8413 if (mapper) 8414 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 8415 8416 if (count) { 8417 8418 if (*count <= 0) 8419 return; 8420 8421 (*count)--; 8422 } 8423 8424 tracing_snapshot_instance(tr); 8425 } 8426 8427 static int 8428 ftrace_snapshot_print(struct seq_file *m, unsigned long ip, 8429 struct ftrace_probe_ops *ops, void *data) 8430 { 8431 struct ftrace_func_mapper *mapper = data; 8432 long *count = NULL; 8433 8434 seq_printf(m, "%ps:", (void *)ip); 8435 8436 seq_puts(m, "snapshot"); 8437 8438 if (mapper) 8439 count = (long *)ftrace_func_mapper_find_ip(mapper, ip); 8440 8441 if (count) 8442 seq_printf(m, ":count=%ld\n", *count); 8443 else 8444 seq_puts(m, ":unlimited\n"); 8445 8446 return 0; 8447 } 8448 8449 static int 8450 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr, 8451 unsigned long ip, void *init_data, void **data) 8452 { 8453 struct ftrace_func_mapper *mapper = *data; 8454 8455 if (!mapper) { 8456 mapper = allocate_ftrace_func_mapper(); 8457 if (!mapper) 8458 return -ENOMEM; 8459 *data = mapper; 8460 } 8461 8462 return ftrace_func_mapper_add_ip(mapper, ip, init_data); 8463 } 8464 8465 static void 8466 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr, 8467 unsigned long ip, void *data) 8468 { 8469 struct ftrace_func_mapper *mapper = data; 8470 8471 if (!ip) { 8472 if (!mapper) 8473 return; 8474 free_ftrace_func_mapper(mapper, NULL); 8475 return; 8476 } 8477 8478 ftrace_func_mapper_remove_ip(mapper, ip); 8479 } 8480 8481 static struct ftrace_probe_ops snapshot_probe_ops = { 8482 .func = ftrace_snapshot, 8483 .print = ftrace_snapshot_print, 8484 }; 8485 8486 static struct ftrace_probe_ops snapshot_count_probe_ops = { 8487 .func = ftrace_count_snapshot, 8488 .print = ftrace_snapshot_print, 8489 .init = ftrace_snapshot_init, 8490 .free = ftrace_snapshot_free, 8491 }; 8492 8493 static int 8494 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash, 8495 char *glob, char *cmd, char *param, int enable) 8496 { 8497 struct ftrace_probe_ops *ops; 8498 void *count = (void *)-1; 8499 char *number; 8500 int ret; 8501 8502 if (!tr) 8503 return -ENODEV; 8504 8505 /* hash funcs only work with set_ftrace_filter */ 8506 if (!enable) 8507 return -EINVAL; 8508 8509 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops; 8510 8511 if (glob[0] == '!') { 8512 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops); 8513 if (!ret) 8514 tracing_disarm_snapshot(tr); 8515 8516 return ret; 8517 } 8518 8519 if (!param) 8520 goto out_reg; 8521 8522 number = strsep(¶m, ":"); 8523 8524 if (!strlen(number)) 8525 goto out_reg; 8526 8527 /* 8528 * We use the callback data field (which is a pointer) 8529 * as our counter. 8530 */ 8531 ret = kstrtoul(number, 0, (unsigned long *)&count); 8532 if (ret) 8533 return ret; 8534 8535 out_reg: 8536 ret = tracing_arm_snapshot(tr); 8537 if (ret < 0) 8538 return ret; 8539 8540 ret = register_ftrace_function_probe(glob, tr, ops, count); 8541 if (ret < 0) 8542 tracing_disarm_snapshot(tr); 8543 8544 return ret < 0 ? ret : 0; 8545 } 8546 8547 static struct ftrace_func_command ftrace_snapshot_cmd = { 8548 .name = "snapshot", 8549 .func = ftrace_trace_snapshot_callback, 8550 }; 8551 8552 static __init int register_snapshot_cmd(void) 8553 { 8554 return register_ftrace_command(&ftrace_snapshot_cmd); 8555 } 8556 #else 8557 static inline __init int register_snapshot_cmd(void) { return 0; } 8558 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ 8559 8560 static struct dentry *tracing_get_dentry(struct trace_array *tr) 8561 { 8562 /* Top directory uses NULL as the parent */ 8563 if (tr->flags & TRACE_ARRAY_FL_GLOBAL) 8564 return NULL; 8565 8566 if (WARN_ON(!tr->dir)) 8567 return ERR_PTR(-ENODEV); 8568 8569 /* All sub buffers have a descriptor */ 8570 return tr->dir; 8571 } 8572 8573 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 8574 { 8575 struct dentry *d_tracer; 8576 8577 if (tr->percpu_dir) 8578 return tr->percpu_dir; 8579 8580 d_tracer = tracing_get_dentry(tr); 8581 if (IS_ERR(d_tracer)) 8582 return NULL; 8583 8584 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); 8585 8586 MEM_FAIL(!tr->percpu_dir, 8587 "Could not create tracefs directory 'per_cpu/%d'\n", cpu); 8588 8589 return tr->percpu_dir; 8590 } 8591 8592 static struct dentry * 8593 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, 8594 void *data, long cpu, const struct file_operations *fops) 8595 { 8596 struct dentry *ret = trace_create_file(name, mode, parent, data, fops); 8597 8598 if (ret) /* See tracing_get_cpu() */ 8599 d_inode(ret)->i_cdev = (void *)(cpu + 1); 8600 return ret; 8601 } 8602 8603 static void 8604 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) 8605 { 8606 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 8607 struct dentry *d_cpu; 8608 char cpu_dir[30]; /* 30 characters should be more than enough */ 8609 8610 if (!d_percpu) 8611 return; 8612 8613 snprintf(cpu_dir, 30, "cpu%ld", cpu); 8614 d_cpu = tracefs_create_dir(cpu_dir, d_percpu); 8615 if (!d_cpu) { 8616 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); 8617 return; 8618 } 8619 8620 /* per cpu trace_pipe */ 8621 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, 8622 tr, cpu, &tracing_pipe_fops); 8623 8624 /* per cpu trace */ 8625 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, 8626 tr, cpu, &tracing_fops); 8627 8628 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, 8629 tr, cpu, &tracing_buffers_fops); 8630 8631 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, 8632 tr, cpu, &tracing_stats_fops); 8633 8634 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu, 8635 tr, cpu, &tracing_entries_fops); 8636 8637 if (tr->range_addr_start) 8638 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, 8639 tr, cpu, &tracing_buffer_meta_fops); 8640 #ifdef CONFIG_TRACER_SNAPSHOT 8641 if (!tr->range_addr_start) { 8642 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, 8643 tr, cpu, &snapshot_fops); 8644 8645 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, 8646 tr, cpu, &snapshot_raw_fops); 8647 } 8648 #endif 8649 } 8650 8651 #ifdef CONFIG_FTRACE_SELFTEST 8652 /* Let selftest have access to static functions in this file */ 8653 #include "trace_selftest.c" 8654 #endif 8655 8656 static ssize_t 8657 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, 8658 loff_t *ppos) 8659 { 8660 struct trace_option_dentry *topt = filp->private_data; 8661 char *buf; 8662 8663 if (topt->flags->val & topt->opt->bit) 8664 buf = "1\n"; 8665 else 8666 buf = "0\n"; 8667 8668 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 8669 } 8670 8671 static ssize_t 8672 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, 8673 loff_t *ppos) 8674 { 8675 struct trace_option_dentry *topt = filp->private_data; 8676 unsigned long val; 8677 int ret; 8678 8679 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8680 if (ret) 8681 return ret; 8682 8683 if (val != 0 && val != 1) 8684 return -EINVAL; 8685 8686 if (!!(topt->flags->val & topt->opt->bit) != val) { 8687 guard(mutex)(&trace_types_lock); 8688 ret = __set_tracer_option(topt->tr, topt->flags, 8689 topt->opt, !val); 8690 if (ret) 8691 return ret; 8692 } 8693 8694 *ppos += cnt; 8695 8696 return cnt; 8697 } 8698 8699 static int tracing_open_options(struct inode *inode, struct file *filp) 8700 { 8701 struct trace_option_dentry *topt = inode->i_private; 8702 int ret; 8703 8704 ret = tracing_check_open_get_tr(topt->tr); 8705 if (ret) 8706 return ret; 8707 8708 filp->private_data = inode->i_private; 8709 return 0; 8710 } 8711 8712 static int tracing_release_options(struct inode *inode, struct file *file) 8713 { 8714 struct trace_option_dentry *topt = file->private_data; 8715 8716 trace_array_put(topt->tr); 8717 return 0; 8718 } 8719 8720 static const struct file_operations trace_options_fops = { 8721 .open = tracing_open_options, 8722 .read = trace_options_read, 8723 .write = trace_options_write, 8724 .llseek = generic_file_llseek, 8725 .release = tracing_release_options, 8726 }; 8727 8728 /* 8729 * In order to pass in both the trace_array descriptor as well as the index 8730 * to the flag that the trace option file represents, the trace_array 8731 * has a character array of trace_flags_index[], which holds the index 8732 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. 8733 * The address of this character array is passed to the flag option file 8734 * read/write callbacks. 8735 * 8736 * In order to extract both the index and the trace_array descriptor, 8737 * get_tr_index() uses the following algorithm. 8738 * 8739 * idx = *ptr; 8740 * 8741 * As the pointer itself contains the address of the index (remember 8742 * index[1] == 1). 8743 * 8744 * Then to get the trace_array descriptor, by subtracting that index 8745 * from the ptr, we get to the start of the index itself. 8746 * 8747 * ptr - idx == &index[0] 8748 * 8749 * Then a simple container_of() from that pointer gets us to the 8750 * trace_array descriptor. 8751 */ 8752 static void get_tr_index(void *data, struct trace_array **ptr, 8753 unsigned int *pindex) 8754 { 8755 *pindex = *(unsigned char *)data; 8756 8757 *ptr = container_of(data - *pindex, struct trace_array, 8758 trace_flags_index); 8759 } 8760 8761 static ssize_t 8762 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, 8763 loff_t *ppos) 8764 { 8765 void *tr_index = filp->private_data; 8766 struct trace_array *tr; 8767 unsigned int index; 8768 char *buf; 8769 8770 get_tr_index(tr_index, &tr, &index); 8771 8772 if (tr->trace_flags & (1ULL << index)) 8773 buf = "1\n"; 8774 else 8775 buf = "0\n"; 8776 8777 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 8778 } 8779 8780 static ssize_t 8781 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, 8782 loff_t *ppos) 8783 { 8784 void *tr_index = filp->private_data; 8785 struct trace_array *tr; 8786 unsigned int index; 8787 unsigned long val; 8788 int ret; 8789 8790 get_tr_index(tr_index, &tr, &index); 8791 8792 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8793 if (ret) 8794 return ret; 8795 8796 if (val != 0 && val != 1) 8797 return -EINVAL; 8798 8799 mutex_lock(&event_mutex); 8800 mutex_lock(&trace_types_lock); 8801 ret = set_tracer_flag(tr, 1ULL << index, val); 8802 mutex_unlock(&trace_types_lock); 8803 mutex_unlock(&event_mutex); 8804 8805 if (ret < 0) 8806 return ret; 8807 8808 *ppos += cnt; 8809 8810 return cnt; 8811 } 8812 8813 static const struct file_operations trace_options_core_fops = { 8814 .open = tracing_open_generic, 8815 .read = trace_options_core_read, 8816 .write = trace_options_core_write, 8817 .llseek = generic_file_llseek, 8818 }; 8819 8820 struct dentry *trace_create_file(const char *name, 8821 umode_t mode, 8822 struct dentry *parent, 8823 void *data, 8824 const struct file_operations *fops) 8825 { 8826 struct dentry *ret; 8827 8828 ret = tracefs_create_file(name, mode, parent, data, fops); 8829 if (!ret) 8830 pr_warn("Could not create tracefs '%s' entry\n", name); 8831 8832 return ret; 8833 } 8834 8835 8836 static struct dentry *trace_options_init_dentry(struct trace_array *tr) 8837 { 8838 struct dentry *d_tracer; 8839 8840 if (tr->options) 8841 return tr->options; 8842 8843 d_tracer = tracing_get_dentry(tr); 8844 if (IS_ERR(d_tracer)) 8845 return NULL; 8846 8847 tr->options = tracefs_create_dir("options", d_tracer); 8848 if (!tr->options) { 8849 pr_warn("Could not create tracefs directory 'options'\n"); 8850 return NULL; 8851 } 8852 8853 return tr->options; 8854 } 8855 8856 static void 8857 create_trace_option_file(struct trace_array *tr, 8858 struct trace_option_dentry *topt, 8859 struct tracer_flags *flags, 8860 struct tracer_opt *opt) 8861 { 8862 struct dentry *t_options; 8863 8864 t_options = trace_options_init_dentry(tr); 8865 if (!t_options) 8866 return; 8867 8868 topt->flags = flags; 8869 topt->opt = opt; 8870 topt->tr = tr; 8871 8872 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, 8873 t_options, topt, &trace_options_fops); 8874 } 8875 8876 static int 8877 create_trace_option_files(struct trace_array *tr, struct tracer *tracer, 8878 struct tracer_flags *flags) 8879 { 8880 struct trace_option_dentry *topts; 8881 struct trace_options *tr_topts; 8882 struct tracer_opt *opts; 8883 int cnt; 8884 8885 if (!flags || !flags->opts) 8886 return 0; 8887 8888 opts = flags->opts; 8889 8890 for (cnt = 0; opts[cnt].name; cnt++) 8891 ; 8892 8893 topts = kzalloc_objs(*topts, cnt + 1); 8894 if (!topts) 8895 return 0; 8896 8897 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), 8898 GFP_KERNEL); 8899 if (!tr_topts) { 8900 kfree(topts); 8901 return -ENOMEM; 8902 } 8903 8904 tr->topts = tr_topts; 8905 tr->topts[tr->nr_topts].tracer = tracer; 8906 tr->topts[tr->nr_topts].topts = topts; 8907 tr->nr_topts++; 8908 8909 for (cnt = 0; opts[cnt].name; cnt++) { 8910 create_trace_option_file(tr, &topts[cnt], flags, 8911 &opts[cnt]); 8912 MEM_FAIL(topts[cnt].entry == NULL, 8913 "Failed to create trace option: %s", 8914 opts[cnt].name); 8915 } 8916 return 0; 8917 } 8918 8919 static int get_global_flags_val(struct tracer *tracer) 8920 { 8921 struct tracers *t; 8922 8923 list_for_each_entry(t, &global_trace.tracers, list) { 8924 if (t->tracer != tracer) 8925 continue; 8926 if (!t->flags) 8927 return -1; 8928 return t->flags->val; 8929 } 8930 return -1; 8931 } 8932 8933 static int add_tracer_options(struct trace_array *tr, struct tracers *t) 8934 { 8935 struct tracer *tracer = t->tracer; 8936 struct tracer_flags *flags = t->flags ?: tracer->flags; 8937 8938 if (!flags) 8939 return 0; 8940 8941 /* Only add tracer options after update_tracer_options finish */ 8942 if (!tracer_options_updated) 8943 return 0; 8944 8945 return create_trace_option_files(tr, tracer, flags); 8946 } 8947 8948 static int add_tracer(struct trace_array *tr, struct tracer *tracer) 8949 { 8950 struct tracer_flags *flags; 8951 struct tracers *t; 8952 int ret; 8953 8954 /* Only enable if the directory has been created already. */ 8955 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL)) 8956 return 0; 8957 8958 /* 8959 * If this is an instance, only create flags for tracers 8960 * the instance may have. 8961 */ 8962 if (!trace_ok_for_array(tracer, tr)) 8963 return 0; 8964 8965 t = kmalloc_obj(*t); 8966 if (!t) 8967 return -ENOMEM; 8968 8969 t->tracer = tracer; 8970 t->flags = NULL; 8971 list_add(&t->list, &tr->tracers); 8972 8973 flags = tracer->flags; 8974 if (!flags) { 8975 if (!tracer->default_flags) 8976 return 0; 8977 8978 /* 8979 * If the tracer defines default flags, it means the flags are 8980 * per trace instance. 8981 */ 8982 flags = kmalloc_obj(*flags); 8983 if (!flags) 8984 return -ENOMEM; 8985 8986 *flags = *tracer->default_flags; 8987 flags->trace = tracer; 8988 8989 t->flags = flags; 8990 8991 /* If this is an instance, inherit the global_trace flags */ 8992 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { 8993 int val = get_global_flags_val(tracer); 8994 if (!WARN_ON_ONCE(val < 0)) 8995 flags->val = val; 8996 } 8997 } 8998 8999 ret = add_tracer_options(tr, t); 9000 if (ret < 0) { 9001 list_del(&t->list); 9002 kfree(t->flags); 9003 kfree(t); 9004 } 9005 9006 return ret; 9007 } 9008 9009 static struct dentry * 9010 create_trace_option_core_file(struct trace_array *tr, 9011 const char *option, long index) 9012 { 9013 struct dentry *t_options; 9014 9015 t_options = trace_options_init_dentry(tr); 9016 if (!t_options) 9017 return NULL; 9018 9019 return trace_create_file(option, TRACE_MODE_WRITE, t_options, 9020 (void *)&tr->trace_flags_index[index], 9021 &trace_options_core_fops); 9022 } 9023 9024 static void create_trace_options_dir(struct trace_array *tr) 9025 { 9026 struct dentry *t_options; 9027 bool top_level = tr == &global_trace; 9028 int i; 9029 9030 t_options = trace_options_init_dentry(tr); 9031 if (!t_options) 9032 return; 9033 9034 for (i = 0; trace_options[i]; i++) { 9035 if (top_level || 9036 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) { 9037 create_trace_option_core_file(tr, trace_options[i], i); 9038 } 9039 } 9040 } 9041 9042 static ssize_t 9043 rb_simple_read(struct file *filp, char __user *ubuf, 9044 size_t cnt, loff_t *ppos) 9045 { 9046 struct trace_array *tr = filp->private_data; 9047 char buf[64]; 9048 int r; 9049 9050 r = tracer_tracing_is_on(tr); 9051 r = sprintf(buf, "%d\n", r); 9052 9053 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9054 } 9055 9056 static ssize_t 9057 rb_simple_write(struct file *filp, const char __user *ubuf, 9058 size_t cnt, loff_t *ppos) 9059 { 9060 struct trace_array *tr = filp->private_data; 9061 struct trace_buffer *buffer = tr->array_buffer.buffer; 9062 unsigned long val; 9063 int ret; 9064 9065 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9066 if (ret) 9067 return ret; 9068 9069 if (buffer) { 9070 guard(mutex)(&trace_types_lock); 9071 if (!!val == tracer_tracing_is_on(tr)) { 9072 val = 0; /* do nothing */ 9073 } else if (val) { 9074 tracer_tracing_on(tr); 9075 if (tr->current_trace->start) 9076 tr->current_trace->start(tr); 9077 } else { 9078 tracer_tracing_off(tr); 9079 if (tr->current_trace->stop) 9080 tr->current_trace->stop(tr); 9081 /* Wake up any waiters */ 9082 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); 9083 } 9084 } 9085 9086 (*ppos)++; 9087 9088 return cnt; 9089 } 9090 9091 static const struct file_operations rb_simple_fops = { 9092 .open = tracing_open_generic_tr, 9093 .read = rb_simple_read, 9094 .write = rb_simple_write, 9095 .release = tracing_release_generic_tr, 9096 .llseek = default_llseek, 9097 }; 9098 9099 static ssize_t 9100 buffer_percent_read(struct file *filp, char __user *ubuf, 9101 size_t cnt, loff_t *ppos) 9102 { 9103 struct trace_array *tr = filp->private_data; 9104 char buf[64]; 9105 int r; 9106 9107 r = tr->buffer_percent; 9108 r = sprintf(buf, "%d\n", r); 9109 9110 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9111 } 9112 9113 static ssize_t 9114 buffer_percent_write(struct file *filp, const char __user *ubuf, 9115 size_t cnt, loff_t *ppos) 9116 { 9117 struct trace_array *tr = filp->private_data; 9118 unsigned long val; 9119 int ret; 9120 9121 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9122 if (ret) 9123 return ret; 9124 9125 if (val > 100) 9126 return -EINVAL; 9127 9128 tr->buffer_percent = val; 9129 9130 (*ppos)++; 9131 9132 return cnt; 9133 } 9134 9135 static const struct file_operations buffer_percent_fops = { 9136 .open = tracing_open_generic_tr, 9137 .read = buffer_percent_read, 9138 .write = buffer_percent_write, 9139 .release = tracing_release_generic_tr, 9140 .llseek = default_llseek, 9141 }; 9142 9143 static ssize_t 9144 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 9145 { 9146 struct trace_array *tr = filp->private_data; 9147 size_t size; 9148 char buf[64]; 9149 int order; 9150 int r; 9151 9152 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9153 size = (PAGE_SIZE << order) / 1024; 9154 9155 r = sprintf(buf, "%zd\n", size); 9156 9157 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 9158 } 9159 9160 static ssize_t 9161 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 9162 size_t cnt, loff_t *ppos) 9163 { 9164 struct trace_array *tr = filp->private_data; 9165 unsigned long val; 9166 int old_order; 9167 int order; 9168 int pages; 9169 int ret; 9170 9171 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 9172 if (ret) 9173 return ret; 9174 9175 val *= 1024; /* value passed in is in KB */ 9176 9177 pages = DIV_ROUND_UP(val, PAGE_SIZE); 9178 order = fls(pages - 1); 9179 9180 /* limit between 1 and 128 system pages */ 9181 if (order < 0 || order > 7) 9182 return -EINVAL; 9183 9184 /* Do not allow tracing while changing the order of the ring buffer */ 9185 tracing_stop_tr(tr); 9186 9187 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 9188 if (old_order == order) 9189 goto out; 9190 9191 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 9192 if (ret) 9193 goto out; 9194 9195 #ifdef CONFIG_TRACER_SNAPSHOT 9196 9197 if (!tr->allocated_snapshot) 9198 goto out_max; 9199 9200 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); 9201 if (ret) { 9202 /* Put back the old order */ 9203 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); 9204 if (WARN_ON_ONCE(cnt)) { 9205 /* 9206 * AARGH! We are left with different orders! 9207 * The max buffer is our "snapshot" buffer. 9208 * When a tracer needs a snapshot (one of the 9209 * latency tracers), it swaps the max buffer 9210 * with the saved snap shot. We succeeded to 9211 * update the order of the main buffer, but failed to 9212 * update the order of the max buffer. But when we tried 9213 * to reset the main buffer to the original size, we 9214 * failed there too. This is very unlikely to 9215 * happen, but if it does, warn and kill all 9216 * tracing. 9217 */ 9218 tracing_disabled = 1; 9219 } 9220 goto out; 9221 } 9222 out_max: 9223 #endif 9224 (*ppos)++; 9225 out: 9226 if (ret) 9227 cnt = ret; 9228 tracing_start_tr(tr); 9229 return cnt; 9230 } 9231 9232 static const struct file_operations buffer_subbuf_size_fops = { 9233 .open = tracing_open_generic_tr, 9234 .read = buffer_subbuf_size_read, 9235 .write = buffer_subbuf_size_write, 9236 .release = tracing_release_generic_tr, 9237 .llseek = default_llseek, 9238 }; 9239 9240 static struct dentry *trace_instance_dir; 9241 9242 static void 9243 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 9244 9245 #ifdef CONFIG_MODULES 9246 static int make_mod_delta(struct module *mod, void *data) 9247 { 9248 struct trace_module_delta *module_delta; 9249 struct trace_scratch *tscratch; 9250 struct trace_mod_entry *entry; 9251 struct trace_array *tr = data; 9252 int i; 9253 9254 tscratch = tr->scratch; 9255 module_delta = READ_ONCE(tr->module_delta); 9256 for (i = 0; i < tscratch->nr_entries; i++) { 9257 entry = &tscratch->entries[i]; 9258 if (strcmp(mod->name, entry->mod_name)) 9259 continue; 9260 if (mod->state == MODULE_STATE_GOING) 9261 module_delta->delta[i] = 0; 9262 else 9263 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base 9264 - entry->mod_addr; 9265 break; 9266 } 9267 return 0; 9268 } 9269 #else 9270 static int make_mod_delta(struct module *mod, void *data) 9271 { 9272 return 0; 9273 } 9274 #endif 9275 9276 static int mod_addr_comp(const void *a, const void *b, const void *data) 9277 { 9278 const struct trace_mod_entry *e1 = a; 9279 const struct trace_mod_entry *e2 = b; 9280 9281 return e1->mod_addr > e2->mod_addr ? 1 : -1; 9282 } 9283 9284 static void setup_trace_scratch(struct trace_array *tr, 9285 struct trace_scratch *tscratch, unsigned int size) 9286 { 9287 struct trace_module_delta *module_delta; 9288 struct trace_mod_entry *entry; 9289 int i, nr_entries; 9290 9291 if (!tscratch) 9292 return; 9293 9294 tr->scratch = tscratch; 9295 tr->scratch_size = size; 9296 9297 if (tscratch->text_addr) 9298 tr->text_delta = (unsigned long)_text - tscratch->text_addr; 9299 9300 if (struct_size(tscratch, entries, tscratch->nr_entries) > size) 9301 goto reset; 9302 9303 /* Check if each module name is a valid string */ 9304 for (i = 0; i < tscratch->nr_entries; i++) { 9305 int n; 9306 9307 entry = &tscratch->entries[i]; 9308 9309 for (n = 0; n < MODULE_NAME_LEN; n++) { 9310 if (entry->mod_name[n] == '\0') 9311 break; 9312 if (!isprint(entry->mod_name[n])) 9313 goto reset; 9314 } 9315 if (n == MODULE_NAME_LEN) 9316 goto reset; 9317 } 9318 9319 /* Sort the entries so that we can find appropriate module from address. */ 9320 nr_entries = tscratch->nr_entries; 9321 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), 9322 mod_addr_comp, NULL, NULL); 9323 9324 if (IS_ENABLED(CONFIG_MODULES)) { 9325 module_delta = kzalloc_flex(*module_delta, delta, nr_entries); 9326 if (!module_delta) { 9327 pr_info("module_delta allocation failed. Not able to decode module address."); 9328 goto reset; 9329 } 9330 init_rcu_head(&module_delta->rcu); 9331 } else 9332 module_delta = NULL; 9333 WRITE_ONCE(tr->module_delta, module_delta); 9334 9335 /* Scan modules to make text delta for modules. */ 9336 module_for_each_mod(make_mod_delta, tr); 9337 9338 /* Set trace_clock as the same of the previous boot. */ 9339 if (tscratch->clock_id != tr->clock_id) { 9340 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) || 9341 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) { 9342 pr_info("the previous trace_clock info is not valid."); 9343 goto reset; 9344 } 9345 } 9346 return; 9347 reset: 9348 /* Invalid trace modules */ 9349 memset(tscratch, 0, size); 9350 } 9351 9352 static int 9353 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 9354 { 9355 enum ring_buffer_flags rb_flags; 9356 struct trace_scratch *tscratch; 9357 unsigned int scratch_size = 0; 9358 9359 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0; 9360 9361 buf->tr = tr; 9362 9363 if (tr->range_addr_start && tr->range_addr_size) { 9364 /* Add scratch buffer to handle 128 modules */ 9365 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, 9366 tr->range_addr_start, 9367 tr->range_addr_size, 9368 struct_size(tscratch, entries, 128)); 9369 9370 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); 9371 setup_trace_scratch(tr, tscratch, scratch_size); 9372 9373 /* 9374 * This is basically the same as a mapped buffer, 9375 * with the same restrictions. 9376 */ 9377 tr->mapped++; 9378 } else { 9379 buf->buffer = ring_buffer_alloc(size, rb_flags); 9380 } 9381 if (!buf->buffer) 9382 return -ENOMEM; 9383 9384 buf->data = alloc_percpu(struct trace_array_cpu); 9385 if (!buf->data) { 9386 ring_buffer_free(buf->buffer); 9387 buf->buffer = NULL; 9388 return -ENOMEM; 9389 } 9390 9391 /* Allocate the first page for all buffers */ 9392 set_buffer_entries(&tr->array_buffer, 9393 ring_buffer_size(tr->array_buffer.buffer, 0)); 9394 9395 return 0; 9396 } 9397 9398 static void free_trace_buffer(struct array_buffer *buf) 9399 { 9400 if (buf->buffer) { 9401 ring_buffer_free(buf->buffer); 9402 buf->buffer = NULL; 9403 free_percpu(buf->data); 9404 buf->data = NULL; 9405 } 9406 } 9407 9408 static int allocate_trace_buffers(struct trace_array *tr, int size) 9409 { 9410 int ret; 9411 9412 ret = allocate_trace_buffer(tr, &tr->array_buffer, size); 9413 if (ret) 9414 return ret; 9415 9416 #ifdef CONFIG_TRACER_SNAPSHOT 9417 /* Fix mapped buffer trace arrays do not have snapshot buffers */ 9418 if (tr->range_addr_start) 9419 return 0; 9420 9421 ret = allocate_trace_buffer(tr, &tr->snapshot_buffer, 9422 allocate_snapshot ? size : 1); 9423 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { 9424 free_trace_buffer(&tr->array_buffer); 9425 return -ENOMEM; 9426 } 9427 tr->allocated_snapshot = allocate_snapshot; 9428 9429 allocate_snapshot = false; 9430 #endif 9431 9432 return 0; 9433 } 9434 9435 static void free_trace_buffers(struct trace_array *tr) 9436 { 9437 if (!tr) 9438 return; 9439 9440 free_trace_buffer(&tr->array_buffer); 9441 kfree(tr->module_delta); 9442 9443 #ifdef CONFIG_TRACER_SNAPSHOT 9444 free_trace_buffer(&tr->snapshot_buffer); 9445 #endif 9446 } 9447 9448 static void init_trace_flags_index(struct trace_array *tr) 9449 { 9450 int i; 9451 9452 /* Used by the trace options files */ 9453 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) 9454 tr->trace_flags_index[i] = i; 9455 } 9456 9457 static int __update_tracer(struct trace_array *tr) 9458 { 9459 struct tracer *t; 9460 int ret = 0; 9461 9462 for (t = trace_types; t && !ret; t = t->next) 9463 ret = add_tracer(tr, t); 9464 9465 return ret; 9466 } 9467 9468 static __init int __update_tracer_options(struct trace_array *tr) 9469 { 9470 struct tracers *t; 9471 int ret = 0; 9472 9473 list_for_each_entry(t, &tr->tracers, list) { 9474 ret = add_tracer_options(tr, t); 9475 if (ret < 0) 9476 break; 9477 } 9478 9479 return ret; 9480 } 9481 9482 static __init void update_tracer_options(void) 9483 { 9484 struct trace_array *tr; 9485 9486 guard(mutex)(&trace_types_lock); 9487 tracer_options_updated = true; 9488 list_for_each_entry(tr, &ftrace_trace_arrays, list) 9489 __update_tracer_options(tr); 9490 } 9491 9492 /* Must have trace_types_lock held */ 9493 struct trace_array *trace_array_find(const char *instance) 9494 { 9495 struct trace_array *tr, *found = NULL; 9496 9497 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9498 if (tr->name && strcmp(tr->name, instance) == 0) { 9499 found = tr; 9500 break; 9501 } 9502 } 9503 9504 return found; 9505 } 9506 9507 struct trace_array *trace_array_find_get(const char *instance) 9508 { 9509 struct trace_array *tr; 9510 9511 guard(mutex)(&trace_types_lock); 9512 tr = trace_array_find(instance); 9513 if (tr) 9514 tr->ref++; 9515 9516 return tr; 9517 } 9518 9519 static int trace_array_create_dir(struct trace_array *tr) 9520 { 9521 int ret; 9522 9523 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); 9524 if (!tr->dir) 9525 return -EINVAL; 9526 9527 ret = event_trace_add_tracer(tr->dir, tr); 9528 if (ret) { 9529 tracefs_remove(tr->dir); 9530 return ret; 9531 } 9532 9533 init_tracer_tracefs(tr, tr->dir); 9534 ret = __update_tracer(tr); 9535 if (ret) { 9536 event_trace_del_tracer(tr); 9537 tracefs_remove(tr->dir); 9538 return ret; 9539 } 9540 return 0; 9541 } 9542 9543 static struct trace_array * 9544 trace_array_create_systems(const char *name, const char *systems, 9545 unsigned long range_addr_start, 9546 unsigned long range_addr_size) 9547 { 9548 struct trace_array *tr; 9549 int ret; 9550 9551 ret = -ENOMEM; 9552 tr = kzalloc_obj(*tr); 9553 if (!tr) 9554 return ERR_PTR(ret); 9555 9556 tr->name = kstrdup(name, GFP_KERNEL); 9557 if (!tr->name) 9558 goto out_free_tr; 9559 9560 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) 9561 goto out_free_tr; 9562 9563 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) 9564 goto out_free_tr; 9565 9566 if (systems) { 9567 tr->system_names = kstrdup_const(systems, GFP_KERNEL); 9568 if (!tr->system_names) 9569 goto out_free_tr; 9570 } 9571 9572 /* Only for boot up memory mapped ring buffers */ 9573 tr->range_addr_start = range_addr_start; 9574 tr->range_addr_size = range_addr_size; 9575 9576 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; 9577 9578 cpumask_copy(tr->tracing_cpumask, cpu_all_mask); 9579 9580 raw_spin_lock_init(&tr->start_lock); 9581 9582 tr->syscall_buf_sz = global_trace.syscall_buf_sz; 9583 9584 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 9585 #ifdef CONFIG_TRACER_SNAPSHOT 9586 spin_lock_init(&tr->snapshot_trigger_lock); 9587 #endif 9588 tr->current_trace = &nop_trace; 9589 tr->current_trace_flags = nop_trace.flags; 9590 9591 INIT_LIST_HEAD(&tr->systems); 9592 INIT_LIST_HEAD(&tr->events); 9593 INIT_LIST_HEAD(&tr->hist_vars); 9594 INIT_LIST_HEAD(&tr->err_log); 9595 INIT_LIST_HEAD(&tr->tracers); 9596 INIT_LIST_HEAD(&tr->marker_list); 9597 9598 #ifdef CONFIG_MODULES 9599 INIT_LIST_HEAD(&tr->mod_events); 9600 #endif 9601 9602 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 9603 goto out_free_tr; 9604 9605 /* The ring buffer is defaultly expanded */ 9606 trace_set_ring_buffer_expanded(tr); 9607 9608 if (ftrace_allocate_ftrace_ops(tr) < 0) 9609 goto out_free_tr; 9610 9611 ftrace_init_trace_array(tr); 9612 9613 init_trace_flags_index(tr); 9614 9615 if (trace_instance_dir) { 9616 ret = trace_array_create_dir(tr); 9617 if (ret) 9618 goto out_free_tr; 9619 } else 9620 __trace_early_add_events(tr); 9621 9622 list_add(&tr->list, &ftrace_trace_arrays); 9623 9624 tr->ref++; 9625 9626 return tr; 9627 9628 out_free_tr: 9629 ftrace_free_ftrace_ops(tr); 9630 free_trace_buffers(tr); 9631 free_cpumask_var(tr->pipe_cpumask); 9632 free_cpumask_var(tr->tracing_cpumask); 9633 kfree_const(tr->system_names); 9634 kfree(tr->range_name); 9635 kfree(tr->name); 9636 kfree(tr); 9637 9638 return ERR_PTR(ret); 9639 } 9640 9641 static struct trace_array *trace_array_create(const char *name) 9642 { 9643 return trace_array_create_systems(name, NULL, 0, 0); 9644 } 9645 9646 static int instance_mkdir(const char *name) 9647 { 9648 struct trace_array *tr; 9649 int ret; 9650 9651 guard(mutex)(&event_mutex); 9652 guard(mutex)(&trace_types_lock); 9653 9654 ret = -EEXIST; 9655 if (trace_array_find(name)) 9656 return -EEXIST; 9657 9658 tr = trace_array_create(name); 9659 9660 ret = PTR_ERR_OR_ZERO(tr); 9661 9662 return ret; 9663 } 9664 9665 #ifdef CONFIG_MMU 9666 static u64 map_pages(unsigned long start, unsigned long size) 9667 { 9668 unsigned long vmap_start, vmap_end; 9669 struct vm_struct *area; 9670 int ret; 9671 9672 area = get_vm_area(size, VM_IOREMAP); 9673 if (!area) 9674 return 0; 9675 9676 vmap_start = (unsigned long) area->addr; 9677 vmap_end = vmap_start + size; 9678 9679 ret = vmap_page_range(vmap_start, vmap_end, 9680 start, pgprot_nx(PAGE_KERNEL)); 9681 if (ret < 0) { 9682 free_vm_area(area); 9683 return 0; 9684 } 9685 9686 return (u64)vmap_start; 9687 } 9688 #else 9689 static inline u64 map_pages(unsigned long start, unsigned long size) 9690 { 9691 return 0; 9692 } 9693 #endif 9694 9695 /** 9696 * trace_array_get_by_name - Create/Lookup a trace array, given its name. 9697 * @name: The name of the trace array to be looked up/created. 9698 * @systems: A list of systems to create event directories for (NULL for all) 9699 * 9700 * Returns pointer to trace array with given name. 9701 * NULL, if it cannot be created. 9702 * 9703 * NOTE: This function increments the reference counter associated with the 9704 * trace array returned. This makes sure it cannot be freed while in use. 9705 * Use trace_array_put() once the trace array is no longer needed. 9706 * If the trace_array is to be freed, trace_array_destroy() needs to 9707 * be called after the trace_array_put(), or simply let user space delete 9708 * it from the tracefs instances directory. But until the 9709 * trace_array_put() is called, user space can not delete it. 9710 * 9711 */ 9712 struct trace_array *trace_array_get_by_name(const char *name, const char *systems) 9713 { 9714 struct trace_array *tr; 9715 9716 guard(mutex)(&event_mutex); 9717 guard(mutex)(&trace_types_lock); 9718 9719 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9720 if (tr->name && strcmp(tr->name, name) == 0) { 9721 tr->ref++; 9722 return tr; 9723 } 9724 } 9725 9726 tr = trace_array_create_systems(name, systems, 0, 0); 9727 9728 if (IS_ERR(tr)) 9729 tr = NULL; 9730 else 9731 tr->ref++; 9732 9733 return tr; 9734 } 9735 EXPORT_SYMBOL_GPL(trace_array_get_by_name); 9736 9737 static int __remove_instance(struct trace_array *tr) 9738 { 9739 int i; 9740 9741 /* Reference counter for a newly created trace array = 1. */ 9742 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) 9743 return -EBUSY; 9744 9745 list_del(&tr->list); 9746 9747 /* Disable all the flags that were enabled coming in */ 9748 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { 9749 if ((1ULL << i) & ZEROED_TRACE_FLAGS) 9750 set_tracer_flag(tr, 1ULL << i, 0); 9751 } 9752 9753 if (printk_trace == tr) 9754 update_printk_trace(&global_trace); 9755 9756 if (update_marker_trace(tr, 0)) 9757 synchronize_rcu(); 9758 9759 tracing_set_nop(tr); 9760 clear_ftrace_function_probes(tr); 9761 event_trace_del_tracer(tr); 9762 ftrace_clear_pids(tr); 9763 ftrace_destroy_function_files(tr); 9764 tracefs_remove(tr->dir); 9765 free_percpu(tr->last_func_repeats); 9766 free_trace_buffers(tr); 9767 clear_tracing_err_log(tr); 9768 free_tracers(tr); 9769 9770 if (tr->range_name) { 9771 reserve_mem_release_by_name(tr->range_name); 9772 kfree(tr->range_name); 9773 } 9774 if (tr->flags & TRACE_ARRAY_FL_VMALLOC) 9775 vfree((void *)tr->range_addr_start); 9776 9777 for (i = 0; i < tr->nr_topts; i++) { 9778 kfree(tr->topts[i].topts); 9779 } 9780 kfree(tr->topts); 9781 9782 free_cpumask_var(tr->pipe_cpumask); 9783 free_cpumask_var(tr->tracing_cpumask); 9784 kfree_const(tr->system_names); 9785 kfree(tr->name); 9786 kfree(tr); 9787 9788 return 0; 9789 } 9790 9791 int trace_array_destroy(struct trace_array *this_tr) 9792 { 9793 struct trace_array *tr; 9794 9795 if (!this_tr) 9796 return -EINVAL; 9797 9798 guard(mutex)(&event_mutex); 9799 guard(mutex)(&trace_types_lock); 9800 9801 9802 /* Making sure trace array exists before destroying it. */ 9803 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9804 if (tr == this_tr) 9805 return __remove_instance(tr); 9806 } 9807 9808 return -ENODEV; 9809 } 9810 EXPORT_SYMBOL_GPL(trace_array_destroy); 9811 9812 static int instance_rmdir(const char *name) 9813 { 9814 struct trace_array *tr; 9815 9816 guard(mutex)(&event_mutex); 9817 guard(mutex)(&trace_types_lock); 9818 9819 tr = trace_array_find(name); 9820 if (!tr) 9821 return -ENODEV; 9822 9823 return __remove_instance(tr); 9824 } 9825 9826 static __init void create_trace_instances(struct dentry *d_tracer) 9827 { 9828 struct trace_array *tr; 9829 9830 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, 9831 instance_mkdir, 9832 instance_rmdir); 9833 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) 9834 return; 9835 9836 guard(mutex)(&event_mutex); 9837 guard(mutex)(&trace_types_lock); 9838 9839 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9840 if (!tr->name) 9841 continue; 9842 if (MEM_FAIL(trace_array_create_dir(tr) < 0, 9843 "Failed to create instance directory\n")) 9844 return; 9845 } 9846 } 9847 9848 static void 9849 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) 9850 { 9851 int cpu; 9852 9853 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, 9854 tr, &show_traces_fops); 9855 9856 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, 9857 tr, &set_tracer_fops); 9858 9859 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, 9860 tr, &tracing_cpumask_fops); 9861 9862 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, 9863 tr, &tracing_iter_fops); 9864 9865 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, 9866 tr, &tracing_fops); 9867 9868 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, 9869 tr, &tracing_pipe_fops); 9870 9871 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, 9872 tr, &tracing_entries_fops); 9873 9874 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, 9875 tr, &tracing_total_entries_fops); 9876 9877 trace_create_file("free_buffer", 0200, d_tracer, 9878 tr, &tracing_free_buffer_fops); 9879 9880 trace_create_file("trace_marker", 0220, d_tracer, 9881 tr, &tracing_mark_fops); 9882 9883 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); 9884 9885 trace_create_file("trace_marker_raw", 0220, d_tracer, 9886 tr, &tracing_mark_raw_fops); 9887 9888 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, 9889 &trace_clock_fops); 9890 9891 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, 9892 tr, &rb_simple_fops); 9893 9894 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, 9895 &trace_time_stamp_mode_fops); 9896 9897 tr->buffer_percent = 50; 9898 9899 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 9900 tr, &buffer_percent_fops); 9901 9902 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer, 9903 tr, &buffer_subbuf_size_fops); 9904 9905 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer, 9906 tr, &tracing_syscall_buf_fops); 9907 9908 create_trace_options_dir(tr); 9909 9910 trace_create_maxlat_file(tr, d_tracer); 9911 9912 if (ftrace_create_function_files(tr, d_tracer)) 9913 MEM_FAIL(1, "Could not allocate function filter files"); 9914 9915 if (tr->range_addr_start) { 9916 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, 9917 tr, &last_boot_fops); 9918 #ifdef CONFIG_TRACER_SNAPSHOT 9919 } else { 9920 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, 9921 tr, &snapshot_fops); 9922 #endif 9923 } 9924 9925 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, 9926 tr, &tracing_err_log_fops); 9927 9928 for_each_tracing_cpu(cpu) 9929 tracing_init_tracefs_percpu(tr, cpu); 9930 9931 ftrace_init_tracefs(tr, d_tracer); 9932 } 9933 9934 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 9935 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 9936 { 9937 struct vfsmount *mnt; 9938 struct file_system_type *type; 9939 struct fs_context *fc; 9940 int ret; 9941 9942 /* 9943 * To maintain backward compatibility for tools that mount 9944 * debugfs to get to the tracing facility, tracefs is automatically 9945 * mounted to the debugfs/tracing directory. 9946 */ 9947 type = get_fs_type("tracefs"); 9948 if (!type) 9949 return NULL; 9950 9951 fc = fs_context_for_submount(type, mntpt); 9952 put_filesystem(type); 9953 if (IS_ERR(fc)) 9954 return ERR_CAST(fc); 9955 9956 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n"); 9957 9958 ret = vfs_parse_fs_string(fc, "source", "tracefs"); 9959 if (!ret) 9960 mnt = fc_mount(fc); 9961 else 9962 mnt = ERR_PTR(ret); 9963 9964 put_fs_context(fc); 9965 return mnt; 9966 } 9967 #endif 9968 9969 /** 9970 * tracing_init_dentry - initialize top level trace array 9971 * 9972 * This is called when creating files or directories in the tracing 9973 * directory. It is called via fs_initcall() by any of the boot up code 9974 * and expects to return the dentry of the top level tracing directory. 9975 */ 9976 int tracing_init_dentry(void) 9977 { 9978 struct trace_array *tr = &global_trace; 9979 9980 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9981 pr_warn("Tracing disabled due to lockdown\n"); 9982 return -EPERM; 9983 } 9984 9985 /* The top level trace array uses NULL as parent */ 9986 if (tr->dir) 9987 return 0; 9988 9989 if (WARN_ON(!tracefs_initialized())) 9990 return -ENODEV; 9991 9992 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 9993 /* 9994 * As there may still be users that expect the tracing 9995 * files to exist in debugfs/tracing, we must automount 9996 * the tracefs file system there, so older tools still 9997 * work with the newer kernel. 9998 */ 9999 tr->dir = debugfs_create_automount("tracing", NULL, 10000 trace_automount, NULL); 10001 #endif 10002 10003 return 0; 10004 } 10005 10006 extern struct trace_eval_map *__start_ftrace_eval_maps[]; 10007 extern struct trace_eval_map *__stop_ftrace_eval_maps[]; 10008 10009 struct workqueue_struct *trace_init_wq __initdata; 10010 static struct work_struct eval_map_work __initdata; 10011 static struct work_struct tracerfs_init_work __initdata; 10012 10013 static void __init eval_map_work_func(struct work_struct *work) 10014 { 10015 int len; 10016 10017 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; 10018 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); 10019 } 10020 10021 static int __init trace_eval_init(void) 10022 { 10023 INIT_WORK(&eval_map_work, eval_map_work_func); 10024 10025 trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0); 10026 if (!trace_init_wq) { 10027 pr_err("Unable to allocate trace_init_wq\n"); 10028 /* Do work here */ 10029 eval_map_work_func(&eval_map_work); 10030 return -ENOMEM; 10031 } 10032 10033 queue_work(trace_init_wq, &eval_map_work); 10034 return 0; 10035 } 10036 10037 subsys_initcall(trace_eval_init); 10038 10039 static int __init trace_eval_sync(void) 10040 { 10041 /* Make sure the eval map updates are finished */ 10042 if (trace_init_wq) 10043 destroy_workqueue(trace_init_wq); 10044 return 0; 10045 } 10046 10047 late_initcall_sync(trace_eval_sync); 10048 10049 10050 #ifdef CONFIG_MODULES 10051 10052 bool module_exists(const char *module) 10053 { 10054 /* All modules have the symbol __this_module */ 10055 static const char this_mod[] = "__this_module"; 10056 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; 10057 unsigned long val; 10058 int n; 10059 10060 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); 10061 10062 if (n > sizeof(modname) - 1) 10063 return false; 10064 10065 val = module_kallsyms_lookup_name(modname); 10066 return val != 0; 10067 } 10068 10069 static void trace_module_add_evals(struct module *mod) 10070 { 10071 /* 10072 * Modules with bad taint do not have events created, do 10073 * not bother with enums either. 10074 */ 10075 if (trace_module_has_bad_taint(mod)) 10076 return; 10077 10078 /* Even if no trace_evals, this need to sanitize field types. */ 10079 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals); 10080 } 10081 10082 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 10083 static void trace_module_remove_evals(struct module *mod) 10084 { 10085 union trace_eval_map_item *map; 10086 union trace_eval_map_item **last = &trace_eval_maps; 10087 10088 if (!mod->num_trace_evals) 10089 return; 10090 10091 guard(mutex)(&trace_eval_mutex); 10092 10093 map = trace_eval_maps; 10094 10095 while (map) { 10096 if (map->head.mod == mod) 10097 break; 10098 map = trace_eval_jmp_to_tail(map); 10099 last = &map->tail.next; 10100 map = map->tail.next; 10101 } 10102 if (!map) 10103 return; 10104 10105 *last = trace_eval_jmp_to_tail(map)->tail.next; 10106 kfree(map); 10107 } 10108 #else 10109 static inline void trace_module_remove_evals(struct module *mod) { } 10110 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 10111 10112 static void trace_module_record(struct module *mod, bool add) 10113 { 10114 struct trace_array *tr; 10115 unsigned long flags; 10116 10117 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10118 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); 10119 /* Update any persistent trace array that has already been started */ 10120 if (flags == TRACE_ARRAY_FL_BOOT && add) { 10121 guard(mutex)(&scratch_mutex); 10122 save_mod(mod, tr); 10123 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { 10124 /* Update delta if the module loaded in previous boot */ 10125 make_mod_delta(mod, tr); 10126 } 10127 } 10128 } 10129 10130 static int trace_module_notify(struct notifier_block *self, 10131 unsigned long val, void *data) 10132 { 10133 struct module *mod = data; 10134 10135 switch (val) { 10136 case MODULE_STATE_COMING: 10137 trace_module_add_evals(mod); 10138 trace_module_record(mod, true); 10139 break; 10140 case MODULE_STATE_GOING: 10141 trace_module_remove_evals(mod); 10142 trace_module_record(mod, false); 10143 break; 10144 } 10145 10146 return NOTIFY_OK; 10147 } 10148 10149 static struct notifier_block trace_module_nb = { 10150 .notifier_call = trace_module_notify, 10151 .priority = 0, 10152 }; 10153 #endif /* CONFIG_MODULES */ 10154 10155 static __init void tracer_init_tracefs_work_func(struct work_struct *work) 10156 { 10157 10158 event_trace_init(); 10159 10160 init_tracer_tracefs(&global_trace, NULL); 10161 ftrace_init_tracefs_toplevel(&global_trace, NULL); 10162 10163 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, 10164 &global_trace, &tracing_thresh_fops); 10165 10166 trace_create_file("README", TRACE_MODE_READ, NULL, 10167 NULL, &tracing_readme_fops); 10168 10169 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, 10170 NULL, &tracing_saved_cmdlines_fops); 10171 10172 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, 10173 NULL, &tracing_saved_cmdlines_size_fops); 10174 10175 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, 10176 NULL, &tracing_saved_tgids_fops); 10177 10178 trace_create_eval_file(NULL); 10179 10180 #ifdef CONFIG_MODULES 10181 register_module_notifier(&trace_module_nb); 10182 #endif 10183 10184 #ifdef CONFIG_DYNAMIC_FTRACE 10185 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, 10186 NULL, &tracing_dyn_info_fops); 10187 #endif 10188 10189 create_trace_instances(NULL); 10190 10191 update_tracer_options(); 10192 } 10193 10194 static __init int tracer_init_tracefs(void) 10195 { 10196 int ret; 10197 10198 trace_access_lock_init(); 10199 10200 ret = tracing_init_dentry(); 10201 if (ret) 10202 return 0; 10203 10204 if (trace_init_wq) { 10205 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); 10206 queue_work(trace_init_wq, &tracerfs_init_work); 10207 } else { 10208 tracer_init_tracefs_work_func(NULL); 10209 } 10210 10211 if (rv_init_interface()) 10212 pr_err("RV: Error while creating the RV interface\n"); 10213 10214 return 0; 10215 } 10216 10217 fs_initcall(tracer_init_tracefs); 10218 10219 static int trace_die_panic_handler(struct notifier_block *self, 10220 unsigned long ev, void *unused); 10221 10222 static struct notifier_block trace_panic_notifier = { 10223 .notifier_call = trace_die_panic_handler, 10224 .priority = INT_MAX - 1, 10225 }; 10226 10227 static struct notifier_block trace_die_notifier = { 10228 .notifier_call = trace_die_panic_handler, 10229 .priority = INT_MAX - 1, 10230 }; 10231 10232 /* 10233 * The idea is to execute the following die/panic callback early, in order 10234 * to avoid showing irrelevant information in the trace (like other panic 10235 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall 10236 * warnings get disabled (to prevent potential log flooding). 10237 */ 10238 static int trace_die_panic_handler(struct notifier_block *self, 10239 unsigned long ev, void *unused) 10240 { 10241 if (!ftrace_dump_on_oops_enabled()) 10242 return NOTIFY_DONE; 10243 10244 /* The die notifier requires DIE_OOPS to trigger */ 10245 if (self == &trace_die_notifier && ev != DIE_OOPS) 10246 return NOTIFY_DONE; 10247 10248 ftrace_dump(DUMP_PARAM); 10249 10250 return NOTIFY_DONE; 10251 } 10252 10253 /* 10254 * printk is set to max of 1024, we really don't need it that big. 10255 * Nothing should be printing 1000 characters anyway. 10256 */ 10257 #define TRACE_MAX_PRINT 1000 10258 10259 /* 10260 * Define here KERN_TRACE so that we have one place to modify 10261 * it if we decide to change what log level the ftrace dump 10262 * should be at. 10263 */ 10264 #define KERN_TRACE KERN_EMERG 10265 10266 void 10267 trace_printk_seq(struct trace_seq *s) 10268 { 10269 /* Probably should print a warning here. */ 10270 if (s->seq.len >= TRACE_MAX_PRINT) 10271 s->seq.len = TRACE_MAX_PRINT; 10272 10273 /* 10274 * More paranoid code. Although the buffer size is set to 10275 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just 10276 * an extra layer of protection. 10277 */ 10278 if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) 10279 s->seq.len = s->seq.size - 1; 10280 10281 /* should be zero ended, but we are paranoid. */ 10282 s->buffer[s->seq.len] = 0; 10283 10284 printk(KERN_TRACE "%s", s->buffer); 10285 10286 trace_seq_init(s); 10287 } 10288 10289 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) 10290 { 10291 iter->tr = tr; 10292 iter->trace = iter->tr->current_trace; 10293 iter->cpu_file = RING_BUFFER_ALL_CPUS; 10294 iter->array_buffer = &tr->array_buffer; 10295 10296 if (iter->trace && iter->trace->open) 10297 iter->trace->open(iter); 10298 10299 /* Annotate start of buffers if we had overruns */ 10300 if (ring_buffer_overruns(iter->array_buffer->buffer)) 10301 iter->iter_flags |= TRACE_FILE_ANNOTATE; 10302 10303 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 10304 if (trace_clocks[iter->tr->clock_id].in_ns) 10305 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 10306 10307 /* Can not use kmalloc for iter.temp and iter.fmt */ 10308 iter->temp = static_temp_buf; 10309 iter->temp_size = STATIC_TEMP_BUF_SIZE; 10310 iter->fmt = static_fmt_buf; 10311 iter->fmt_size = STATIC_FMT_BUF_SIZE; 10312 } 10313 10314 void trace_init_global_iter(struct trace_iterator *iter) 10315 { 10316 trace_init_iter(iter, &global_trace); 10317 } 10318 10319 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) 10320 { 10321 /* use static because iter can be a bit big for the stack */ 10322 static struct trace_iterator iter; 10323 unsigned int old_userobj; 10324 unsigned long flags; 10325 int cnt = 0; 10326 10327 /* 10328 * Always turn off tracing when we dump. 10329 * We don't need to show trace output of what happens 10330 * between multiple crashes. 10331 * 10332 * If the user does a sysrq-z, then they can re-enable 10333 * tracing with echo 1 > tracing_on. 10334 */ 10335 tracer_tracing_off(tr); 10336 10337 local_irq_save(flags); 10338 10339 /* Simulate the iterator */ 10340 trace_init_iter(&iter, tr); 10341 10342 /* While dumping, do not allow the buffer to be enable */ 10343 tracer_tracing_disable(tr); 10344 10345 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ); 10346 10347 /* don't look at user memory in panic mode */ 10348 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ); 10349 10350 if (dump_mode == DUMP_ORIG) 10351 iter.cpu_file = raw_smp_processor_id(); 10352 else 10353 iter.cpu_file = RING_BUFFER_ALL_CPUS; 10354 10355 if (tr == &global_trace) 10356 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 10357 else 10358 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); 10359 10360 /* Did function tracer already get disabled? */ 10361 if (ftrace_is_dead()) { 10362 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); 10363 printk("# MAY BE MISSING FUNCTION EVENTS\n"); 10364 } 10365 10366 /* 10367 * We need to stop all tracing on all CPUS to read 10368 * the next buffer. This is a bit expensive, but is 10369 * not done often. We fill all what we can read, 10370 * and then release the locks again. 10371 */ 10372 10373 while (!trace_empty(&iter)) { 10374 10375 if (!cnt) 10376 printk(KERN_TRACE "---------------------------------\n"); 10377 10378 cnt++; 10379 10380 trace_iterator_reset(&iter); 10381 iter.iter_flags |= TRACE_FILE_LAT_FMT; 10382 10383 if (trace_find_next_entry_inc(&iter) != NULL) { 10384 int ret; 10385 10386 ret = print_trace_line(&iter); 10387 if (ret != TRACE_TYPE_NO_CONSUME) 10388 trace_consume(&iter); 10389 10390 trace_printk_seq(&iter.seq); 10391 } 10392 touch_nmi_watchdog(); 10393 } 10394 10395 if (!cnt) 10396 printk(KERN_TRACE " (ftrace buffer empty)\n"); 10397 else 10398 printk(KERN_TRACE "---------------------------------\n"); 10399 10400 tr->trace_flags |= old_userobj; 10401 10402 tracer_tracing_enable(tr); 10403 local_irq_restore(flags); 10404 } 10405 10406 static void ftrace_dump_by_param(void) 10407 { 10408 bool first_param = true; 10409 char dump_param[MAX_TRACER_SIZE]; 10410 char *buf, *token, *inst_name; 10411 struct trace_array *tr; 10412 10413 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); 10414 buf = dump_param; 10415 10416 while ((token = strsep(&buf, ",")) != NULL) { 10417 if (first_param) { 10418 first_param = false; 10419 if (!strcmp("0", token)) 10420 continue; 10421 else if (!strcmp("1", token)) { 10422 ftrace_dump_one(&global_trace, DUMP_ALL); 10423 continue; 10424 } 10425 else if (!strcmp("2", token) || 10426 !strcmp("orig_cpu", token)) { 10427 ftrace_dump_one(&global_trace, DUMP_ORIG); 10428 continue; 10429 } 10430 } 10431 10432 inst_name = strsep(&token, "="); 10433 tr = trace_array_find(inst_name); 10434 if (!tr) { 10435 printk(KERN_TRACE "Instance %s not found\n", inst_name); 10436 continue; 10437 } 10438 10439 if (token && (!strcmp("2", token) || 10440 !strcmp("orig_cpu", token))) 10441 ftrace_dump_one(tr, DUMP_ORIG); 10442 else 10443 ftrace_dump_one(tr, DUMP_ALL); 10444 } 10445 } 10446 10447 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) 10448 { 10449 static atomic_t dump_running; 10450 10451 /* Only allow one dump user at a time. */ 10452 if (atomic_inc_return(&dump_running) != 1) { 10453 atomic_dec(&dump_running); 10454 return; 10455 } 10456 10457 switch (oops_dump_mode) { 10458 case DUMP_ALL: 10459 ftrace_dump_one(&global_trace, DUMP_ALL); 10460 break; 10461 case DUMP_ORIG: 10462 ftrace_dump_one(&global_trace, DUMP_ORIG); 10463 break; 10464 case DUMP_PARAM: 10465 ftrace_dump_by_param(); 10466 break; 10467 case DUMP_NONE: 10468 break; 10469 default: 10470 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); 10471 ftrace_dump_one(&global_trace, DUMP_ALL); 10472 } 10473 10474 atomic_dec(&dump_running); 10475 } 10476 EXPORT_SYMBOL_GPL(ftrace_dump); 10477 10478 #define WRITE_BUFSIZE 4096 10479 10480 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, 10481 size_t count, loff_t *ppos, 10482 int (*createfn)(const char *)) 10483 { 10484 char *kbuf __free(kfree) = NULL; 10485 char *buf, *tmp; 10486 int ret = 0; 10487 size_t done = 0; 10488 size_t size; 10489 10490 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 10491 if (!kbuf) 10492 return -ENOMEM; 10493 10494 while (done < count) { 10495 size = count - done; 10496 10497 if (size >= WRITE_BUFSIZE) 10498 size = WRITE_BUFSIZE - 1; 10499 10500 if (copy_from_user(kbuf, buffer + done, size)) 10501 return -EFAULT; 10502 10503 kbuf[size] = '\0'; 10504 buf = kbuf; 10505 do { 10506 tmp = strchr(buf, '\n'); 10507 if (tmp) { 10508 *tmp = '\0'; 10509 size = tmp - buf + 1; 10510 } else { 10511 size = strlen(buf); 10512 if (done + size < count) { 10513 if (buf != kbuf) 10514 break; 10515 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ 10516 pr_warn("Line length is too long: Should be less than %d\n", 10517 WRITE_BUFSIZE - 2); 10518 return -EINVAL; 10519 } 10520 } 10521 done += size; 10522 10523 /* Remove comments */ 10524 tmp = strchr(buf, '#'); 10525 10526 if (tmp) 10527 *tmp = '\0'; 10528 10529 ret = createfn(buf); 10530 if (ret) 10531 return ret; 10532 buf += size; 10533 10534 } while (done < count); 10535 } 10536 return done; 10537 } 10538 10539 #ifdef CONFIG_TRACER_SNAPSHOT 10540 __init static bool tr_needs_alloc_snapshot(const char *name) 10541 { 10542 char *test; 10543 int len = strlen(name); 10544 bool ret; 10545 10546 if (!boot_snapshot_index) 10547 return false; 10548 10549 if (strncmp(name, boot_snapshot_info, len) == 0 && 10550 boot_snapshot_info[len] == '\t') 10551 return true; 10552 10553 test = kmalloc(strlen(name) + 3, GFP_KERNEL); 10554 if (!test) 10555 return false; 10556 10557 sprintf(test, "\t%s\t", name); 10558 ret = strstr(boot_snapshot_info, test) == NULL; 10559 kfree(test); 10560 return ret; 10561 } 10562 10563 __init static void do_allocate_snapshot(const char *name) 10564 { 10565 if (!tr_needs_alloc_snapshot(name)) 10566 return; 10567 10568 /* 10569 * When allocate_snapshot is set, the next call to 10570 * allocate_trace_buffers() (called by trace_array_get_by_name()) 10571 * will allocate the snapshot buffer. That will also clear 10572 * this flag. 10573 */ 10574 allocate_snapshot = true; 10575 } 10576 #else 10577 static inline void do_allocate_snapshot(const char *name) { } 10578 #endif 10579 10580 __init static int backup_instance_area(const char *backup, 10581 unsigned long *addr, phys_addr_t *size) 10582 { 10583 struct trace_array *backup_tr; 10584 void *allocated_vaddr = NULL; 10585 10586 backup_tr = trace_array_get_by_name(backup, NULL); 10587 if (!backup_tr) { 10588 pr_warn("Tracing: Instance %s is not found.\n", backup); 10589 return -ENOENT; 10590 } 10591 10592 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) { 10593 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup); 10594 trace_array_put(backup_tr); 10595 return -EINVAL; 10596 } 10597 10598 *size = backup_tr->range_addr_size; 10599 10600 allocated_vaddr = vzalloc(*size); 10601 if (!allocated_vaddr) { 10602 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n", 10603 backup, (unsigned long)*size); 10604 trace_array_put(backup_tr); 10605 return -ENOMEM; 10606 } 10607 10608 memcpy(allocated_vaddr, 10609 (void *)backup_tr->range_addr_start, (size_t)*size); 10610 *addr = (unsigned long)allocated_vaddr; 10611 10612 trace_array_put(backup_tr); 10613 return 0; 10614 } 10615 10616 __init static void enable_instances(void) 10617 { 10618 struct trace_array *tr; 10619 bool memmap_area = false; 10620 char *curr_str; 10621 char *name; 10622 char *str; 10623 char *tok; 10624 10625 /* A tab is always appended */ 10626 boot_instance_info[boot_instance_index - 1] = '\0'; 10627 str = boot_instance_info; 10628 10629 while ((curr_str = strsep(&str, "\t"))) { 10630 phys_addr_t start = 0; 10631 phys_addr_t size = 0; 10632 unsigned long addr = 0; 10633 bool traceprintk = false; 10634 bool traceoff = false; 10635 char *flag_delim; 10636 char *addr_delim; 10637 char *rname __free(kfree) = NULL; 10638 char *backup; 10639 10640 tok = strsep(&curr_str, ","); 10641 10642 name = strsep(&tok, "="); 10643 backup = tok; 10644 10645 flag_delim = strchr(name, '^'); 10646 addr_delim = strchr(name, '@'); 10647 10648 if (addr_delim) 10649 *addr_delim++ = '\0'; 10650 10651 if (flag_delim) 10652 *flag_delim++ = '\0'; 10653 10654 if (backup) { 10655 if (backup_instance_area(backup, &addr, &size) < 0) 10656 continue; 10657 } 10658 10659 if (flag_delim) { 10660 char *flag; 10661 10662 while ((flag = strsep(&flag_delim, "^"))) { 10663 if (strcmp(flag, "traceoff") == 0) { 10664 traceoff = true; 10665 } else if ((strcmp(flag, "printk") == 0) || 10666 (strcmp(flag, "traceprintk") == 0) || 10667 (strcmp(flag, "trace_printk") == 0)) { 10668 traceprintk = true; 10669 } else { 10670 pr_info("Tracing: Invalid instance flag '%s' for %s\n", 10671 flag, name); 10672 } 10673 } 10674 } 10675 10676 tok = addr_delim; 10677 if (tok && isdigit(*tok)) { 10678 start = memparse(tok, &tok); 10679 if (!start) { 10680 pr_warn("Tracing: Invalid boot instance address for %s\n", 10681 name); 10682 continue; 10683 } 10684 if (*tok != ':') { 10685 pr_warn("Tracing: No size specified for instance %s\n", name); 10686 continue; 10687 } 10688 tok++; 10689 size = memparse(tok, &tok); 10690 if (!size) { 10691 pr_warn("Tracing: Invalid boot instance size for %s\n", 10692 name); 10693 continue; 10694 } 10695 memmap_area = true; 10696 } else if (tok) { 10697 if (!reserve_mem_find_by_name(tok, &start, &size)) { 10698 start = 0; 10699 pr_warn("Failed to map boot instance %s to %s\n", name, tok); 10700 continue; 10701 } 10702 rname = kstrdup(tok, GFP_KERNEL); 10703 } 10704 10705 if (start) { 10706 /* Start and size must be page aligned */ 10707 if (start & ~PAGE_MASK) { 10708 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); 10709 continue; 10710 } 10711 if (size & ~PAGE_MASK) { 10712 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); 10713 continue; 10714 } 10715 10716 if (memmap_area) 10717 addr = map_pages(start, size); 10718 else 10719 addr = (unsigned long)phys_to_virt(start); 10720 if (addr) { 10721 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", 10722 name, &start, (unsigned long)size); 10723 } else { 10724 pr_warn("Tracing: Failed to map boot instance %s\n", name); 10725 continue; 10726 } 10727 } else { 10728 /* Only non mapped buffers have snapshot buffers */ 10729 if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT)) 10730 do_allocate_snapshot(name); 10731 } 10732 10733 tr = trace_array_create_systems(name, NULL, addr, size); 10734 if (IS_ERR(tr)) { 10735 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); 10736 continue; 10737 } 10738 10739 if (traceoff) 10740 tracer_tracing_off(tr); 10741 10742 if (traceprintk) 10743 update_printk_trace(tr); 10744 10745 /* 10746 * memmap'd buffers can not be freed. 10747 */ 10748 if (memmap_area) { 10749 tr->flags |= TRACE_ARRAY_FL_MEMMAP; 10750 tr->ref++; 10751 } 10752 10753 /* 10754 * Backup buffers can be freed but need vfree(). 10755 */ 10756 if (backup) 10757 tr->flags |= TRACE_ARRAY_FL_VMALLOC; 10758 10759 if (start || backup) { 10760 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 10761 tr->range_name = no_free_ptr(rname); 10762 } 10763 10764 while ((tok = strsep(&curr_str, ","))) { 10765 early_enable_events(tr, tok, true); 10766 } 10767 } 10768 } 10769 10770 __init static int tracer_alloc_buffers(void) 10771 { 10772 int ring_buf_size; 10773 int ret = -ENOMEM; 10774 10775 10776 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10777 pr_warn("Tracing disabled due to lockdown\n"); 10778 return -EPERM; 10779 } 10780 10781 /* 10782 * Make sure we don't accidentally add more trace options 10783 * than we have bits for. 10784 */ 10785 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); 10786 10787 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 10788 return -ENOMEM; 10789 10790 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) 10791 goto out_free_buffer_mask; 10792 10793 /* Only allocate trace_printk buffers if a trace_printk exists */ 10794 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) 10795 /* Must be called before global_trace.buffer is allocated */ 10796 trace_printk_init_buffers(); 10797 10798 /* To save memory, keep the ring buffer size to its minimum */ 10799 if (global_trace.ring_buffer_expanded) 10800 ring_buf_size = trace_buf_size; 10801 else 10802 ring_buf_size = 1; 10803 10804 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 10805 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); 10806 10807 raw_spin_lock_init(&global_trace.start_lock); 10808 10809 /* 10810 * The prepare callbacks allocates some memory for the ring buffer. We 10811 * don't free the buffer if the CPU goes down. If we were to free 10812 * the buffer, then the user would lose any trace that was in the 10813 * buffer. The memory will be removed once the "instance" is removed. 10814 */ 10815 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, 10816 "trace/RB:prepare", trace_rb_cpu_prepare, 10817 NULL); 10818 if (ret < 0) 10819 goto out_free_cpumask; 10820 /* Used for event triggers */ 10821 ret = -ENOMEM; 10822 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); 10823 if (!temp_buffer) 10824 goto out_rm_hp_state; 10825 10826 if (trace_create_savedcmd() < 0) 10827 goto out_free_temp_buffer; 10828 10829 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) 10830 goto out_free_savedcmd; 10831 10832 /* TODO: make the number of buffers hot pluggable with CPUS */ 10833 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { 10834 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); 10835 goto out_free_pipe_cpumask; 10836 } 10837 if (global_trace.buffer_disabled) 10838 tracing_off(); 10839 10840 if (trace_boot_clock) { 10841 ret = tracing_set_clock(&global_trace, trace_boot_clock); 10842 if (ret < 0) 10843 pr_warn("Trace clock %s not defined, going back to default\n", 10844 trace_boot_clock); 10845 } 10846 10847 /* 10848 * register_tracer() might reference current_trace, so it 10849 * needs to be set before we register anything. This is 10850 * just a bootstrap of current_trace anyway. 10851 */ 10852 global_trace.current_trace = &nop_trace; 10853 global_trace.current_trace_flags = nop_trace.flags; 10854 10855 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 10856 #ifdef CONFIG_TRACER_SNAPSHOT 10857 spin_lock_init(&global_trace.snapshot_trigger_lock); 10858 #endif 10859 ftrace_init_global_array_ops(&global_trace); 10860 10861 #ifdef CONFIG_MODULES 10862 INIT_LIST_HEAD(&global_trace.mod_events); 10863 #endif 10864 10865 init_trace_flags_index(&global_trace); 10866 10867 INIT_LIST_HEAD(&global_trace.tracers); 10868 10869 /* All seems OK, enable tracing */ 10870 tracing_disabled = 0; 10871 10872 atomic_notifier_chain_register(&panic_notifier_list, 10873 &trace_panic_notifier); 10874 10875 register_die_notifier(&trace_die_notifier); 10876 10877 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 10878 10879 global_trace.syscall_buf_sz = syscall_buf_size; 10880 10881 INIT_LIST_HEAD(&global_trace.systems); 10882 INIT_LIST_HEAD(&global_trace.events); 10883 INIT_LIST_HEAD(&global_trace.hist_vars); 10884 INIT_LIST_HEAD(&global_trace.err_log); 10885 list_add(&global_trace.marker_list, &marker_copies); 10886 list_add(&global_trace.list, &ftrace_trace_arrays); 10887 10888 register_tracer(&nop_trace); 10889 10890 /* Function tracing may start here (via kernel command line) */ 10891 init_function_trace(); 10892 10893 apply_trace_boot_options(); 10894 10895 register_snapshot_cmd(); 10896 10897 return 0; 10898 10899 out_free_pipe_cpumask: 10900 free_cpumask_var(global_trace.pipe_cpumask); 10901 out_free_savedcmd: 10902 trace_free_saved_cmdlines_buffer(); 10903 out_free_temp_buffer: 10904 ring_buffer_free(temp_buffer); 10905 out_rm_hp_state: 10906 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); 10907 out_free_cpumask: 10908 free_cpumask_var(global_trace.tracing_cpumask); 10909 out_free_buffer_mask: 10910 free_cpumask_var(tracing_buffer_mask); 10911 return ret; 10912 } 10913 10914 #ifdef CONFIG_FUNCTION_TRACER 10915 /* Used to set module cached ftrace filtering at boot up */ 10916 struct trace_array *trace_get_global_array(void) 10917 { 10918 return &global_trace; 10919 } 10920 #endif 10921 10922 void __init ftrace_boot_snapshot(void) 10923 { 10924 #ifdef CONFIG_TRACER_SNAPSHOT 10925 struct trace_array *tr; 10926 10927 if (!snapshot_at_boot) 10928 return; 10929 10930 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 10931 if (!tr->allocated_snapshot) 10932 continue; 10933 10934 tracing_snapshot_instance(tr); 10935 trace_array_puts(tr, "** Boot snapshot taken **\n"); 10936 } 10937 #endif 10938 } 10939 10940 void __init early_trace_init(void) 10941 { 10942 if (tracepoint_printk) { 10943 tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter); 10944 if (MEM_FAIL(!tracepoint_print_iter, 10945 "Failed to allocate trace iterator\n")) 10946 tracepoint_printk = 0; 10947 else 10948 static_key_enable(&tracepoint_printk_key.key); 10949 } 10950 tracer_alloc_buffers(); 10951 10952 init_events(); 10953 } 10954 10955 void __init trace_init(void) 10956 { 10957 trace_event_init(); 10958 10959 if (boot_instance_index) 10960 enable_instances(); 10961 } 10962 10963 __init static void clear_boot_tracer(void) 10964 { 10965 /* 10966 * The default tracer at boot buffer is an init section. 10967 * This function is called in lateinit. If we did not 10968 * find the boot tracer, then clear it out, to prevent 10969 * later registration from accessing the buffer that is 10970 * about to be freed. 10971 */ 10972 if (!default_bootup_tracer) 10973 return; 10974 10975 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", 10976 default_bootup_tracer); 10977 default_bootup_tracer = NULL; 10978 } 10979 10980 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 10981 __init static void tracing_set_default_clock(void) 10982 { 10983 /* sched_clock_stable() is determined in late_initcall */ 10984 if (!trace_boot_clock && !sched_clock_stable()) { 10985 if (security_locked_down(LOCKDOWN_TRACEFS)) { 10986 pr_warn("Can not set tracing clock due to lockdown\n"); 10987 return; 10988 } 10989 10990 printk(KERN_WARNING 10991 "Unstable clock detected, switching default tracing clock to \"global\"\n" 10992 "If you want to keep using the local clock, then add:\n" 10993 " \"trace_clock=local\"\n" 10994 "on the kernel command line\n"); 10995 tracing_set_clock(&global_trace, "global"); 10996 } 10997 } 10998 #else 10999 static inline void tracing_set_default_clock(void) { } 11000 #endif 11001 11002 __init static int late_trace_init(void) 11003 { 11004 if (tracepoint_printk && tracepoint_printk_stop_on_boot) { 11005 static_key_disable(&tracepoint_printk_key.key); 11006 tracepoint_printk = 0; 11007 } 11008 11009 if (traceoff_after_boot) 11010 tracing_off(); 11011 11012 tracing_set_default_clock(); 11013 clear_boot_tracer(); 11014 return 0; 11015 } 11016 11017 late_initcall_sync(late_trace_init); 11018