1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ring buffer based function tracer 4 * 5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> 6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 7 * 8 * Originally taken from the RT patch by: 9 * Arnaldo Carvalho de Melo <acme@redhat.com> 10 * 11 * Based on code from the latency_tracer, that is: 12 * Copyright (C) 2004-2006 Ingo Molnar 13 * Copyright (C) 2004 Nadia Yvette Chambers 14 */ 15 #include <linux/ring_buffer.h> 16 #include <linux/utsname.h> 17 #include <linux/stacktrace.h> 18 #include <linux/writeback.h> 19 #include <linux/kallsyms.h> 20 #include <linux/security.h> 21 #include <linux/seq_file.h> 22 #include <linux/irqflags.h> 23 #include <linux/syscalls.h> 24 #include <linux/debugfs.h> 25 #include <linux/tracefs.h> 26 #include <linux/pagemap.h> 27 #include <linux/hardirq.h> 28 #include <linux/linkage.h> 29 #include <linux/uaccess.h> 30 #include <linux/cleanup.h> 31 #include <linux/vmalloc.h> 32 #include <linux/ftrace.h> 33 #include <linux/module.h> 34 #include <linux/percpu.h> 35 #include <linux/splice.h> 36 #include <linux/kdebug.h> 37 #include <linux/string.h> 38 #include <linux/mount.h> 39 #include <linux/rwsem.h> 40 #include <linux/slab.h> 41 #include <linux/ctype.h> 42 #include <linux/init.h> 43 #include <linux/panic_notifier.h> 44 #include <linux/poll.h> 45 #include <linux/nmi.h> 46 #include <linux/fs.h> 47 #include <linux/trace.h> 48 #include <linux/sched/clock.h> 49 #include <linux/sched/rt.h> 50 #include <linux/irq_work.h> 51 #include <linux/workqueue.h> 52 #include <linux/sort.h> 53 #include <linux/io.h> /* vmap_page_range() */ 54 #include <linux/fs_context.h> 55 56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 57 58 #include "trace.h" 59 #include "trace_output.h" 60 61 #ifdef CONFIG_FTRACE_STARTUP_TEST 62 /* 63 * We need to change this state when a selftest is running. 64 * A selftest will lurk into the ring-buffer to count the 65 * entries inserted during the selftest although some concurrent 66 * insertions into the ring-buffer such as trace_printk could occurred 67 * at the same time, giving false positive or negative results. 68 */ 69 bool __read_mostly tracing_selftest_running; 70 71 /* 72 * If boot-time tracing including tracers/events via kernel cmdline 73 * is running, we do not want to run SELFTEST. 74 */ 75 bool __read_mostly tracing_selftest_disabled; 76 77 void __init disable_tracing_selftest(const char *reason) 78 { 79 if (!tracing_selftest_disabled) { 80 tracing_selftest_disabled = true; 81 pr_info("Ftrace startup test is disabled due to %s\n", reason); 82 } 83 } 84 #else 85 #define tracing_selftest_disabled 0 86 #endif 87 88 /* Pipe tracepoints to printk */ 89 static struct trace_iterator *tracepoint_print_iter; 90 int tracepoint_printk; 91 static bool tracepoint_printk_stop_on_boot __initdata; 92 static bool traceoff_after_boot __initdata; 93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); 94 95 /* Store tracers and their flags per instance */ 96 struct tracers { 97 struct list_head list; 98 struct tracer *tracer; 99 struct tracer_flags *flags; 100 }; 101 102 /* 103 * To prevent the comm cache from being overwritten when no 104 * tracing is active, only save the comm when a trace event 105 * occurred. 106 */ 107 DEFINE_PER_CPU(bool, trace_taskinfo_save); 108 109 /* 110 * Kill all tracing for good (never come back). 111 * It is initialized to 1 but will turn to zero if the initialization 112 * of the tracer is successful. But that is the only place that sets 113 * this back to zero. 114 */ 115 int tracing_disabled = 1; 116 117 cpumask_var_t __read_mostly tracing_buffer_mask; 118 119 #define MAX_TRACER_SIZE 100 120 /* 121 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 122 * 123 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops 124 * is set, then ftrace_dump is called. This will output the contents 125 * of the ftrace buffers to the console. This is very useful for 126 * capturing traces that lead to crashes and outputting it to a 127 * serial console. 128 * 129 * It is default off, but you can enable it with either specifying 130 * "ftrace_dump_on_oops" in the kernel command line, or setting 131 * /proc/sys/kernel/ftrace_dump_on_oops 132 * Set 1 if you want to dump buffers of all CPUs 133 * Set 2 if you want to dump the buffer of the CPU that triggered oops 134 * Set instance name if you want to dump the specific trace instance 135 * Multiple instance dump is also supported, and instances are separated 136 * by commas. 137 */ 138 /* Set to string format zero to disable by default */ 139 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; 140 141 /* When set, tracing will stop when a WARN*() is hit */ 142 static int __disable_trace_on_warning; 143 144 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 145 void *buffer, size_t *lenp, loff_t *ppos); 146 static const struct ctl_table trace_sysctl_table[] = { 147 { 148 .procname = "ftrace_dump_on_oops", 149 .data = &ftrace_dump_on_oops, 150 .maxlen = MAX_TRACER_SIZE, 151 .mode = 0644, 152 .proc_handler = proc_dostring, 153 }, 154 { 155 .procname = "traceoff_on_warning", 156 .data = &__disable_trace_on_warning, 157 .maxlen = sizeof(__disable_trace_on_warning), 158 .mode = 0644, 159 .proc_handler = proc_dointvec, 160 }, 161 { 162 .procname = "tracepoint_printk", 163 .data = &tracepoint_printk, 164 .maxlen = sizeof(tracepoint_printk), 165 .mode = 0644, 166 .proc_handler = tracepoint_printk_sysctl, 167 }, 168 }; 169 170 static int __init init_trace_sysctls(void) 171 { 172 register_sysctl_init("kernel", trace_sysctl_table); 173 return 0; 174 } 175 subsys_initcall(init_trace_sysctls); 176 177 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 178 /* Map of enums to their values, for "eval_map" file */ 179 struct trace_eval_map_head { 180 struct module *mod; 181 unsigned long length; 182 }; 183 184 union trace_eval_map_item; 185 186 struct trace_eval_map_tail { 187 /* 188 * "end" is first and points to NULL as it must be different 189 * than "mod" or "eval_string" 190 */ 191 union trace_eval_map_item *next; 192 const char *end; /* points to NULL */ 193 }; 194 195 static DEFINE_MUTEX(trace_eval_mutex); 196 197 /* 198 * The trace_eval_maps are saved in an array with two extra elements, 199 * one at the beginning, and one at the end. The beginning item contains 200 * the count of the saved maps (head.length), and the module they 201 * belong to if not built in (head.mod). The ending item contains a 202 * pointer to the next array of saved eval_map items. 203 */ 204 union trace_eval_map_item { 205 struct trace_eval_map map; 206 struct trace_eval_map_head head; 207 struct trace_eval_map_tail tail; 208 }; 209 210 static union trace_eval_map_item *trace_eval_maps; 211 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 212 213 int tracing_set_tracer(struct trace_array *tr, const char *buf); 214 static void ftrace_trace_userstack(struct trace_array *tr, 215 struct trace_buffer *buffer, 216 unsigned int trace_ctx); 217 218 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 219 static char *default_bootup_tracer; 220 221 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; 222 static int boot_instance_index; 223 224 /* 225 * Repeated boot parameters, including Bootconfig array expansions, need 226 * to stay in the delimiter form that the existing parser consumes. 227 */ 228 void __init trace_append_boot_param(char *buf, const char *str, char sep, 229 int size) 230 { 231 int len, needed, str_len; 232 233 if (!*str) 234 return; 235 236 len = strlen(buf); 237 str_len = strlen(str); 238 needed = len + str_len + 1; 239 240 /* For continuation, account for the separator. */ 241 if (len) 242 needed++; 243 if (needed > size) 244 return; 245 246 if (len) 247 buf[len++] = sep; 248 249 strscpy(buf + len, str, size - len); 250 } 251 252 static int __init set_cmdline_ftrace(char *str) 253 { 254 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 255 default_bootup_tracer = bootup_tracer_buf; 256 /* We are using ftrace early, expand it */ 257 trace_set_ring_buffer_expanded(NULL); 258 return 1; 259 } 260 __setup("ftrace=", set_cmdline_ftrace); 261 262 int ftrace_dump_on_oops_enabled(void) 263 { 264 if (!strcmp("0", ftrace_dump_on_oops)) 265 return 0; 266 else 267 return 1; 268 } 269 270 static int __init set_ftrace_dump_on_oops(char *str) 271 { 272 if (!*str) { 273 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 274 return 1; 275 } 276 277 if (*str == ',') { 278 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 279 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); 280 return 1; 281 } 282 283 if (*str++ == '=') { 284 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); 285 return 1; 286 } 287 288 return 0; 289 } 290 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 291 292 static int __init stop_trace_on_warning(char *str) 293 { 294 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 295 __disable_trace_on_warning = 1; 296 return 1; 297 } 298 __setup("traceoff_on_warning", stop_trace_on_warning); 299 300 static int __init boot_instance(char *str) 301 { 302 char *slot = boot_instance_info + boot_instance_index; 303 int left = sizeof(boot_instance_info) - boot_instance_index; 304 int ret; 305 306 if (strlen(str) >= left) 307 return -1; 308 309 ret = snprintf(slot, left, "%s\t", str); 310 boot_instance_index += ret; 311 312 return 1; 313 } 314 __setup("trace_instance=", boot_instance); 315 316 317 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; 318 319 static int __init set_trace_boot_options(char *str) 320 { 321 trace_append_boot_param(trace_boot_options_buf, str, ',', 322 MAX_TRACER_SIZE); 323 return 1; 324 } 325 __setup("trace_options=", set_trace_boot_options); 326 327 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; 328 static char *trace_boot_clock __initdata; 329 330 static int __init set_trace_boot_clock(char *str) 331 { 332 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); 333 trace_boot_clock = trace_boot_clock_buf; 334 return 1; 335 } 336 __setup("trace_clock=", set_trace_boot_clock); 337 338 static int __init set_tracepoint_printk(char *str) 339 { 340 /* Ignore the "tp_printk_stop_on_boot" param */ 341 if (*str == '_') 342 return 0; 343 344 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 345 tracepoint_printk = 1; 346 return 1; 347 } 348 __setup("tp_printk", set_tracepoint_printk); 349 350 static int __init set_tracepoint_printk_stop(char *str) 351 { 352 tracepoint_printk_stop_on_boot = true; 353 return 1; 354 } 355 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); 356 357 static int __init set_traceoff_after_boot(char *str) 358 { 359 traceoff_after_boot = true; 360 return 1; 361 } 362 __setup("traceoff_after_boot", set_traceoff_after_boot); 363 364 unsigned long long ns2usecs(u64 nsec) 365 { 366 nsec += 500; 367 do_div(nsec, 1000); 368 return nsec; 369 } 370 371 static void 372 trace_process_export(struct trace_export *export, 373 struct ring_buffer_event *event, int flag) 374 { 375 struct trace_entry *entry; 376 unsigned int size = 0; 377 378 if (export->flags & flag) { 379 entry = ring_buffer_event_data(event); 380 size = ring_buffer_event_length(event); 381 export->write(export, entry, size); 382 } 383 } 384 385 static DEFINE_MUTEX(ftrace_export_lock); 386 387 static struct trace_export __rcu *ftrace_exports_list __read_mostly; 388 389 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); 390 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); 391 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); 392 393 static inline void ftrace_exports_enable(struct trace_export *export) 394 { 395 if (export->flags & TRACE_EXPORT_FUNCTION) 396 static_branch_inc(&trace_function_exports_enabled); 397 398 if (export->flags & TRACE_EXPORT_EVENT) 399 static_branch_inc(&trace_event_exports_enabled); 400 401 if (export->flags & TRACE_EXPORT_MARKER) 402 static_branch_inc(&trace_marker_exports_enabled); 403 } 404 405 static inline void ftrace_exports_disable(struct trace_export *export) 406 { 407 if (export->flags & TRACE_EXPORT_FUNCTION) 408 static_branch_dec(&trace_function_exports_enabled); 409 410 if (export->flags & TRACE_EXPORT_EVENT) 411 static_branch_dec(&trace_event_exports_enabled); 412 413 if (export->flags & TRACE_EXPORT_MARKER) 414 static_branch_dec(&trace_marker_exports_enabled); 415 } 416 417 static void ftrace_exports(struct ring_buffer_event *event, int flag) 418 { 419 struct trace_export *export; 420 421 guard(preempt_notrace)(); 422 423 export = rcu_dereference_raw_check(ftrace_exports_list); 424 while (export) { 425 trace_process_export(export, event, flag); 426 export = rcu_dereference_raw_check(export->next); 427 } 428 } 429 430 static inline void 431 add_trace_export(struct trace_export **list, struct trace_export *export) 432 { 433 rcu_assign_pointer(export->next, *list); 434 /* 435 * We are entering export into the list but another 436 * CPU might be walking that list. We need to make sure 437 * the export->next pointer is valid before another CPU sees 438 * the export pointer included into the list. 439 */ 440 rcu_assign_pointer(*list, export); 441 } 442 443 static inline int 444 rm_trace_export(struct trace_export **list, struct trace_export *export) 445 { 446 struct trace_export **p; 447 448 for (p = list; *p != NULL; p = &(*p)->next) 449 if (*p == export) 450 break; 451 452 if (*p != export) 453 return -1; 454 455 rcu_assign_pointer(*p, (*p)->next); 456 457 return 0; 458 } 459 460 static inline void 461 add_ftrace_export(struct trace_export **list, struct trace_export *export) 462 { 463 ftrace_exports_enable(export); 464 465 add_trace_export(list, export); 466 } 467 468 static inline int 469 rm_ftrace_export(struct trace_export **list, struct trace_export *export) 470 { 471 int ret; 472 473 ret = rm_trace_export(list, export); 474 ftrace_exports_disable(export); 475 476 return ret; 477 } 478 479 int register_ftrace_export(struct trace_export *export) 480 { 481 if (WARN_ON_ONCE(!export->write)) 482 return -1; 483 484 guard(mutex)(&ftrace_export_lock); 485 486 add_ftrace_export(&ftrace_exports_list, export); 487 488 return 0; 489 } 490 EXPORT_SYMBOL_GPL(register_ftrace_export); 491 492 int unregister_ftrace_export(struct trace_export *export) 493 { 494 guard(mutex)(&ftrace_export_lock); 495 return rm_ftrace_export(&ftrace_exports_list, export); 496 } 497 EXPORT_SYMBOL_GPL(unregister_ftrace_export); 498 499 /* trace_flags holds trace_options default values */ 500 #define TRACE_DEFAULT_FLAGS \ 501 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \ 502 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \ 503 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \ 504 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \ 505 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \ 506 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \ 507 TRACE_ITER(COPY_MARKER)) 508 509 /* trace_options that are only supported by global_trace */ 510 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \ 511 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \ 512 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS) 513 514 /* trace_flags that are default zero for instances */ 515 #define ZEROED_TRACE_FLAGS \ 516 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \ 517 TRACE_ITER(COPY_MARKER)) 518 519 /* 520 * The global_trace is the descriptor that holds the top-level tracing 521 * buffers for the live tracing. 522 */ 523 static struct trace_array global_trace = { 524 .trace_flags = TRACE_DEFAULT_FLAGS, 525 }; 526 527 struct trace_array *printk_trace = &global_trace; 528 529 /* List of trace_arrays interested in the top level trace_marker */ 530 static LIST_HEAD(marker_copies); 531 532 static void update_printk_trace(struct trace_array *tr) 533 { 534 if (printk_trace == tr) 535 return; 536 537 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK); 538 printk_trace = tr; 539 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK); 540 } 541 542 /* Returns true if the status of tr changed */ 543 static bool update_marker_trace(struct trace_array *tr, int enabled) 544 { 545 lockdep_assert_held(&event_mutex); 546 547 if (enabled) { 548 if (tr->trace_flags & TRACE_ITER(COPY_MARKER)) 549 return false; 550 551 list_add_rcu(&tr->marker_list, &marker_copies); 552 tr->trace_flags |= TRACE_ITER(COPY_MARKER); 553 return true; 554 } 555 556 if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER))) 557 return false; 558 559 list_del_rcu(&tr->marker_list); 560 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); 561 return true; 562 } 563 564 void trace_set_ring_buffer_expanded(struct trace_array *tr) 565 { 566 if (!tr) 567 tr = &global_trace; 568 tr->ring_buffer_expanded = true; 569 } 570 571 static void trace_array_autoremove(struct work_struct *work) 572 { 573 struct trace_array *tr = container_of(work, struct trace_array, autoremove_work); 574 575 trace_array_destroy(tr); 576 } 577 578 static struct workqueue_struct *autoremove_wq; 579 580 static void trace_array_kick_autoremove(struct trace_array *tr) 581 { 582 if (autoremove_wq) 583 queue_work(autoremove_wq, &tr->autoremove_work); 584 } 585 586 static void trace_array_cancel_autoremove(struct trace_array *tr) 587 { 588 /* 589 * Since this can be called inside trace_array_autoremove(), 590 * it has to avoid deadlock of the workqueue. 591 */ 592 if (work_pending(&tr->autoremove_work)) 593 cancel_work_sync(&tr->autoremove_work); 594 } 595 596 static void trace_array_init_autoremove(struct trace_array *tr) 597 { 598 INIT_WORK(&tr->autoremove_work, trace_array_autoremove); 599 } 600 601 static void trace_array_start_autoremove(void) 602 { 603 if (autoremove_wq) 604 return; 605 606 autoremove_wq = alloc_workqueue("tr_autoremove_wq", 607 WQ_UNBOUND | WQ_HIGHPRI, 0); 608 if (!autoremove_wq) 609 pr_warn("Unable to allocate tr_autoremove_wq. autoremove disabled.\n"); 610 } 611 612 LIST_HEAD(ftrace_trace_arrays); 613 614 static int __trace_array_get(struct trace_array *this_tr) 615 { 616 /* When free_on_close is set, this is not available anymore. */ 617 if (autoremove_wq && this_tr->free_on_close) 618 return -ENODEV; 619 620 this_tr->ref++; 621 return 0; 622 } 623 624 int trace_array_get(struct trace_array *this_tr) 625 { 626 struct trace_array *tr; 627 628 guard(mutex)(&trace_types_lock); 629 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 630 if (tr == this_tr) { 631 return __trace_array_get(tr); 632 } 633 } 634 635 return -ENODEV; 636 } 637 638 static void __trace_array_put(struct trace_array *this_tr) 639 { 640 WARN_ON(!this_tr->ref); 641 this_tr->ref--; 642 /* 643 * When free_on_close is set, prepare removing the array 644 * when the last reference is released. 645 */ 646 if (this_tr->ref == 1 && this_tr->free_on_close) 647 trace_array_kick_autoremove(this_tr); 648 } 649 650 /** 651 * trace_array_put - Decrement the reference counter for this trace array. 652 * @this_tr : pointer to the trace array 653 * 654 * NOTE: Use this when we no longer need the trace array returned by 655 * trace_array_get_by_name(). This ensures the trace array can be later 656 * destroyed. 657 * 658 */ 659 void trace_array_put(struct trace_array *this_tr) 660 { 661 if (!this_tr) 662 return; 663 664 guard(mutex)(&trace_types_lock); 665 __trace_array_put(this_tr); 666 } 667 EXPORT_SYMBOL_GPL(trace_array_put); 668 669 int tracing_check_open_get_tr(struct trace_array *tr) 670 { 671 int ret; 672 673 ret = security_locked_down(LOCKDOWN_TRACEFS); 674 if (ret) 675 return ret; 676 677 if (tracing_disabled) 678 return -ENODEV; 679 680 if (tr && trace_array_get(tr) < 0) 681 return -ENODEV; 682 683 return 0; 684 } 685 686 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) 687 { 688 u64 ts; 689 690 /* Early boot up does not have a buffer yet */ 691 if (!buf->buffer) 692 return trace_clock_local(); 693 694 ts = ring_buffer_time_stamp(buf->buffer); 695 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); 696 697 return ts; 698 } 699 700 u64 ftrace_now(int cpu) 701 { 702 return buffer_ftrace_now(&global_trace.array_buffer, cpu); 703 } 704 705 /** 706 * tracing_is_enabled - Show if global_trace has been enabled 707 * 708 * Shows if the global trace has been enabled or not. It uses the 709 * mirror flag "buffer_disabled" to be used in fast paths such as for 710 * the irqsoff tracer. But it may be inaccurate due to races. If you 711 * need to know the accurate state, use tracing_is_on() which is a little 712 * slower, but accurate. 713 */ 714 int tracing_is_enabled(void) 715 { 716 /* 717 * For quick access (irqsoff uses this in fast path), just 718 * return the mirror variable of the state of the ring buffer. 719 * It's a little racy, but we don't really care. 720 */ 721 return !global_trace.buffer_disabled; 722 } 723 724 /* 725 * trace_buf_size is the size in bytes that is allocated 726 * for a buffer. Note, the number of bytes is always rounded 727 * to page size. 728 * 729 * This number is purposely set to a low number of 16384. 730 * If the dump on oops happens, it will be much appreciated 731 * to not have to wait for all that output. Anyway this can be 732 * boot time and run time configurable. 733 */ 734 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ 735 736 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; 737 738 /* trace_types holds a link list of available tracers. */ 739 static struct tracer *trace_types __read_mostly; 740 741 /* 742 * trace_types_lock is used to protect the trace_types list. 743 */ 744 DEFINE_MUTEX(trace_types_lock); 745 746 /* 747 * serialize the access of the ring buffer 748 * 749 * ring buffer serializes readers, but it is low level protection. 750 * The validity of the events (which returns by ring_buffer_peek() ..etc) 751 * are not protected by ring buffer. 752 * 753 * The content of events may become garbage if we allow other process consumes 754 * these events concurrently: 755 * A) the page of the consumed events may become a normal page 756 * (not reader page) in ring buffer, and this page will be rewritten 757 * by events producer. 758 * B) The page of the consumed events may become a page for splice_read, 759 * and this page will be returned to system. 760 * 761 * These primitives allow multi process access to different cpu ring buffer 762 * concurrently. 763 * 764 * These primitives don't distinguish read-only and read-consume access. 765 * Multi read-only access are also serialized. 766 */ 767 768 #ifdef CONFIG_SMP 769 static DECLARE_RWSEM(all_cpu_access_lock); 770 static DEFINE_PER_CPU(struct mutex, cpu_access_lock); 771 772 static inline void trace_access_lock(int cpu) 773 { 774 if (cpu == RING_BUFFER_ALL_CPUS) { 775 /* gain it for accessing the whole ring buffer. */ 776 down_write(&all_cpu_access_lock); 777 } else { 778 /* gain it for accessing a cpu ring buffer. */ 779 780 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ 781 down_read(&all_cpu_access_lock); 782 783 /* Secondly block other access to this @cpu ring buffer. */ 784 mutex_lock(&per_cpu(cpu_access_lock, cpu)); 785 } 786 } 787 788 static inline void trace_access_unlock(int cpu) 789 { 790 if (cpu == RING_BUFFER_ALL_CPUS) { 791 up_write(&all_cpu_access_lock); 792 } else { 793 mutex_unlock(&per_cpu(cpu_access_lock, cpu)); 794 up_read(&all_cpu_access_lock); 795 } 796 } 797 798 static inline void trace_access_lock_init(void) 799 { 800 int cpu; 801 802 for_each_possible_cpu(cpu) 803 mutex_init(&per_cpu(cpu_access_lock, cpu)); 804 } 805 806 #else 807 808 static DEFINE_MUTEX(access_lock); 809 810 static inline void trace_access_lock(int cpu) 811 { 812 (void)cpu; 813 mutex_lock(&access_lock); 814 } 815 816 static inline void trace_access_unlock(int cpu) 817 { 818 (void)cpu; 819 mutex_unlock(&access_lock); 820 } 821 822 static inline void trace_access_lock_init(void) 823 { 824 } 825 826 #endif 827 828 void tracer_tracing_on(struct trace_array *tr) 829 { 830 if (tr->array_buffer.buffer) 831 ring_buffer_record_on(tr->array_buffer.buffer); 832 /* 833 * This flag is looked at when buffers haven't been allocated 834 * yet, or by some tracers (like irqsoff), that just want to 835 * know if the ring buffer has been disabled, but it can handle 836 * races of where it gets disabled but we still do a record. 837 * As the check is in the fast path of the tracers, it is more 838 * important to be fast than accurate. 839 */ 840 tr->buffer_disabled = 0; 841 } 842 843 /** 844 * tracing_on - enable tracing buffers 845 * 846 * This function enables tracing buffers that may have been 847 * disabled with tracing_off. 848 */ 849 void tracing_on(void) 850 { 851 tracer_tracing_on(&global_trace); 852 } 853 EXPORT_SYMBOL_GPL(tracing_on); 854 855 #ifdef CONFIG_TRACER_SNAPSHOT 856 /** 857 * tracing_snapshot - take a snapshot of the current buffer. 858 * 859 * This causes a swap between the snapshot buffer and the current live 860 * tracing buffer. You can use this to take snapshots of the live 861 * trace when some condition is triggered, but continue to trace. 862 * 863 * Note, make sure to allocate the snapshot with either 864 * a tracing_snapshot_alloc(), or by doing it manually 865 * with: echo 1 > /sys/kernel/tracing/snapshot 866 * 867 * If the snapshot buffer is not allocated, it will stop tracing. 868 * Basically making a permanent snapshot. 869 */ 870 void tracing_snapshot(void) 871 { 872 struct trace_array *tr = &global_trace; 873 874 tracing_snapshot_instance(tr); 875 } 876 EXPORT_SYMBOL_GPL(tracing_snapshot); 877 878 /** 879 * tracing_alloc_snapshot - allocate snapshot buffer. 880 * 881 * This only allocates the snapshot buffer if it isn't already 882 * allocated - it doesn't also take a snapshot. 883 * 884 * This is meant to be used in cases where the snapshot buffer needs 885 * to be set up for events that can't sleep but need to be able to 886 * trigger a snapshot. 887 */ 888 int tracing_alloc_snapshot(void) 889 { 890 struct trace_array *tr = &global_trace; 891 int ret; 892 893 ret = tracing_alloc_snapshot_instance(tr); 894 WARN_ON(ret < 0); 895 896 return ret; 897 } 898 #else 899 void tracing_snapshot(void) 900 { 901 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); 902 } 903 EXPORT_SYMBOL_GPL(tracing_snapshot); 904 void tracing_snapshot_alloc(void) 905 { 906 /* Give warning */ 907 tracing_snapshot(); 908 } 909 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 910 #endif /* CONFIG_TRACER_SNAPSHOT */ 911 912 void tracer_tracing_off(struct trace_array *tr) 913 { 914 if (tr->array_buffer.buffer) 915 ring_buffer_record_off(tr->array_buffer.buffer); 916 /* 917 * This flag is looked at when buffers haven't been allocated 918 * yet, or by some tracers (like irqsoff), that just want to 919 * know if the ring buffer has been disabled, but it can handle 920 * races of where it gets disabled but we still do a record. 921 * As the check is in the fast path of the tracers, it is more 922 * important to be fast than accurate. 923 */ 924 tr->buffer_disabled = 1; 925 } 926 927 /** 928 * tracer_tracing_disable() - temporary disable the buffer from write 929 * @tr: The trace array to disable its buffer for 930 * 931 * Expects trace_tracing_enable() to re-enable tracing. 932 * The difference between this and tracer_tracing_off() is that this 933 * is a counter and can nest, whereas, tracer_tracing_off() can 934 * be called multiple times and a single trace_tracing_on() will 935 * enable it. 936 */ 937 void tracer_tracing_disable(struct trace_array *tr) 938 { 939 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 940 return; 941 942 ring_buffer_record_disable(tr->array_buffer.buffer); 943 } 944 945 /** 946 * tracer_tracing_enable() - counter part of tracer_tracing_disable() 947 * @tr: The trace array that had tracer_tracincg_disable() called on it 948 * 949 * This is called after tracer_tracing_disable() has been called on @tr, 950 * when it's safe to re-enable tracing. 951 */ 952 void tracer_tracing_enable(struct trace_array *tr) 953 { 954 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 955 return; 956 957 ring_buffer_record_enable(tr->array_buffer.buffer); 958 } 959 960 /** 961 * tracing_off - turn off tracing buffers 962 * 963 * This function stops the tracing buffers from recording data. 964 * It does not disable any overhead the tracers themselves may 965 * be causing. This function simply causes all recording to 966 * the ring buffers to fail. 967 */ 968 void tracing_off(void) 969 { 970 tracer_tracing_off(&global_trace); 971 } 972 EXPORT_SYMBOL_GPL(tracing_off); 973 974 void disable_trace_on_warning(void) 975 { 976 if (__disable_trace_on_warning) { 977 struct trace_array *tr = READ_ONCE(printk_trace); 978 979 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, 980 "Disabling tracing due to warning\n"); 981 tracing_off(); 982 983 /* Disable trace_printk() buffer too */ 984 if (tr != &global_trace) { 985 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 986 "Disabling tracing due to warning\n"); 987 tracer_tracing_off(tr); 988 } 989 } 990 } 991 992 /** 993 * tracer_tracing_is_on - show real state of ring buffer enabled 994 * @tr : the trace array to know if ring buffer is enabled 995 * 996 * Shows real state of the ring buffer if it is enabled or not. 997 */ 998 bool tracer_tracing_is_on(struct trace_array *tr) 999 { 1000 if (tr->array_buffer.buffer) 1001 return ring_buffer_record_is_set_on(tr->array_buffer.buffer); 1002 return !tr->buffer_disabled; 1003 } 1004 1005 /** 1006 * tracing_is_on - show state of ring buffers enabled 1007 */ 1008 int tracing_is_on(void) 1009 { 1010 return tracer_tracing_is_on(&global_trace); 1011 } 1012 EXPORT_SYMBOL_GPL(tracing_is_on); 1013 1014 static int __init set_buf_size(char *str) 1015 { 1016 unsigned long buf_size; 1017 1018 if (!str) 1019 return 0; 1020 buf_size = memparse(str, &str); 1021 /* 1022 * nr_entries can not be zero and the startup 1023 * tests require some buffer space. Therefore 1024 * ensure we have at least 4096 bytes of buffer. 1025 */ 1026 trace_buf_size = max(4096UL, buf_size); 1027 return 1; 1028 } 1029 __setup("trace_buf_size=", set_buf_size); 1030 1031 static int __init set_tracing_thresh(char *str) 1032 { 1033 unsigned long threshold; 1034 int ret; 1035 1036 if (!str) 1037 return 0; 1038 ret = kstrtoul(str, 0, &threshold); 1039 if (ret < 0) 1040 return 0; 1041 tracing_thresh = threshold * 1000; 1042 return 1; 1043 } 1044 __setup("tracing_thresh=", set_tracing_thresh); 1045 1046 unsigned long nsecs_to_usecs(unsigned long nsecs) 1047 { 1048 return nsecs / 1000; 1049 } 1050 1051 /* 1052 * TRACE_FLAGS is defined as a tuple matching bit masks with strings. 1053 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that 1054 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list 1055 * of strings in the order that the evals (enum) were defined. 1056 */ 1057 #undef C 1058 #define C(a, b) b 1059 1060 /* These must match the bit positions in trace_iterator_flags */ 1061 static const char *trace_options[] = { 1062 TRACE_FLAGS 1063 NULL 1064 }; 1065 1066 static struct { 1067 u64 (*func)(void); 1068 const char *name; 1069 int in_ns; /* is this clock in nanoseconds? */ 1070 } trace_clocks[] = { 1071 { trace_clock_local, "local", 1 }, 1072 { trace_clock_global, "global", 1 }, 1073 { trace_clock_counter, "counter", 0 }, 1074 { trace_clock_jiffies, "uptime", 0 }, 1075 { trace_clock, "perf", 1 }, 1076 { ktime_get_mono_fast_ns, "mono", 1 }, 1077 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1078 { ktime_get_boot_fast_ns, "boot", 1 }, 1079 { ktime_get_tai_fast_ns, "tai", 1 }, 1080 ARCH_TRACE_CLOCKS 1081 }; 1082 1083 bool trace_clock_in_ns(struct trace_array *tr) 1084 { 1085 if (trace_clocks[tr->clock_id].in_ns) 1086 return true; 1087 1088 return false; 1089 } 1090 1091 /* 1092 * trace_parser_get_init - gets the buffer for trace parser 1093 */ 1094 int trace_parser_get_init(struct trace_parser *parser, int size) 1095 { 1096 memset(parser, 0, sizeof(*parser)); 1097 1098 parser->buffer = kmalloc(size, GFP_KERNEL); 1099 if (!parser->buffer) 1100 return 1; 1101 1102 parser->size = size; 1103 return 0; 1104 } 1105 1106 /* 1107 * trace_parser_put - frees the buffer for trace parser 1108 */ 1109 void trace_parser_put(struct trace_parser *parser) 1110 { 1111 kfree(parser->buffer); 1112 parser->buffer = NULL; 1113 } 1114 1115 /* 1116 * trace_get_user - reads the user input string separated by space 1117 * (matched by isspace(ch)) 1118 * 1119 * For each string found the 'struct trace_parser' is updated, 1120 * and the function returns. 1121 * 1122 * Returns number of bytes read. 1123 * 1124 * See kernel/trace/trace.h for 'struct trace_parser' details. 1125 */ 1126 int trace_get_user(struct trace_parser *parser, const char __user *ubuf, 1127 size_t cnt, loff_t *ppos) 1128 { 1129 char ch; 1130 size_t read = 0; 1131 ssize_t ret; 1132 1133 if (!*ppos) 1134 trace_parser_clear(parser); 1135 1136 ret = get_user(ch, ubuf++); 1137 if (ret) 1138 goto fail; 1139 1140 read++; 1141 cnt--; 1142 1143 /* 1144 * The parser is not finished with the last write, 1145 * continue reading the user input without skipping spaces. 1146 */ 1147 if (!parser->cont) { 1148 /* skip white space */ 1149 while (cnt && isspace(ch)) { 1150 ret = get_user(ch, ubuf++); 1151 if (ret) 1152 goto fail; 1153 read++; 1154 cnt--; 1155 } 1156 1157 parser->idx = 0; 1158 1159 /* only spaces were written */ 1160 if (isspace(ch) || !ch) { 1161 *ppos += read; 1162 return read; 1163 } 1164 } 1165 1166 /* read the non-space input */ 1167 while (cnt && !isspace(ch) && ch) { 1168 if (parser->idx < parser->size - 1) 1169 parser->buffer[parser->idx++] = ch; 1170 else { 1171 ret = -EINVAL; 1172 goto fail; 1173 } 1174 1175 ret = get_user(ch, ubuf++); 1176 if (ret) 1177 goto fail; 1178 read++; 1179 cnt--; 1180 } 1181 1182 /* We either got finished input or we have to wait for another call. */ 1183 if (isspace(ch) || !ch) { 1184 parser->buffer[parser->idx] = 0; 1185 parser->cont = false; 1186 } else if (parser->idx < parser->size - 1) { 1187 parser->cont = true; 1188 parser->buffer[parser->idx++] = ch; 1189 /* Make sure the parsed string always terminates with '\0'. */ 1190 parser->buffer[parser->idx] = 0; 1191 } else { 1192 ret = -EINVAL; 1193 goto fail; 1194 } 1195 1196 *ppos += read; 1197 return read; 1198 fail: 1199 trace_parser_fail(parser); 1200 return ret; 1201 } 1202 1203 /* TODO add a seq_buf_to_buffer() */ 1204 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 1205 { 1206 int len; 1207 1208 if (trace_seq_used(s) <= s->readpos) 1209 return -EBUSY; 1210 1211 len = trace_seq_used(s) - s->readpos; 1212 if (cnt > len) 1213 cnt = len; 1214 memcpy(buf, s->buffer + s->readpos, cnt); 1215 1216 s->readpos += cnt; 1217 return cnt; 1218 } 1219 1220 unsigned long __read_mostly tracing_thresh; 1221 1222 struct pipe_wait { 1223 struct trace_iterator *iter; 1224 int wait_index; 1225 }; 1226 1227 static bool wait_pipe_cond(void *data) 1228 { 1229 struct pipe_wait *pwait = data; 1230 struct trace_iterator *iter = pwait->iter; 1231 1232 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) 1233 return true; 1234 1235 return iter->closed; 1236 } 1237 1238 static int wait_on_pipe(struct trace_iterator *iter, int full) 1239 { 1240 struct pipe_wait pwait; 1241 int ret; 1242 1243 /* Iterators are static, they should be filled or empty */ 1244 if (trace_buffer_iter(iter, iter->cpu_file)) 1245 return 0; 1246 1247 pwait.wait_index = atomic_read_acquire(&iter->wait_index); 1248 pwait.iter = iter; 1249 1250 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, 1251 wait_pipe_cond, &pwait); 1252 1253 #ifdef CONFIG_TRACER_SNAPSHOT 1254 /* 1255 * Make sure this is still the snapshot buffer, as if a snapshot were 1256 * to happen, this would now be the main buffer. 1257 */ 1258 if (iter->snapshot) 1259 iter->array_buffer = &iter->tr->snapshot_buffer; 1260 #endif 1261 return ret; 1262 } 1263 1264 #ifdef CONFIG_FTRACE_STARTUP_TEST 1265 static bool selftests_can_run; 1266 1267 struct trace_selftests { 1268 struct list_head list; 1269 struct tracer *type; 1270 }; 1271 1272 static LIST_HEAD(postponed_selftests); 1273 1274 static int save_selftest(struct tracer *type) 1275 { 1276 struct trace_selftests *selftest; 1277 1278 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); 1279 if (!selftest) 1280 return -ENOMEM; 1281 1282 selftest->type = type; 1283 list_add(&selftest->list, &postponed_selftests); 1284 return 0; 1285 } 1286 1287 static int run_tracer_selftest(struct tracer *type) 1288 { 1289 struct trace_array *tr = &global_trace; 1290 struct tracer_flags *saved_flags = tr->current_trace_flags; 1291 struct tracer *saved_tracer = tr->current_trace; 1292 int ret; 1293 1294 if (!type->selftest || tracing_selftest_disabled) 1295 return 0; 1296 1297 /* 1298 * If a tracer registers early in boot up (before scheduling is 1299 * initialized and such), then do not run its selftests yet. 1300 * Instead, run it a little later in the boot process. 1301 */ 1302 if (!selftests_can_run) 1303 return save_selftest(type); 1304 1305 if (!tracing_is_on()) { 1306 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", 1307 type->name); 1308 return 0; 1309 } 1310 1311 /* 1312 * Run a selftest on this tracer. 1313 * Here we reset the trace buffer, and set the current 1314 * tracer to be this tracer. The tracer can then run some 1315 * internal tracing to verify that everything is in order. 1316 * If we fail, we do not register this tracer. 1317 */ 1318 tracing_reset_online_cpus(&tr->array_buffer); 1319 1320 tr->current_trace = type; 1321 tr->current_trace_flags = type->flags ? : type->default_flags; 1322 1323 #ifdef CONFIG_TRACER_MAX_TRACE 1324 if (tracer_uses_snapshot(type)) { 1325 /* If we expanded the buffers, make sure the max is expanded too */ 1326 if (tr->ring_buffer_expanded) 1327 ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size, 1328 RING_BUFFER_ALL_CPUS); 1329 tr->allocated_snapshot = true; 1330 } 1331 #endif 1332 1333 /* the test is responsible for initializing and enabling */ 1334 pr_info("Testing tracer %s: ", type->name); 1335 ret = type->selftest(type, tr); 1336 /* the test is responsible for resetting too */ 1337 tr->current_trace = saved_tracer; 1338 tr->current_trace_flags = saved_flags; 1339 if (ret) { 1340 printk(KERN_CONT "FAILED!\n"); 1341 /* Add the warning after printing 'FAILED' */ 1342 WARN_ON(1); 1343 return -1; 1344 } 1345 /* Only reset on passing, to avoid touching corrupted buffers */ 1346 tracing_reset_online_cpus(&tr->array_buffer); 1347 1348 #ifdef CONFIG_TRACER_MAX_TRACE 1349 if (tracer_uses_snapshot(type)) { 1350 tr->allocated_snapshot = false; 1351 1352 /* Shrink the max buffer again */ 1353 if (tr->ring_buffer_expanded) 1354 ring_buffer_resize(tr->snapshot_buffer.buffer, 1, 1355 RING_BUFFER_ALL_CPUS); 1356 } 1357 #endif 1358 1359 printk(KERN_CONT "PASSED\n"); 1360 return 0; 1361 } 1362 1363 static int do_run_tracer_selftest(struct tracer *type) 1364 { 1365 int ret; 1366 1367 /* 1368 * Tests can take a long time, especially if they are run one after the 1369 * other, as does happen during bootup when all the tracers are 1370 * registered. This could cause the soft lockup watchdog to trigger. 1371 */ 1372 cond_resched(); 1373 1374 tracing_selftest_running = true; 1375 ret = run_tracer_selftest(type); 1376 tracing_selftest_running = false; 1377 1378 return ret; 1379 } 1380 1381 static __init int init_trace_selftests(void) 1382 { 1383 struct trace_selftests *p, *n; 1384 struct tracer *t, **last; 1385 int ret; 1386 1387 selftests_can_run = true; 1388 1389 guard(mutex)(&trace_types_lock); 1390 1391 if (list_empty(&postponed_selftests)) 1392 return 0; 1393 1394 pr_info("Running postponed tracer tests:\n"); 1395 1396 tracing_selftest_running = true; 1397 list_for_each_entry_safe(p, n, &postponed_selftests, list) { 1398 /* This loop can take minutes when sanitizers are enabled, so 1399 * lets make sure we allow RCU processing. 1400 */ 1401 cond_resched(); 1402 ret = run_tracer_selftest(p->type); 1403 /* If the test fails, then warn and remove from available_tracers */ 1404 if (ret < 0) { 1405 WARN(1, "tracer: %s failed selftest, disabling\n", 1406 p->type->name); 1407 last = &trace_types; 1408 for (t = trace_types; t; t = t->next) { 1409 if (t == p->type) { 1410 *last = t->next; 1411 break; 1412 } 1413 last = &t->next; 1414 } 1415 } 1416 list_del(&p->list); 1417 kfree(p); 1418 } 1419 tracing_selftest_running = false; 1420 1421 return 0; 1422 } 1423 core_initcall(init_trace_selftests); 1424 #else 1425 static inline int do_run_tracer_selftest(struct tracer *type) 1426 { 1427 return 0; 1428 } 1429 #endif /* CONFIG_FTRACE_STARTUP_TEST */ 1430 1431 static int add_tracer(struct trace_array *tr, struct tracer *t); 1432 1433 static void __init apply_trace_boot_options(void); 1434 1435 static void free_tracers(struct trace_array *tr) 1436 { 1437 struct tracers *t, *n; 1438 1439 lockdep_assert_held(&trace_types_lock); 1440 1441 list_for_each_entry_safe(t, n, &tr->tracers, list) { 1442 list_del(&t->list); 1443 kfree(t->flags); 1444 kfree(t); 1445 } 1446 } 1447 1448 /** 1449 * register_tracer - register a tracer with the ftrace system. 1450 * @type: the plugin for the tracer 1451 * 1452 * Register a new plugin tracer. 1453 */ 1454 int __init register_tracer(struct tracer *type) 1455 { 1456 struct trace_array *tr; 1457 struct tracer *t; 1458 int ret = 0; 1459 1460 if (!type->name) { 1461 pr_info("Tracer must have a name\n"); 1462 return -1; 1463 } 1464 1465 if (strlen(type->name) >= MAX_TRACER_SIZE) { 1466 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 1467 return -1; 1468 } 1469 1470 if (security_locked_down(LOCKDOWN_TRACEFS)) { 1471 pr_warn("Can not register tracer %s due to lockdown\n", 1472 type->name); 1473 return -EPERM; 1474 } 1475 1476 mutex_lock(&trace_types_lock); 1477 1478 for (t = trace_types; t; t = t->next) { 1479 if (strcmp(type->name, t->name) == 0) { 1480 /* already found */ 1481 pr_info("Tracer %s already registered\n", 1482 type->name); 1483 ret = -1; 1484 goto out; 1485 } 1486 } 1487 1488 /* store the tracer for __set_tracer_option */ 1489 if (type->flags) 1490 type->flags->trace = type; 1491 1492 ret = do_run_tracer_selftest(type); 1493 if (ret < 0) 1494 goto out; 1495 1496 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 1497 ret = add_tracer(tr, type); 1498 if (ret < 0) { 1499 /* The tracer will still exist but without options */ 1500 pr_warn("Failed to create tracer options for %s\n", type->name); 1501 break; 1502 } 1503 } 1504 1505 type->next = trace_types; 1506 trace_types = type; 1507 1508 out: 1509 mutex_unlock(&trace_types_lock); 1510 1511 if (ret || !default_bootup_tracer) 1512 return ret; 1513 1514 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) 1515 return 0; 1516 1517 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 1518 /* Do we want this tracer to start on bootup? */ 1519 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0); 1520 default_bootup_tracer = NULL; 1521 1522 apply_trace_boot_options(); 1523 1524 /* disable other selftests, since this will break it. */ 1525 disable_tracing_selftest("running a tracer"); 1526 1527 return 0; 1528 } 1529 1530 void tracing_reset_cpu(struct array_buffer *buf, int cpu) 1531 { 1532 struct trace_buffer *buffer = buf->buffer; 1533 1534 if (!buffer) 1535 return; 1536 1537 ring_buffer_record_disable(buffer); 1538 1539 /* Make sure all commits have finished */ 1540 synchronize_rcu(); 1541 ring_buffer_reset_cpu(buffer, cpu); 1542 1543 ring_buffer_record_enable(buffer); 1544 } 1545 1546 void tracing_reset_online_cpus(struct array_buffer *buf) 1547 { 1548 struct trace_buffer *buffer = buf->buffer; 1549 1550 if (!buffer) 1551 return; 1552 1553 ring_buffer_record_disable(buffer); 1554 1555 /* Make sure all commits have finished */ 1556 synchronize_rcu(); 1557 1558 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 1559 1560 ring_buffer_reset_online_cpus(buffer); 1561 1562 ring_buffer_record_enable(buffer); 1563 } 1564 1565 static void tracing_reset_all_cpus(struct array_buffer *buf) 1566 { 1567 struct trace_buffer *buffer = buf->buffer; 1568 1569 if (!buffer) 1570 return; 1571 1572 ring_buffer_record_disable(buffer); 1573 1574 /* Make sure all commits have finished */ 1575 synchronize_rcu(); 1576 1577 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 1578 1579 ring_buffer_reset(buffer); 1580 1581 ring_buffer_record_enable(buffer); 1582 } 1583 1584 /* Must have trace_types_lock held */ 1585 void tracing_reset_all_online_cpus_unlocked(void) 1586 { 1587 struct trace_array *tr; 1588 1589 lockdep_assert_held(&trace_types_lock); 1590 1591 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 1592 if (!tr->clear_trace) 1593 continue; 1594 tr->clear_trace = false; 1595 tracing_reset_online_cpus(&tr->array_buffer); 1596 #ifdef CONFIG_TRACER_SNAPSHOT 1597 tracing_reset_online_cpus(&tr->snapshot_buffer); 1598 #endif 1599 } 1600 } 1601 1602 void tracing_reset_all_online_cpus(void) 1603 { 1604 guard(mutex)(&trace_types_lock); 1605 tracing_reset_all_online_cpus_unlocked(); 1606 } 1607 1608 int is_tracing_stopped(void) 1609 { 1610 return global_trace.stop_count; 1611 } 1612 1613 static void tracing_start_tr(struct trace_array *tr) 1614 { 1615 struct trace_buffer *buffer; 1616 1617 if (tracing_disabled) 1618 return; 1619 1620 guard(raw_spinlock_irqsave)(&tr->start_lock); 1621 if (--tr->stop_count) { 1622 if (WARN_ON_ONCE(tr->stop_count < 0)) { 1623 /* Someone screwed up their debugging */ 1624 tr->stop_count = 0; 1625 } 1626 return; 1627 } 1628 1629 /* Prevent the buffers from switching */ 1630 arch_spin_lock(&tr->max_lock); 1631 1632 buffer = tr->array_buffer.buffer; 1633 if (buffer) 1634 ring_buffer_record_enable(buffer); 1635 1636 #ifdef CONFIG_TRACER_SNAPSHOT 1637 buffer = tr->snapshot_buffer.buffer; 1638 if (buffer) 1639 ring_buffer_record_enable(buffer); 1640 #endif 1641 1642 arch_spin_unlock(&tr->max_lock); 1643 } 1644 1645 /** 1646 * tracing_start - quick start of the tracer 1647 * 1648 * If tracing is enabled but was stopped by tracing_stop, 1649 * this will start the tracer back up. 1650 */ 1651 void tracing_start(void) 1652 1653 { 1654 return tracing_start_tr(&global_trace); 1655 } 1656 1657 static void tracing_stop_tr(struct trace_array *tr) 1658 { 1659 struct trace_buffer *buffer; 1660 1661 guard(raw_spinlock_irqsave)(&tr->start_lock); 1662 if (tr->stop_count++) 1663 return; 1664 1665 /* Prevent the buffers from switching */ 1666 arch_spin_lock(&tr->max_lock); 1667 1668 buffer = tr->array_buffer.buffer; 1669 if (buffer) 1670 ring_buffer_record_disable(buffer); 1671 1672 #ifdef CONFIG_TRACER_SNAPSHOT 1673 buffer = tr->snapshot_buffer.buffer; 1674 if (buffer) 1675 ring_buffer_record_disable(buffer); 1676 #endif 1677 1678 arch_spin_unlock(&tr->max_lock); 1679 } 1680 1681 /** 1682 * tracing_stop - quick stop of the tracer 1683 * 1684 * Light weight way to stop tracing. Use in conjunction with 1685 * tracing_start. 1686 */ 1687 void tracing_stop(void) 1688 { 1689 return tracing_stop_tr(&global_trace); 1690 } 1691 1692 /* 1693 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 1694 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 1695 * simplifies those functions and keeps them in sync. 1696 */ 1697 enum print_line_t trace_handle_return(struct trace_seq *s) 1698 { 1699 return trace_seq_has_overflowed(s) ? 1700 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 1701 } 1702 EXPORT_SYMBOL_GPL(trace_handle_return); 1703 1704 static unsigned short migration_disable_value(void) 1705 { 1706 #if defined(CONFIG_SMP) 1707 return current->migration_disabled; 1708 #else 1709 return 0; 1710 #endif 1711 } 1712 1713 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) 1714 { 1715 unsigned int trace_flags = irqs_status; 1716 unsigned int pc; 1717 1718 pc = preempt_count(); 1719 1720 if (pc & NMI_MASK) 1721 trace_flags |= TRACE_FLAG_NMI; 1722 if (pc & HARDIRQ_MASK) 1723 trace_flags |= TRACE_FLAG_HARDIRQ; 1724 if (in_serving_softirq()) 1725 trace_flags |= TRACE_FLAG_SOFTIRQ; 1726 if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) 1727 trace_flags |= TRACE_FLAG_BH_OFF; 1728 1729 if (tif_need_resched()) 1730 trace_flags |= TRACE_FLAG_NEED_RESCHED; 1731 if (test_preempt_need_resched()) 1732 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; 1733 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) 1734 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; 1735 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | 1736 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; 1737 } 1738 1739 struct ring_buffer_event * 1740 trace_buffer_lock_reserve(struct trace_buffer *buffer, 1741 int type, 1742 unsigned long len, 1743 unsigned int trace_ctx) 1744 { 1745 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); 1746 } 1747 1748 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); 1749 DEFINE_PER_CPU(int, trace_buffered_event_cnt); 1750 static int trace_buffered_event_ref; 1751 1752 /** 1753 * trace_buffered_event_enable - enable buffering events 1754 * 1755 * When events are being filtered, it is quicker to use a temporary 1756 * buffer to write the event data into if there's a likely chance 1757 * that it will not be committed. The discard of the ring buffer 1758 * is not as fast as committing, and is much slower than copying 1759 * a commit. 1760 * 1761 * When an event is to be filtered, allocate per cpu buffers to 1762 * write the event data into, and if the event is filtered and discarded 1763 * it is simply dropped, otherwise, the entire data is to be committed 1764 * in one shot. 1765 */ 1766 void trace_buffered_event_enable(void) 1767 { 1768 struct ring_buffer_event *event; 1769 struct page *page; 1770 int cpu; 1771 1772 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 1773 1774 if (trace_buffered_event_ref++) 1775 return; 1776 1777 for_each_tracing_cpu(cpu) { 1778 page = alloc_pages_node(cpu_to_node(cpu), 1779 GFP_KERNEL | __GFP_NORETRY, 0); 1780 /* This is just an optimization and can handle failures */ 1781 if (!page) { 1782 pr_err("Failed to allocate event buffer\n"); 1783 break; 1784 } 1785 1786 event = page_address(page); 1787 memset(event, 0, sizeof(*event)); 1788 1789 per_cpu(trace_buffered_event, cpu) = event; 1790 1791 scoped_guard(preempt,) { 1792 if (cpu == smp_processor_id() && 1793 __this_cpu_read(trace_buffered_event) != 1794 per_cpu(trace_buffered_event, cpu)) 1795 WARN_ON_ONCE(1); 1796 } 1797 } 1798 } 1799 1800 static void enable_trace_buffered_event(void *data) 1801 { 1802 this_cpu_dec(trace_buffered_event_cnt); 1803 } 1804 1805 static void disable_trace_buffered_event(void *data) 1806 { 1807 this_cpu_inc(trace_buffered_event_cnt); 1808 } 1809 1810 /** 1811 * trace_buffered_event_disable - disable buffering events 1812 * 1813 * When a filter is removed, it is faster to not use the buffered 1814 * events, and to commit directly into the ring buffer. Free up 1815 * the temp buffers when there are no more users. This requires 1816 * special synchronization with current events. 1817 */ 1818 void trace_buffered_event_disable(void) 1819 { 1820 int cpu; 1821 1822 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 1823 1824 if (WARN_ON_ONCE(!trace_buffered_event_ref)) 1825 return; 1826 1827 if (--trace_buffered_event_ref) 1828 return; 1829 1830 /* For each CPU, set the buffer as used. */ 1831 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, 1832 NULL, true); 1833 1834 /* Wait for all current users to finish */ 1835 synchronize_rcu(); 1836 1837 for_each_tracing_cpu(cpu) { 1838 free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); 1839 per_cpu(trace_buffered_event, cpu) = NULL; 1840 } 1841 1842 /* 1843 * Wait for all CPUs that potentially started checking if they can use 1844 * their event buffer only after the previous synchronize_rcu() call and 1845 * they still read a valid pointer from trace_buffered_event. It must be 1846 * ensured they don't see cleared trace_buffered_event_cnt else they 1847 * could wrongly decide to use the pointed-to buffer which is now freed. 1848 */ 1849 synchronize_rcu(); 1850 1851 /* For each CPU, relinquish the buffer */ 1852 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, 1853 true); 1854 } 1855 1856 static struct trace_buffer *temp_buffer; 1857 1858 struct ring_buffer_event * 1859 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, 1860 struct trace_event_file *trace_file, 1861 int type, unsigned long len, 1862 unsigned int trace_ctx) 1863 { 1864 struct ring_buffer_event *entry; 1865 struct trace_array *tr = trace_file->tr; 1866 int val; 1867 1868 *current_rb = tr->array_buffer.buffer; 1869 1870 if (!tr->no_filter_buffering_ref && 1871 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { 1872 preempt_disable_notrace(); 1873 /* 1874 * Filtering is on, so try to use the per cpu buffer first. 1875 * This buffer will simulate a ring_buffer_event, 1876 * where the type_len is zero and the array[0] will 1877 * hold the full length. 1878 * (see include/linux/ring-buffer.h for details on 1879 * how the ring_buffer_event is structured). 1880 * 1881 * Using a temp buffer during filtering and copying it 1882 * on a matched filter is quicker than writing directly 1883 * into the ring buffer and then discarding it when 1884 * it doesn't match. That is because the discard 1885 * requires several atomic operations to get right. 1886 * Copying on match and doing nothing on a failed match 1887 * is still quicker than no copy on match, but having 1888 * to discard out of the ring buffer on a failed match. 1889 */ 1890 if ((entry = __this_cpu_read(trace_buffered_event))) { 1891 int max_len = PAGE_SIZE - struct_size(entry, array, 1); 1892 1893 val = this_cpu_inc_return(trace_buffered_event_cnt); 1894 1895 /* 1896 * Preemption is disabled, but interrupts and NMIs 1897 * can still come in now. If that happens after 1898 * the above increment, then it will have to go 1899 * back to the old method of allocating the event 1900 * on the ring buffer, and if the filter fails, it 1901 * will have to call ring_buffer_discard_commit() 1902 * to remove it. 1903 * 1904 * Need to also check the unlikely case that the 1905 * length is bigger than the temp buffer size. 1906 * If that happens, then the reserve is pretty much 1907 * guaranteed to fail, as the ring buffer currently 1908 * only allows events less than a page. But that may 1909 * change in the future, so let the ring buffer reserve 1910 * handle the failure in that case. 1911 */ 1912 if (val == 1 && likely(len <= max_len)) { 1913 trace_event_setup(entry, type, trace_ctx); 1914 entry->array[0] = len; 1915 /* Return with preemption disabled */ 1916 return entry; 1917 } 1918 this_cpu_dec(trace_buffered_event_cnt); 1919 } 1920 /* __trace_buffer_lock_reserve() disables preemption */ 1921 preempt_enable_notrace(); 1922 } 1923 1924 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 1925 trace_ctx); 1926 /* 1927 * If tracing is off, but we have triggers enabled 1928 * we still need to look at the event data. Use the temp_buffer 1929 * to store the trace event for the trigger to use. It's recursive 1930 * safe and will not be recorded anywhere. 1931 */ 1932 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { 1933 *current_rb = temp_buffer; 1934 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 1935 trace_ctx); 1936 } 1937 return entry; 1938 } 1939 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); 1940 1941 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); 1942 static DEFINE_MUTEX(tracepoint_printk_mutex); 1943 1944 static void output_printk(struct trace_event_buffer *fbuffer) 1945 { 1946 struct trace_event_call *event_call; 1947 struct trace_event_file *file; 1948 struct trace_event *event; 1949 unsigned long flags; 1950 struct trace_iterator *iter = tracepoint_print_iter; 1951 1952 /* We should never get here if iter is NULL */ 1953 if (WARN_ON_ONCE(!iter)) 1954 return; 1955 1956 event_call = fbuffer->trace_file->event_call; 1957 if (!event_call || !event_call->event.funcs || 1958 !event_call->event.funcs->trace) 1959 return; 1960 1961 file = fbuffer->trace_file; 1962 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 1963 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 1964 !filter_match_preds(file->filter, fbuffer->entry))) 1965 return; 1966 1967 event = &fbuffer->trace_file->event_call->event; 1968 1969 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); 1970 trace_seq_init(&iter->seq); 1971 iter->ent = fbuffer->entry; 1972 event_call->event.funcs->trace(iter, 0, event); 1973 trace_seq_putc(&iter->seq, 0); 1974 printk("%s", iter->seq.buffer); 1975 1976 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); 1977 } 1978 1979 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 1980 void *buffer, size_t *lenp, 1981 loff_t *ppos) 1982 { 1983 int save_tracepoint_printk; 1984 int ret; 1985 1986 guard(mutex)(&tracepoint_printk_mutex); 1987 save_tracepoint_printk = tracepoint_printk; 1988 1989 ret = proc_dointvec(table, write, buffer, lenp, ppos); 1990 1991 /* 1992 * This will force exiting early, as tracepoint_printk 1993 * is always zero when tracepoint_printk_iter is not allocated 1994 */ 1995 if (!tracepoint_print_iter) 1996 tracepoint_printk = 0; 1997 1998 if (save_tracepoint_printk == tracepoint_printk) 1999 return ret; 2000 2001 if (tracepoint_printk) 2002 static_key_enable(&tracepoint_printk_key.key); 2003 else 2004 static_key_disable(&tracepoint_printk_key.key); 2005 2006 return ret; 2007 } 2008 2009 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) 2010 { 2011 enum event_trigger_type tt = ETT_NONE; 2012 struct trace_event_file *file = fbuffer->trace_file; 2013 2014 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, 2015 fbuffer->entry, &tt)) 2016 goto discard; 2017 2018 if (static_key_false(&tracepoint_printk_key.key)) 2019 output_printk(fbuffer); 2020 2021 if (static_branch_unlikely(&trace_event_exports_enabled)) 2022 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); 2023 2024 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, 2025 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); 2026 2027 discard: 2028 if (tt) 2029 event_triggers_post_call(file, tt); 2030 2031 } 2032 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2033 2034 /* 2035 * Skip 3: 2036 * 2037 * trace_buffer_unlock_commit_regs() 2038 * trace_event_buffer_commit() 2039 * trace_event_raw_event_xxx() 2040 */ 2041 # define STACK_SKIP 3 2042 2043 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2044 struct trace_buffer *buffer, 2045 struct ring_buffer_event *event, 2046 unsigned int trace_ctx, 2047 struct pt_regs *regs) 2048 { 2049 __buffer_unlock_commit(buffer, event); 2050 2051 /* 2052 * If regs is not set, then skip the necessary functions. 2053 * Note, we can still get here via blktrace, wakeup tracer 2054 * and mmiotrace, but that's ok if they lose a function or 2055 * two. They are not that meaningful. 2056 */ 2057 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); 2058 ftrace_trace_userstack(tr, buffer, trace_ctx); 2059 } 2060 2061 /* 2062 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. 2063 */ 2064 void 2065 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, 2066 struct ring_buffer_event *event) 2067 { 2068 __buffer_unlock_commit(buffer, event); 2069 } 2070 2071 void 2072 trace_function(struct trace_array *tr, unsigned long ip, unsigned long 2073 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) 2074 { 2075 struct trace_buffer *buffer = tr->array_buffer.buffer; 2076 struct ring_buffer_event *event; 2077 struct ftrace_entry *entry; 2078 int size = sizeof(*entry); 2079 2080 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); 2081 2082 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, 2083 trace_ctx); 2084 if (!event) 2085 return; 2086 entry = ring_buffer_event_data(event); 2087 entry->ip = ip; 2088 entry->parent_ip = parent_ip; 2089 2090 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 2091 if (fregs) { 2092 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) 2093 entry->args[i] = ftrace_regs_get_argument(fregs, i); 2094 } 2095 #endif 2096 2097 if (static_branch_unlikely(&trace_function_exports_enabled)) 2098 ftrace_exports(event, TRACE_EXPORT_FUNCTION); 2099 __buffer_unlock_commit(buffer, event); 2100 } 2101 2102 #ifdef CONFIG_STACKTRACE 2103 2104 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ 2105 #define FTRACE_KSTACK_NESTING 4 2106 2107 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) 2108 2109 struct ftrace_stack { 2110 unsigned long calls[FTRACE_KSTACK_ENTRIES]; 2111 }; 2112 2113 2114 struct ftrace_stacks { 2115 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; 2116 }; 2117 2118 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); 2119 static DEFINE_PER_CPU(int, ftrace_stack_reserve); 2120 2121 void __ftrace_trace_stack(struct trace_array *tr, 2122 struct trace_buffer *buffer, 2123 unsigned int trace_ctx, 2124 int skip, struct pt_regs *regs) 2125 { 2126 struct ring_buffer_event *event; 2127 unsigned int size, nr_entries; 2128 struct ftrace_stack *fstack; 2129 struct stack_entry *entry; 2130 int stackidx; 2131 int bit; 2132 2133 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START); 2134 if (bit < 0) 2135 return; 2136 2137 /* 2138 * Add one, for this function and the call to save_stack_trace() 2139 * If regs is set, then these functions will not be in the way. 2140 */ 2141 #ifndef CONFIG_UNWINDER_ORC 2142 if (!regs) 2143 skip++; 2144 #endif 2145 2146 guard(preempt_notrace)(); 2147 2148 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; 2149 2150 /* This should never happen. If it does, yell once and skip */ 2151 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) 2152 goto out; 2153 2154 /* 2155 * The above __this_cpu_inc_return() is 'atomic' cpu local. An 2156 * interrupt will either see the value pre increment or post 2157 * increment. If the interrupt happens pre increment it will have 2158 * restored the counter when it returns. We just need a barrier to 2159 * keep gcc from moving things around. 2160 */ 2161 barrier(); 2162 2163 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; 2164 size = ARRAY_SIZE(fstack->calls); 2165 2166 if (regs) { 2167 nr_entries = stack_trace_save_regs(regs, fstack->calls, 2168 size, skip); 2169 } else { 2170 nr_entries = stack_trace_save(fstack->calls, size, skip); 2171 } 2172 2173 #ifdef CONFIG_DYNAMIC_FTRACE 2174 /* Mark entry of stack trace as trampoline code */ 2175 if (tr->ops && tr->ops->trampoline) { 2176 unsigned long tramp_start = tr->ops->trampoline; 2177 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; 2178 unsigned long *calls = fstack->calls; 2179 2180 for (int i = 0; i < nr_entries; i++) { 2181 if (calls[i] >= tramp_start && calls[i] < tramp_end) 2182 calls[i] = FTRACE_TRAMPOLINE_MARKER; 2183 } 2184 } 2185 #endif 2186 2187 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, 2188 struct_size(entry, caller, nr_entries), 2189 trace_ctx); 2190 if (!event) 2191 goto out; 2192 entry = ring_buffer_event_data(event); 2193 2194 entry->size = nr_entries; 2195 memcpy(&entry->caller, fstack->calls, 2196 flex_array_size(entry, caller, nr_entries)); 2197 2198 __buffer_unlock_commit(buffer, event); 2199 2200 out: 2201 /* Again, don't let gcc optimize things here */ 2202 barrier(); 2203 __this_cpu_dec(ftrace_stack_reserve); 2204 trace_clear_recursion(bit); 2205 } 2206 2207 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, 2208 int skip) 2209 { 2210 struct trace_buffer *buffer = tr->array_buffer.buffer; 2211 2212 if (rcu_is_watching()) { 2213 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 2214 return; 2215 } 2216 2217 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) 2218 return; 2219 2220 /* 2221 * When an NMI triggers, RCU is enabled via ct_nmi_enter(), 2222 * but if the above rcu_is_watching() failed, then the NMI 2223 * triggered someplace critical, and ct_irq_enter() should 2224 * not be called from NMI. 2225 */ 2226 if (unlikely(in_nmi())) 2227 return; 2228 2229 ct_irq_enter_irqson(); 2230 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 2231 ct_irq_exit_irqson(); 2232 } 2233 2234 /** 2235 * trace_dump_stack - record a stack back trace in the trace buffer 2236 * @skip: Number of functions to skip (helper handlers) 2237 */ 2238 void trace_dump_stack(int skip) 2239 { 2240 if (tracing_disabled || tracing_selftest_running) 2241 return; 2242 2243 #ifndef CONFIG_UNWINDER_ORC 2244 /* Skip 1 to skip this function. */ 2245 skip++; 2246 #endif 2247 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, 2248 tracing_gen_ctx(), skip, NULL); 2249 } 2250 EXPORT_SYMBOL_GPL(trace_dump_stack); 2251 2252 #ifdef CONFIG_USER_STACKTRACE_SUPPORT 2253 static DEFINE_PER_CPU(int, user_stack_count); 2254 2255 static void 2256 ftrace_trace_userstack(struct trace_array *tr, 2257 struct trace_buffer *buffer, unsigned int trace_ctx) 2258 { 2259 struct ring_buffer_event *event; 2260 struct userstack_entry *entry; 2261 2262 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE))) 2263 return; 2264 2265 /* 2266 * NMIs can not handle page faults, even with fix ups. 2267 * The save user stack can (and often does) fault. 2268 */ 2269 if (unlikely(in_nmi())) 2270 return; 2271 2272 /* 2273 * prevent recursion, since the user stack tracing may 2274 * trigger other kernel events. 2275 */ 2276 guard(preempt)(); 2277 if (__this_cpu_read(user_stack_count)) 2278 return; 2279 2280 __this_cpu_inc(user_stack_count); 2281 2282 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 2283 sizeof(*entry), trace_ctx); 2284 if (!event) 2285 goto out_drop_count; 2286 entry = ring_buffer_event_data(event); 2287 2288 entry->tgid = current->tgid; 2289 memset(&entry->caller, 0, sizeof(entry->caller)); 2290 2291 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); 2292 __buffer_unlock_commit(buffer, event); 2293 2294 out_drop_count: 2295 __this_cpu_dec(user_stack_count); 2296 } 2297 #else /* CONFIG_USER_STACKTRACE_SUPPORT */ 2298 static void ftrace_trace_userstack(struct trace_array *tr, 2299 struct trace_buffer *buffer, 2300 unsigned int trace_ctx) 2301 { 2302 } 2303 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ 2304 2305 #endif /* CONFIG_STACKTRACE */ 2306 2307 static inline void 2308 func_repeats_set_delta_ts(struct func_repeats_entry *entry, 2309 unsigned long long delta) 2310 { 2311 entry->bottom_delta_ts = delta & U32_MAX; 2312 entry->top_delta_ts = (delta >> 32); 2313 } 2314 2315 void trace_last_func_repeats(struct trace_array *tr, 2316 struct trace_func_repeats *last_info, 2317 unsigned int trace_ctx) 2318 { 2319 struct trace_buffer *buffer = tr->array_buffer.buffer; 2320 struct func_repeats_entry *entry; 2321 struct ring_buffer_event *event; 2322 u64 delta; 2323 2324 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, 2325 sizeof(*entry), trace_ctx); 2326 if (!event) 2327 return; 2328 2329 delta = ring_buffer_event_time_stamp(buffer, event) - 2330 last_info->ts_last_call; 2331 2332 entry = ring_buffer_event_data(event); 2333 entry->ip = last_info->ip; 2334 entry->parent_ip = last_info->parent_ip; 2335 entry->count = last_info->count; 2336 func_repeats_set_delta_ts(entry, delta); 2337 2338 __buffer_unlock_commit(buffer, event); 2339 } 2340 2341 static struct trace_entry * 2342 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, 2343 unsigned long *lost_events) 2344 { 2345 struct ring_buffer_event *event; 2346 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 2347 2348 if (buf_iter) { 2349 event = ring_buffer_iter_peek(buf_iter, ts); 2350 if (lost_events) 2351 *lost_events = ring_buffer_iter_dropped(buf_iter) ? 2352 (unsigned long)-1 : 0; 2353 } else { 2354 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, 2355 lost_events); 2356 } 2357 2358 if (event) { 2359 iter->ent_size = ring_buffer_event_length(event); 2360 return ring_buffer_event_data(event); 2361 } 2362 iter->ent_size = 0; 2363 return NULL; 2364 } 2365 2366 static struct trace_entry * 2367 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, 2368 unsigned long *missing_events, u64 *ent_ts) 2369 { 2370 struct trace_buffer *buffer = iter->array_buffer->buffer; 2371 struct trace_entry *ent, *next = NULL; 2372 unsigned long lost_events = 0, next_lost = 0; 2373 int cpu_file = iter->cpu_file; 2374 u64 next_ts = 0, ts; 2375 int next_cpu = -1; 2376 int next_size = 0; 2377 int cpu; 2378 2379 /* 2380 * If we are in a per_cpu trace file, don't bother by iterating over 2381 * all cpu and peek directly. 2382 */ 2383 if (cpu_file > RING_BUFFER_ALL_CPUS) { 2384 if (ring_buffer_empty_cpu(buffer, cpu_file)) 2385 return NULL; 2386 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); 2387 if (ent_cpu) 2388 *ent_cpu = cpu_file; 2389 2390 return ent; 2391 } 2392 2393 for_each_tracing_cpu(cpu) { 2394 2395 if (ring_buffer_empty_cpu(buffer, cpu)) 2396 continue; 2397 2398 ent = peek_next_entry(iter, cpu, &ts, &lost_events); 2399 2400 /* 2401 * Pick the entry with the smallest timestamp: 2402 */ 2403 if (ent && (!next || ts < next_ts)) { 2404 next = ent; 2405 next_cpu = cpu; 2406 next_ts = ts; 2407 next_lost = lost_events; 2408 next_size = iter->ent_size; 2409 } 2410 } 2411 2412 iter->ent_size = next_size; 2413 2414 if (ent_cpu) 2415 *ent_cpu = next_cpu; 2416 2417 if (ent_ts) 2418 *ent_ts = next_ts; 2419 2420 if (missing_events) 2421 *missing_events = next_lost; 2422 2423 return next; 2424 } 2425 2426 #define STATIC_FMT_BUF_SIZE 128 2427 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 2428 2429 char *trace_iter_expand_format(struct trace_iterator *iter) 2430 { 2431 char *tmp; 2432 2433 /* 2434 * iter->tr is NULL when used with tp_printk, which makes 2435 * this get called where it is not safe to call krealloc(). 2436 */ 2437 if (!iter->tr || iter->fmt == static_fmt_buf) 2438 return NULL; 2439 2440 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, 2441 GFP_KERNEL); 2442 if (tmp) { 2443 iter->fmt_size += STATIC_FMT_BUF_SIZE; 2444 iter->fmt = tmp; 2445 } 2446 2447 return tmp; 2448 } 2449 2450 /* Returns true if the string is safe to dereference from an event */ 2451 static bool trace_safe_str(struct trace_iterator *iter, const char *str) 2452 { 2453 unsigned long addr = (unsigned long)str; 2454 struct trace_event *trace_event; 2455 struct trace_event_call *event; 2456 2457 /* OK if part of the event data */ 2458 if ((addr >= (unsigned long)iter->ent) && 2459 (addr < (unsigned long)iter->ent + iter->ent_size)) 2460 return true; 2461 2462 /* OK if part of the temp seq buffer */ 2463 if ((addr >= (unsigned long)iter->tmp_seq.buffer) && 2464 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) 2465 return true; 2466 2467 /* Core rodata can not be freed */ 2468 if (is_kernel_rodata(addr)) 2469 return true; 2470 2471 if (trace_is_tracepoint_string(str)) 2472 return true; 2473 2474 /* 2475 * Now this could be a module event, referencing core module 2476 * data, which is OK. 2477 */ 2478 if (!iter->ent) 2479 return false; 2480 2481 trace_event = ftrace_find_event(iter->ent->type); 2482 if (!trace_event) 2483 return false; 2484 2485 event = container_of(trace_event, struct trace_event_call, event); 2486 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) 2487 return false; 2488 2489 /* Would rather have rodata, but this will suffice */ 2490 if (within_module_core(addr, event->module)) 2491 return true; 2492 2493 return false; 2494 } 2495 2496 /** 2497 * ignore_event - Check dereferenced fields while writing to the seq buffer 2498 * @iter: The iterator that holds the seq buffer and the event being printed 2499 * 2500 * At boot up, test_event_printk() will flag any event that dereferences 2501 * a string with "%s" that does exist in the ring buffer. It may still 2502 * be valid, as the string may point to a static string in the kernel 2503 * rodata that never gets freed. But if the string pointer is pointing 2504 * to something that was allocated, there's a chance that it can be freed 2505 * by the time the user reads the trace. This would cause a bad memory 2506 * access by the kernel and possibly crash the system. 2507 * 2508 * This function will check if the event has any fields flagged as needing 2509 * to be checked at runtime and perform those checks. 2510 * 2511 * If it is found that a field is unsafe, it will write into the @iter->seq 2512 * a message stating what was found to be unsafe. 2513 * 2514 * @return: true if the event is unsafe and should be ignored, 2515 * false otherwise. 2516 */ 2517 bool ignore_event(struct trace_iterator *iter) 2518 { 2519 struct ftrace_event_field *field; 2520 struct trace_event *trace_event; 2521 struct trace_event_call *event; 2522 struct list_head *head; 2523 struct trace_seq *seq; 2524 const void *ptr; 2525 2526 trace_event = ftrace_find_event(iter->ent->type); 2527 2528 seq = &iter->seq; 2529 2530 if (!trace_event) { 2531 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); 2532 return true; 2533 } 2534 2535 event = container_of(trace_event, struct trace_event_call, event); 2536 if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) 2537 return false; 2538 2539 head = trace_get_fields(event); 2540 if (!head) { 2541 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", 2542 trace_event_name(event)); 2543 return true; 2544 } 2545 2546 /* Offsets are from the iter->ent that points to the raw event */ 2547 ptr = iter->ent; 2548 2549 list_for_each_entry(field, head, link) { 2550 const char *str; 2551 bool good; 2552 2553 if (!field->needs_test) 2554 continue; 2555 2556 str = *(const char **)(ptr + field->offset); 2557 2558 good = trace_safe_str(iter, str); 2559 2560 /* 2561 * If you hit this warning, it is likely that the 2562 * trace event in question used %s on a string that 2563 * was saved at the time of the event, but may not be 2564 * around when the trace is read. Use __string(), 2565 * __assign_str() and __get_str() helpers in the TRACE_EVENT() 2566 * instead. See samples/trace_events/trace-events-sample.h 2567 * for reference. 2568 */ 2569 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", 2570 trace_event_name(event), field->name)) { 2571 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", 2572 trace_event_name(event), field->name); 2573 return true; 2574 } 2575 } 2576 return false; 2577 } 2578 2579 const char *trace_event_format(struct trace_iterator *iter, const char *fmt) 2580 { 2581 const char *p, *new_fmt; 2582 char *q; 2583 2584 if (WARN_ON_ONCE(!fmt)) 2585 return fmt; 2586 2587 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR)) 2588 return fmt; 2589 2590 p = fmt; 2591 new_fmt = q = iter->fmt; 2592 while (*p) { 2593 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { 2594 if (!trace_iter_expand_format(iter)) 2595 return fmt; 2596 2597 q += iter->fmt - new_fmt; 2598 new_fmt = iter->fmt; 2599 } 2600 2601 *q++ = *p++; 2602 2603 /* Replace %p with %px */ 2604 if (p[-1] == '%') { 2605 if (p[0] == '%') { 2606 *q++ = *p++; 2607 } else if (p[0] == 'p' && !isalnum(p[1])) { 2608 *q++ = *p++; 2609 *q++ = 'x'; 2610 } 2611 } 2612 } 2613 *q = '\0'; 2614 2615 return new_fmt; 2616 } 2617 2618 #define STATIC_TEMP_BUF_SIZE 128 2619 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); 2620 2621 /* Find the next real entry, without updating the iterator itself */ 2622 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 2623 int *ent_cpu, u64 *ent_ts) 2624 { 2625 /* __find_next_entry will reset ent_size */ 2626 int ent_size = iter->ent_size; 2627 struct trace_entry *entry; 2628 2629 /* 2630 * If called from ftrace_dump(), then the iter->temp buffer 2631 * will be the static_temp_buf and not created from kmalloc. 2632 * If the entry size is greater than the buffer, we can 2633 * not save it. Just return NULL in that case. This is only 2634 * used to add markers when two consecutive events' time 2635 * stamps have a large delta. See trace_print_lat_context() 2636 */ 2637 if (iter->temp == static_temp_buf && 2638 STATIC_TEMP_BUF_SIZE < ent_size) 2639 return NULL; 2640 2641 /* 2642 * The __find_next_entry() may call peek_next_entry(), which may 2643 * call ring_buffer_peek() that may make the contents of iter->ent 2644 * undefined. Need to copy iter->ent now. 2645 */ 2646 if (iter->ent && iter->ent != iter->temp) { 2647 if ((!iter->temp || iter->temp_size < iter->ent_size) && 2648 !WARN_ON_ONCE(iter->temp == static_temp_buf)) { 2649 void *temp; 2650 temp = kmalloc(iter->ent_size, GFP_KERNEL); 2651 if (!temp) 2652 return NULL; 2653 kfree(iter->temp); 2654 iter->temp = temp; 2655 iter->temp_size = iter->ent_size; 2656 } 2657 memcpy(iter->temp, iter->ent, iter->ent_size); 2658 iter->ent = iter->temp; 2659 } 2660 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 2661 /* Put back the original ent_size */ 2662 iter->ent_size = ent_size; 2663 2664 return entry; 2665 } 2666 2667 /* Find the next real entry, and increment the iterator to the next entry */ 2668 void *trace_find_next_entry_inc(struct trace_iterator *iter) 2669 { 2670 struct ring_buffer_iter *buf_iter; 2671 2672 iter->ent = __find_next_entry(iter, &iter->cpu, 2673 &iter->lost_events, &iter->ts); 2674 2675 if (iter->ent) { 2676 iter->idx++; 2677 buf_iter = trace_buffer_iter(iter, iter->cpu); 2678 if (buf_iter) 2679 ring_buffer_iter_advance(buf_iter); 2680 } 2681 2682 return iter->ent ? iter : NULL; 2683 } 2684 2685 static void trace_consume(struct trace_iterator *iter) 2686 { 2687 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, 2688 &iter->lost_events); 2689 } 2690 2691 static void *s_next(struct seq_file *m, void *v, loff_t *pos) 2692 { 2693 struct trace_iterator *iter = m->private; 2694 int i = (int)*pos; 2695 void *ent; 2696 2697 WARN_ON_ONCE(iter->leftover); 2698 2699 (*pos)++; 2700 2701 /* can't go backwards */ 2702 if (iter->idx > i) 2703 return NULL; 2704 2705 if (iter->idx < 0) 2706 ent = trace_find_next_entry_inc(iter); 2707 else 2708 ent = iter; 2709 2710 while (ent && iter->idx < i) 2711 ent = trace_find_next_entry_inc(iter); 2712 2713 iter->pos = *pos; 2714 2715 return ent; 2716 } 2717 2718 void tracing_iter_reset(struct trace_iterator *iter, int cpu) 2719 { 2720 struct ring_buffer_iter *buf_iter; 2721 unsigned long entries = 0; 2722 u64 ts; 2723 2724 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; 2725 2726 buf_iter = trace_buffer_iter(iter, cpu); 2727 if (!buf_iter) 2728 return; 2729 2730 ring_buffer_iter_reset(buf_iter); 2731 2732 /* 2733 * We could have the case with the max latency tracers 2734 * that a reset never took place on a cpu. This is evident 2735 * by the timestamp being before the start of the buffer. 2736 */ 2737 while (ring_buffer_iter_peek(buf_iter, &ts)) { 2738 if (ts >= iter->array_buffer->time_start) 2739 break; 2740 entries++; 2741 ring_buffer_iter_advance(buf_iter); 2742 /* This could be a big loop */ 2743 cond_resched(); 2744 } 2745 2746 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; 2747 } 2748 2749 /* 2750 * The current tracer is copied to avoid a global locking 2751 * all around. 2752 */ 2753 static void *s_start(struct seq_file *m, loff_t *pos) 2754 { 2755 struct trace_iterator *iter = m->private; 2756 struct trace_array *tr = iter->tr; 2757 int cpu_file = iter->cpu_file; 2758 void *p = NULL; 2759 loff_t l = 0; 2760 int cpu; 2761 2762 mutex_lock(&trace_types_lock); 2763 if (unlikely(tr->current_trace != iter->trace)) { 2764 /* Close iter->trace before switching to the new current tracer */ 2765 if (iter->trace->close) 2766 iter->trace->close(iter); 2767 iter->trace = tr->current_trace; 2768 /* Reopen the new current tracer */ 2769 if (iter->trace->open) 2770 iter->trace->open(iter); 2771 } 2772 mutex_unlock(&trace_types_lock); 2773 2774 if (iter->snapshot && tracer_uses_snapshot(iter->trace)) 2775 return ERR_PTR(-EBUSY); 2776 2777 if (*pos != iter->pos) { 2778 iter->ent = NULL; 2779 iter->cpu = 0; 2780 iter->idx = -1; 2781 2782 if (cpu_file == RING_BUFFER_ALL_CPUS) { 2783 for_each_tracing_cpu(cpu) 2784 tracing_iter_reset(iter, cpu); 2785 } else 2786 tracing_iter_reset(iter, cpu_file); 2787 2788 iter->leftover = 0; 2789 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 2790 ; 2791 2792 } else { 2793 /* 2794 * If we overflowed the seq_file before, then we want 2795 * to just reuse the trace_seq buffer again. 2796 */ 2797 if (iter->leftover) 2798 p = iter; 2799 else { 2800 l = *pos - 1; 2801 p = s_next(m, p, &l); 2802 } 2803 } 2804 2805 trace_event_read_lock(); 2806 trace_access_lock(cpu_file); 2807 return p; 2808 } 2809 2810 static void s_stop(struct seq_file *m, void *p) 2811 { 2812 struct trace_iterator *iter = m->private; 2813 2814 if (iter->snapshot && tracer_uses_snapshot(iter->trace)) 2815 return; 2816 2817 trace_access_unlock(iter->cpu_file); 2818 trace_event_read_unlock(); 2819 } 2820 2821 static void 2822 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, 2823 unsigned long *entries, int cpu) 2824 { 2825 unsigned long count; 2826 2827 count = ring_buffer_entries_cpu(buf->buffer, cpu); 2828 /* 2829 * If this buffer has skipped entries, then we hold all 2830 * entries for the trace and we need to ignore the 2831 * ones before the time stamp. 2832 */ 2833 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { 2834 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; 2835 /* total is the same as the entries */ 2836 *total = count; 2837 } else 2838 *total = count + 2839 ring_buffer_overrun_cpu(buf->buffer, cpu); 2840 *entries = count; 2841 } 2842 2843 static void 2844 get_total_entries(struct array_buffer *buf, 2845 unsigned long *total, unsigned long *entries) 2846 { 2847 unsigned long t, e; 2848 int cpu; 2849 2850 *total = 0; 2851 *entries = 0; 2852 2853 for_each_tracing_cpu(cpu) { 2854 get_total_entries_cpu(buf, &t, &e, cpu); 2855 *total += t; 2856 *entries += e; 2857 } 2858 } 2859 2860 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) 2861 { 2862 unsigned long total, entries; 2863 2864 if (!tr) 2865 tr = &global_trace; 2866 2867 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); 2868 2869 return entries; 2870 } 2871 2872 unsigned long trace_total_entries(struct trace_array *tr) 2873 { 2874 unsigned long total, entries; 2875 2876 if (!tr) 2877 tr = &global_trace; 2878 2879 get_total_entries(&tr->array_buffer, &total, &entries); 2880 2881 return entries; 2882 } 2883 2884 static void print_lat_help_header(struct seq_file *m) 2885 { 2886 seq_puts(m, "# _------=> CPU# \n" 2887 "# / _-----=> irqs-off/BH-disabled\n" 2888 "# | / _----=> need-resched \n" 2889 "# || / _---=> hardirq/softirq \n" 2890 "# ||| / _--=> preempt-depth \n" 2891 "# |||| / _-=> migrate-disable \n" 2892 "# ||||| / delay \n" 2893 "# cmd pid |||||| time | caller \n" 2894 "# \\ / |||||| \\ | / \n"); 2895 } 2896 2897 static void print_event_info(struct array_buffer *buf, struct seq_file *m) 2898 { 2899 unsigned long total; 2900 unsigned long entries; 2901 2902 get_total_entries(buf, &total, &entries); 2903 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 2904 entries, total, num_online_cpus()); 2905 seq_puts(m, "#\n"); 2906 } 2907 2908 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, 2909 unsigned int flags) 2910 { 2911 bool tgid = flags & TRACE_ITER(RECORD_TGID); 2912 2913 print_event_info(buf, m); 2914 2915 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); 2916 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); 2917 } 2918 2919 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, 2920 unsigned int flags) 2921 { 2922 bool tgid = flags & TRACE_ITER(RECORD_TGID); 2923 static const char space[] = " "; 2924 int prec = tgid ? 12 : 2; 2925 2926 print_event_info(buf, m); 2927 2928 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); 2929 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); 2930 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); 2931 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); 2932 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); 2933 seq_printf(m, "# %.*s|||| / delay\n", prec, space); 2934 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); 2935 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); 2936 } 2937 2938 void 2939 print_trace_header(struct seq_file *m, struct trace_iterator *iter) 2940 { 2941 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); 2942 struct array_buffer *buf = iter->array_buffer; 2943 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); 2944 struct tracer *type = iter->trace; 2945 unsigned long entries; 2946 unsigned long total; 2947 const char *name = type->name; 2948 2949 get_total_entries(buf, &total, &entries); 2950 2951 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 2952 name, init_utsname()->release); 2953 seq_puts(m, "# -----------------------------------" 2954 "---------------------------------\n"); 2955 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" 2956 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 2957 nsecs_to_usecs(data->saved_latency), 2958 entries, 2959 total, 2960 buf->cpu, 2961 preempt_model_str(), 2962 /* These are reserved for later use */ 2963 0, 0, 0, 0); 2964 #ifdef CONFIG_SMP 2965 seq_printf(m, " #P:%d)\n", num_online_cpus()); 2966 #else 2967 seq_puts(m, ")\n"); 2968 #endif 2969 seq_puts(m, "# -----------------\n"); 2970 seq_printf(m, "# | task: %.16s-%d " 2971 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 2972 data->comm, data->pid, 2973 from_kuid_munged(seq_user_ns(m), data->uid), data->nice, 2974 data->policy, data->rt_priority); 2975 seq_puts(m, "# -----------------\n"); 2976 2977 if (data->critical_start) { 2978 seq_puts(m, "# => started at: "); 2979 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 2980 trace_print_seq(m, &iter->seq); 2981 seq_puts(m, "\n# => ended at: "); 2982 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 2983 trace_print_seq(m, &iter->seq); 2984 seq_puts(m, "\n#\n"); 2985 } 2986 2987 seq_puts(m, "#\n"); 2988 } 2989 2990 static void test_cpu_buff_start(struct trace_iterator *iter) 2991 { 2992 struct trace_seq *s = &iter->seq; 2993 struct trace_array *tr = iter->tr; 2994 2995 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE))) 2996 return; 2997 2998 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) 2999 return; 3000 3001 if (cpumask_available(iter->started) && 3002 cpumask_test_cpu(iter->cpu, iter->started)) 3003 return; 3004 3005 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) 3006 return; 3007 3008 if (cpumask_available(iter->started)) 3009 cpumask_set_cpu(iter->cpu, iter->started); 3010 3011 /* Don't print started cpu buffer for the first entry of the trace */ 3012 if (iter->idx > 1) 3013 trace_seq_printf(s, "##### CPU %u buffer started ####\n", 3014 iter->cpu); 3015 } 3016 3017 #ifdef CONFIG_FTRACE_SYSCALLS 3018 static bool is_syscall_event(struct trace_event *event) 3019 { 3020 return (event->funcs == &enter_syscall_print_funcs) || 3021 (event->funcs == &exit_syscall_print_funcs); 3022 3023 } 3024 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT 3025 #else 3026 static inline bool is_syscall_event(struct trace_event *event) 3027 { 3028 return false; 3029 } 3030 #define syscall_buf_size 0 3031 #endif /* CONFIG_FTRACE_SYSCALLS */ 3032 3033 static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 3034 { 3035 struct trace_array *tr = iter->tr; 3036 struct trace_seq *s = &iter->seq; 3037 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); 3038 struct trace_entry *entry; 3039 struct trace_event *event; 3040 3041 entry = iter->ent; 3042 3043 test_cpu_buff_start(iter); 3044 3045 event = ftrace_find_event(entry->type); 3046 3047 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3048 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 3049 trace_print_lat_context(iter); 3050 else 3051 trace_print_context(iter); 3052 } 3053 3054 if (trace_seq_has_overflowed(s)) 3055 return TRACE_TYPE_PARTIAL_LINE; 3056 3057 if (event) { 3058 if (tr->trace_flags & TRACE_ITER(FIELDS)) 3059 return print_event_fields(iter, event); 3060 /* 3061 * For TRACE_EVENT() events, the print_fmt is not 3062 * safe to use if the array has delta offsets 3063 * Force printing via the fields. 3064 */ 3065 if ((tr->text_delta)) { 3066 /* ftrace and system call events are still OK */ 3067 if ((event->type > __TRACE_LAST_TYPE) && 3068 !is_syscall_event(event)) 3069 return print_event_fields(iter, event); 3070 } 3071 return event->funcs->trace(iter, sym_flags, event); 3072 } 3073 3074 trace_seq_printf(s, "Unknown type %d\n", entry->type); 3075 3076 return trace_handle_return(s); 3077 } 3078 3079 static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 3080 { 3081 struct trace_array *tr = iter->tr; 3082 struct trace_seq *s = &iter->seq; 3083 struct trace_entry *entry; 3084 struct trace_event *event; 3085 3086 entry = iter->ent; 3087 3088 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) 3089 trace_seq_printf(s, "%d %d %llu ", 3090 entry->pid, iter->cpu, iter->ts); 3091 3092 if (trace_seq_has_overflowed(s)) 3093 return TRACE_TYPE_PARTIAL_LINE; 3094 3095 event = ftrace_find_event(entry->type); 3096 if (event) 3097 return event->funcs->raw(iter, 0, event); 3098 3099 trace_seq_printf(s, "%d ?\n", entry->type); 3100 3101 return trace_handle_return(s); 3102 } 3103 3104 static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 3105 { 3106 struct trace_array *tr = iter->tr; 3107 struct trace_seq *s = &iter->seq; 3108 unsigned char newline = '\n'; 3109 struct trace_entry *entry; 3110 struct trace_event *event; 3111 3112 entry = iter->ent; 3113 3114 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3115 SEQ_PUT_HEX_FIELD(s, entry->pid); 3116 SEQ_PUT_HEX_FIELD(s, iter->cpu); 3117 SEQ_PUT_HEX_FIELD(s, iter->ts); 3118 if (trace_seq_has_overflowed(s)) 3119 return TRACE_TYPE_PARTIAL_LINE; 3120 } 3121 3122 event = ftrace_find_event(entry->type); 3123 if (event) { 3124 enum print_line_t ret = event->funcs->hex(iter, 0, event); 3125 if (ret != TRACE_TYPE_HANDLED) 3126 return ret; 3127 } 3128 3129 SEQ_PUT_FIELD(s, newline); 3130 3131 return trace_handle_return(s); 3132 } 3133 3134 static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 3135 { 3136 struct trace_array *tr = iter->tr; 3137 struct trace_seq *s = &iter->seq; 3138 struct trace_entry *entry; 3139 struct trace_event *event; 3140 3141 entry = iter->ent; 3142 3143 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3144 SEQ_PUT_FIELD(s, entry->pid); 3145 SEQ_PUT_FIELD(s, iter->cpu); 3146 SEQ_PUT_FIELD(s, iter->ts); 3147 if (trace_seq_has_overflowed(s)) 3148 return TRACE_TYPE_PARTIAL_LINE; 3149 } 3150 3151 event = ftrace_find_event(entry->type); 3152 return event ? event->funcs->binary(iter, 0, event) : 3153 TRACE_TYPE_HANDLED; 3154 } 3155 3156 int trace_empty(struct trace_iterator *iter) 3157 { 3158 struct ring_buffer_iter *buf_iter; 3159 int cpu; 3160 3161 /* If we are looking at one CPU buffer, only check that one */ 3162 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 3163 cpu = iter->cpu_file; 3164 buf_iter = trace_buffer_iter(iter, cpu); 3165 if (buf_iter) { 3166 if (!ring_buffer_iter_empty(buf_iter)) 3167 return 0; 3168 } else { 3169 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 3170 return 0; 3171 } 3172 return 1; 3173 } 3174 3175 for_each_tracing_cpu(cpu) { 3176 buf_iter = trace_buffer_iter(iter, cpu); 3177 if (buf_iter) { 3178 if (!ring_buffer_iter_empty(buf_iter)) 3179 return 0; 3180 } else { 3181 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 3182 return 0; 3183 } 3184 } 3185 3186 return 1; 3187 } 3188 3189 /* Called with trace_event_read_lock() held. */ 3190 enum print_line_t print_trace_line(struct trace_iterator *iter) 3191 { 3192 struct trace_array *tr = iter->tr; 3193 unsigned long trace_flags = tr->trace_flags; 3194 enum print_line_t ret; 3195 3196 if (iter->lost_events) { 3197 if (iter->lost_events == (unsigned long)-1) 3198 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", 3199 iter->cpu); 3200 else 3201 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 3202 iter->cpu, iter->lost_events); 3203 if (trace_seq_has_overflowed(&iter->seq)) 3204 return TRACE_TYPE_PARTIAL_LINE; 3205 } 3206 3207 if (iter->trace && iter->trace->print_line) { 3208 ret = iter->trace->print_line(iter); 3209 if (ret != TRACE_TYPE_UNHANDLED) 3210 return ret; 3211 } 3212 3213 if (iter->ent->type == TRACE_BPUTS && 3214 trace_flags & TRACE_ITER(PRINTK) && 3215 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3216 return trace_print_bputs_msg_only(iter); 3217 3218 if (iter->ent->type == TRACE_BPRINT && 3219 trace_flags & TRACE_ITER(PRINTK) && 3220 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3221 return trace_print_bprintk_msg_only(iter); 3222 3223 if (iter->ent->type == TRACE_PRINT && 3224 trace_flags & TRACE_ITER(PRINTK) && 3225 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3226 return trace_print_printk_msg_only(iter); 3227 3228 if (trace_flags & TRACE_ITER(BIN)) 3229 return print_bin_fmt(iter); 3230 3231 if (trace_flags & TRACE_ITER(HEX)) 3232 return print_hex_fmt(iter); 3233 3234 if (trace_flags & TRACE_ITER(RAW)) 3235 return print_raw_fmt(iter); 3236 3237 return print_trace_fmt(iter); 3238 } 3239 3240 void trace_latency_header(struct seq_file *m) 3241 { 3242 struct trace_iterator *iter = m->private; 3243 struct trace_array *tr = iter->tr; 3244 3245 /* print nothing if the buffers are empty */ 3246 if (trace_empty(iter)) 3247 return; 3248 3249 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 3250 print_trace_header(m, iter); 3251 3252 if (!(tr->trace_flags & TRACE_ITER(VERBOSE))) 3253 print_lat_help_header(m); 3254 } 3255 3256 void trace_default_header(struct seq_file *m) 3257 { 3258 struct trace_iterator *iter = m->private; 3259 struct trace_array *tr = iter->tr; 3260 unsigned long trace_flags = tr->trace_flags; 3261 3262 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO))) 3263 return; 3264 3265 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 3266 /* print nothing if the buffers are empty */ 3267 if (trace_empty(iter)) 3268 return; 3269 print_trace_header(m, iter); 3270 if (!(trace_flags & TRACE_ITER(VERBOSE))) 3271 print_lat_help_header(m); 3272 } else { 3273 if (!(trace_flags & TRACE_ITER(VERBOSE))) { 3274 if (trace_flags & TRACE_ITER(IRQ_INFO)) 3275 print_func_help_header_irq(iter->array_buffer, 3276 m, trace_flags); 3277 else 3278 print_func_help_header(iter->array_buffer, m, 3279 trace_flags); 3280 } 3281 } 3282 } 3283 3284 static void test_ftrace_alive(struct seq_file *m) 3285 { 3286 if (!ftrace_is_dead()) 3287 return; 3288 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" 3289 "# MAY BE MISSING FUNCTION EVENTS\n"); 3290 } 3291 3292 static int s_show(struct seq_file *m, void *v) 3293 { 3294 struct trace_iterator *iter = v; 3295 int ret; 3296 3297 if (iter->ent == NULL) { 3298 if (iter->tr) { 3299 seq_printf(m, "# tracer: %s\n", iter->trace->name); 3300 seq_puts(m, "#\n"); 3301 test_ftrace_alive(m); 3302 } 3303 if (iter->snapshot && trace_empty(iter)) 3304 print_snapshot_help(m, iter); 3305 else if (iter->trace && iter->trace->print_header) 3306 iter->trace->print_header(m); 3307 else 3308 trace_default_header(m); 3309 3310 } else if (iter->leftover) { 3311 /* 3312 * If we filled the seq_file buffer earlier, we 3313 * want to just show it now. 3314 */ 3315 ret = trace_print_seq(m, &iter->seq); 3316 3317 /* ret should this time be zero, but you never know */ 3318 iter->leftover = ret; 3319 3320 } else { 3321 ret = print_trace_line(iter); 3322 if (ret == TRACE_TYPE_PARTIAL_LINE) { 3323 iter->seq.full = 0; 3324 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 3325 } 3326 ret = trace_print_seq(m, &iter->seq); 3327 /* 3328 * If we overflow the seq_file buffer, then it will 3329 * ask us for this data again at start up. 3330 * Use that instead. 3331 * ret is 0 if seq_file write succeeded. 3332 * -1 otherwise. 3333 */ 3334 iter->leftover = ret; 3335 } 3336 3337 return 0; 3338 } 3339 3340 static const struct seq_operations tracer_seq_ops = { 3341 .start = s_start, 3342 .next = s_next, 3343 .stop = s_stop, 3344 .show = s_show, 3345 }; 3346 3347 /* 3348 * Note, as iter itself can be allocated and freed in different 3349 * ways, this function is only used to free its content, and not 3350 * the iterator itself. The only requirement to all the allocations 3351 * is that it must zero all fields (kzalloc), as freeing works with 3352 * ethier allocated content or NULL. 3353 */ 3354 static void free_trace_iter_content(struct trace_iterator *iter) 3355 { 3356 /* The fmt is either NULL, allocated or points to static_fmt_buf */ 3357 if (iter->fmt != static_fmt_buf) 3358 kfree(iter->fmt); 3359 3360 kfree(iter->temp); 3361 kfree(iter->buffer_iter); 3362 mutex_destroy(&iter->mutex); 3363 free_cpumask_var(iter->started); 3364 } 3365 3366 struct trace_iterator * 3367 __tracing_open(struct inode *inode, struct file *file, bool snapshot) 3368 { 3369 struct trace_array *tr = inode->i_private; 3370 struct trace_iterator *iter; 3371 int cpu; 3372 3373 if (tracing_disabled) 3374 return ERR_PTR(-ENODEV); 3375 3376 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 3377 if (!iter) 3378 return ERR_PTR(-ENOMEM); 3379 3380 iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids); 3381 if (!iter->buffer_iter) 3382 goto release; 3383 3384 /* 3385 * trace_find_next_entry() may need to save off iter->ent. 3386 * It will place it into the iter->temp buffer. As most 3387 * events are less than 128, allocate a buffer of that size. 3388 * If one is greater, then trace_find_next_entry() will 3389 * allocate a new buffer to adjust for the bigger iter->ent. 3390 * It's not critical if it fails to get allocated here. 3391 */ 3392 iter->temp = kmalloc(128, GFP_KERNEL); 3393 if (iter->temp) 3394 iter->temp_size = 128; 3395 3396 /* 3397 * trace_event_printf() may need to modify given format 3398 * string to replace %p with %px so that it shows real address 3399 * instead of hash value. However, that is only for the event 3400 * tracing, other tracer may not need. Defer the allocation 3401 * until it is needed. 3402 */ 3403 iter->fmt = NULL; 3404 iter->fmt_size = 0; 3405 3406 mutex_lock(&trace_types_lock); 3407 iter->trace = tr->current_trace; 3408 3409 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 3410 goto fail; 3411 3412 iter->tr = tr; 3413 3414 #ifdef CONFIG_TRACER_SNAPSHOT 3415 /* Currently only the top directory has a snapshot */ 3416 if (tr->current_trace->print_max || snapshot) 3417 iter->array_buffer = &tr->snapshot_buffer; 3418 else 3419 #endif 3420 iter->array_buffer = &tr->array_buffer; 3421 iter->snapshot = snapshot; 3422 iter->pos = -1; 3423 iter->cpu_file = tracing_get_cpu(inode); 3424 mutex_init(&iter->mutex); 3425 3426 /* Notify the tracer early; before we stop tracing. */ 3427 if (iter->trace->open) 3428 iter->trace->open(iter); 3429 3430 /* Annotate start of buffers if we had overruns */ 3431 if (ring_buffer_overruns(iter->array_buffer->buffer)) 3432 iter->iter_flags |= TRACE_FILE_ANNOTATE; 3433 3434 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 3435 if (trace_clocks[tr->clock_id].in_ns) 3436 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 3437 3438 /* 3439 * If pause-on-trace is enabled, then stop the trace while 3440 * dumping, unless this is the "snapshot" file 3441 */ 3442 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) { 3443 iter->iter_flags |= TRACE_FILE_PAUSE; 3444 tracing_stop_tr(tr); 3445 } 3446 3447 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 3448 for_each_tracing_cpu(cpu) { 3449 iter->buffer_iter[cpu] = 3450 ring_buffer_read_start(iter->array_buffer->buffer, 3451 cpu, GFP_KERNEL); 3452 tracing_iter_reset(iter, cpu); 3453 } 3454 } else { 3455 cpu = iter->cpu_file; 3456 iter->buffer_iter[cpu] = 3457 ring_buffer_read_start(iter->array_buffer->buffer, 3458 cpu, GFP_KERNEL); 3459 tracing_iter_reset(iter, cpu); 3460 } 3461 3462 mutex_unlock(&trace_types_lock); 3463 3464 return iter; 3465 3466 fail: 3467 mutex_unlock(&trace_types_lock); 3468 free_trace_iter_content(iter); 3469 release: 3470 seq_release_private(inode, file); 3471 return ERR_PTR(-ENOMEM); 3472 } 3473 3474 int tracing_open_generic(struct inode *inode, struct file *filp) 3475 { 3476 int ret; 3477 3478 ret = tracing_check_open_get_tr(NULL); 3479 if (ret) 3480 return ret; 3481 3482 filp->private_data = inode->i_private; 3483 return 0; 3484 } 3485 3486 /* 3487 * Open and update trace_array ref count. 3488 * Must have the current trace_array passed to it. 3489 */ 3490 int tracing_open_generic_tr(struct inode *inode, struct file *filp) 3491 { 3492 struct trace_array *tr = inode->i_private; 3493 int ret; 3494 3495 ret = tracing_check_open_get_tr(tr); 3496 if (ret) 3497 return ret; 3498 3499 if ((filp->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) { 3500 trace_array_put(tr); 3501 return -EACCES; 3502 } 3503 3504 filp->private_data = inode->i_private; 3505 3506 return 0; 3507 } 3508 3509 /* 3510 * The private pointer of the inode is the trace_event_file. 3511 * Update the tr ref count associated to it. 3512 */ 3513 int tracing_open_file_tr(struct inode *inode, struct file *filp) 3514 { 3515 struct trace_event_file *file = inode->i_private; 3516 int ret; 3517 3518 ret = tracing_check_open_get_tr(file->tr); 3519 if (ret) 3520 return ret; 3521 3522 guard(mutex)(&event_mutex); 3523 3524 /* Fail if the file is marked for removal */ 3525 if (file->flags & EVENT_FILE_FL_FREED) { 3526 trace_array_put(file->tr); 3527 return -ENODEV; 3528 } else { 3529 event_file_get(file); 3530 } 3531 3532 return 0; 3533 } 3534 3535 int tracing_release_file_tr(struct inode *inode, struct file *filp) 3536 { 3537 struct trace_event_file *file = inode->i_private; 3538 3539 trace_array_put(file->tr); 3540 event_file_put(file); 3541 3542 return 0; 3543 } 3544 3545 int tracing_single_release_file_tr(struct inode *inode, struct file *filp) 3546 { 3547 tracing_release_file_tr(inode, filp); 3548 return single_release(inode, filp); 3549 } 3550 3551 int tracing_release(struct inode *inode, struct file *file) 3552 { 3553 struct trace_array *tr = inode->i_private; 3554 struct seq_file *m = file->private_data; 3555 struct trace_iterator *iter; 3556 int cpu; 3557 3558 if (!(file->f_mode & FMODE_READ)) { 3559 trace_array_put(tr); 3560 return 0; 3561 } 3562 3563 /* Writes do not use seq_file */ 3564 iter = m->private; 3565 mutex_lock(&trace_types_lock); 3566 3567 for_each_tracing_cpu(cpu) { 3568 if (iter->buffer_iter[cpu]) 3569 ring_buffer_read_finish(iter->buffer_iter[cpu]); 3570 } 3571 3572 if (iter->trace && iter->trace->close) 3573 iter->trace->close(iter); 3574 3575 if (iter->iter_flags & TRACE_FILE_PAUSE) 3576 /* reenable tracing if it was previously enabled */ 3577 tracing_start_tr(tr); 3578 3579 __trace_array_put(tr); 3580 3581 mutex_unlock(&trace_types_lock); 3582 3583 free_trace_iter_content(iter); 3584 seq_release_private(inode, file); 3585 3586 return 0; 3587 } 3588 3589 int tracing_release_generic_tr(struct inode *inode, struct file *file) 3590 { 3591 struct trace_array *tr = inode->i_private; 3592 3593 trace_array_put(tr); 3594 return 0; 3595 } 3596 3597 static int tracing_single_release_tr(struct inode *inode, struct file *file) 3598 { 3599 struct trace_array *tr = inode->i_private; 3600 3601 trace_array_put(tr); 3602 3603 return single_release(inode, file); 3604 } 3605 3606 static bool update_last_data_if_empty(struct trace_array *tr); 3607 3608 static int tracing_open(struct inode *inode, struct file *file) 3609 { 3610 struct trace_array *tr = inode->i_private; 3611 struct trace_iterator *iter; 3612 int ret; 3613 3614 ret = tracing_check_open_get_tr(tr); 3615 if (ret) 3616 return ret; 3617 3618 /* If this file was open for write, then erase contents */ 3619 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 3620 int cpu = tracing_get_cpu(inode); 3621 struct array_buffer *trace_buf = &tr->array_buffer; 3622 3623 #ifdef CONFIG_TRACER_MAX_TRACE 3624 if (tr->current_trace->print_max) 3625 trace_buf = &tr->snapshot_buffer; 3626 #endif 3627 3628 if (cpu == RING_BUFFER_ALL_CPUS) 3629 tracing_reset_online_cpus(trace_buf); 3630 else 3631 tracing_reset_cpu(trace_buf, cpu); 3632 3633 update_last_data_if_empty(tr); 3634 } 3635 3636 if (file->f_mode & FMODE_READ) { 3637 iter = __tracing_open(inode, file, false); 3638 if (IS_ERR(iter)) 3639 ret = PTR_ERR(iter); 3640 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 3641 iter->iter_flags |= TRACE_FILE_LAT_FMT; 3642 } 3643 3644 if (ret < 0) 3645 trace_array_put(tr); 3646 3647 return ret; 3648 } 3649 3650 /* 3651 * Some tracers are not suitable for instance buffers. 3652 * A tracer is always available for the global array (toplevel) 3653 * or if it explicitly states that it is. 3654 */ 3655 static bool 3656 trace_ok_for_array(struct tracer *t, struct trace_array *tr) 3657 { 3658 /* arrays with mapped buffer range do not have snapshots */ 3659 if (tr->range_addr_start && tracer_uses_snapshot(t)) 3660 return false; 3661 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; 3662 } 3663 3664 /* Find the next tracer that this trace array may use */ 3665 static struct tracer * 3666 get_tracer_for_array(struct trace_array *tr, struct tracer *t) 3667 { 3668 while (t && !trace_ok_for_array(t, tr)) 3669 t = t->next; 3670 3671 return t; 3672 } 3673 3674 static void * 3675 t_next(struct seq_file *m, void *v, loff_t *pos) 3676 { 3677 struct trace_array *tr = m->private; 3678 struct tracer *t = v; 3679 3680 (*pos)++; 3681 3682 if (t) 3683 t = get_tracer_for_array(tr, t->next); 3684 3685 return t; 3686 } 3687 3688 static void *t_start(struct seq_file *m, loff_t *pos) 3689 { 3690 struct trace_array *tr = m->private; 3691 struct tracer *t; 3692 loff_t l = 0; 3693 3694 mutex_lock(&trace_types_lock); 3695 3696 t = get_tracer_for_array(tr, trace_types); 3697 for (; t && l < *pos; t = t_next(m, t, &l)) 3698 ; 3699 3700 return t; 3701 } 3702 3703 static void t_stop(struct seq_file *m, void *p) 3704 { 3705 mutex_unlock(&trace_types_lock); 3706 } 3707 3708 static int t_show(struct seq_file *m, void *v) 3709 { 3710 struct tracer *t = v; 3711 3712 if (!t) 3713 return 0; 3714 3715 seq_puts(m, t->name); 3716 if (t->next) 3717 seq_putc(m, ' '); 3718 else 3719 seq_putc(m, '\n'); 3720 3721 return 0; 3722 } 3723 3724 static const struct seq_operations show_traces_seq_ops = { 3725 .start = t_start, 3726 .next = t_next, 3727 .stop = t_stop, 3728 .show = t_show, 3729 }; 3730 3731 static int show_traces_open(struct inode *inode, struct file *file) 3732 { 3733 struct trace_array *tr = inode->i_private; 3734 struct seq_file *m; 3735 int ret; 3736 3737 ret = tracing_check_open_get_tr(tr); 3738 if (ret) 3739 return ret; 3740 3741 ret = seq_open(file, &show_traces_seq_ops); 3742 if (ret) { 3743 trace_array_put(tr); 3744 return ret; 3745 } 3746 3747 m = file->private_data; 3748 m->private = tr; 3749 3750 return 0; 3751 } 3752 3753 static int tracing_seq_release(struct inode *inode, struct file *file) 3754 { 3755 struct trace_array *tr = inode->i_private; 3756 3757 trace_array_put(tr); 3758 return seq_release(inode, file); 3759 } 3760 3761 static ssize_t 3762 tracing_write_stub(struct file *filp, const char __user *ubuf, 3763 size_t count, loff_t *ppos) 3764 { 3765 return count; 3766 } 3767 3768 loff_t tracing_lseek(struct file *file, loff_t offset, int whence) 3769 { 3770 int ret; 3771 3772 if (file->f_mode & FMODE_READ) 3773 ret = seq_lseek(file, offset, whence); 3774 else 3775 file->f_pos = ret = 0; 3776 3777 return ret; 3778 } 3779 3780 static const struct file_operations tracing_fops = { 3781 .open = tracing_open, 3782 .read = seq_read, 3783 .read_iter = seq_read_iter, 3784 .splice_read = copy_splice_read, 3785 .write = tracing_write_stub, 3786 .llseek = tracing_lseek, 3787 .release = tracing_release, 3788 }; 3789 3790 static const struct file_operations show_traces_fops = { 3791 .open = show_traces_open, 3792 .read = seq_read, 3793 .llseek = seq_lseek, 3794 .release = tracing_seq_release, 3795 }; 3796 3797 static ssize_t 3798 tracing_cpumask_read(struct file *filp, char __user *ubuf, 3799 size_t count, loff_t *ppos) 3800 { 3801 struct trace_array *tr = file_inode(filp)->i_private; 3802 char *mask_str __free(kfree) = NULL; 3803 int len; 3804 3805 len = snprintf(NULL, 0, "%*pb\n", 3806 cpumask_pr_args(tr->tracing_cpumask)) + 1; 3807 mask_str = kmalloc(len, GFP_KERNEL); 3808 if (!mask_str) 3809 return -ENOMEM; 3810 3811 len = snprintf(mask_str, len, "%*pb\n", 3812 cpumask_pr_args(tr->tracing_cpumask)); 3813 if (len >= count) 3814 return -EINVAL; 3815 3816 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 3817 } 3818 3819 int tracing_set_cpumask(struct trace_array *tr, 3820 cpumask_var_t tracing_cpumask_new) 3821 { 3822 int cpu; 3823 3824 if (!tr) 3825 return -EINVAL; 3826 3827 local_irq_disable(); 3828 arch_spin_lock(&tr->max_lock); 3829 for_each_tracing_cpu(cpu) { 3830 /* 3831 * Increase/decrease the disabled counter if we are 3832 * about to flip a bit in the cpumask: 3833 */ 3834 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && 3835 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 3836 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); 3837 #ifdef CONFIG_TRACER_SNAPSHOT 3838 ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu); 3839 #endif 3840 } 3841 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && 3842 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 3843 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); 3844 #ifdef CONFIG_TRACER_SNAPSHOT 3845 ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu); 3846 #endif 3847 } 3848 } 3849 arch_spin_unlock(&tr->max_lock); 3850 local_irq_enable(); 3851 3852 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 3853 3854 return 0; 3855 } 3856 3857 static ssize_t 3858 tracing_cpumask_write(struct file *filp, const char __user *ubuf, 3859 size_t count, loff_t *ppos) 3860 { 3861 struct trace_array *tr = file_inode(filp)->i_private; 3862 cpumask_var_t tracing_cpumask_new; 3863 int err; 3864 3865 if (count == 0 || count > KMALLOC_MAX_SIZE) 3866 return -EINVAL; 3867 3868 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 3869 return -ENOMEM; 3870 3871 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 3872 if (err) 3873 goto err_free; 3874 3875 err = tracing_set_cpumask(tr, tracing_cpumask_new); 3876 if (err) 3877 goto err_free; 3878 3879 free_cpumask_var(tracing_cpumask_new); 3880 3881 return count; 3882 3883 err_free: 3884 free_cpumask_var(tracing_cpumask_new); 3885 3886 return err; 3887 } 3888 3889 static const struct file_operations tracing_cpumask_fops = { 3890 .open = tracing_open_generic_tr, 3891 .read = tracing_cpumask_read, 3892 .write = tracing_cpumask_write, 3893 .release = tracing_release_generic_tr, 3894 .llseek = generic_file_llseek, 3895 }; 3896 3897 static int tracing_trace_options_show(struct seq_file *m, void *v) 3898 { 3899 struct tracer_opt *trace_opts; 3900 struct trace_array *tr = m->private; 3901 struct tracer_flags *flags; 3902 u32 tracer_flags; 3903 int i; 3904 3905 guard(mutex)(&trace_types_lock); 3906 3907 for (i = 0; trace_options[i]; i++) { 3908 if (tr->trace_flags & (1ULL << i)) 3909 seq_printf(m, "%s\n", trace_options[i]); 3910 else 3911 seq_printf(m, "no%s\n", trace_options[i]); 3912 } 3913 3914 flags = tr->current_trace_flags; 3915 if (!flags || !flags->opts) 3916 return 0; 3917 3918 tracer_flags = flags->val; 3919 trace_opts = flags->opts; 3920 3921 for (i = 0; trace_opts[i].name; i++) { 3922 if (tracer_flags & trace_opts[i].bit) 3923 seq_printf(m, "%s\n", trace_opts[i].name); 3924 else 3925 seq_printf(m, "no%s\n", trace_opts[i].name); 3926 } 3927 3928 return 0; 3929 } 3930 3931 static int __set_tracer_option(struct trace_array *tr, 3932 struct tracer_flags *tracer_flags, 3933 struct tracer_opt *opts, int neg) 3934 { 3935 struct tracer *trace = tracer_flags->trace; 3936 int ret = 0; 3937 3938 if (trace->set_flag) 3939 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); 3940 if (ret) 3941 return ret; 3942 3943 if (neg) 3944 tracer_flags->val &= ~opts->bit; 3945 else 3946 tracer_flags->val |= opts->bit; 3947 return 0; 3948 } 3949 3950 /* Try to assign a tracer specific option */ 3951 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) 3952 { 3953 struct tracer_flags *tracer_flags = tr->current_trace_flags; 3954 struct tracer_opt *opts = NULL; 3955 int i; 3956 3957 if (!tracer_flags || !tracer_flags->opts) 3958 return 0; 3959 3960 for (i = 0; tracer_flags->opts[i].name; i++) { 3961 opts = &tracer_flags->opts[i]; 3962 3963 if (strcmp(cmp, opts->name) == 0) 3964 return __set_tracer_option(tr, tracer_flags, opts, neg); 3965 } 3966 3967 return -EINVAL; 3968 } 3969 3970 /* Some tracers require overwrite to stay enabled */ 3971 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set) 3972 { 3973 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set) 3974 return -1; 3975 3976 return 0; 3977 } 3978 3979 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) 3980 { 3981 switch (mask) { 3982 case TRACE_ITER(RECORD_TGID): 3983 case TRACE_ITER(RECORD_CMD): 3984 case TRACE_ITER(TRACE_PRINTK): 3985 case TRACE_ITER(COPY_MARKER): 3986 lockdep_assert_held(&event_mutex); 3987 } 3988 3989 /* do nothing if flag is already set */ 3990 if (!!(tr->trace_flags & mask) == !!enabled) 3991 return 0; 3992 3993 /* Give the tracer a chance to approve the change */ 3994 if (tr->current_trace->flag_changed) 3995 if (tr->current_trace->flag_changed(tr, mask, !!enabled)) 3996 return -EINVAL; 3997 3998 switch (mask) { 3999 case TRACE_ITER(TRACE_PRINTK): 4000 if (enabled) { 4001 update_printk_trace(tr); 4002 } else { 4003 /* 4004 * The global_trace cannot clear this. 4005 * It's flag only gets cleared if another instance sets it. 4006 */ 4007 if (printk_trace == &global_trace) 4008 return -EINVAL; 4009 /* 4010 * An instance must always have it set. 4011 * by default, that's the global_trace instance. 4012 */ 4013 if (printk_trace == tr) 4014 update_printk_trace(&global_trace); 4015 } 4016 break; 4017 4018 case TRACE_ITER(COPY_MARKER): 4019 update_marker_trace(tr, enabled); 4020 /* update_marker_trace updates the tr->trace_flags */ 4021 return 0; 4022 } 4023 4024 if (enabled) 4025 tr->trace_flags |= mask; 4026 else 4027 tr->trace_flags &= ~mask; 4028 4029 switch (mask) { 4030 case TRACE_ITER(RECORD_CMD): 4031 trace_event_enable_cmd_record(enabled); 4032 break; 4033 4034 case TRACE_ITER(RECORD_TGID): 4035 4036 if (trace_alloc_tgid_map() < 0) { 4037 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID); 4038 return -ENOMEM; 4039 } 4040 4041 trace_event_enable_tgid_record(enabled); 4042 break; 4043 4044 case TRACE_ITER(EVENT_FORK): 4045 trace_event_follow_fork(tr, enabled); 4046 break; 4047 4048 case TRACE_ITER(FUNC_FORK): 4049 ftrace_pid_follow_fork(tr, enabled); 4050 break; 4051 4052 case TRACE_ITER(OVERWRITE): 4053 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); 4054 #ifdef CONFIG_TRACER_SNAPSHOT 4055 ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled); 4056 #endif 4057 break; 4058 4059 case TRACE_ITER(PRINTK): 4060 trace_printk_start_stop_comm(enabled); 4061 trace_printk_control(enabled); 4062 break; 4063 4064 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER) 4065 case TRACE_GRAPH_GRAPH_TIME: 4066 ftrace_graph_graph_time_control(enabled); 4067 break; 4068 #endif 4069 } 4070 4071 return 0; 4072 } 4073 4074 int trace_set_options(struct trace_array *tr, char *option) 4075 { 4076 char *cmp; 4077 int neg = 0; 4078 int ret; 4079 size_t orig_len = strlen(option); 4080 int len; 4081 4082 cmp = strstrip(option); 4083 4084 len = str_has_prefix(cmp, "no"); 4085 if (len) 4086 neg = 1; 4087 4088 cmp += len; 4089 4090 mutex_lock(&event_mutex); 4091 mutex_lock(&trace_types_lock); 4092 4093 ret = match_string(trace_options, -1, cmp); 4094 /* If no option could be set, test the specific tracer options */ 4095 if (ret < 0) 4096 ret = set_tracer_option(tr, cmp, neg); 4097 else 4098 ret = set_tracer_flag(tr, 1ULL << ret, !neg); 4099 4100 mutex_unlock(&trace_types_lock); 4101 mutex_unlock(&event_mutex); 4102 4103 /* 4104 * If the first trailing whitespace is replaced with '\0' by strstrip, 4105 * turn it back into a space. 4106 */ 4107 if (orig_len > strlen(option)) 4108 option[strlen(option)] = ' '; 4109 4110 return ret; 4111 } 4112 4113 static void __init apply_trace_boot_options(void) 4114 { 4115 char *buf = trace_boot_options_buf; 4116 char *option; 4117 4118 while (true) { 4119 option = strsep(&buf, ","); 4120 4121 if (!option) 4122 break; 4123 4124 if (*option) 4125 trace_set_options(&global_trace, option); 4126 4127 /* Put back the comma to allow this to be called again */ 4128 if (buf) 4129 *(buf - 1) = ','; 4130 } 4131 } 4132 4133 static ssize_t 4134 tracing_trace_options_write(struct file *filp, const char __user *ubuf, 4135 size_t cnt, loff_t *ppos) 4136 { 4137 struct seq_file *m = filp->private_data; 4138 struct trace_array *tr = m->private; 4139 char buf[64]; 4140 int ret; 4141 4142 if (cnt >= sizeof(buf)) 4143 return -EINVAL; 4144 4145 if (copy_from_user(buf, ubuf, cnt)) 4146 return -EFAULT; 4147 4148 buf[cnt] = 0; 4149 4150 ret = trace_set_options(tr, buf); 4151 if (ret < 0) 4152 return ret; 4153 4154 *ppos += cnt; 4155 4156 return cnt; 4157 } 4158 4159 static int tracing_trace_options_open(struct inode *inode, struct file *file) 4160 { 4161 struct trace_array *tr = inode->i_private; 4162 int ret; 4163 4164 ret = tracing_check_open_get_tr(tr); 4165 if (ret) 4166 return ret; 4167 4168 ret = single_open(file, tracing_trace_options_show, inode->i_private); 4169 if (ret < 0) 4170 trace_array_put(tr); 4171 4172 return ret; 4173 } 4174 4175 static const struct file_operations tracing_iter_fops = { 4176 .open = tracing_trace_options_open, 4177 .read = seq_read, 4178 .llseek = seq_lseek, 4179 .release = tracing_single_release_tr, 4180 .write = tracing_trace_options_write, 4181 }; 4182 4183 static const char readme_msg[] = 4184 "tracing mini-HOWTO:\n\n" 4185 "By default tracefs removes all OTH file permission bits.\n" 4186 "When mounting tracefs an optional group id can be specified\n" 4187 "which adds the group to every directory and file in tracefs:\n\n" 4188 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" 4189 "# echo 0 > tracing_on : quick way to disable tracing\n" 4190 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" 4191 " Important files:\n" 4192 " trace\t\t\t- The static contents of the buffer\n" 4193 "\t\t\t To clear the buffer write into this file: echo > trace\n" 4194 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" 4195 " current_tracer\t- function and latency tracers\n" 4196 " available_tracers\t- list of configured tracers for current_tracer\n" 4197 " error_log\t- error log for failed commands (that support it)\n" 4198 " buffer_size_kb\t- view and modify size of per cpu buffer\n" 4199 " buffer_total_size_kb - view total size of all cpu buffers\n\n" 4200 " trace_clock\t\t- change the clock used to order events\n" 4201 " local: Per cpu clock but may not be synced across CPUs\n" 4202 " global: Synced across CPUs but slows tracing down.\n" 4203 " counter: Not a clock, but just an increment\n" 4204 " uptime: Jiffy counter from time of boot\n" 4205 " perf: Same clock that perf events use\n" 4206 #ifdef CONFIG_X86_64 4207 " x86-tsc: TSC cycle counter\n" 4208 #endif 4209 "\n timestamp_mode\t- view the mode used to timestamp events\n" 4210 " delta: Delta difference against a buffer-wide timestamp\n" 4211 " absolute: Absolute (standalone) timestamp\n" 4212 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" 4213 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" 4214 " tracing_cpumask\t- Limit which CPUs to trace\n" 4215 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" 4216 "\t\t\t Remove sub-buffer with rmdir\n" 4217 " trace_options\t\t- Set format or modify how tracing happens\n" 4218 "\t\t\t Disable an option by prefixing 'no' to the\n" 4219 "\t\t\t option name\n" 4220 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" 4221 #ifdef CONFIG_DYNAMIC_FTRACE 4222 "\n available_filter_functions - list of functions that can be filtered on\n" 4223 " set_ftrace_filter\t- echo function name in here to only trace these\n" 4224 "\t\t\t functions\n" 4225 "\t accepts: func_full_name or glob-matching-pattern\n" 4226 "\t modules: Can select a group via module\n" 4227 "\t Format: :mod:<module-name>\n" 4228 "\t example: echo :mod:ext3 > set_ftrace_filter\n" 4229 "\t triggers: a command to perform when function is hit\n" 4230 "\t Format: <function>:<trigger>[:count]\n" 4231 "\t trigger: traceon, traceoff\n" 4232 "\t\t enable_event:<system>:<event>\n" 4233 "\t\t disable_event:<system>:<event>\n" 4234 #ifdef CONFIG_STACKTRACE 4235 "\t\t stacktrace\n" 4236 #endif 4237 #ifdef CONFIG_TRACER_SNAPSHOT 4238 "\t\t snapshot\n" 4239 #endif 4240 "\t\t dump\n" 4241 "\t\t cpudump\n" 4242 "\t example: echo do_fault:traceoff > set_ftrace_filter\n" 4243 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" 4244 "\t The first one will disable tracing every time do_fault is hit\n" 4245 "\t The second will disable tracing at most 3 times when do_trap is hit\n" 4246 "\t The first time do trap is hit and it disables tracing, the\n" 4247 "\t counter will decrement to 2. If tracing is already disabled,\n" 4248 "\t the counter will not decrement. It only decrements when the\n" 4249 "\t trigger did work\n" 4250 "\t To remove trigger without count:\n" 4251 "\t echo '!<function>:<trigger> > set_ftrace_filter\n" 4252 "\t To remove trigger with a count:\n" 4253 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" 4254 " set_ftrace_notrace\t- echo function name in here to never trace.\n" 4255 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" 4256 "\t modules: Can select a group via module command :mod:\n" 4257 "\t Does not accept triggers\n" 4258 #endif /* CONFIG_DYNAMIC_FTRACE */ 4259 #ifdef CONFIG_FUNCTION_TRACER 4260 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" 4261 "\t\t (function)\n" 4262 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" 4263 "\t\t (function)\n" 4264 #endif 4265 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 4266 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" 4267 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" 4268 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" 4269 #endif 4270 #ifdef CONFIG_TRACER_SNAPSHOT 4271 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" 4272 "\t\t\t snapshot buffer. Read the contents for more\n" 4273 "\t\t\t information\n" 4274 #endif 4275 #ifdef CONFIG_STACK_TRACER 4276 " stack_trace\t\t- Shows the max stack trace when active\n" 4277 " stack_max_size\t- Shows current max stack size that was traced\n" 4278 "\t\t\t Write into this file to reset the max size (trigger a\n" 4279 "\t\t\t new trace)\n" 4280 #ifdef CONFIG_DYNAMIC_FTRACE 4281 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" 4282 "\t\t\t traces\n" 4283 #endif 4284 #endif /* CONFIG_STACK_TRACER */ 4285 #ifdef CONFIG_DYNAMIC_EVENTS 4286 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" 4287 "\t\t\t Write into this file to define/undefine new trace events.\n" 4288 #endif 4289 #ifdef CONFIG_KPROBE_EVENTS 4290 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" 4291 "\t\t\t Write into this file to define/undefine new trace events.\n" 4292 #endif 4293 #ifdef CONFIG_UPROBE_EVENTS 4294 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" 4295 "\t\t\t Write into this file to define/undefine new trace events.\n" 4296 #endif 4297 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ 4298 defined(CONFIG_FPROBE_EVENTS) 4299 "\t accepts: event-definitions (one definition per line)\n" 4300 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) 4301 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" 4302 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" 4303 #endif 4304 #ifdef CONFIG_FPROBE_EVENTS 4305 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" 4306 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" 4307 #endif 4308 #ifdef CONFIG_HIST_TRIGGERS 4309 "\t s:[synthetic/]<event> <field> [<field>]\n" 4310 #endif 4311 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" 4312 "\t -:[<group>/][<event>]\n" 4313 #ifdef CONFIG_KPROBE_EVENTS 4314 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" 4315 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" 4316 #endif 4317 #ifdef CONFIG_UPROBE_EVENTS 4318 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" 4319 #endif 4320 "\t args: <name>=fetcharg[:type]\n" 4321 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" 4322 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 4323 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" 4324 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS 4325 "\t <argname>[->field[->field|.field...]],\n" 4326 #endif 4327 #else 4328 "\t $stack<index>, $stack, $retval, $comm,\n" 4329 #endif 4330 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" 4331 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 4332 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 4333 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 4334 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" 4335 #ifdef CONFIG_HIST_TRIGGERS 4336 "\t field: <stype> <name>;\n" 4337 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" 4338 "\t [unsigned] char/int/long\n" 4339 #endif 4340 "\t efield: For event probes ('e' types), the field is on of the fields\n" 4341 "\t of the <attached-group>/<attached-event>.\n" 4342 #endif 4343 " set_event\t\t- Enables events by name written into it\n" 4344 "\t\t\t Can enable module events via: :mod:<module>\n" 4345 " events/\t\t- Directory containing all trace event subsystems:\n" 4346 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" 4347 " events/<system>/\t- Directory containing all trace events for <system>:\n" 4348 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" 4349 "\t\t\t events\n" 4350 " filter\t\t- If set, only events passing filter are traced\n" 4351 " events/<system>/<event>/\t- Directory containing control files for\n" 4352 "\t\t\t <event>:\n" 4353 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" 4354 " filter\t\t- If set, only events passing filter are traced\n" 4355 " trigger\t\t- If set, a command to perform when event is hit\n" 4356 "\t Format: <trigger>[:count][if <filter>]\n" 4357 "\t trigger: traceon, traceoff\n" 4358 "\t enable_event:<system>:<event>\n" 4359 "\t disable_event:<system>:<event>\n" 4360 #ifdef CONFIG_HIST_TRIGGERS 4361 "\t enable_hist:<system>:<event>\n" 4362 "\t disable_hist:<system>:<event>\n" 4363 #endif 4364 #ifdef CONFIG_STACKTRACE 4365 "\t\t stacktrace\n" 4366 #endif 4367 #ifdef CONFIG_TRACER_SNAPSHOT 4368 "\t\t snapshot\n" 4369 #endif 4370 #ifdef CONFIG_HIST_TRIGGERS 4371 "\t\t hist (see below)\n" 4372 #endif 4373 "\t example: echo traceoff > events/block/block_unplug/trigger\n" 4374 "\t echo traceoff:3 > events/block/block_unplug/trigger\n" 4375 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" 4376 "\t events/block/block_unplug/trigger\n" 4377 "\t The first disables tracing every time block_unplug is hit.\n" 4378 "\t The second disables tracing the first 3 times block_unplug is hit.\n" 4379 "\t The third enables the kmalloc event the first 3 times block_unplug\n" 4380 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" 4381 "\t Like function triggers, the counter is only decremented if it\n" 4382 "\t enabled or disabled tracing.\n" 4383 "\t To remove a trigger without a count:\n" 4384 "\t echo '!<trigger> > <system>/<event>/trigger\n" 4385 "\t To remove a trigger with a count:\n" 4386 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" 4387 "\t Filters can be ignored when removing a trigger.\n" 4388 #ifdef CONFIG_HIST_TRIGGERS 4389 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 4390 "\t Format: hist:keys=<field1[,field2,...]>\n" 4391 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 4392 "\t [:values=<field1[,field2,...]>]\n" 4393 "\t [:sort=<field1[,field2,...]>]\n" 4394 "\t [:size=#entries]\n" 4395 "\t [:pause][:continue][:clear]\n" 4396 "\t [:name=histname1]\n" 4397 "\t [:nohitcount]\n" 4398 "\t [:<handler>.<action>]\n" 4399 "\t [if <filter>]\n\n" 4400 "\t Note, special fields can be used as well:\n" 4401 "\t common_timestamp - to record current timestamp\n" 4402 "\t common_cpu - to record the CPU the event happened on\n" 4403 "\n" 4404 "\t A hist trigger variable can be:\n" 4405 "\t - a reference to a field e.g. x=current_timestamp,\n" 4406 "\t - a reference to another variable e.g. y=$x,\n" 4407 "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 4408 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 4409 "\n" 4410 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" 4411 "\t multiplication(*) and division(/) operators. An operand can be either a\n" 4412 "\t variable reference, field or numeric literal.\n" 4413 "\n" 4414 "\t When a matching event is hit, an entry is added to a hash\n" 4415 "\t table using the key(s) and value(s) named, and the value of a\n" 4416 "\t sum called 'hitcount' is incremented. Keys and values\n" 4417 "\t correspond to fields in the event's format description. Keys\n" 4418 "\t can be any field, or the special string 'common_stacktrace'.\n" 4419 "\t Compound keys consisting of up to two fields can be specified\n" 4420 "\t by the 'keys' keyword. Values must correspond to numeric\n" 4421 "\t fields. Sort keys consisting of up to two fields can be\n" 4422 "\t specified using the 'sort' keyword. The sort direction can\n" 4423 "\t be modified by appending '.descending' or '.ascending' to a\n" 4424 "\t sort field. The 'size' parameter can be used to specify more\n" 4425 "\t or fewer than the default 2048 entries for the hashtable size.\n" 4426 "\t If a hist trigger is given a name using the 'name' parameter,\n" 4427 "\t its histogram data will be shared with other triggers of the\n" 4428 "\t same name, and trigger hits will update this common data.\n\n" 4429 "\t Reading the 'hist' file for the event will dump the hash\n" 4430 "\t table in its entirety to stdout. If there are multiple hist\n" 4431 "\t triggers attached to an event, there will be a table for each\n" 4432 "\t trigger in the output. The table displayed for a named\n" 4433 "\t trigger will be the same as any other instance having the\n" 4434 "\t same name. The default format used to display a given field\n" 4435 "\t can be modified by appending any of the following modifiers\n" 4436 "\t to the field name, as applicable:\n\n" 4437 "\t .hex display a number as a hex value\n" 4438 "\t .sym display an address as a symbol\n" 4439 "\t .sym-offset display an address as a symbol and offset\n" 4440 "\t .execname display a common_pid as a program name\n" 4441 "\t .syscall display a syscall id as a syscall name\n" 4442 "\t .log2 display log2 value rather than raw number\n" 4443 "\t .buckets=size display values in groups of size rather than raw number\n" 4444 "\t .usecs display a common_timestamp in microseconds\n" 4445 "\t .percent display a number of percentage value\n" 4446 "\t .graph display a bar-graph of a value\n\n" 4447 "\t The 'pause' parameter can be used to pause an existing hist\n" 4448 "\t trigger or to start a hist trigger but not log any events\n" 4449 "\t until told to do so. 'continue' can be used to start or\n" 4450 "\t restart a paused hist trigger.\n\n" 4451 "\t The 'clear' parameter will clear the contents of a running\n" 4452 "\t hist trigger and leave its current paused/active state\n" 4453 "\t unchanged.\n\n" 4454 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" 4455 "\t raw hitcount in the histogram.\n\n" 4456 "\t The enable_hist and disable_hist triggers can be used to\n" 4457 "\t have one event conditionally start and stop another event's\n" 4458 "\t already-attached hist trigger. The syntax is analogous to\n" 4459 "\t the enable_event and disable_event triggers.\n\n" 4460 "\t Hist trigger handlers and actions are executed whenever a\n" 4461 "\t a histogram entry is added or updated. They take the form:\n\n" 4462 "\t <handler>.<action>\n\n" 4463 "\t The available handlers are:\n\n" 4464 "\t onmatch(matching.event) - invoke on addition or update\n" 4465 "\t onmax(var) - invoke if var exceeds current max\n" 4466 "\t onchange(var) - invoke action if var changes\n\n" 4467 "\t The available actions are:\n\n" 4468 "\t trace(<synthetic_event>,param list) - generate synthetic event\n" 4469 "\t save(field,...) - save current event fields\n" 4470 #ifdef CONFIG_TRACER_SNAPSHOT 4471 "\t snapshot() - snapshot the trace buffer\n\n" 4472 #endif 4473 #ifdef CONFIG_SYNTH_EVENTS 4474 " synthetic_events\t- Create/append/remove/show synthetic events\n" 4475 "\t Write into this file to define/undefine new synthetic events.\n" 4476 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" 4477 #endif 4478 #endif 4479 ; 4480 4481 static ssize_t 4482 tracing_readme_read(struct file *filp, char __user *ubuf, 4483 size_t cnt, loff_t *ppos) 4484 { 4485 return simple_read_from_buffer(ubuf, cnt, ppos, 4486 readme_msg, strlen(readme_msg)); 4487 } 4488 4489 static const struct file_operations tracing_readme_fops = { 4490 .open = tracing_open_generic, 4491 .read = tracing_readme_read, 4492 .llseek = generic_file_llseek, 4493 }; 4494 4495 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 4496 static union trace_eval_map_item * 4497 update_eval_map(union trace_eval_map_item *ptr) 4498 { 4499 if (!ptr->map.eval_string) { 4500 if (ptr->tail.next) { 4501 ptr = ptr->tail.next; 4502 /* Set ptr to the next real item (skip head) */ 4503 ptr++; 4504 } else 4505 return NULL; 4506 } 4507 return ptr; 4508 } 4509 4510 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) 4511 { 4512 union trace_eval_map_item *ptr = v; 4513 4514 /* 4515 * Paranoid! If ptr points to end, we don't want to increment past it. 4516 * This really should never happen. 4517 */ 4518 (*pos)++; 4519 ptr = update_eval_map(ptr); 4520 if (WARN_ON_ONCE(!ptr)) 4521 return NULL; 4522 4523 ptr++; 4524 ptr = update_eval_map(ptr); 4525 4526 return ptr; 4527 } 4528 4529 static void *eval_map_start(struct seq_file *m, loff_t *pos) 4530 { 4531 union trace_eval_map_item *v; 4532 loff_t l = 0; 4533 4534 mutex_lock(&trace_eval_mutex); 4535 4536 v = trace_eval_maps; 4537 if (v) 4538 v++; 4539 4540 while (v && l < *pos) { 4541 v = eval_map_next(m, v, &l); 4542 } 4543 4544 return v; 4545 } 4546 4547 static void eval_map_stop(struct seq_file *m, void *v) 4548 { 4549 mutex_unlock(&trace_eval_mutex); 4550 } 4551 4552 static int eval_map_show(struct seq_file *m, void *v) 4553 { 4554 union trace_eval_map_item *ptr = v; 4555 4556 seq_printf(m, "%s %ld (%s)\n", 4557 ptr->map.eval_string, ptr->map.eval_value, 4558 ptr->map.system); 4559 4560 return 0; 4561 } 4562 4563 static const struct seq_operations tracing_eval_map_seq_ops = { 4564 .start = eval_map_start, 4565 .next = eval_map_next, 4566 .stop = eval_map_stop, 4567 .show = eval_map_show, 4568 }; 4569 4570 static int tracing_eval_map_open(struct inode *inode, struct file *filp) 4571 { 4572 int ret; 4573 4574 ret = tracing_check_open_get_tr(NULL); 4575 if (ret) 4576 return ret; 4577 4578 return seq_open(filp, &tracing_eval_map_seq_ops); 4579 } 4580 4581 static const struct file_operations tracing_eval_map_fops = { 4582 .open = tracing_eval_map_open, 4583 .read = seq_read, 4584 .llseek = seq_lseek, 4585 .release = seq_release, 4586 }; 4587 4588 static inline union trace_eval_map_item * 4589 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) 4590 { 4591 /* Return tail of array given the head */ 4592 return ptr + ptr->head.length + 1; 4593 } 4594 4595 static void 4596 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, 4597 int len) 4598 { 4599 struct trace_eval_map **stop; 4600 struct trace_eval_map **map; 4601 union trace_eval_map_item *map_array; 4602 union trace_eval_map_item *ptr; 4603 4604 stop = start + len; 4605 4606 /* 4607 * The trace_eval_maps contains the map plus a head and tail item, 4608 * where the head holds the module and length of array, and the 4609 * tail holds a pointer to the next list. 4610 */ 4611 map_array = kmalloc_objs(*map_array, len + 2); 4612 if (!map_array) { 4613 pr_warn("Unable to allocate trace eval mapping\n"); 4614 return; 4615 } 4616 4617 guard(mutex)(&trace_eval_mutex); 4618 4619 if (!trace_eval_maps) 4620 trace_eval_maps = map_array; 4621 else { 4622 ptr = trace_eval_maps; 4623 for (;;) { 4624 ptr = trace_eval_jmp_to_tail(ptr); 4625 if (!ptr->tail.next) 4626 break; 4627 ptr = ptr->tail.next; 4628 4629 } 4630 ptr->tail.next = map_array; 4631 } 4632 map_array->head.mod = mod; 4633 map_array->head.length = len; 4634 map_array++; 4635 4636 for (map = start; (unsigned long)map < (unsigned long)stop; map++) { 4637 map_array->map = **map; 4638 map_array++; 4639 } 4640 memset(map_array, 0, sizeof(*map_array)); 4641 } 4642 4643 static void trace_create_eval_file(struct dentry *d_tracer) 4644 { 4645 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, 4646 NULL, &tracing_eval_map_fops); 4647 } 4648 4649 #else /* CONFIG_TRACE_EVAL_MAP_FILE */ 4650 static inline void trace_create_eval_file(struct dentry *d_tracer) { } 4651 static inline void trace_insert_eval_map_file(struct module *mod, 4652 struct trace_eval_map **start, int len) { } 4653 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ 4654 4655 static void 4656 trace_event_update_with_eval_map(struct module *mod, 4657 struct trace_eval_map **start, 4658 int len) 4659 { 4660 struct trace_eval_map **map; 4661 4662 /* Always run sanitizer only if btf_type_tag attr exists. */ 4663 if (len <= 0) { 4664 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && 4665 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && 4666 __has_attribute(btf_type_tag))) 4667 return; 4668 } 4669 4670 map = start; 4671 4672 trace_event_update_all(map, len); 4673 4674 if (len <= 0) 4675 return; 4676 4677 trace_insert_eval_map_file(mod, start, len); 4678 } 4679 4680 static ssize_t 4681 tracing_set_trace_read(struct file *filp, char __user *ubuf, 4682 size_t cnt, loff_t *ppos) 4683 { 4684 struct trace_array *tr = filp->private_data; 4685 char buf[MAX_TRACER_SIZE+2]; 4686 int r; 4687 4688 scoped_guard(mutex, &trace_types_lock) { 4689 r = sprintf(buf, "%s\n", tr->current_trace->name); 4690 } 4691 4692 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 4693 } 4694 4695 int tracer_init(struct tracer *t, struct trace_array *tr) 4696 { 4697 tracing_reset_online_cpus(&tr->array_buffer); 4698 update_last_data_if_empty(tr); 4699 return t->init(tr); 4700 } 4701 4702 void trace_set_buffer_entries(struct array_buffer *buf, unsigned long val) 4703 { 4704 int cpu; 4705 4706 for_each_tracing_cpu(cpu) 4707 per_cpu_ptr(buf->data, cpu)->entries = val; 4708 } 4709 4710 static void update_buffer_entries(struct array_buffer *buf, int cpu) 4711 { 4712 if (cpu == RING_BUFFER_ALL_CPUS) { 4713 trace_set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); 4714 } else { 4715 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); 4716 } 4717 } 4718 4719 static int __tracing_resize_ring_buffer(struct trace_array *tr, 4720 unsigned long size, int cpu) 4721 { 4722 int ret; 4723 4724 /* 4725 * If kernel or user changes the size of the ring buffer 4726 * we use the size that was given, and we can forget about 4727 * expanding it later. 4728 */ 4729 trace_set_ring_buffer_expanded(tr); 4730 4731 /* May be called before buffers are initialized */ 4732 if (!tr->array_buffer.buffer) 4733 return 0; 4734 4735 /* Do not allow tracing while resizing ring buffer */ 4736 tracing_stop_tr(tr); 4737 4738 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); 4739 if (ret < 0) 4740 goto out_start; 4741 4742 #ifdef CONFIG_TRACER_SNAPSHOT 4743 if (!tr->allocated_snapshot) 4744 goto out; 4745 4746 ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu); 4747 if (ret < 0) { 4748 int r = resize_buffer_duplicate_size(&tr->array_buffer, 4749 &tr->array_buffer, cpu); 4750 if (r < 0) { 4751 /* 4752 * AARGH! We are left with different 4753 * size max buffer!!!! 4754 * The max buffer is our "snapshot" buffer. 4755 * When a tracer needs a snapshot (one of the 4756 * latency tracers), it swaps the max buffer 4757 * with the saved snap shot. We succeeded to 4758 * update the size of the main buffer, but failed to 4759 * update the size of the max buffer. But when we tried 4760 * to reset the main buffer to the original size, we 4761 * failed there too. This is very unlikely to 4762 * happen, but if it does, warn and kill all 4763 * tracing. 4764 */ 4765 WARN_ON(1); 4766 tracing_disabled = 1; 4767 } 4768 goto out_start; 4769 } 4770 4771 update_buffer_entries(&tr->snapshot_buffer, cpu); 4772 4773 out: 4774 #endif /* CONFIG_TRACER_SNAPSHOT */ 4775 4776 update_buffer_entries(&tr->array_buffer, cpu); 4777 out_start: 4778 tracing_start_tr(tr); 4779 return ret; 4780 } 4781 4782 ssize_t tracing_resize_ring_buffer(struct trace_array *tr, 4783 unsigned long size, int cpu_id) 4784 { 4785 guard(mutex)(&trace_types_lock); 4786 4787 if (cpu_id != RING_BUFFER_ALL_CPUS) { 4788 /* make sure, this cpu is enabled in the mask */ 4789 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) 4790 return -EINVAL; 4791 } 4792 4793 return __tracing_resize_ring_buffer(tr, size, cpu_id); 4794 } 4795 4796 struct trace_mod_entry { 4797 unsigned long mod_addr; 4798 char mod_name[MODULE_NAME_LEN]; 4799 }; 4800 4801 struct trace_scratch { 4802 unsigned int clock_id; 4803 unsigned long text_addr; 4804 unsigned long nr_entries; 4805 struct trace_mod_entry entries[]; 4806 }; 4807 4808 static DEFINE_MUTEX(scratch_mutex); 4809 4810 static int cmp_mod_entry(const void *key, const void *pivot) 4811 { 4812 unsigned long addr = (unsigned long)key; 4813 const struct trace_mod_entry *ent = pivot; 4814 4815 if (addr < ent[0].mod_addr) 4816 return -1; 4817 4818 return addr >= ent[1].mod_addr; 4819 } 4820 4821 /** 4822 * trace_adjust_address() - Adjust prev boot address to current address. 4823 * @tr: Persistent ring buffer's trace_array. 4824 * @addr: Address in @tr which is adjusted. 4825 */ 4826 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) 4827 { 4828 struct trace_module_delta *module_delta; 4829 struct trace_scratch *tscratch; 4830 struct trace_mod_entry *entry; 4831 unsigned long raddr; 4832 int idx = 0, nr_entries; 4833 4834 /* If we don't have last boot delta, return the address */ 4835 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 4836 return addr; 4837 4838 /* tr->module_delta must be protected by rcu. */ 4839 guard(rcu)(); 4840 tscratch = tr->scratch; 4841 /* if there is no tscrach, module_delta must be NULL. */ 4842 module_delta = READ_ONCE(tr->module_delta); 4843 if (!module_delta || !tscratch->nr_entries || 4844 tscratch->entries[0].mod_addr > addr) { 4845 raddr = addr + tr->text_delta; 4846 return __is_kernel(raddr) || is_kernel_core_data(raddr) || 4847 is_kernel_rodata(raddr) ? raddr : addr; 4848 } 4849 4850 /* Note that entries must be sorted. */ 4851 nr_entries = tscratch->nr_entries; 4852 if (nr_entries == 1 || 4853 tscratch->entries[nr_entries - 1].mod_addr < addr) 4854 idx = nr_entries - 1; 4855 else { 4856 entry = __inline_bsearch((void *)addr, 4857 tscratch->entries, 4858 nr_entries - 1, 4859 sizeof(tscratch->entries[0]), 4860 cmp_mod_entry); 4861 if (entry) 4862 idx = entry - tscratch->entries; 4863 } 4864 4865 return addr + module_delta->delta[idx]; 4866 } 4867 4868 #ifdef CONFIG_MODULES 4869 static int save_mod(struct module *mod, void *data) 4870 { 4871 struct trace_array *tr = data; 4872 struct trace_scratch *tscratch; 4873 struct trace_mod_entry *entry; 4874 unsigned int size; 4875 4876 tscratch = tr->scratch; 4877 if (!tscratch) 4878 return -1; 4879 size = tr->scratch_size; 4880 4881 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) 4882 return -1; 4883 4884 entry = &tscratch->entries[tscratch->nr_entries]; 4885 4886 tscratch->nr_entries++; 4887 4888 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; 4889 strscpy(entry->mod_name, mod->name); 4890 4891 return 0; 4892 } 4893 #else 4894 static int save_mod(struct module *mod, void *data) 4895 { 4896 return 0; 4897 } 4898 #endif 4899 4900 static void update_last_data(struct trace_array *tr) 4901 { 4902 struct trace_module_delta *module_delta; 4903 struct trace_scratch *tscratch; 4904 4905 if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) 4906 return; 4907 4908 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 4909 return; 4910 4911 /* Only if the buffer has previous boot data clear and update it. */ 4912 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; 4913 4914 /* If this is a backup instance, mark it for autoremove. */ 4915 if (tr->flags & TRACE_ARRAY_FL_VMALLOC) 4916 tr->free_on_close = true; 4917 4918 /* Reset the module list and reload them */ 4919 if (tr->scratch) { 4920 struct trace_scratch *tscratch = tr->scratch; 4921 4922 tscratch->clock_id = tr->clock_id; 4923 memset(tscratch->entries, 0, 4924 flex_array_size(tscratch, entries, tscratch->nr_entries)); 4925 tscratch->nr_entries = 0; 4926 4927 guard(mutex)(&scratch_mutex); 4928 module_for_each_mod(save_mod, tr); 4929 } 4930 4931 /* 4932 * Need to clear all CPU buffers as there cannot be events 4933 * from the previous boot mixed with events with this boot 4934 * as that will cause a confusing trace. Need to clear all 4935 * CPU buffers, even for those that may currently be offline. 4936 */ 4937 tracing_reset_all_cpus(&tr->array_buffer); 4938 4939 /* Using current data now */ 4940 tr->text_delta = 0; 4941 4942 if (!tr->scratch) 4943 return; 4944 4945 tscratch = tr->scratch; 4946 module_delta = READ_ONCE(tr->module_delta); 4947 WRITE_ONCE(tr->module_delta, NULL); 4948 kfree_rcu(module_delta, rcu); 4949 4950 /* Set the persistent ring buffer meta data to this address */ 4951 tscratch->text_addr = (unsigned long)_text; 4952 } 4953 4954 /** 4955 * tracing_update_buffers - used by tracing facility to expand ring buffers 4956 * @tr: The tracing instance 4957 * 4958 * To save on memory when the tracing is never used on a system with it 4959 * configured in. The ring buffers are set to a minimum size. But once 4960 * a user starts to use the tracing facility, then they need to grow 4961 * to their default size. 4962 * 4963 * This function is to be called when a tracer is about to be used. 4964 */ 4965 int tracing_update_buffers(struct trace_array *tr) 4966 { 4967 int ret = 0; 4968 4969 if (!tr) 4970 tr = &global_trace; 4971 4972 guard(mutex)(&trace_types_lock); 4973 4974 update_last_data(tr); 4975 4976 if (!tr->ring_buffer_expanded) 4977 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 4978 RING_BUFFER_ALL_CPUS); 4979 return ret; 4980 } 4981 4982 /* 4983 * Used to clear out the tracer before deletion of an instance. 4984 * Must have trace_types_lock held. 4985 */ 4986 static void tracing_set_nop(struct trace_array *tr) 4987 { 4988 if (tr->current_trace == &nop_trace) 4989 return; 4990 4991 tr->current_trace->enabled--; 4992 4993 if (tr->current_trace->reset) 4994 tr->current_trace->reset(tr); 4995 4996 tr->current_trace = &nop_trace; 4997 tr->current_trace_flags = nop_trace.flags; 4998 } 4999 5000 static bool tracer_options_updated; 5001 5002 int tracing_set_tracer(struct trace_array *tr, const char *buf) 5003 { 5004 struct tracer *trace = NULL; 5005 struct tracers *t; 5006 bool had_max_tr; 5007 int ret; 5008 5009 guard(mutex)(&trace_types_lock); 5010 5011 update_last_data(tr); 5012 5013 if (!tr->ring_buffer_expanded) { 5014 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 5015 RING_BUFFER_ALL_CPUS); 5016 if (ret < 0) 5017 return ret; 5018 ret = 0; 5019 } 5020 5021 list_for_each_entry(t, &tr->tracers, list) { 5022 if (strcmp(t->tracer->name, buf) == 0) { 5023 trace = t->tracer; 5024 break; 5025 } 5026 } 5027 if (!trace) 5028 return -EINVAL; 5029 5030 if (trace == tr->current_trace) 5031 return 0; 5032 5033 #ifdef CONFIG_TRACER_SNAPSHOT 5034 if (tracer_uses_snapshot(trace)) { 5035 local_irq_disable(); 5036 arch_spin_lock(&tr->max_lock); 5037 ret = tr->cond_snapshot ? -EBUSY : 0; 5038 arch_spin_unlock(&tr->max_lock); 5039 local_irq_enable(); 5040 if (ret) 5041 return ret; 5042 } 5043 #endif 5044 /* Some tracers won't work on kernel command line */ 5045 if (system_state < SYSTEM_RUNNING && trace->noboot) { 5046 pr_warn("Tracer '%s' is not allowed on command line, ignored\n", 5047 trace->name); 5048 return -EINVAL; 5049 } 5050 5051 /* Some tracers are only allowed for the top level buffer */ 5052 if (!trace_ok_for_array(trace, tr)) 5053 return -EINVAL; 5054 5055 /* If trace pipe files are being read, we can't change the tracer */ 5056 if (tr->trace_ref) 5057 return -EBUSY; 5058 5059 trace_branch_disable(); 5060 5061 tr->current_trace->enabled--; 5062 5063 if (tr->current_trace->reset) 5064 tr->current_trace->reset(tr); 5065 5066 had_max_tr = tracer_uses_snapshot(tr->current_trace); 5067 5068 /* Current trace needs to be nop_trace before synchronize_rcu */ 5069 tr->current_trace = &nop_trace; 5070 tr->current_trace_flags = nop_trace.flags; 5071 5072 if (had_max_tr && !tracer_uses_snapshot(trace)) { 5073 /* 5074 * We need to make sure that the update_max_tr sees that 5075 * current_trace changed to nop_trace to keep it from 5076 * swapping the buffers after we resize it. 5077 * The update_max_tr is called from interrupts disabled 5078 * so a synchronized_sched() is sufficient. 5079 */ 5080 synchronize_rcu(); 5081 free_snapshot(tr); 5082 tracing_disarm_snapshot(tr); 5083 } 5084 5085 if (!had_max_tr && tracer_uses_snapshot(trace)) { 5086 ret = tracing_arm_snapshot_locked(tr); 5087 if (ret) 5088 return ret; 5089 } 5090 5091 tr->current_trace_flags = t->flags ? : t->tracer->flags; 5092 5093 if (trace->init) { 5094 ret = tracer_init(trace, tr); 5095 if (ret) { 5096 if (tracer_uses_snapshot(trace)) 5097 tracing_disarm_snapshot(tr); 5098 tr->current_trace_flags = nop_trace.flags; 5099 return ret; 5100 } 5101 } 5102 5103 tr->current_trace = trace; 5104 tr->current_trace->enabled++; 5105 trace_branch_enable(tr); 5106 5107 return 0; 5108 } 5109 5110 static ssize_t 5111 tracing_set_trace_write(struct file *filp, const char __user *ubuf, 5112 size_t cnt, loff_t *ppos) 5113 { 5114 struct trace_array *tr = filp->private_data; 5115 char buf[MAX_TRACER_SIZE+1]; 5116 char *name; 5117 size_t ret; 5118 int err; 5119 5120 ret = cnt; 5121 5122 if (cnt > MAX_TRACER_SIZE) 5123 cnt = MAX_TRACER_SIZE; 5124 5125 if (copy_from_user(buf, ubuf, cnt)) 5126 return -EFAULT; 5127 5128 buf[cnt] = 0; 5129 5130 name = strim(buf); 5131 5132 err = tracing_set_tracer(tr, name); 5133 if (err) 5134 return err; 5135 5136 *ppos += ret; 5137 5138 return ret; 5139 } 5140 5141 ssize_t tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, 5142 size_t cnt, loff_t *ppos) 5143 { 5144 char buf[64]; 5145 int r; 5146 5147 r = snprintf(buf, sizeof(buf), "%ld\n", 5148 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); 5149 if (r > sizeof(buf)) 5150 r = sizeof(buf); 5151 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5152 } 5153 5154 ssize_t tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, 5155 size_t cnt, loff_t *ppos) 5156 { 5157 unsigned long val; 5158 int ret; 5159 5160 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 5161 if (ret) 5162 return ret; 5163 5164 *ptr = val * 1000; 5165 5166 return cnt; 5167 } 5168 5169 static ssize_t 5170 tracing_thresh_read(struct file *filp, char __user *ubuf, 5171 size_t cnt, loff_t *ppos) 5172 { 5173 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); 5174 } 5175 5176 static ssize_t 5177 tracing_thresh_write(struct file *filp, const char __user *ubuf, 5178 size_t cnt, loff_t *ppos) 5179 { 5180 struct trace_array *tr = filp->private_data; 5181 int ret; 5182 5183 guard(mutex)(&trace_types_lock); 5184 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); 5185 if (ret < 0) 5186 return ret; 5187 5188 if (tr->current_trace->update_thresh) { 5189 ret = tr->current_trace->update_thresh(tr); 5190 if (ret < 0) 5191 return ret; 5192 } 5193 5194 return cnt; 5195 } 5196 5197 static int open_pipe_on_cpu(struct trace_array *tr, int cpu) 5198 { 5199 if (cpu == RING_BUFFER_ALL_CPUS) { 5200 if (cpumask_empty(tr->pipe_cpumask)) { 5201 cpumask_setall(tr->pipe_cpumask); 5202 return 0; 5203 } 5204 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { 5205 cpumask_set_cpu(cpu, tr->pipe_cpumask); 5206 return 0; 5207 } 5208 return -EBUSY; 5209 } 5210 5211 static void close_pipe_on_cpu(struct trace_array *tr, int cpu) 5212 { 5213 if (cpu == RING_BUFFER_ALL_CPUS) { 5214 WARN_ON(!cpumask_full(tr->pipe_cpumask)); 5215 cpumask_clear(tr->pipe_cpumask); 5216 } else { 5217 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); 5218 cpumask_clear_cpu(cpu, tr->pipe_cpumask); 5219 } 5220 } 5221 5222 static int tracing_open_pipe(struct inode *inode, struct file *filp) 5223 { 5224 struct trace_array *tr = inode->i_private; 5225 struct trace_iterator *iter; 5226 int cpu; 5227 int ret; 5228 5229 ret = tracing_check_open_get_tr(tr); 5230 if (ret) 5231 return ret; 5232 5233 guard(mutex)(&trace_types_lock); 5234 cpu = tracing_get_cpu(inode); 5235 ret = open_pipe_on_cpu(tr, cpu); 5236 if (ret) 5237 goto fail_pipe_on_cpu; 5238 5239 /* create a buffer to store the information to pass to userspace */ 5240 iter = kzalloc_obj(*iter); 5241 if (!iter) { 5242 ret = -ENOMEM; 5243 goto fail_alloc_iter; 5244 } 5245 5246 trace_seq_init(&iter->seq); 5247 iter->trace = tr->current_trace; 5248 5249 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 5250 ret = -ENOMEM; 5251 goto fail; 5252 } 5253 5254 /* trace pipe does not show start of buffer */ 5255 cpumask_setall(iter->started); 5256 5257 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 5258 iter->iter_flags |= TRACE_FILE_LAT_FMT; 5259 5260 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 5261 if (trace_clocks[tr->clock_id].in_ns) 5262 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 5263 5264 iter->tr = tr; 5265 iter->array_buffer = &tr->array_buffer; 5266 iter->cpu_file = cpu; 5267 mutex_init(&iter->mutex); 5268 filp->private_data = iter; 5269 5270 if (iter->trace->pipe_open) 5271 iter->trace->pipe_open(iter); 5272 5273 nonseekable_open(inode, filp); 5274 5275 tr->trace_ref++; 5276 5277 return ret; 5278 5279 fail: 5280 kfree(iter); 5281 fail_alloc_iter: 5282 close_pipe_on_cpu(tr, cpu); 5283 fail_pipe_on_cpu: 5284 __trace_array_put(tr); 5285 return ret; 5286 } 5287 5288 static int tracing_release_pipe(struct inode *inode, struct file *file) 5289 { 5290 struct trace_iterator *iter = file->private_data; 5291 struct trace_array *tr = inode->i_private; 5292 5293 scoped_guard(mutex, &trace_types_lock) { 5294 tr->trace_ref--; 5295 5296 if (iter->trace->pipe_close) 5297 iter->trace->pipe_close(iter); 5298 close_pipe_on_cpu(tr, iter->cpu_file); 5299 } 5300 5301 free_trace_iter_content(iter); 5302 kfree(iter); 5303 5304 trace_array_put(tr); 5305 5306 return 0; 5307 } 5308 5309 static __poll_t 5310 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 5311 { 5312 struct trace_array *tr = iter->tr; 5313 5314 /* Iterators are static, they should be filled or empty */ 5315 if (trace_buffer_iter(iter, iter->cpu_file)) 5316 return EPOLLIN | EPOLLRDNORM; 5317 5318 if (tr->trace_flags & TRACE_ITER(BLOCK)) 5319 /* 5320 * Always select as readable when in blocking mode 5321 */ 5322 return EPOLLIN | EPOLLRDNORM; 5323 else 5324 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, 5325 filp, poll_table, iter->tr->buffer_percent); 5326 } 5327 5328 static __poll_t 5329 tracing_poll_pipe(struct file *filp, poll_table *poll_table) 5330 { 5331 struct trace_iterator *iter = filp->private_data; 5332 5333 return trace_poll(iter, filp, poll_table); 5334 } 5335 5336 /* Must be called with iter->mutex held. */ 5337 static int tracing_wait_pipe(struct file *filp) 5338 { 5339 struct trace_iterator *iter = filp->private_data; 5340 int ret; 5341 5342 while (trace_empty(iter)) { 5343 5344 if ((filp->f_flags & O_NONBLOCK)) { 5345 return -EAGAIN; 5346 } 5347 5348 /* 5349 * We block until we read something and tracing is disabled. 5350 * We still block if tracing is disabled, but we have never 5351 * read anything. This allows a user to cat this file, and 5352 * then enable tracing. But after we have read something, 5353 * we give an EOF when tracing is again disabled. 5354 * 5355 * iter->pos will be 0 if we haven't read anything. 5356 */ 5357 if (!tracer_tracing_is_on(iter->tr) && iter->pos) 5358 break; 5359 5360 mutex_unlock(&iter->mutex); 5361 5362 ret = wait_on_pipe(iter, 0); 5363 5364 mutex_lock(&iter->mutex); 5365 5366 if (ret) 5367 return ret; 5368 } 5369 5370 return 1; 5371 } 5372 5373 static bool update_last_data_if_empty(struct trace_array *tr) 5374 { 5375 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5376 return false; 5377 5378 if (!ring_buffer_empty(tr->array_buffer.buffer)) 5379 return false; 5380 5381 /* 5382 * If the buffer contains the last boot data and all per-cpu 5383 * buffers are empty, reset it from the kernel side. 5384 */ 5385 update_last_data(tr); 5386 return true; 5387 } 5388 5389 /* 5390 * Consumer reader. 5391 */ 5392 static ssize_t 5393 tracing_read_pipe(struct file *filp, char __user *ubuf, 5394 size_t cnt, loff_t *ppos) 5395 { 5396 struct trace_iterator *iter = filp->private_data; 5397 ssize_t sret; 5398 5399 /* 5400 * Avoid more than one consumer on a single file descriptor 5401 * This is just a matter of traces coherency, the ring buffer itself 5402 * is protected. 5403 */ 5404 guard(mutex)(&iter->mutex); 5405 5406 /* return any leftover data */ 5407 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 5408 if (sret != -EBUSY) 5409 return sret; 5410 5411 trace_seq_init(&iter->seq); 5412 5413 if (iter->trace->read) { 5414 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 5415 if (sret) 5416 return sret; 5417 } 5418 5419 waitagain: 5420 if (update_last_data_if_empty(iter->tr)) 5421 return 0; 5422 5423 sret = tracing_wait_pipe(filp); 5424 if (sret <= 0) 5425 return sret; 5426 5427 /* stop when tracing is finished */ 5428 if (trace_empty(iter)) 5429 return 0; 5430 5431 if (cnt >= TRACE_SEQ_BUFFER_SIZE) 5432 cnt = TRACE_SEQ_BUFFER_SIZE - 1; 5433 5434 /* reset all but tr, trace, and overruns */ 5435 trace_iterator_reset(iter); 5436 cpumask_clear(iter->started); 5437 trace_seq_init(&iter->seq); 5438 5439 trace_event_read_lock(); 5440 trace_access_lock(iter->cpu_file); 5441 while (trace_find_next_entry_inc(iter) != NULL) { 5442 enum print_line_t ret; 5443 int save_len = iter->seq.seq.len; 5444 5445 ret = print_trace_line(iter); 5446 if (ret == TRACE_TYPE_PARTIAL_LINE) { 5447 /* 5448 * If one print_trace_line() fills entire trace_seq in one shot, 5449 * trace_seq_to_user() will returns -EBUSY because save_len == 0, 5450 * In this case, we need to consume it, otherwise, loop will peek 5451 * this event next time, resulting in an infinite loop. 5452 */ 5453 if (save_len == 0) { 5454 iter->seq.full = 0; 5455 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 5456 trace_consume(iter); 5457 break; 5458 } 5459 5460 /* In other cases, don't print partial lines */ 5461 iter->seq.seq.len = save_len; 5462 break; 5463 } 5464 if (ret != TRACE_TYPE_NO_CONSUME) 5465 trace_consume(iter); 5466 5467 if (trace_seq_used(&iter->seq) >= cnt) 5468 break; 5469 5470 /* 5471 * Setting the full flag means we reached the trace_seq buffer 5472 * size and we should leave by partial output condition above. 5473 * One of the trace_seq_* functions is not used properly. 5474 */ 5475 WARN_ONCE(iter->seq.full, "full flag set for trace type %d", 5476 iter->ent->type); 5477 } 5478 trace_access_unlock(iter->cpu_file); 5479 trace_event_read_unlock(); 5480 5481 /* Now copy what we have to the user */ 5482 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 5483 if (iter->seq.readpos >= trace_seq_used(&iter->seq)) 5484 trace_seq_init(&iter->seq); 5485 5486 /* 5487 * If there was nothing to send to user, in spite of consuming trace 5488 * entries, go back to wait for more entries. 5489 */ 5490 if (sret == -EBUSY) 5491 goto waitagain; 5492 5493 return sret; 5494 } 5495 5496 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, 5497 unsigned int idx) 5498 { 5499 __free_page(spd->pages[idx]); 5500 } 5501 5502 static size_t 5503 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) 5504 { 5505 size_t count; 5506 int save_len; 5507 int ret; 5508 5509 /* Seq buffer is page-sized, exactly what we need. */ 5510 for (;;) { 5511 save_len = iter->seq.seq.len; 5512 ret = print_trace_line(iter); 5513 5514 if (trace_seq_has_overflowed(&iter->seq)) { 5515 iter->seq.seq.len = save_len; 5516 break; 5517 } 5518 5519 /* 5520 * This should not be hit, because it should only 5521 * be set if the iter->seq overflowed. But check it 5522 * anyway to be safe. 5523 */ 5524 if (ret == TRACE_TYPE_PARTIAL_LINE) { 5525 iter->seq.seq.len = save_len; 5526 break; 5527 } 5528 5529 count = trace_seq_used(&iter->seq) - save_len; 5530 if (rem < count) { 5531 rem = 0; 5532 iter->seq.seq.len = save_len; 5533 break; 5534 } 5535 5536 if (ret != TRACE_TYPE_NO_CONSUME) 5537 trace_consume(iter); 5538 rem -= count; 5539 if (!trace_find_next_entry_inc(iter)) { 5540 rem = 0; 5541 iter->ent = NULL; 5542 break; 5543 } 5544 } 5545 5546 return rem; 5547 } 5548 5549 static ssize_t tracing_splice_read_pipe(struct file *filp, 5550 loff_t *ppos, 5551 struct pipe_inode_info *pipe, 5552 size_t len, 5553 unsigned int flags) 5554 { 5555 struct page *pages_def[PIPE_DEF_BUFFERS]; 5556 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 5557 struct trace_iterator *iter = filp->private_data; 5558 struct splice_pipe_desc spd = { 5559 .pages = pages_def, 5560 .partial = partial_def, 5561 .nr_pages = 0, /* This gets updated below. */ 5562 .nr_pages_max = PIPE_DEF_BUFFERS, 5563 .ops = &default_pipe_buf_ops, 5564 .spd_release = tracing_spd_release_pipe, 5565 }; 5566 ssize_t ret; 5567 size_t rem; 5568 unsigned int i; 5569 5570 if (splice_grow_spd(pipe, &spd)) 5571 return -ENOMEM; 5572 5573 mutex_lock(&iter->mutex); 5574 5575 if (iter->trace->splice_read) { 5576 ret = iter->trace->splice_read(iter, filp, 5577 ppos, pipe, len, flags); 5578 if (ret) 5579 goto out_err; 5580 } 5581 5582 ret = tracing_wait_pipe(filp); 5583 if (ret <= 0) 5584 goto out_err; 5585 5586 if (!iter->ent && !trace_find_next_entry_inc(iter)) { 5587 ret = -EFAULT; 5588 goto out_err; 5589 } 5590 5591 trace_event_read_lock(); 5592 trace_access_lock(iter->cpu_file); 5593 5594 /* Fill as many pages as possible. */ 5595 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 5596 spd.pages[i] = alloc_page(GFP_KERNEL); 5597 if (!spd.pages[i]) 5598 break; 5599 5600 rem = tracing_fill_pipe_page(rem, iter); 5601 5602 /* Copy the data into the page, so we can start over. */ 5603 ret = trace_seq_to_buffer(&iter->seq, 5604 page_address(spd.pages[i]), 5605 min((size_t)trace_seq_used(&iter->seq), 5606 (size_t)PAGE_SIZE)); 5607 if (ret < 0) { 5608 __free_page(spd.pages[i]); 5609 break; 5610 } 5611 spd.partial[i].offset = 0; 5612 spd.partial[i].len = ret; 5613 5614 trace_seq_init(&iter->seq); 5615 } 5616 5617 trace_access_unlock(iter->cpu_file); 5618 trace_event_read_unlock(); 5619 mutex_unlock(&iter->mutex); 5620 5621 spd.nr_pages = i; 5622 5623 if (i) 5624 ret = splice_to_pipe(pipe, &spd); 5625 else 5626 ret = 0; 5627 out: 5628 splice_shrink_spd(&spd); 5629 return ret; 5630 5631 out_err: 5632 mutex_unlock(&iter->mutex); 5633 goto out; 5634 } 5635 5636 static ssize_t 5637 tracing_syscall_buf_read(struct file *filp, char __user *ubuf, 5638 size_t cnt, loff_t *ppos) 5639 { 5640 struct inode *inode = file_inode(filp); 5641 struct trace_array *tr = inode->i_private; 5642 char buf[64]; 5643 int r; 5644 5645 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz); 5646 5647 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5648 } 5649 5650 static ssize_t 5651 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf, 5652 size_t cnt, loff_t *ppos) 5653 { 5654 struct inode *inode = file_inode(filp); 5655 struct trace_array *tr = inode->i_private; 5656 unsigned long val; 5657 int ret; 5658 5659 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 5660 if (ret) 5661 return ret; 5662 5663 if (val > SYSCALL_FAULT_USER_MAX) 5664 val = SYSCALL_FAULT_USER_MAX; 5665 5666 tr->syscall_buf_sz = val; 5667 5668 *ppos += cnt; 5669 5670 return cnt; 5671 } 5672 5673 static ssize_t 5674 tracing_entries_read(struct file *filp, char __user *ubuf, 5675 size_t cnt, loff_t *ppos) 5676 { 5677 struct inode *inode = file_inode(filp); 5678 struct trace_array *tr = inode->i_private; 5679 int cpu = tracing_get_cpu(inode); 5680 char buf[64]; 5681 int r = 0; 5682 ssize_t ret; 5683 5684 mutex_lock(&trace_types_lock); 5685 5686 if (cpu == RING_BUFFER_ALL_CPUS) { 5687 int cpu, buf_size_same; 5688 unsigned long size; 5689 5690 size = 0; 5691 buf_size_same = 1; 5692 /* check if all cpu sizes are same */ 5693 for_each_tracing_cpu(cpu) { 5694 /* fill in the size from first enabled cpu */ 5695 if (size == 0) 5696 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; 5697 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { 5698 buf_size_same = 0; 5699 break; 5700 } 5701 } 5702 5703 if (buf_size_same) { 5704 if (!tr->ring_buffer_expanded) 5705 r = sprintf(buf, "%lu (expanded: %lu)\n", 5706 size >> 10, 5707 trace_buf_size >> 10); 5708 else 5709 r = sprintf(buf, "%lu\n", size >> 10); 5710 } else 5711 r = sprintf(buf, "X\n"); 5712 } else 5713 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); 5714 5715 mutex_unlock(&trace_types_lock); 5716 5717 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5718 return ret; 5719 } 5720 5721 static ssize_t 5722 tracing_entries_write(struct file *filp, const char __user *ubuf, 5723 size_t cnt, loff_t *ppos) 5724 { 5725 struct inode *inode = file_inode(filp); 5726 struct trace_array *tr = inode->i_private; 5727 unsigned long val; 5728 int ret; 5729 5730 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 5731 if (ret) 5732 return ret; 5733 5734 /* must have at least 1 entry */ 5735 if (!val) 5736 return -EINVAL; 5737 5738 /* value is in KB */ 5739 val <<= 10; 5740 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); 5741 if (ret < 0) 5742 return ret; 5743 5744 *ppos += cnt; 5745 5746 return cnt; 5747 } 5748 5749 static ssize_t 5750 tracing_total_entries_read(struct file *filp, char __user *ubuf, 5751 size_t cnt, loff_t *ppos) 5752 { 5753 struct trace_array *tr = filp->private_data; 5754 char buf[64]; 5755 int r, cpu; 5756 unsigned long size = 0, expanded_size = 0; 5757 5758 mutex_lock(&trace_types_lock); 5759 for_each_tracing_cpu(cpu) { 5760 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; 5761 if (!tr->ring_buffer_expanded) 5762 expanded_size += trace_buf_size >> 10; 5763 } 5764 if (tr->ring_buffer_expanded) 5765 r = sprintf(buf, "%lu\n", size); 5766 else 5767 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); 5768 mutex_unlock(&trace_types_lock); 5769 5770 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5771 } 5772 5773 #define LAST_BOOT_HEADER ((void *)1) 5774 5775 static void *l_next(struct seq_file *m, void *v, loff_t *pos) 5776 { 5777 struct trace_array *tr = m->private; 5778 struct trace_scratch *tscratch = tr->scratch; 5779 unsigned int index = *pos; 5780 5781 (*pos)++; 5782 5783 if (*pos == 1) 5784 return LAST_BOOT_HEADER; 5785 5786 /* Only show offsets of the last boot data */ 5787 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5788 return NULL; 5789 5790 /* *pos 0 is for the header, 1 is for the first module */ 5791 index--; 5792 5793 if (index >= tscratch->nr_entries) 5794 return NULL; 5795 5796 return &tscratch->entries[index]; 5797 } 5798 5799 static void *l_start(struct seq_file *m, loff_t *pos) 5800 { 5801 mutex_lock(&scratch_mutex); 5802 5803 return l_next(m, NULL, pos); 5804 } 5805 5806 static void l_stop(struct seq_file *m, void *p) 5807 { 5808 mutex_unlock(&scratch_mutex); 5809 } 5810 5811 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) 5812 { 5813 struct trace_scratch *tscratch = tr->scratch; 5814 5815 /* 5816 * Do not leak KASLR address. This only shows the KASLR address of 5817 * the last boot. When the ring buffer is started, the LAST_BOOT 5818 * flag gets cleared, and this should only report "current". 5819 * Otherwise it shows the KASLR address from the previous boot which 5820 * should not be the same as the current boot. 5821 */ 5822 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5823 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); 5824 else 5825 seq_puts(m, "# Current\n"); 5826 } 5827 5828 static int l_show(struct seq_file *m, void *v) 5829 { 5830 struct trace_array *tr = m->private; 5831 struct trace_mod_entry *entry = v; 5832 5833 if (v == LAST_BOOT_HEADER) { 5834 show_last_boot_header(m, tr); 5835 return 0; 5836 } 5837 5838 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); 5839 return 0; 5840 } 5841 5842 static const struct seq_operations last_boot_seq_ops = { 5843 .start = l_start, 5844 .next = l_next, 5845 .stop = l_stop, 5846 .show = l_show, 5847 }; 5848 5849 static int tracing_last_boot_open(struct inode *inode, struct file *file) 5850 { 5851 struct trace_array *tr = inode->i_private; 5852 struct seq_file *m; 5853 int ret; 5854 5855 ret = tracing_check_open_get_tr(tr); 5856 if (ret) 5857 return ret; 5858 5859 ret = seq_open(file, &last_boot_seq_ops); 5860 if (ret) { 5861 trace_array_put(tr); 5862 return ret; 5863 } 5864 5865 m = file->private_data; 5866 m->private = tr; 5867 5868 return 0; 5869 } 5870 5871 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) 5872 { 5873 struct trace_array *tr = inode->i_private; 5874 int cpu = tracing_get_cpu(inode); 5875 int ret; 5876 5877 ret = tracing_check_open_get_tr(tr); 5878 if (ret) 5879 return ret; 5880 5881 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); 5882 if (ret < 0) 5883 __trace_array_put(tr); 5884 return ret; 5885 } 5886 5887 static ssize_t 5888 tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 5889 size_t cnt, loff_t *ppos) 5890 { 5891 /* 5892 * There is no need to read what the user has written, this function 5893 * is just to make sure that there is no error when "echo" is used 5894 */ 5895 5896 *ppos += cnt; 5897 5898 return cnt; 5899 } 5900 5901 static int 5902 tracing_free_buffer_release(struct inode *inode, struct file *filp) 5903 { 5904 struct trace_array *tr = inode->i_private; 5905 5906 /* disable tracing ? */ 5907 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE)) 5908 tracer_tracing_off(tr); 5909 /* resize the ring buffer to 0 */ 5910 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 5911 5912 trace_array_put(tr); 5913 5914 return 0; 5915 } 5916 5917 #define TRACE_MARKER_MAX_SIZE 4096 5918 5919 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf, 5920 size_t cnt, unsigned long ip) 5921 { 5922 struct ring_buffer_event *event; 5923 enum event_trigger_type tt = ETT_NONE; 5924 struct trace_buffer *buffer; 5925 struct print_entry *entry; 5926 int meta_size; 5927 ssize_t written; 5928 size_t size; 5929 5930 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 5931 again: 5932 size = cnt + meta_size; 5933 5934 buffer = tr->array_buffer.buffer; 5935 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 5936 tracing_gen_ctx()); 5937 if (unlikely(!event)) { 5938 /* 5939 * If the size was greater than what was allowed, then 5940 * make it smaller and try again. 5941 */ 5942 if (size > ring_buffer_max_event_size(buffer)) { 5943 cnt = ring_buffer_max_event_size(buffer) - meta_size; 5944 /* The above should only happen once */ 5945 if (WARN_ON_ONCE(cnt + meta_size == size)) 5946 return -EBADF; 5947 goto again; 5948 } 5949 5950 /* Ring buffer disabled, return as if not open for write */ 5951 return -EBADF; 5952 } 5953 5954 entry = ring_buffer_event_data(event); 5955 entry->ip = ip; 5956 memcpy(&entry->buf, buf, cnt); 5957 written = cnt; 5958 5959 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { 5960 /* do not add \n before testing triggers, but add \0 */ 5961 entry->buf[cnt] = '\0'; 5962 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); 5963 } 5964 5965 if (entry->buf[cnt - 1] != '\n') { 5966 entry->buf[cnt] = '\n'; 5967 entry->buf[cnt + 1] = '\0'; 5968 } else 5969 entry->buf[cnt] = '\0'; 5970 5971 if (static_branch_unlikely(&trace_marker_exports_enabled)) 5972 ftrace_exports(event, TRACE_EXPORT_MARKER); 5973 __buffer_unlock_commit(buffer, event); 5974 5975 if (tt) 5976 event_triggers_post_call(tr->trace_marker_file, tt); 5977 5978 return written; 5979 } 5980 5981 struct trace_user_buf { 5982 char *buf; 5983 }; 5984 5985 static DEFINE_MUTEX(trace_user_buffer_mutex); 5986 static struct trace_user_buf_info *trace_user_buffer; 5987 5988 /** 5989 * trace_user_fault_destroy - free up allocated memory of a trace user buffer 5990 * @tinfo: The descriptor to free up 5991 * 5992 * Frees any data allocated in the trace info dsecriptor. 5993 */ 5994 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo) 5995 { 5996 char *buf; 5997 int cpu; 5998 5999 if (!tinfo || !tinfo->tbuf) 6000 return; 6001 6002 for_each_possible_cpu(cpu) { 6003 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6004 kfree(buf); 6005 } 6006 free_percpu(tinfo->tbuf); 6007 } 6008 6009 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size) 6010 { 6011 char *buf; 6012 int cpu; 6013 6014 lockdep_assert_held(&trace_user_buffer_mutex); 6015 6016 tinfo->tbuf = alloc_percpu(struct trace_user_buf); 6017 if (!tinfo->tbuf) 6018 return -ENOMEM; 6019 6020 tinfo->ref = 1; 6021 tinfo->size = size; 6022 6023 /* Clear each buffer in case of error */ 6024 for_each_possible_cpu(cpu) { 6025 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL; 6026 } 6027 6028 for_each_possible_cpu(cpu) { 6029 buf = kmalloc_node(size, GFP_KERNEL, 6030 cpu_to_node(cpu)); 6031 if (!buf) 6032 return -ENOMEM; 6033 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf; 6034 } 6035 6036 return 0; 6037 } 6038 6039 /* For internal use. Free and reinitialize */ 6040 static void user_buffer_free(struct trace_user_buf_info **tinfo) 6041 { 6042 lockdep_assert_held(&trace_user_buffer_mutex); 6043 6044 trace_user_fault_destroy(*tinfo); 6045 kfree(*tinfo); 6046 *tinfo = NULL; 6047 } 6048 6049 /* For internal use. Initialize and allocate */ 6050 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size) 6051 { 6052 bool alloc = false; 6053 int ret; 6054 6055 lockdep_assert_held(&trace_user_buffer_mutex); 6056 6057 if (!*tinfo) { 6058 alloc = true; 6059 *tinfo = kzalloc_obj(**tinfo); 6060 if (!*tinfo) 6061 return -ENOMEM; 6062 } 6063 6064 ret = user_fault_buffer_enable(*tinfo, size); 6065 if (ret < 0 && alloc) 6066 user_buffer_free(tinfo); 6067 6068 return ret; 6069 } 6070 6071 /* For internal use, derefrence and free if necessary */ 6072 static void user_buffer_put(struct trace_user_buf_info **tinfo) 6073 { 6074 guard(mutex)(&trace_user_buffer_mutex); 6075 6076 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref)) 6077 return; 6078 6079 if (--(*tinfo)->ref) 6080 return; 6081 6082 user_buffer_free(tinfo); 6083 } 6084 6085 /** 6086 * trace_user_fault_init - Allocated or reference a per CPU buffer 6087 * @tinfo: A pointer to the trace buffer descriptor 6088 * @size: The size to allocate each per CPU buffer 6089 * 6090 * Create a per CPU buffer that can be used to copy from user space 6091 * in a task context. When calling trace_user_fault_read(), preemption 6092 * must be disabled, and it will enable preemption and copy user 6093 * space data to the buffer. If any schedule switches occur, it will 6094 * retry until it succeeds without a schedule switch knowing the buffer 6095 * is still valid. 6096 * 6097 * Returns 0 on success, negative on failure. 6098 */ 6099 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size) 6100 { 6101 int ret; 6102 6103 if (!tinfo) 6104 return -EINVAL; 6105 6106 guard(mutex)(&trace_user_buffer_mutex); 6107 6108 ret = user_buffer_init(&tinfo, size); 6109 if (ret < 0) 6110 trace_user_fault_destroy(tinfo); 6111 6112 return ret; 6113 } 6114 6115 /** 6116 * trace_user_fault_get - up the ref count for the user buffer 6117 * @tinfo: A pointer to a pointer to the trace buffer descriptor 6118 * 6119 * Ups the ref count of the trace buffer. 6120 * 6121 * Returns the new ref count. 6122 */ 6123 int trace_user_fault_get(struct trace_user_buf_info *tinfo) 6124 { 6125 if (!tinfo) 6126 return -1; 6127 6128 guard(mutex)(&trace_user_buffer_mutex); 6129 6130 tinfo->ref++; 6131 return tinfo->ref; 6132 } 6133 6134 /** 6135 * trace_user_fault_put - dereference a per cpu trace buffer 6136 * @tinfo: The @tinfo that was passed to trace_user_fault_get() 6137 * 6138 * Decrement the ref count of @tinfo. 6139 * 6140 * Returns the new refcount (negative on error). 6141 */ 6142 int trace_user_fault_put(struct trace_user_buf_info *tinfo) 6143 { 6144 guard(mutex)(&trace_user_buffer_mutex); 6145 6146 if (WARN_ON_ONCE(!tinfo || !tinfo->ref)) 6147 return -1; 6148 6149 --tinfo->ref; 6150 return tinfo->ref; 6151 } 6152 6153 /** 6154 * trace_user_fault_read - Read user space into a per CPU buffer 6155 * @tinfo: The @tinfo allocated by trace_user_fault_get() 6156 * @ptr: The user space pointer to read 6157 * @size: The size of user space to read. 6158 * @copy_func: Optional function to use to copy from user space 6159 * @data: Data to pass to copy_func if it was supplied 6160 * 6161 * Preemption must be disabled when this is called, and must not 6162 * be enabled while using the returned buffer. 6163 * This does the copying from user space into a per CPU buffer. 6164 * 6165 * The @size must not be greater than the size passed in to 6166 * trace_user_fault_init(). 6167 * 6168 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(), 6169 * otherwise it will call @copy_func. It will call @copy_func with: 6170 * 6171 * buffer: the per CPU buffer of the @tinfo. 6172 * ptr: The pointer @ptr to user space to read 6173 * size: The @size of the ptr to read 6174 * data: The @data parameter 6175 * 6176 * It is expected that @copy_func will return 0 on success and non zero 6177 * if there was a fault. 6178 * 6179 * Returns a pointer to the buffer with the content read from @ptr. 6180 * Preemption must remain disabled while the caller accesses the 6181 * buffer returned by this function. 6182 * Returns NULL if there was a fault, or the size passed in is 6183 * greater than the size passed to trace_user_fault_init(). 6184 */ 6185 char *trace_user_fault_read(struct trace_user_buf_info *tinfo, 6186 const char __user *ptr, size_t size, 6187 trace_user_buf_copy copy_func, void *data) 6188 { 6189 int cpu = smp_processor_id(); 6190 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6191 unsigned int cnt; 6192 int trys = 0; 6193 int ret; 6194 6195 lockdep_assert_preemption_disabled(); 6196 6197 /* 6198 * It's up to the caller to not try to copy more than it said 6199 * it would. 6200 */ 6201 if (size > tinfo->size) 6202 return NULL; 6203 6204 /* 6205 * This acts similar to a seqcount. The per CPU context switches are 6206 * recorded, migration is disabled and preemption is enabled. The 6207 * read of the user space memory is copied into the per CPU buffer. 6208 * Preemption is disabled again, and if the per CPU context switches count 6209 * is still the same, it means the buffer has not been corrupted. 6210 * If the count is different, it is assumed the buffer is corrupted 6211 * and reading must be tried again. 6212 */ 6213 6214 do { 6215 /* 6216 * It is possible that something is trying to migrate this 6217 * task. What happens then, is when preemption is enabled, 6218 * the migration thread will preempt this task, try to 6219 * migrate it, fail, then let it run again. That will 6220 * cause this to loop again and never succeed. 6221 * On failures, enabled and disable preemption with 6222 * migration enabled, to allow the migration thread to 6223 * migrate this task. 6224 */ 6225 if (trys) { 6226 preempt_enable_notrace(); 6227 preempt_disable_notrace(); 6228 cpu = smp_processor_id(); 6229 buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6230 } 6231 6232 /* 6233 * If for some reason, copy_from_user() always causes a context 6234 * switch, this would then cause an infinite loop. 6235 * If this task is preempted by another user space task, it 6236 * will cause this task to try again. But just in case something 6237 * changes where the copying from user space causes another task 6238 * to run, prevent this from going into an infinite loop. 6239 * 100 tries should be plenty. 6240 */ 6241 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space")) 6242 return NULL; 6243 6244 /* Read the current CPU context switch counter */ 6245 cnt = nr_context_switches_cpu(cpu); 6246 6247 /* 6248 * Preemption is going to be enabled, but this task must 6249 * remain on this CPU. 6250 */ 6251 migrate_disable(); 6252 6253 /* 6254 * Now preemption is being enabled and another task can come in 6255 * and use the same buffer and corrupt our data. 6256 */ 6257 preempt_enable_notrace(); 6258 6259 /* Make sure preemption is enabled here */ 6260 lockdep_assert_preemption_enabled(); 6261 6262 if (copy_func) { 6263 ret = copy_func(buffer, ptr, size, data); 6264 } else { 6265 ret = __copy_from_user(buffer, ptr, size); 6266 } 6267 6268 preempt_disable_notrace(); 6269 migrate_enable(); 6270 6271 /* if it faulted, no need to test if the buffer was corrupted */ 6272 if (ret) 6273 return NULL; 6274 6275 /* 6276 * Preemption is disabled again, now check the per CPU context 6277 * switch counter. If it doesn't match, then another user space 6278 * process may have schedule in and corrupted our buffer. In that 6279 * case the copying must be retried. 6280 */ 6281 } while (nr_context_switches_cpu(cpu) != cnt); 6282 6283 return buffer; 6284 } 6285 6286 static ssize_t 6287 tracing_mark_write(struct file *filp, const char __user *ubuf, 6288 size_t cnt, loff_t *fpos) 6289 { 6290 struct trace_array *tr = filp->private_data; 6291 ssize_t written = -ENODEV; 6292 unsigned long ip; 6293 char *buf; 6294 6295 if (unlikely(tracing_disabled)) 6296 return -EINVAL; 6297 6298 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 6299 return -EINVAL; 6300 6301 if ((ssize_t)cnt < 0) 6302 return -EINVAL; 6303 6304 if (cnt > TRACE_MARKER_MAX_SIZE) 6305 cnt = TRACE_MARKER_MAX_SIZE; 6306 6307 /* Must have preemption disabled while having access to the buffer */ 6308 guard(preempt_notrace)(); 6309 6310 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 6311 if (!buf) 6312 return -EFAULT; 6313 6314 /* The selftests expect this function to be the IP address */ 6315 ip = _THIS_IP_; 6316 6317 /* The global trace_marker can go to multiple instances */ 6318 if (tr == &global_trace) { 6319 guard(rcu)(); 6320 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 6321 written = write_marker_to_buffer(tr, buf, cnt, ip); 6322 if (written < 0) 6323 break; 6324 } 6325 } else { 6326 written = write_marker_to_buffer(tr, buf, cnt, ip); 6327 } 6328 6329 return written; 6330 } 6331 6332 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, 6333 const char *buf, size_t cnt) 6334 { 6335 struct ring_buffer_event *event; 6336 struct trace_buffer *buffer; 6337 struct raw_data_entry *entry; 6338 ssize_t written; 6339 size_t size; 6340 6341 /* cnt includes both the entry->id and the data behind it. */ 6342 size = struct_offset(entry, id) + cnt; 6343 6344 buffer = tr->array_buffer.buffer; 6345 6346 if (size > ring_buffer_max_event_size(buffer)) 6347 return -EINVAL; 6348 6349 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, 6350 tracing_gen_ctx()); 6351 if (!event) 6352 /* Ring buffer disabled, return as if not open for write */ 6353 return -EBADF; 6354 6355 entry = ring_buffer_event_data(event); 6356 unsafe_memcpy(&entry->id, buf, cnt, 6357 "id and content already reserved on ring buffer" 6358 "'buf' includes the 'id' and the data." 6359 "'entry' was allocated with cnt from 'id'."); 6360 written = cnt; 6361 6362 __buffer_unlock_commit(buffer, event); 6363 6364 return written; 6365 } 6366 6367 static ssize_t 6368 tracing_mark_raw_write(struct file *filp, const char __user *ubuf, 6369 size_t cnt, loff_t *fpos) 6370 { 6371 struct trace_array *tr = filp->private_data; 6372 ssize_t written = -ENODEV; 6373 char *buf; 6374 6375 if (unlikely(tracing_disabled)) 6376 return -EINVAL; 6377 6378 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 6379 return -EINVAL; 6380 6381 /* The marker must at least have a tag id */ 6382 if (cnt < sizeof(unsigned int)) 6383 return -EINVAL; 6384 6385 /* raw write is all or nothing */ 6386 if (cnt > TRACE_MARKER_MAX_SIZE) 6387 return -EINVAL; 6388 6389 /* Must have preemption disabled while having access to the buffer */ 6390 guard(preempt_notrace)(); 6391 6392 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 6393 if (!buf) 6394 return -EFAULT; 6395 6396 /* The global trace_marker_raw can go to multiple instances */ 6397 if (tr == &global_trace) { 6398 guard(rcu)(); 6399 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 6400 written = write_raw_marker_to_buffer(tr, buf, cnt); 6401 if (written < 0) 6402 break; 6403 } 6404 } else { 6405 written = write_raw_marker_to_buffer(tr, buf, cnt); 6406 } 6407 6408 return written; 6409 } 6410 6411 static int tracing_mark_open(struct inode *inode, struct file *filp) 6412 { 6413 int ret; 6414 6415 scoped_guard(mutex, &trace_user_buffer_mutex) { 6416 if (!trace_user_buffer) { 6417 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE); 6418 if (ret < 0) 6419 return ret; 6420 } else { 6421 trace_user_buffer->ref++; 6422 } 6423 } 6424 6425 stream_open(inode, filp); 6426 ret = tracing_open_generic_tr(inode, filp); 6427 if (ret < 0) 6428 user_buffer_put(&trace_user_buffer); 6429 return ret; 6430 } 6431 6432 static int tracing_mark_release(struct inode *inode, struct file *file) 6433 { 6434 user_buffer_put(&trace_user_buffer); 6435 return tracing_release_generic_tr(inode, file); 6436 } 6437 6438 static int tracing_clock_show(struct seq_file *m, void *v) 6439 { 6440 struct trace_array *tr = m->private; 6441 int i; 6442 6443 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 6444 seq_printf(m, 6445 "%s%s%s%s", i ? " " : "", 6446 i == tr->clock_id ? "[" : "", trace_clocks[i].name, 6447 i == tr->clock_id ? "]" : ""); 6448 seq_putc(m, '\n'); 6449 6450 return 0; 6451 } 6452 6453 int tracing_set_clock(struct trace_array *tr, const char *clockstr) 6454 { 6455 int i; 6456 6457 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { 6458 if (strcmp(trace_clocks[i].name, clockstr) == 0) 6459 break; 6460 } 6461 if (i == ARRAY_SIZE(trace_clocks)) 6462 return -EINVAL; 6463 6464 guard(mutex)(&trace_types_lock); 6465 6466 tr->clock_id = i; 6467 6468 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); 6469 6470 /* 6471 * New clock may not be consistent with the previous clock. 6472 * Reset the buffer so that it doesn't have incomparable timestamps. 6473 */ 6474 tracing_reset_online_cpus(&tr->array_buffer); 6475 6476 #ifdef CONFIG_TRACER_SNAPSHOT 6477 if (tr->snapshot_buffer.buffer) 6478 ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func); 6479 tracing_reset_online_cpus(&tr->snapshot_buffer); 6480 #endif 6481 update_last_data_if_empty(tr); 6482 6483 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { 6484 struct trace_scratch *tscratch = tr->scratch; 6485 6486 tscratch->clock_id = i; 6487 } 6488 6489 return 0; 6490 } 6491 6492 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 6493 size_t cnt, loff_t *fpos) 6494 { 6495 struct seq_file *m = filp->private_data; 6496 struct trace_array *tr = m->private; 6497 char buf[64]; 6498 const char *clockstr; 6499 int ret; 6500 6501 if (cnt >= sizeof(buf)) 6502 return -EINVAL; 6503 6504 if (copy_from_user(buf, ubuf, cnt)) 6505 return -EFAULT; 6506 6507 buf[cnt] = 0; 6508 6509 clockstr = strstrip(buf); 6510 6511 ret = tracing_set_clock(tr, clockstr); 6512 if (ret) 6513 return ret; 6514 6515 *fpos += cnt; 6516 6517 return cnt; 6518 } 6519 6520 static int tracing_clock_open(struct inode *inode, struct file *file) 6521 { 6522 struct trace_array *tr = inode->i_private; 6523 int ret; 6524 6525 ret = tracing_check_open_get_tr(tr); 6526 if (ret) 6527 return ret; 6528 6529 if ((file->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) { 6530 trace_array_put(tr); 6531 return -EACCES; 6532 } 6533 6534 ret = single_open(file, tracing_clock_show, inode->i_private); 6535 if (ret < 0) 6536 trace_array_put(tr); 6537 6538 return ret; 6539 } 6540 6541 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) 6542 { 6543 struct trace_array *tr = m->private; 6544 6545 guard(mutex)(&trace_types_lock); 6546 6547 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) 6548 seq_puts(m, "delta [absolute]\n"); 6549 else 6550 seq_puts(m, "[delta] absolute\n"); 6551 6552 return 0; 6553 } 6554 6555 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) 6556 { 6557 struct trace_array *tr = inode->i_private; 6558 int ret; 6559 6560 ret = tracing_check_open_get_tr(tr); 6561 if (ret) 6562 return ret; 6563 6564 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); 6565 if (ret < 0) 6566 trace_array_put(tr); 6567 6568 return ret; 6569 } 6570 6571 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) 6572 { 6573 if (rbe == this_cpu_read(trace_buffered_event)) 6574 return ring_buffer_time_stamp(buffer); 6575 6576 return ring_buffer_event_time_stamp(buffer, rbe); 6577 } 6578 6579 static const struct file_operations tracing_thresh_fops = { 6580 .open = tracing_open_generic, 6581 .read = tracing_thresh_read, 6582 .write = tracing_thresh_write, 6583 .llseek = generic_file_llseek, 6584 }; 6585 6586 static const struct file_operations set_tracer_fops = { 6587 .open = tracing_open_generic_tr, 6588 .read = tracing_set_trace_read, 6589 .write = tracing_set_trace_write, 6590 .llseek = generic_file_llseek, 6591 .release = tracing_release_generic_tr, 6592 }; 6593 6594 static const struct file_operations tracing_pipe_fops = { 6595 .open = tracing_open_pipe, 6596 .poll = tracing_poll_pipe, 6597 .read = tracing_read_pipe, 6598 .splice_read = tracing_splice_read_pipe, 6599 .release = tracing_release_pipe, 6600 }; 6601 6602 static const struct file_operations tracing_entries_fops = { 6603 .open = tracing_open_generic_tr, 6604 .read = tracing_entries_read, 6605 .write = tracing_entries_write, 6606 .llseek = generic_file_llseek, 6607 .release = tracing_release_generic_tr, 6608 }; 6609 6610 static const struct file_operations tracing_syscall_buf_fops = { 6611 .open = tracing_open_generic_tr, 6612 .read = tracing_syscall_buf_read, 6613 .write = tracing_syscall_buf_write, 6614 .llseek = generic_file_llseek, 6615 .release = tracing_release_generic_tr, 6616 }; 6617 6618 static const struct file_operations tracing_buffer_meta_fops = { 6619 .open = tracing_buffer_meta_open, 6620 .read = seq_read, 6621 .llseek = seq_lseek, 6622 .release = tracing_seq_release, 6623 }; 6624 6625 static const struct file_operations tracing_total_entries_fops = { 6626 .open = tracing_open_generic_tr, 6627 .read = tracing_total_entries_read, 6628 .llseek = generic_file_llseek, 6629 .release = tracing_release_generic_tr, 6630 }; 6631 6632 static const struct file_operations tracing_free_buffer_fops = { 6633 .open = tracing_open_generic_tr, 6634 .write = tracing_free_buffer_write, 6635 .release = tracing_free_buffer_release, 6636 }; 6637 6638 static const struct file_operations tracing_mark_fops = { 6639 .open = tracing_mark_open, 6640 .write = tracing_mark_write, 6641 .release = tracing_mark_release, 6642 }; 6643 6644 static const struct file_operations tracing_mark_raw_fops = { 6645 .open = tracing_mark_open, 6646 .write = tracing_mark_raw_write, 6647 .release = tracing_mark_release, 6648 }; 6649 6650 static const struct file_operations trace_clock_fops = { 6651 .open = tracing_clock_open, 6652 .read = seq_read, 6653 .llseek = seq_lseek, 6654 .release = tracing_single_release_tr, 6655 .write = tracing_clock_write, 6656 }; 6657 6658 static const struct file_operations trace_time_stamp_mode_fops = { 6659 .open = tracing_time_stamp_mode_open, 6660 .read = seq_read, 6661 .llseek = seq_lseek, 6662 .release = tracing_single_release_tr, 6663 }; 6664 6665 static const struct file_operations last_boot_fops = { 6666 .open = tracing_last_boot_open, 6667 .read = seq_read, 6668 .llseek = seq_lseek, 6669 .release = tracing_seq_release, 6670 }; 6671 6672 /* 6673 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct 6674 * @filp: The active open file structure 6675 * @ubuf: The userspace provided buffer to read value into 6676 * @cnt: The maximum number of bytes to read 6677 * @ppos: The current "file" position 6678 * 6679 * This function implements the write interface for a struct trace_min_max_param. 6680 * The filp->private_data must point to a trace_min_max_param structure that 6681 * defines where to write the value, the min and the max acceptable values, 6682 * and a lock to protect the write. 6683 */ 6684 static ssize_t 6685 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 6686 { 6687 struct trace_min_max_param *param = filp->private_data; 6688 u64 val; 6689 int err; 6690 6691 if (!param) 6692 return -EFAULT; 6693 6694 err = kstrtoull_from_user(ubuf, cnt, 10, &val); 6695 if (err) 6696 return err; 6697 6698 if (param->lock) 6699 mutex_lock(param->lock); 6700 6701 if (param->min && val < *param->min) 6702 err = -EINVAL; 6703 6704 if (param->max && val > *param->max) 6705 err = -EINVAL; 6706 6707 if (!err) 6708 *param->val = val; 6709 6710 if (param->lock) 6711 mutex_unlock(param->lock); 6712 6713 if (err) 6714 return err; 6715 6716 return cnt; 6717 } 6718 6719 /* 6720 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct 6721 * @filp: The active open file structure 6722 * @ubuf: The userspace provided buffer to read value into 6723 * @cnt: The maximum number of bytes to read 6724 * @ppos: The current "file" position 6725 * 6726 * This function implements the read interface for a struct trace_min_max_param. 6727 * The filp->private_data must point to a trace_min_max_param struct with valid 6728 * data. 6729 */ 6730 static ssize_t 6731 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 6732 { 6733 struct trace_min_max_param *param = filp->private_data; 6734 char buf[U64_STR_SIZE]; 6735 int len; 6736 u64 val; 6737 6738 if (!param) 6739 return -EFAULT; 6740 6741 val = *param->val; 6742 6743 if (cnt > sizeof(buf)) 6744 cnt = sizeof(buf); 6745 6746 len = snprintf(buf, sizeof(buf), "%llu\n", val); 6747 6748 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 6749 } 6750 6751 const struct file_operations trace_min_max_fops = { 6752 .open = tracing_open_generic, 6753 .read = trace_min_max_read, 6754 .write = trace_min_max_write, 6755 }; 6756 6757 #define TRACING_LOG_ERRS_MAX 8 6758 #define TRACING_LOG_LOC_MAX 128 6759 6760 #define CMD_PREFIX " Command: " 6761 6762 struct err_info { 6763 const char **errs; /* ptr to loc-specific array of err strings */ 6764 u8 type; /* index into errs -> specific err string */ 6765 u16 pos; /* caret position */ 6766 u64 ts; 6767 }; 6768 6769 struct tracing_log_err { 6770 struct list_head list; 6771 struct err_info info; 6772 char loc[TRACING_LOG_LOC_MAX]; /* err location */ 6773 char *cmd; /* what caused err */ 6774 }; 6775 6776 static DEFINE_MUTEX(tracing_err_log_lock); 6777 6778 static struct tracing_log_err *alloc_tracing_log_err(int len) 6779 { 6780 struct tracing_log_err *err; 6781 6782 err = kzalloc_obj(*err); 6783 if (!err) 6784 return ERR_PTR(-ENOMEM); 6785 6786 err->cmd = kzalloc(len, GFP_KERNEL); 6787 if (!err->cmd) { 6788 kfree(err); 6789 return ERR_PTR(-ENOMEM); 6790 } 6791 6792 return err; 6793 } 6794 6795 static void free_tracing_log_err(struct tracing_log_err *err) 6796 { 6797 kfree(err->cmd); 6798 kfree(err); 6799 } 6800 6801 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, 6802 int len) 6803 { 6804 struct tracing_log_err *err; 6805 char *cmd; 6806 6807 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { 6808 err = alloc_tracing_log_err(len); 6809 if (PTR_ERR(err) != -ENOMEM) 6810 tr->n_err_log_entries++; 6811 6812 return err; 6813 } 6814 cmd = kzalloc(len, GFP_KERNEL); 6815 if (!cmd) 6816 return ERR_PTR(-ENOMEM); 6817 err = list_first_entry(&tr->err_log, struct tracing_log_err, list); 6818 kfree(err->cmd); 6819 err->cmd = cmd; 6820 list_del(&err->list); 6821 6822 return err; 6823 } 6824 6825 /** 6826 * err_pos - find the position of a string within a command for error careting 6827 * @cmd: The tracing command that caused the error 6828 * @str: The string to position the caret at within @cmd 6829 * 6830 * Finds the position of the first occurrence of @str within @cmd. The 6831 * return value can be passed to tracing_log_err() for caret placement 6832 * within @cmd. 6833 * 6834 * Returns the index within @cmd of the first occurrence of @str or 0 6835 * if @str was not found. 6836 */ 6837 unsigned int err_pos(char *cmd, const char *str) 6838 { 6839 char *found; 6840 6841 if (WARN_ON(!strlen(cmd))) 6842 return 0; 6843 6844 found = strstr(cmd, str); 6845 if (found) 6846 return found - cmd; 6847 6848 return 0; 6849 } 6850 6851 /** 6852 * tracing_log_err - write an error to the tracing error log 6853 * @tr: The associated trace array for the error (NULL for top level array) 6854 * @loc: A string describing where the error occurred 6855 * @cmd: The tracing command that caused the error 6856 * @errs: The array of loc-specific static error strings 6857 * @type: The index into errs[], which produces the specific static err string 6858 * @pos: The position the caret should be placed in the cmd 6859 * 6860 * Writes an error into tracing/error_log of the form: 6861 * 6862 * <loc>: error: <text> 6863 * Command: <cmd> 6864 * ^ 6865 * 6866 * tracing/error_log is a small log file containing the last 6867 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated 6868 * unless there has been a tracing error, and the error log can be 6869 * cleared and have its memory freed by writing the empty string in 6870 * truncation mode to it i.e. echo > tracing/error_log. 6871 * 6872 * NOTE: the @errs array along with the @type param are used to 6873 * produce a static error string - this string is not copied and saved 6874 * when the error is logged - only a pointer to it is saved. See 6875 * existing callers for examples of how static strings are typically 6876 * defined for use with tracing_log_err(). 6877 */ 6878 void tracing_log_err(struct trace_array *tr, 6879 const char *loc, const char *cmd, 6880 const char **errs, u8 type, u16 pos) 6881 { 6882 struct tracing_log_err *err; 6883 int len = 0; 6884 6885 if (!tr) 6886 tr = &global_trace; 6887 6888 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; 6889 6890 guard(mutex)(&tracing_err_log_lock); 6891 6892 err = get_tracing_log_err(tr, len); 6893 if (PTR_ERR(err) == -ENOMEM) 6894 return; 6895 6896 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); 6897 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); 6898 6899 err->info.errs = errs; 6900 err->info.type = type; 6901 err->info.pos = pos; 6902 err->info.ts = local_clock(); 6903 6904 list_add_tail(&err->list, &tr->err_log); 6905 } 6906 6907 static void clear_tracing_err_log(struct trace_array *tr) 6908 { 6909 struct tracing_log_err *err, *next; 6910 6911 guard(mutex)(&tracing_err_log_lock); 6912 6913 list_for_each_entry_safe(err, next, &tr->err_log, list) { 6914 list_del(&err->list); 6915 free_tracing_log_err(err); 6916 } 6917 6918 tr->n_err_log_entries = 0; 6919 } 6920 6921 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) 6922 { 6923 struct trace_array *tr = m->private; 6924 6925 mutex_lock(&tracing_err_log_lock); 6926 6927 return seq_list_start(&tr->err_log, *pos); 6928 } 6929 6930 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) 6931 { 6932 struct trace_array *tr = m->private; 6933 6934 return seq_list_next(v, &tr->err_log, pos); 6935 } 6936 6937 static void tracing_err_log_seq_stop(struct seq_file *m, void *v) 6938 { 6939 mutex_unlock(&tracing_err_log_lock); 6940 } 6941 6942 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) 6943 { 6944 u16 i; 6945 6946 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) 6947 seq_putc(m, ' '); 6948 for (i = 0; i < pos; i++) 6949 seq_putc(m, ' '); 6950 seq_puts(m, "^\n"); 6951 } 6952 6953 static int tracing_err_log_seq_show(struct seq_file *m, void *v) 6954 { 6955 struct tracing_log_err *err = v; 6956 6957 if (err) { 6958 const char *err_text = err->info.errs[err->info.type]; 6959 u64 sec = err->info.ts; 6960 u32 nsec; 6961 6962 nsec = do_div(sec, NSEC_PER_SEC); 6963 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, 6964 err->loc, err_text); 6965 seq_printf(m, "%s", err->cmd); 6966 tracing_err_log_show_pos(m, err->info.pos); 6967 } 6968 6969 return 0; 6970 } 6971 6972 static const struct seq_operations tracing_err_log_seq_ops = { 6973 .start = tracing_err_log_seq_start, 6974 .next = tracing_err_log_seq_next, 6975 .stop = tracing_err_log_seq_stop, 6976 .show = tracing_err_log_seq_show 6977 }; 6978 6979 static int tracing_err_log_open(struct inode *inode, struct file *file) 6980 { 6981 struct trace_array *tr = inode->i_private; 6982 int ret = 0; 6983 6984 ret = tracing_check_open_get_tr(tr); 6985 if (ret) 6986 return ret; 6987 6988 /* If this file was opened for write, then erase contents */ 6989 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 6990 clear_tracing_err_log(tr); 6991 6992 if (file->f_mode & FMODE_READ) { 6993 ret = seq_open(file, &tracing_err_log_seq_ops); 6994 if (!ret) { 6995 struct seq_file *m = file->private_data; 6996 m->private = tr; 6997 } else { 6998 trace_array_put(tr); 6999 } 7000 } 7001 return ret; 7002 } 7003 7004 static ssize_t tracing_err_log_write(struct file *file, 7005 const char __user *buffer, 7006 size_t count, loff_t *ppos) 7007 { 7008 return count; 7009 } 7010 7011 static int tracing_err_log_release(struct inode *inode, struct file *file) 7012 { 7013 struct trace_array *tr = inode->i_private; 7014 7015 trace_array_put(tr); 7016 7017 if (file->f_mode & FMODE_READ) 7018 seq_release(inode, file); 7019 7020 return 0; 7021 } 7022 7023 static const struct file_operations tracing_err_log_fops = { 7024 .open = tracing_err_log_open, 7025 .write = tracing_err_log_write, 7026 .read = seq_read, 7027 .llseek = tracing_lseek, 7028 .release = tracing_err_log_release, 7029 }; 7030 7031 int tracing_buffers_open(struct inode *inode, struct file *filp) 7032 { 7033 struct trace_array *tr = inode->i_private; 7034 struct ftrace_buffer_info *info; 7035 int ret; 7036 7037 ret = tracing_check_open_get_tr(tr); 7038 if (ret) 7039 return ret; 7040 7041 info = kvzalloc_obj(*info); 7042 if (!info) { 7043 trace_array_put(tr); 7044 return -ENOMEM; 7045 } 7046 7047 mutex_lock(&trace_types_lock); 7048 7049 info->iter.tr = tr; 7050 info->iter.cpu_file = tracing_get_cpu(inode); 7051 info->iter.trace = tr->current_trace; 7052 info->iter.array_buffer = &tr->array_buffer; 7053 info->spare = NULL; 7054 /* Force reading ring buffer for first read */ 7055 info->read = (unsigned int)-1; 7056 7057 filp->private_data = info; 7058 7059 tr->trace_ref++; 7060 7061 mutex_unlock(&trace_types_lock); 7062 7063 ret = nonseekable_open(inode, filp); 7064 if (ret < 0) 7065 trace_array_put(tr); 7066 7067 return ret; 7068 } 7069 7070 static __poll_t 7071 tracing_buffers_poll(struct file *filp, poll_table *poll_table) 7072 { 7073 struct ftrace_buffer_info *info = filp->private_data; 7074 struct trace_iterator *iter = &info->iter; 7075 7076 return trace_poll(iter, filp, poll_table); 7077 } 7078 7079 ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, 7080 size_t count, loff_t *ppos) 7081 { 7082 struct ftrace_buffer_info *info = filp->private_data; 7083 struct trace_iterator *iter = &info->iter; 7084 void *trace_data; 7085 int page_size; 7086 ssize_t ret = 0; 7087 ssize_t size; 7088 7089 if (!count) 7090 return 0; 7091 7092 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) 7093 return -EBUSY; 7094 7095 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 7096 7097 /* Make sure the spare matches the current sub buffer size */ 7098 if (info->spare) { 7099 if (page_size != info->spare_size) { 7100 ring_buffer_free_read_page(iter->array_buffer->buffer, 7101 info->spare_cpu, info->spare); 7102 info->spare = NULL; 7103 } 7104 } 7105 7106 if (!info->spare) { 7107 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, 7108 iter->cpu_file); 7109 if (IS_ERR(info->spare)) { 7110 ret = PTR_ERR(info->spare); 7111 info->spare = NULL; 7112 } else { 7113 info->spare_cpu = iter->cpu_file; 7114 info->spare_size = page_size; 7115 } 7116 } 7117 if (!info->spare) 7118 return ret; 7119 7120 /* Do we have previous read data to read? */ 7121 if (info->read < page_size) 7122 goto read; 7123 7124 again: 7125 trace_access_lock(iter->cpu_file); 7126 ret = ring_buffer_read_page(iter->array_buffer->buffer, 7127 info->spare, 7128 count, 7129 iter->cpu_file, 0); 7130 trace_access_unlock(iter->cpu_file); 7131 7132 if (ret < 0) { 7133 if (trace_empty(iter) && !iter->closed) { 7134 if (update_last_data_if_empty(iter->tr)) 7135 return 0; 7136 7137 if ((filp->f_flags & O_NONBLOCK)) 7138 return -EAGAIN; 7139 7140 ret = wait_on_pipe(iter, 0); 7141 if (ret) 7142 return ret; 7143 7144 goto again; 7145 } 7146 return 0; 7147 } 7148 7149 info->read = 0; 7150 read: 7151 size = page_size - info->read; 7152 if (size > count) 7153 size = count; 7154 trace_data = ring_buffer_read_page_data(info->spare); 7155 ret = copy_to_user(ubuf, trace_data + info->read, size); 7156 if (ret == size) 7157 return -EFAULT; 7158 7159 size -= ret; 7160 7161 *ppos += size; 7162 info->read += size; 7163 7164 return size; 7165 } 7166 7167 static int tracing_buffers_flush(struct file *file, fl_owner_t id) 7168 { 7169 struct ftrace_buffer_info *info = file->private_data; 7170 struct trace_iterator *iter = &info->iter; 7171 7172 iter->closed = true; 7173 /* Make sure the waiters see the new wait_index */ 7174 (void)atomic_fetch_inc_release(&iter->wait_index); 7175 7176 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 7177 7178 return 0; 7179 } 7180 7181 int tracing_buffers_release(struct inode *inode, struct file *file) 7182 { 7183 struct ftrace_buffer_info *info = file->private_data; 7184 struct trace_iterator *iter = &info->iter; 7185 7186 guard(mutex)(&trace_types_lock); 7187 7188 iter->tr->trace_ref--; 7189 7190 __trace_array_put(iter->tr); 7191 7192 if (info->spare) 7193 ring_buffer_free_read_page(iter->array_buffer->buffer, 7194 info->spare_cpu, info->spare); 7195 kvfree(info); 7196 7197 return 0; 7198 } 7199 7200 struct buffer_ref { 7201 struct trace_buffer *buffer; 7202 void *page; 7203 int cpu; 7204 refcount_t refcount; 7205 }; 7206 7207 static void buffer_ref_release(struct buffer_ref *ref) 7208 { 7209 if (!refcount_dec_and_test(&ref->refcount)) 7210 return; 7211 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); 7212 kfree(ref); 7213 } 7214 7215 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, 7216 struct pipe_buffer *buf) 7217 { 7218 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7219 7220 buffer_ref_release(ref); 7221 buf->private = 0; 7222 } 7223 7224 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, 7225 struct pipe_buffer *buf) 7226 { 7227 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7228 7229 if (refcount_read(&ref->refcount) > INT_MAX/2) 7230 return false; 7231 7232 refcount_inc(&ref->refcount); 7233 return true; 7234 } 7235 7236 /* Pipe buffer operations for a buffer. */ 7237 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 7238 .release = buffer_pipe_buf_release, 7239 .get = buffer_pipe_buf_get, 7240 }; 7241 7242 /* 7243 * Callback from splice_to_pipe(), if we need to release some pages 7244 * at the end of the spd in case we error'ed out in filling the pipe. 7245 */ 7246 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) 7247 { 7248 struct buffer_ref *ref = 7249 (struct buffer_ref *)spd->partial[i].private; 7250 7251 buffer_ref_release(ref); 7252 spd->partial[i].private = 0; 7253 } 7254 7255 ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, 7256 struct pipe_inode_info *pipe, size_t len, 7257 unsigned int flags) 7258 { 7259 struct ftrace_buffer_info *info = file->private_data; 7260 struct trace_iterator *iter = &info->iter; 7261 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 7262 struct page *pages_def[PIPE_DEF_BUFFERS]; 7263 struct splice_pipe_desc spd = { 7264 .pages = pages_def, 7265 .partial = partial_def, 7266 .nr_pages_max = PIPE_DEF_BUFFERS, 7267 .ops = &buffer_pipe_buf_ops, 7268 .spd_release = buffer_spd_release, 7269 }; 7270 struct buffer_ref *ref; 7271 bool woken = false; 7272 int page_size; 7273 int entries, i; 7274 ssize_t ret = 0; 7275 7276 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) 7277 return -EBUSY; 7278 7279 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 7280 if (*ppos & (page_size - 1)) 7281 return -EINVAL; 7282 7283 if (len & (page_size - 1)) { 7284 if (len < page_size) 7285 return -EINVAL; 7286 len &= (~(page_size - 1)); 7287 } 7288 7289 if (splice_grow_spd(pipe, &spd)) 7290 return -ENOMEM; 7291 7292 again: 7293 trace_access_lock(iter->cpu_file); 7294 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 7295 7296 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { 7297 struct page *page; 7298 int r; 7299 7300 ref = kzalloc_obj(*ref); 7301 if (!ref) { 7302 ret = -ENOMEM; 7303 break; 7304 } 7305 7306 refcount_set(&ref->refcount, 1); 7307 ref->buffer = iter->array_buffer->buffer; 7308 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 7309 if (IS_ERR(ref->page)) { 7310 ret = PTR_ERR(ref->page); 7311 ref->page = NULL; 7312 kfree(ref); 7313 break; 7314 } 7315 ref->cpu = iter->cpu_file; 7316 7317 r = ring_buffer_read_page(ref->buffer, ref->page, 7318 len, iter->cpu_file, 1); 7319 if (r < 0) { 7320 ring_buffer_free_read_page(ref->buffer, ref->cpu, 7321 ref->page); 7322 kfree(ref); 7323 break; 7324 } 7325 7326 page = virt_to_page(ring_buffer_read_page_data(ref->page)); 7327 7328 spd.pages[i] = page; 7329 spd.partial[i].len = page_size; 7330 spd.partial[i].offset = 0; 7331 spd.partial[i].private = (unsigned long)ref; 7332 spd.nr_pages++; 7333 *ppos += page_size; 7334 7335 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 7336 } 7337 7338 trace_access_unlock(iter->cpu_file); 7339 spd.nr_pages = i; 7340 7341 /* did we read anything? */ 7342 if (!spd.nr_pages) { 7343 7344 if (ret) 7345 goto out; 7346 7347 if (woken) 7348 goto out; 7349 7350 ret = -EAGAIN; 7351 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) 7352 goto out; 7353 7354 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 7355 if (ret) 7356 goto out; 7357 7358 /* No need to wait after waking up when tracing is off */ 7359 if (!tracer_tracing_is_on(iter->tr)) 7360 goto out; 7361 7362 /* Iterate one more time to collect any new data then exit */ 7363 woken = true; 7364 7365 goto again; 7366 } 7367 7368 ret = splice_to_pipe(pipe, &spd); 7369 out: 7370 splice_shrink_spd(&spd); 7371 7372 return ret; 7373 } 7374 7375 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 7376 { 7377 struct ftrace_buffer_info *info = file->private_data; 7378 struct trace_iterator *iter = &info->iter; 7379 int err; 7380 7381 if (cmd == TRACE_MMAP_IOCTL_GET_READER) { 7382 if (!(file->f_flags & O_NONBLOCK)) { 7383 err = ring_buffer_wait(iter->array_buffer->buffer, 7384 iter->cpu_file, 7385 iter->tr->buffer_percent, 7386 NULL, NULL); 7387 if (err) 7388 return err; 7389 } 7390 7391 return ring_buffer_map_get_reader(iter->array_buffer->buffer, 7392 iter->cpu_file); 7393 } else if (cmd) { 7394 return -ENOTTY; 7395 } 7396 7397 /* 7398 * An ioctl call with cmd 0 to the ring buffer file will wake up all 7399 * waiters 7400 */ 7401 guard(mutex)(&trace_types_lock); 7402 7403 /* Make sure the waiters see the new wait_index */ 7404 (void)atomic_fetch_inc_release(&iter->wait_index); 7405 7406 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 7407 7408 return 0; 7409 } 7410 7411 /* 7412 * This is called when a VMA is duplicated (e.g., on fork()) to increment 7413 * the user_mapped counter without remapping pages. 7414 */ 7415 static void tracing_buffers_mmap_open(struct vm_area_struct *vma) 7416 { 7417 struct ftrace_buffer_info *info = vma->vm_file->private_data; 7418 struct trace_iterator *iter = &info->iter; 7419 7420 ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file); 7421 } 7422 7423 static void tracing_buffers_mmap_close(struct vm_area_struct *vma) 7424 { 7425 struct ftrace_buffer_info *info = vma->vm_file->private_data; 7426 struct trace_iterator *iter = &info->iter; 7427 7428 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); 7429 put_snapshot_map(iter->tr); 7430 } 7431 7432 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr) 7433 { 7434 /* 7435 * Trace buffer mappings require the complete buffer including 7436 * the meta page. Partial mappings are not supported. 7437 */ 7438 return -EINVAL; 7439 } 7440 7441 static const struct vm_operations_struct tracing_buffers_vmops = { 7442 .open = tracing_buffers_mmap_open, 7443 .close = tracing_buffers_mmap_close, 7444 .may_split = tracing_buffers_may_split, 7445 }; 7446 7447 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) 7448 { 7449 struct ftrace_buffer_info *info = filp->private_data; 7450 struct trace_iterator *iter = &info->iter; 7451 int ret = 0; 7452 7453 /* A memmap'ed and backup buffers are not supported for user space mmap */ 7454 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC)) 7455 return -ENODEV; 7456 7457 ret = get_snapshot_map(iter->tr); 7458 if (ret) 7459 return ret; 7460 7461 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); 7462 if (ret) 7463 put_snapshot_map(iter->tr); 7464 7465 vma->vm_ops = &tracing_buffers_vmops; 7466 7467 return ret; 7468 } 7469 7470 static const struct file_operations tracing_buffers_fops = { 7471 .open = tracing_buffers_open, 7472 .read = tracing_buffers_read, 7473 .poll = tracing_buffers_poll, 7474 .release = tracing_buffers_release, 7475 .flush = tracing_buffers_flush, 7476 .splice_read = tracing_buffers_splice_read, 7477 .unlocked_ioctl = tracing_buffers_ioctl, 7478 .mmap = tracing_buffers_mmap, 7479 }; 7480 7481 static ssize_t 7482 tracing_stats_read(struct file *filp, char __user *ubuf, 7483 size_t count, loff_t *ppos) 7484 { 7485 struct inode *inode = file_inode(filp); 7486 struct trace_array *tr = inode->i_private; 7487 struct array_buffer *trace_buf = &tr->array_buffer; 7488 int cpu = tracing_get_cpu(inode); 7489 struct trace_seq *s; 7490 unsigned long cnt; 7491 unsigned long long t; 7492 unsigned long usec_rem; 7493 7494 s = kmalloc_obj(*s); 7495 if (!s) 7496 return -ENOMEM; 7497 7498 trace_seq_init(s); 7499 7500 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); 7501 trace_seq_printf(s, "entries: %ld\n", cnt); 7502 7503 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); 7504 trace_seq_printf(s, "overrun: %ld\n", cnt); 7505 7506 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); 7507 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 7508 7509 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 7510 trace_seq_printf(s, "bytes: %ld\n", cnt); 7511 7512 if (trace_clocks[tr->clock_id].in_ns) { 7513 /* local or global for trace_clock */ 7514 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 7515 usec_rem = do_div(t, USEC_PER_SEC); 7516 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", 7517 t, usec_rem); 7518 7519 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); 7520 usec_rem = do_div(t, USEC_PER_SEC); 7521 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); 7522 } else { 7523 /* counter or tsc mode for trace_clock */ 7524 trace_seq_printf(s, "oldest event ts: %llu\n", 7525 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 7526 7527 trace_seq_printf(s, "now ts: %llu\n", 7528 ring_buffer_time_stamp(trace_buf->buffer)); 7529 } 7530 7531 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); 7532 trace_seq_printf(s, "dropped events: %ld\n", cnt); 7533 7534 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); 7535 trace_seq_printf(s, "read events: %ld\n", cnt); 7536 7537 count = simple_read_from_buffer(ubuf, count, ppos, 7538 s->buffer, trace_seq_used(s)); 7539 7540 kfree(s); 7541 7542 return count; 7543 } 7544 7545 static const struct file_operations tracing_stats_fops = { 7546 .open = tracing_open_generic_tr, 7547 .read = tracing_stats_read, 7548 .llseek = generic_file_llseek, 7549 .release = tracing_release_generic_tr, 7550 }; 7551 7552 #ifdef CONFIG_DYNAMIC_FTRACE 7553 7554 static ssize_t 7555 tracing_read_dyn_info(struct file *filp, char __user *ubuf, 7556 size_t cnt, loff_t *ppos) 7557 { 7558 ssize_t ret; 7559 char *buf; 7560 int r; 7561 7562 /* 512 should be plenty to hold the amount needed */ 7563 #define DYN_INFO_BUF_SIZE 512 7564 7565 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); 7566 if (!buf) 7567 return -ENOMEM; 7568 7569 r = scnprintf(buf, DYN_INFO_BUF_SIZE, 7570 "%ld pages:%ld groups: %ld\n" 7571 "ftrace boot update time = %llu (ns)\n" 7572 "ftrace module total update time = %llu (ns)\n", 7573 ftrace_update_tot_cnt, 7574 ftrace_number_of_pages, 7575 ftrace_number_of_groups, 7576 ftrace_update_time, 7577 ftrace_total_mod_time); 7578 7579 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 7580 kfree(buf); 7581 return ret; 7582 } 7583 7584 static const struct file_operations tracing_dyn_info_fops = { 7585 .open = tracing_open_generic, 7586 .read = tracing_read_dyn_info, 7587 .llseek = generic_file_llseek, 7588 }; 7589 #endif /* CONFIG_DYNAMIC_FTRACE */ 7590 7591 static struct dentry *tracing_get_dentry(struct trace_array *tr) 7592 { 7593 /* Top directory uses NULL as the parent */ 7594 if (tr->flags & TRACE_ARRAY_FL_GLOBAL) 7595 return NULL; 7596 7597 if (WARN_ON(!tr->dir)) 7598 return ERR_PTR(-ENODEV); 7599 7600 /* All sub buffers have a descriptor */ 7601 return tr->dir; 7602 } 7603 7604 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 7605 { 7606 struct dentry *d_tracer; 7607 7608 if (tr->percpu_dir) 7609 return tr->percpu_dir; 7610 7611 d_tracer = tracing_get_dentry(tr); 7612 if (IS_ERR(d_tracer)) 7613 return NULL; 7614 7615 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); 7616 7617 MEM_FAIL(!tr->percpu_dir, 7618 "Could not create tracefs directory 'per_cpu/%d'\n", cpu); 7619 7620 return tr->percpu_dir; 7621 } 7622 7623 struct dentry * 7624 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, 7625 void *data, long cpu, const struct file_operations *fops) 7626 { 7627 struct dentry *ret = trace_create_file(name, mode, parent, data, fops); 7628 7629 if (ret) /* See tracing_get_cpu() */ 7630 d_inode(ret)->i_cdev = (void *)(cpu + 1); 7631 return ret; 7632 } 7633 7634 static void 7635 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) 7636 { 7637 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 7638 struct dentry *d_cpu; 7639 char cpu_dir[30]; /* 30 characters should be more than enough */ 7640 7641 if (!d_percpu) 7642 return; 7643 7644 snprintf(cpu_dir, 30, "cpu%ld", cpu); 7645 d_cpu = tracefs_create_dir(cpu_dir, d_percpu); 7646 if (!d_cpu) { 7647 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); 7648 return; 7649 } 7650 7651 /* per cpu trace_pipe */ 7652 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, 7653 tr, cpu, &tracing_pipe_fops); 7654 7655 /* per cpu trace */ 7656 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, 7657 tr, cpu, &tracing_fops); 7658 7659 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, 7660 tr, cpu, &tracing_buffers_fops); 7661 7662 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, 7663 tr, cpu, &tracing_stats_fops); 7664 7665 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu, 7666 tr, cpu, &tracing_entries_fops); 7667 7668 if (tr->range_addr_start) 7669 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, 7670 tr, cpu, &tracing_buffer_meta_fops); 7671 #ifdef CONFIG_TRACER_SNAPSHOT 7672 if (!tr->range_addr_start) { 7673 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, 7674 tr, cpu, &snapshot_fops); 7675 7676 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, 7677 tr, cpu, &snapshot_raw_fops); 7678 } 7679 #endif 7680 } 7681 7682 #ifdef CONFIG_FTRACE_SELFTEST 7683 /* Let selftest have access to static functions in this file */ 7684 #include "trace_selftest.c" 7685 #endif 7686 7687 static ssize_t 7688 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, 7689 loff_t *ppos) 7690 { 7691 struct trace_option_dentry *topt = filp->private_data; 7692 char *buf; 7693 7694 if (topt->flags->val & topt->opt->bit) 7695 buf = "1\n"; 7696 else 7697 buf = "0\n"; 7698 7699 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 7700 } 7701 7702 static ssize_t 7703 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, 7704 loff_t *ppos) 7705 { 7706 struct trace_option_dentry *topt = filp->private_data; 7707 unsigned long val; 7708 int ret; 7709 7710 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7711 if (ret) 7712 return ret; 7713 7714 if (val != 0 && val != 1) 7715 return -EINVAL; 7716 7717 if (!!(topt->flags->val & topt->opt->bit) != val) { 7718 guard(mutex)(&trace_types_lock); 7719 ret = __set_tracer_option(topt->tr, topt->flags, 7720 topt->opt, !val); 7721 if (ret) 7722 return ret; 7723 } 7724 7725 *ppos += cnt; 7726 7727 return cnt; 7728 } 7729 7730 static int tracing_open_options(struct inode *inode, struct file *filp) 7731 { 7732 struct trace_option_dentry *topt = inode->i_private; 7733 int ret; 7734 7735 ret = tracing_check_open_get_tr(topt->tr); 7736 if (ret) 7737 return ret; 7738 7739 filp->private_data = inode->i_private; 7740 return 0; 7741 } 7742 7743 static int tracing_release_options(struct inode *inode, struct file *file) 7744 { 7745 struct trace_option_dentry *topt = file->private_data; 7746 7747 trace_array_put(topt->tr); 7748 return 0; 7749 } 7750 7751 static const struct file_operations trace_options_fops = { 7752 .open = tracing_open_options, 7753 .read = trace_options_read, 7754 .write = trace_options_write, 7755 .llseek = generic_file_llseek, 7756 .release = tracing_release_options, 7757 }; 7758 7759 /* 7760 * In order to pass in both the trace_array descriptor as well as the index 7761 * to the flag that the trace option file represents, the trace_array 7762 * has a character array of trace_flags_index[], which holds the index 7763 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. 7764 * The address of this character array is passed to the flag option file 7765 * read/write callbacks. 7766 * 7767 * In order to extract both the index and the trace_array descriptor, 7768 * get_tr_index() uses the following algorithm. 7769 * 7770 * idx = *ptr; 7771 * 7772 * As the pointer itself contains the address of the index (remember 7773 * index[1] == 1). 7774 * 7775 * Then to get the trace_array descriptor, by subtracting that index 7776 * from the ptr, we get to the start of the index itself. 7777 * 7778 * ptr - idx == &index[0] 7779 * 7780 * Then a simple container_of() from that pointer gets us to the 7781 * trace_array descriptor. 7782 */ 7783 static void get_tr_index(void *data, struct trace_array **ptr, 7784 unsigned int *pindex) 7785 { 7786 *pindex = *(unsigned char *)data; 7787 7788 *ptr = container_of(data - *pindex, struct trace_array, 7789 trace_flags_index); 7790 } 7791 7792 static ssize_t 7793 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, 7794 loff_t *ppos) 7795 { 7796 void *tr_index = filp->private_data; 7797 struct trace_array *tr; 7798 unsigned int index; 7799 char *buf; 7800 7801 get_tr_index(tr_index, &tr, &index); 7802 7803 if (tr->trace_flags & (1ULL << index)) 7804 buf = "1\n"; 7805 else 7806 buf = "0\n"; 7807 7808 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 7809 } 7810 7811 static ssize_t 7812 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, 7813 loff_t *ppos) 7814 { 7815 void *tr_index = filp->private_data; 7816 struct trace_array *tr; 7817 unsigned int index; 7818 unsigned long val; 7819 int ret; 7820 7821 get_tr_index(tr_index, &tr, &index); 7822 7823 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7824 if (ret) 7825 return ret; 7826 7827 if (val != 0 && val != 1) 7828 return -EINVAL; 7829 7830 mutex_lock(&event_mutex); 7831 mutex_lock(&trace_types_lock); 7832 ret = set_tracer_flag(tr, 1ULL << index, val); 7833 mutex_unlock(&trace_types_lock); 7834 mutex_unlock(&event_mutex); 7835 7836 if (ret < 0) 7837 return ret; 7838 7839 *ppos += cnt; 7840 7841 return cnt; 7842 } 7843 7844 static const struct file_operations trace_options_core_fops = { 7845 .open = tracing_open_generic, 7846 .read = trace_options_core_read, 7847 .write = trace_options_core_write, 7848 .llseek = generic_file_llseek, 7849 }; 7850 7851 struct dentry *trace_create_file(const char *name, 7852 umode_t mode, 7853 struct dentry *parent, 7854 void *data, 7855 const struct file_operations *fops) 7856 { 7857 struct dentry *ret; 7858 7859 ret = tracefs_create_file(name, mode, parent, data, fops); 7860 if (!ret) 7861 pr_warn("Could not create tracefs '%s' entry\n", name); 7862 7863 return ret; 7864 } 7865 7866 7867 static struct dentry *trace_options_init_dentry(struct trace_array *tr) 7868 { 7869 struct dentry *d_tracer; 7870 7871 if (tr->options) 7872 return tr->options; 7873 7874 d_tracer = tracing_get_dentry(tr); 7875 if (IS_ERR(d_tracer)) 7876 return NULL; 7877 7878 tr->options = tracefs_create_dir("options", d_tracer); 7879 if (!tr->options) { 7880 pr_warn("Could not create tracefs directory 'options'\n"); 7881 return NULL; 7882 } 7883 7884 return tr->options; 7885 } 7886 7887 static void 7888 create_trace_option_file(struct trace_array *tr, 7889 struct trace_option_dentry *topt, 7890 struct tracer_flags *flags, 7891 struct tracer_opt *opt) 7892 { 7893 struct dentry *t_options; 7894 7895 t_options = trace_options_init_dentry(tr); 7896 if (!t_options) 7897 return; 7898 7899 topt->flags = flags; 7900 topt->opt = opt; 7901 topt->tr = tr; 7902 7903 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, 7904 t_options, topt, &trace_options_fops); 7905 } 7906 7907 static int 7908 create_trace_option_files(struct trace_array *tr, struct tracer *tracer, 7909 struct tracer_flags *flags) 7910 { 7911 struct trace_option_dentry *topts; 7912 struct trace_options *tr_topts; 7913 struct tracer_opt *opts; 7914 int cnt; 7915 7916 if (!flags || !flags->opts) 7917 return 0; 7918 7919 opts = flags->opts; 7920 7921 for (cnt = 0; opts[cnt].name; cnt++) 7922 ; 7923 7924 topts = kzalloc_objs(*topts, cnt + 1); 7925 if (!topts) 7926 return 0; 7927 7928 tr_topts = krealloc_array(tr->topts, tr->nr_topts + 1, sizeof(*tr->topts), 7929 GFP_KERNEL); 7930 if (!tr_topts) { 7931 kfree(topts); 7932 return -ENOMEM; 7933 } 7934 7935 tr->topts = tr_topts; 7936 tr->topts[tr->nr_topts].tracer = tracer; 7937 tr->topts[tr->nr_topts].topts = topts; 7938 tr->nr_topts++; 7939 7940 for (cnt = 0; opts[cnt].name; cnt++) { 7941 create_trace_option_file(tr, &topts[cnt], flags, 7942 &opts[cnt]); 7943 MEM_FAIL(topts[cnt].entry == NULL, 7944 "Failed to create trace option: %s", 7945 opts[cnt].name); 7946 } 7947 return 0; 7948 } 7949 7950 static int get_global_flags_val(struct tracer *tracer) 7951 { 7952 struct tracers *t; 7953 7954 list_for_each_entry(t, &global_trace.tracers, list) { 7955 if (t->tracer != tracer) 7956 continue; 7957 if (!t->flags) 7958 return -1; 7959 return t->flags->val; 7960 } 7961 return -1; 7962 } 7963 7964 static int add_tracer_options(struct trace_array *tr, struct tracers *t) 7965 { 7966 struct tracer *tracer = t->tracer; 7967 struct tracer_flags *flags = t->flags ?: tracer->flags; 7968 7969 if (!flags) 7970 return 0; 7971 7972 /* Only add tracer options after update_tracer_options finish */ 7973 if (!tracer_options_updated) 7974 return 0; 7975 7976 return create_trace_option_files(tr, tracer, flags); 7977 } 7978 7979 static int add_tracer(struct trace_array *tr, struct tracer *tracer) 7980 { 7981 struct tracer_flags *flags; 7982 struct tracers *t; 7983 int ret; 7984 7985 /* Only enable if the directory has been created already. */ 7986 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL)) 7987 return 0; 7988 7989 /* 7990 * If this is an instance, only create flags for tracers 7991 * the instance may have. 7992 */ 7993 if (!trace_ok_for_array(tracer, tr)) 7994 return 0; 7995 7996 t = kmalloc_obj(*t); 7997 if (!t) 7998 return -ENOMEM; 7999 8000 t->tracer = tracer; 8001 t->flags = NULL; 8002 list_add(&t->list, &tr->tracers); 8003 8004 flags = tracer->flags; 8005 if (!flags) { 8006 if (!tracer->default_flags) 8007 return 0; 8008 8009 /* 8010 * If the tracer defines default flags, it means the flags are 8011 * per trace instance. 8012 */ 8013 flags = kmalloc_obj(*flags); 8014 if (!flags) 8015 return -ENOMEM; 8016 8017 *flags = *tracer->default_flags; 8018 flags->trace = tracer; 8019 8020 t->flags = flags; 8021 8022 /* If this is an instance, inherit the global_trace flags */ 8023 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { 8024 int val = get_global_flags_val(tracer); 8025 if (!WARN_ON_ONCE(val < 0)) 8026 flags->val = val; 8027 } 8028 } 8029 8030 ret = add_tracer_options(tr, t); 8031 if (ret < 0) { 8032 list_del(&t->list); 8033 kfree(t->flags); 8034 kfree(t); 8035 } 8036 8037 return ret; 8038 } 8039 8040 static struct dentry * 8041 create_trace_option_core_file(struct trace_array *tr, 8042 const char *option, long index) 8043 { 8044 struct dentry *t_options; 8045 8046 t_options = trace_options_init_dentry(tr); 8047 if (!t_options) 8048 return NULL; 8049 8050 return trace_create_file(option, TRACE_MODE_WRITE, t_options, 8051 (void *)&tr->trace_flags_index[index], 8052 &trace_options_core_fops); 8053 } 8054 8055 static void create_trace_options_dir(struct trace_array *tr) 8056 { 8057 struct dentry *t_options; 8058 bool top_level = tr == &global_trace; 8059 int i; 8060 8061 t_options = trace_options_init_dentry(tr); 8062 if (!t_options) 8063 return; 8064 8065 for (i = 0; trace_options[i]; i++) { 8066 if (top_level || 8067 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) { 8068 create_trace_option_core_file(tr, trace_options[i], i); 8069 } 8070 } 8071 } 8072 8073 static ssize_t 8074 rb_simple_read(struct file *filp, char __user *ubuf, 8075 size_t cnt, loff_t *ppos) 8076 { 8077 struct trace_array *tr = filp->private_data; 8078 char buf[64]; 8079 int r; 8080 8081 r = tracer_tracing_is_on(tr); 8082 r = sprintf(buf, "%d\n", r); 8083 8084 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8085 } 8086 8087 static ssize_t 8088 rb_simple_write(struct file *filp, const char __user *ubuf, 8089 size_t cnt, loff_t *ppos) 8090 { 8091 struct trace_array *tr = filp->private_data; 8092 struct trace_buffer *buffer = tr->array_buffer.buffer; 8093 unsigned long val; 8094 int ret; 8095 8096 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8097 if (ret) 8098 return ret; 8099 8100 if (buffer) { 8101 guard(mutex)(&trace_types_lock); 8102 if (!!val == tracer_tracing_is_on(tr)) { 8103 val = 0; /* do nothing */ 8104 } else if (val) { 8105 tracer_tracing_on(tr); 8106 if (tr->current_trace->start) 8107 tr->current_trace->start(tr); 8108 } else { 8109 tracer_tracing_off(tr); 8110 if (tr->current_trace->stop) 8111 tr->current_trace->stop(tr); 8112 /* Wake up any waiters */ 8113 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); 8114 } 8115 } 8116 8117 (*ppos)++; 8118 8119 return cnt; 8120 } 8121 8122 static const struct file_operations rb_simple_fops = { 8123 .open = tracing_open_generic_tr, 8124 .read = rb_simple_read, 8125 .write = rb_simple_write, 8126 .release = tracing_release_generic_tr, 8127 .llseek = default_llseek, 8128 }; 8129 8130 static ssize_t 8131 buffer_percent_read(struct file *filp, char __user *ubuf, 8132 size_t cnt, loff_t *ppos) 8133 { 8134 struct trace_array *tr = filp->private_data; 8135 char buf[64]; 8136 int r; 8137 8138 r = tr->buffer_percent; 8139 r = sprintf(buf, "%d\n", r); 8140 8141 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8142 } 8143 8144 static ssize_t 8145 buffer_percent_write(struct file *filp, const char __user *ubuf, 8146 size_t cnt, loff_t *ppos) 8147 { 8148 struct trace_array *tr = filp->private_data; 8149 unsigned long val; 8150 int ret; 8151 8152 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8153 if (ret) 8154 return ret; 8155 8156 if (val > 100) 8157 return -EINVAL; 8158 8159 tr->buffer_percent = val; 8160 8161 (*ppos)++; 8162 8163 return cnt; 8164 } 8165 8166 static const struct file_operations buffer_percent_fops = { 8167 .open = tracing_open_generic_tr, 8168 .read = buffer_percent_read, 8169 .write = buffer_percent_write, 8170 .release = tracing_release_generic_tr, 8171 .llseek = default_llseek, 8172 }; 8173 8174 static ssize_t 8175 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 8176 { 8177 struct trace_array *tr = filp->private_data; 8178 size_t size; 8179 char buf[64]; 8180 int order; 8181 int r; 8182 8183 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 8184 size = (PAGE_SIZE << order) / 1024; 8185 8186 r = sprintf(buf, "%zd\n", size); 8187 8188 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8189 } 8190 8191 static ssize_t 8192 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 8193 size_t cnt, loff_t *ppos) 8194 { 8195 struct trace_array *tr = filp->private_data; 8196 unsigned long val; 8197 int old_order; 8198 int order; 8199 int pages; 8200 int ret; 8201 8202 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8203 if (ret) 8204 return ret; 8205 8206 val *= 1024; /* value passed in is in KB */ 8207 8208 pages = DIV_ROUND_UP(val, PAGE_SIZE); 8209 order = fls(pages - 1); 8210 8211 /* limit between 1 and 128 system pages */ 8212 if (order < 0 || order > 7) 8213 return -EINVAL; 8214 8215 /* Do not allow tracing while changing the order of the ring buffer */ 8216 tracing_stop_tr(tr); 8217 8218 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 8219 if (old_order == order) 8220 goto out; 8221 8222 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 8223 if (ret) 8224 goto out; 8225 8226 #ifdef CONFIG_TRACER_SNAPSHOT 8227 8228 if (!tr->allocated_snapshot) 8229 goto out_max; 8230 8231 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); 8232 if (ret) { 8233 /* Put back the old order */ 8234 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); 8235 if (WARN_ON_ONCE(cnt)) { 8236 /* 8237 * AARGH! We are left with different orders! 8238 * The max buffer is our "snapshot" buffer. 8239 * When a tracer needs a snapshot (one of the 8240 * latency tracers), it swaps the max buffer 8241 * with the saved snap shot. We succeeded to 8242 * update the order of the main buffer, but failed to 8243 * update the order of the max buffer. But when we tried 8244 * to reset the main buffer to the original size, we 8245 * failed there too. This is very unlikely to 8246 * happen, but if it does, warn and kill all 8247 * tracing. 8248 */ 8249 tracing_disabled = 1; 8250 } 8251 goto out; 8252 } 8253 out_max: 8254 #endif 8255 (*ppos)++; 8256 out: 8257 if (ret) 8258 cnt = ret; 8259 tracing_start_tr(tr); 8260 return cnt; 8261 } 8262 8263 static const struct file_operations buffer_subbuf_size_fops = { 8264 .open = tracing_open_generic_tr, 8265 .read = buffer_subbuf_size_read, 8266 .write = buffer_subbuf_size_write, 8267 .release = tracing_release_generic_tr, 8268 .llseek = default_llseek, 8269 }; 8270 8271 static struct dentry *trace_instance_dir; 8272 8273 static void 8274 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 8275 8276 #ifdef CONFIG_MODULES 8277 static int make_mod_delta(struct module *mod, void *data) 8278 { 8279 struct trace_module_delta *module_delta; 8280 struct trace_scratch *tscratch; 8281 struct trace_mod_entry *entry; 8282 struct trace_array *tr = data; 8283 int i; 8284 8285 tscratch = tr->scratch; 8286 module_delta = READ_ONCE(tr->module_delta); 8287 for (i = 0; i < tscratch->nr_entries; i++) { 8288 entry = &tscratch->entries[i]; 8289 if (strcmp(mod->name, entry->mod_name)) 8290 continue; 8291 if (mod->state == MODULE_STATE_GOING) 8292 module_delta->delta[i] = 0; 8293 else 8294 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base 8295 - entry->mod_addr; 8296 break; 8297 } 8298 return 0; 8299 } 8300 #else 8301 static int make_mod_delta(struct module *mod, void *data) 8302 { 8303 return 0; 8304 } 8305 #endif 8306 8307 static int mod_addr_comp(const void *a, const void *b, const void *data) 8308 { 8309 const struct trace_mod_entry *e1 = a; 8310 const struct trace_mod_entry *e2 = b; 8311 8312 return e1->mod_addr > e2->mod_addr ? 1 : -1; 8313 } 8314 8315 static void setup_trace_scratch(struct trace_array *tr, 8316 struct trace_scratch *tscratch, unsigned int size) 8317 { 8318 struct trace_module_delta *module_delta; 8319 struct trace_mod_entry *entry; 8320 int i, nr_entries; 8321 8322 if (!tscratch) 8323 return; 8324 8325 tr->scratch = tscratch; 8326 tr->scratch_size = size; 8327 8328 if (tscratch->text_addr) 8329 tr->text_delta = (unsigned long)_text - tscratch->text_addr; 8330 8331 if (struct_size(tscratch, entries, tscratch->nr_entries) > size) 8332 goto reset; 8333 8334 /* Check if each module name is a valid string */ 8335 for (i = 0; i < tscratch->nr_entries; i++) { 8336 int n; 8337 8338 entry = &tscratch->entries[i]; 8339 8340 for (n = 0; n < MODULE_NAME_LEN; n++) { 8341 if (entry->mod_name[n] == '\0') 8342 break; 8343 if (!isprint(entry->mod_name[n])) 8344 goto reset; 8345 } 8346 if (n == MODULE_NAME_LEN) 8347 goto reset; 8348 } 8349 8350 /* Sort the entries so that we can find appropriate module from address. */ 8351 nr_entries = tscratch->nr_entries; 8352 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), 8353 mod_addr_comp, NULL, NULL); 8354 8355 if (IS_ENABLED(CONFIG_MODULES)) { 8356 module_delta = kzalloc_flex(*module_delta, delta, nr_entries); 8357 if (!module_delta) { 8358 pr_info("module_delta allocation failed. Not able to decode module address."); 8359 goto reset; 8360 } 8361 init_rcu_head(&module_delta->rcu); 8362 } else 8363 module_delta = NULL; 8364 WRITE_ONCE(tr->module_delta, module_delta); 8365 8366 /* Scan modules to make text delta for modules. */ 8367 module_for_each_mod(make_mod_delta, tr); 8368 8369 /* Set trace_clock as the same of the previous boot. */ 8370 if (tscratch->clock_id != tr->clock_id) { 8371 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) || 8372 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) { 8373 pr_info("the previous trace_clock info is not valid."); 8374 goto reset; 8375 } 8376 } 8377 return; 8378 reset: 8379 /* Invalid trace modules */ 8380 memset(tscratch, 0, size); 8381 } 8382 8383 #define TRACE_TEST_PTRACING_NAME "ptracingtest" 8384 8385 int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 8386 { 8387 enum ring_buffer_flags rb_flags; 8388 struct trace_scratch *tscratch; 8389 unsigned int scratch_size = 0; 8390 8391 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0; 8392 8393 buf->tr = tr; 8394 8395 if (tr->range_addr_start && tr->range_addr_size) { 8396 if (tr->name && !strcmp(tr->name, TRACE_TEST_PTRACING_NAME)) 8397 rb_flags |= RB_FL_TESTING; 8398 /* Add scratch buffer to handle 128 modules */ 8399 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, 8400 tr->range_addr_start, 8401 tr->range_addr_size, 8402 struct_size(tscratch, entries, 128)); 8403 8404 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); 8405 setup_trace_scratch(tr, tscratch, scratch_size); 8406 8407 /* 8408 * This is basically the same as a mapped buffer, 8409 * with the same restrictions. 8410 */ 8411 tr->mapped++; 8412 } else { 8413 buf->buffer = ring_buffer_alloc(size, rb_flags); 8414 } 8415 if (!buf->buffer) 8416 return -ENOMEM; 8417 8418 buf->data = alloc_percpu(struct trace_array_cpu); 8419 if (!buf->data) { 8420 ring_buffer_free(buf->buffer); 8421 buf->buffer = NULL; 8422 return -ENOMEM; 8423 } 8424 8425 /* Allocate the first page for all buffers */ 8426 trace_set_buffer_entries(&tr->array_buffer, 8427 ring_buffer_size(tr->array_buffer.buffer, 0)); 8428 8429 return 0; 8430 } 8431 8432 static void free_trace_buffer(struct array_buffer *buf) 8433 { 8434 if (buf->buffer) { 8435 ring_buffer_free(buf->buffer); 8436 buf->buffer = NULL; 8437 free_percpu(buf->data); 8438 buf->data = NULL; 8439 } 8440 } 8441 8442 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size) 8443 { 8444 int ret; 8445 8446 ret = allocate_trace_buffer(tr, &tr->array_buffer, size); 8447 if (ret) 8448 return ret; 8449 8450 ret = trace_allocate_snapshot(tr, size); 8451 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) 8452 free_trace_buffer(&tr->array_buffer); 8453 8454 return ret; 8455 } 8456 8457 static void free_trace_buffers(struct trace_array *tr) 8458 { 8459 if (!tr) 8460 return; 8461 8462 free_trace_buffer(&tr->array_buffer); 8463 kfree(tr->module_delta); 8464 8465 #ifdef CONFIG_TRACER_SNAPSHOT 8466 free_trace_buffer(&tr->snapshot_buffer); 8467 #endif 8468 } 8469 8470 static void init_trace_flags_index(struct trace_array *tr) 8471 { 8472 int i; 8473 8474 /* Used by the trace options files */ 8475 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) 8476 tr->trace_flags_index[i] = i; 8477 } 8478 8479 static int __update_tracer(struct trace_array *tr) 8480 { 8481 struct tracer *t; 8482 int ret = 0; 8483 8484 for (t = trace_types; t && !ret; t = t->next) 8485 ret = add_tracer(tr, t); 8486 8487 return ret; 8488 } 8489 8490 static __init int __update_tracer_options(struct trace_array *tr) 8491 { 8492 struct tracers *t; 8493 int ret = 0; 8494 8495 list_for_each_entry(t, &tr->tracers, list) { 8496 ret = add_tracer_options(tr, t); 8497 if (ret < 0) 8498 break; 8499 } 8500 8501 return ret; 8502 } 8503 8504 static __init void update_tracer_options(void) 8505 { 8506 struct trace_array *tr; 8507 8508 guard(mutex)(&trace_types_lock); 8509 tracer_options_updated = true; 8510 list_for_each_entry(tr, &ftrace_trace_arrays, list) 8511 __update_tracer_options(tr); 8512 } 8513 8514 /* Must have trace_types_lock held */ 8515 struct trace_array *trace_array_find(const char *instance) 8516 { 8517 struct trace_array *tr, *found = NULL; 8518 8519 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 8520 if (tr->name && strcmp(tr->name, instance) == 0) { 8521 found = tr; 8522 break; 8523 } 8524 } 8525 8526 return found; 8527 } 8528 8529 struct trace_array *trace_array_find_get(const char *instance) 8530 { 8531 struct trace_array *tr; 8532 8533 guard(mutex)(&trace_types_lock); 8534 tr = trace_array_find(instance); 8535 if (tr && __trace_array_get(tr) < 0) 8536 tr = NULL; 8537 8538 return tr; 8539 } 8540 8541 static int trace_array_create_dir(struct trace_array *tr) 8542 { 8543 int ret; 8544 8545 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); 8546 if (!tr->dir) 8547 return -EINVAL; 8548 8549 ret = event_trace_add_tracer(tr->dir, tr); 8550 if (ret) { 8551 tracefs_remove(tr->dir); 8552 return ret; 8553 } 8554 8555 init_tracer_tracefs(tr, tr->dir); 8556 ret = __update_tracer(tr); 8557 if (ret) { 8558 event_trace_del_tracer(tr); 8559 tracefs_remove(tr->dir); 8560 return ret; 8561 } 8562 return 0; 8563 } 8564 8565 static struct trace_array * 8566 trace_array_create_systems(const char *name, const char *systems, 8567 unsigned long range_addr_start, 8568 unsigned long range_addr_size) 8569 { 8570 struct trace_array *tr; 8571 int ret; 8572 8573 ret = -ENOMEM; 8574 tr = kzalloc_obj(*tr); 8575 if (!tr) 8576 return ERR_PTR(ret); 8577 8578 tr->name = kstrdup(name, GFP_KERNEL); 8579 if (!tr->name) 8580 goto out_free_tr; 8581 8582 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) 8583 goto out_free_tr; 8584 8585 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) 8586 goto out_free_tr; 8587 8588 if (systems) { 8589 tr->system_names = kstrdup_const(systems, GFP_KERNEL); 8590 if (!tr->system_names) 8591 goto out_free_tr; 8592 } 8593 8594 /* Only for boot up memory mapped ring buffers */ 8595 tr->range_addr_start = range_addr_start; 8596 tr->range_addr_size = range_addr_size; 8597 8598 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; 8599 8600 cpumask_copy(tr->tracing_cpumask, cpu_all_mask); 8601 8602 raw_spin_lock_init(&tr->start_lock); 8603 8604 tr->syscall_buf_sz = global_trace.syscall_buf_sz; 8605 8606 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 8607 #ifdef CONFIG_TRACER_SNAPSHOT 8608 spin_lock_init(&tr->snapshot_trigger_lock); 8609 #endif 8610 tr->current_trace = &nop_trace; 8611 tr->current_trace_flags = nop_trace.flags; 8612 8613 INIT_LIST_HEAD(&tr->systems); 8614 INIT_LIST_HEAD(&tr->events); 8615 INIT_LIST_HEAD(&tr->hist_vars); 8616 INIT_LIST_HEAD(&tr->err_log); 8617 INIT_LIST_HEAD(&tr->tracers); 8618 INIT_LIST_HEAD(&tr->marker_list); 8619 8620 #ifdef CONFIG_MODULES 8621 INIT_LIST_HEAD(&tr->mod_events); 8622 #endif 8623 8624 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 8625 goto out_free_tr; 8626 8627 /* The ring buffer is defaultly expanded */ 8628 trace_set_ring_buffer_expanded(tr); 8629 8630 if (ftrace_allocate_ftrace_ops(tr) < 0) 8631 goto out_free_tr; 8632 8633 trace_array_init_autoremove(tr); 8634 8635 ftrace_init_trace_array(tr); 8636 8637 init_trace_flags_index(tr); 8638 8639 if (trace_instance_dir) { 8640 ret = trace_array_create_dir(tr); 8641 if (ret) 8642 goto out_free_tr; 8643 } else 8644 __trace_early_add_events(tr); 8645 8646 list_add(&tr->list, &ftrace_trace_arrays); 8647 8648 tr->ref++; 8649 8650 return tr; 8651 8652 out_free_tr: 8653 ftrace_free_ftrace_ops(tr); 8654 free_trace_buffers(tr); 8655 free_cpumask_var(tr->pipe_cpumask); 8656 free_cpumask_var(tr->tracing_cpumask); 8657 kfree_const(tr->system_names); 8658 kfree(tr->range_name); 8659 kfree(tr->name); 8660 kfree(tr); 8661 8662 return ERR_PTR(ret); 8663 } 8664 8665 static struct trace_array *trace_array_create(const char *name) 8666 { 8667 return trace_array_create_systems(name, NULL, 0, 0); 8668 } 8669 8670 static int instance_mkdir(const char *name) 8671 { 8672 struct trace_array *tr; 8673 int ret; 8674 8675 guard(mutex)(&event_mutex); 8676 guard(mutex)(&trace_types_lock); 8677 8678 ret = -EEXIST; 8679 if (trace_array_find(name)) 8680 return -EEXIST; 8681 8682 tr = trace_array_create(name); 8683 8684 ret = PTR_ERR_OR_ZERO(tr); 8685 8686 return ret; 8687 } 8688 8689 #ifdef CONFIG_MMU 8690 static u64 map_pages(unsigned long start, unsigned long size) 8691 { 8692 unsigned long vmap_start, vmap_end; 8693 struct vm_struct *area; 8694 int ret; 8695 8696 area = get_vm_area(size, VM_IOREMAP); 8697 if (!area) 8698 return 0; 8699 8700 vmap_start = (unsigned long) area->addr; 8701 vmap_end = vmap_start + size; 8702 8703 ret = vmap_page_range(vmap_start, vmap_end, 8704 start, pgprot_nx(PAGE_KERNEL)); 8705 if (ret < 0) { 8706 free_vm_area(area); 8707 return 0; 8708 } 8709 8710 return (u64)vmap_start; 8711 } 8712 #else 8713 static inline u64 map_pages(unsigned long start, unsigned long size) 8714 { 8715 return 0; 8716 } 8717 #endif 8718 8719 /** 8720 * trace_array_get_by_name - Create/Lookup a trace array, given its name. 8721 * @name: The name of the trace array to be looked up/created. 8722 * @systems: A list of systems to create event directories for (NULL for all) 8723 * 8724 * Returns pointer to trace array with given name. 8725 * NULL, if it cannot be created. 8726 * 8727 * NOTE: This function increments the reference counter associated with the 8728 * trace array returned. This makes sure it cannot be freed while in use. 8729 * Use trace_array_put() once the trace array is no longer needed. 8730 * If the trace_array is to be freed, trace_array_destroy() needs to 8731 * be called after the trace_array_put(), or simply let user space delete 8732 * it from the tracefs instances directory. But until the 8733 * trace_array_put() is called, user space can not delete it. 8734 * 8735 */ 8736 struct trace_array *trace_array_get_by_name(const char *name, const char *systems) 8737 { 8738 struct trace_array *tr; 8739 8740 guard(mutex)(&event_mutex); 8741 guard(mutex)(&trace_types_lock); 8742 8743 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 8744 if (tr->name && strcmp(tr->name, name) == 0) { 8745 /* if this fails, @tr is going to be removed. */ 8746 if (__trace_array_get(tr) < 0) 8747 tr = NULL; 8748 return tr; 8749 } 8750 } 8751 8752 tr = trace_array_create_systems(name, systems, 0, 0); 8753 8754 if (IS_ERR(tr)) 8755 tr = NULL; 8756 else 8757 tr->ref++; 8758 8759 return tr; 8760 } 8761 EXPORT_SYMBOL_GPL(trace_array_get_by_name); 8762 8763 static int __remove_instance(struct trace_array *tr) 8764 { 8765 int i; 8766 8767 /* Reference counter for a newly created trace array = 1. */ 8768 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) 8769 return -EBUSY; 8770 8771 list_del(&tr->list); 8772 8773 if (printk_trace == tr) 8774 update_printk_trace(&global_trace); 8775 8776 /* Must be done before disabling all the flags */ 8777 if (update_marker_trace(tr, 0)) 8778 synchronize_rcu(); 8779 8780 /* Disable all the flags that were enabled coming in */ 8781 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { 8782 if ((1ULL << i) & ZEROED_TRACE_FLAGS) 8783 set_tracer_flag(tr, 1ULL << i, 0); 8784 } 8785 8786 trace_array_cancel_autoremove(tr); 8787 tracing_set_nop(tr); 8788 clear_ftrace_function_probes(tr); 8789 event_trace_del_tracer(tr); 8790 ftrace_clear_pids(tr); 8791 ftrace_destroy_function_files(tr); 8792 tracefs_remove(tr->dir); 8793 free_percpu(tr->last_func_repeats); 8794 free_trace_buffers(tr); 8795 clear_tracing_err_log(tr); 8796 free_tracers(tr); 8797 8798 if (tr->range_name) { 8799 reserve_mem_release_by_name(tr->range_name); 8800 kfree(tr->range_name); 8801 } 8802 if (tr->flags & TRACE_ARRAY_FL_VMALLOC) 8803 vfree((void *)tr->range_addr_start); 8804 8805 for (i = 0; i < tr->nr_topts; i++) { 8806 kfree(tr->topts[i].topts); 8807 } 8808 kfree(tr->topts); 8809 8810 free_cpumask_var(tr->pipe_cpumask); 8811 free_cpumask_var(tr->tracing_cpumask); 8812 kfree_const(tr->system_names); 8813 kfree(tr->name); 8814 kfree(tr); 8815 8816 return 0; 8817 } 8818 8819 int trace_array_destroy(struct trace_array *this_tr) 8820 { 8821 struct trace_array *tr; 8822 8823 if (!this_tr) 8824 return -EINVAL; 8825 8826 guard(mutex)(&event_mutex); 8827 guard(mutex)(&trace_types_lock); 8828 8829 8830 /* Making sure trace array exists before destroying it. */ 8831 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 8832 if (tr == this_tr) 8833 return __remove_instance(tr); 8834 } 8835 8836 return -ENODEV; 8837 } 8838 EXPORT_SYMBOL_GPL(trace_array_destroy); 8839 8840 static int instance_rmdir(const char *name) 8841 { 8842 struct trace_array *tr; 8843 8844 guard(mutex)(&event_mutex); 8845 guard(mutex)(&trace_types_lock); 8846 8847 tr = trace_array_find(name); 8848 if (!tr) 8849 return -ENODEV; 8850 8851 return __remove_instance(tr); 8852 } 8853 8854 static __init void create_trace_instances(struct dentry *d_tracer) 8855 { 8856 struct trace_array *tr; 8857 8858 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, 8859 instance_mkdir, 8860 instance_rmdir); 8861 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) 8862 return; 8863 8864 guard(mutex)(&event_mutex); 8865 guard(mutex)(&trace_types_lock); 8866 8867 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 8868 if (!tr->name) 8869 continue; 8870 if (MEM_FAIL(trace_array_create_dir(tr) < 0, 8871 "Failed to create instance directory\n")) 8872 return; 8873 } 8874 } 8875 8876 static void 8877 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) 8878 { 8879 umode_t writable_mode = TRACE_MODE_WRITE; 8880 int cpu; 8881 8882 if (trace_array_is_readonly(tr)) 8883 writable_mode = TRACE_MODE_READ; 8884 8885 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, 8886 tr, &show_traces_fops); 8887 8888 trace_create_file("current_tracer", writable_mode, d_tracer, 8889 tr, &set_tracer_fops); 8890 8891 trace_create_file("tracing_cpumask", writable_mode, d_tracer, 8892 tr, &tracing_cpumask_fops); 8893 8894 /* Options are used for changing print-format even for readonly instance. */ 8895 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, 8896 tr, &tracing_iter_fops); 8897 8898 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, 8899 tr, &tracing_fops); 8900 8901 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, 8902 tr, &tracing_pipe_fops); 8903 8904 trace_create_file("buffer_size_kb", writable_mode, d_tracer, 8905 tr, &tracing_entries_fops); 8906 8907 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, 8908 tr, &tracing_total_entries_fops); 8909 8910 trace_create_file("trace_clock", writable_mode, d_tracer, tr, 8911 &trace_clock_fops); 8912 8913 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, 8914 &trace_time_stamp_mode_fops); 8915 8916 tr->buffer_percent = 50; 8917 8918 trace_create_file("buffer_subbuf_size_kb", writable_mode, d_tracer, 8919 tr, &buffer_subbuf_size_fops); 8920 8921 create_trace_options_dir(tr); 8922 8923 if (tr->range_addr_start) 8924 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, 8925 tr, &last_boot_fops); 8926 8927 for_each_tracing_cpu(cpu) 8928 tracing_init_tracefs_percpu(tr, cpu); 8929 8930 /* Read-only instance has above files only. */ 8931 if (trace_array_is_readonly(tr)) 8932 return; 8933 8934 trace_create_file("free_buffer", 0200, d_tracer, 8935 tr, &tracing_free_buffer_fops); 8936 8937 trace_create_file("trace_marker", 0220, d_tracer, 8938 tr, &tracing_mark_fops); 8939 8940 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); 8941 8942 trace_create_file("trace_marker_raw", 0220, d_tracer, 8943 tr, &tracing_mark_raw_fops); 8944 8945 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 8946 tr, &buffer_percent_fops); 8947 8948 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer, 8949 tr, &tracing_syscall_buf_fops); 8950 8951 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, 8952 tr, &rb_simple_fops); 8953 8954 trace_create_maxlat_file(tr, d_tracer); 8955 8956 if (ftrace_create_function_files(tr, d_tracer)) 8957 MEM_FAIL(1, "Could not allocate function filter files"); 8958 8959 #ifdef CONFIG_TRACER_SNAPSHOT 8960 if (!tr->range_addr_start) 8961 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, 8962 tr, &snapshot_fops); 8963 #endif 8964 8965 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, 8966 tr, &tracing_err_log_fops); 8967 8968 ftrace_init_tracefs(tr, d_tracer); 8969 } 8970 8971 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 8972 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 8973 { 8974 struct vfsmount *mnt; 8975 struct file_system_type *type; 8976 struct fs_context *fc; 8977 int ret; 8978 8979 /* 8980 * To maintain backward compatibility for tools that mount 8981 * debugfs to get to the tracing facility, tracefs is automatically 8982 * mounted to the debugfs/tracing directory. 8983 */ 8984 type = get_fs_type("tracefs"); 8985 if (!type) 8986 return NULL; 8987 8988 fc = fs_context_for_submount(type, mntpt); 8989 put_filesystem(type); 8990 if (IS_ERR(fc)) 8991 return ERR_CAST(fc); 8992 8993 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n"); 8994 8995 ret = vfs_parse_fs_string(fc, "source", "tracefs"); 8996 if (!ret) 8997 mnt = fc_mount(fc); 8998 else 8999 mnt = ERR_PTR(ret); 9000 9001 put_fs_context(fc); 9002 return mnt; 9003 } 9004 #endif 9005 9006 /** 9007 * tracing_init_dentry - initialize top level trace array 9008 * 9009 * This is called when creating files or directories in the tracing 9010 * directory. It is called via fs_initcall() by any of the boot up code 9011 * and expects to return the dentry of the top level tracing directory. 9012 */ 9013 int tracing_init_dentry(void) 9014 { 9015 struct trace_array *tr = &global_trace; 9016 9017 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9018 pr_warn("Tracing disabled due to lockdown\n"); 9019 return -EPERM; 9020 } 9021 9022 /* The top level trace array uses NULL as parent */ 9023 if (tr->dir) 9024 return 0; 9025 9026 if (WARN_ON(!tracefs_initialized())) 9027 return -ENODEV; 9028 9029 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 9030 /* 9031 * As there may still be users that expect the tracing 9032 * files to exist in debugfs/tracing, we must automount 9033 * the tracefs file system there, so older tools still 9034 * work with the newer kernel. 9035 */ 9036 tr->dir = debugfs_create_automount("tracing", NULL, 9037 trace_automount, NULL); 9038 #endif 9039 9040 return 0; 9041 } 9042 9043 extern struct trace_eval_map *__start_ftrace_eval_maps[]; 9044 extern struct trace_eval_map *__stop_ftrace_eval_maps[]; 9045 9046 struct workqueue_struct *trace_init_wq __initdata; 9047 static struct work_struct eval_map_work __initdata; 9048 static struct work_struct tracerfs_init_work __initdata; 9049 9050 static void __init eval_map_work_func(struct work_struct *work) 9051 { 9052 int len; 9053 9054 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; 9055 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); 9056 } 9057 9058 static int __init trace_eval_init(void) 9059 { 9060 INIT_WORK(&eval_map_work, eval_map_work_func); 9061 9062 trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0); 9063 if (!trace_init_wq) { 9064 pr_err("Unable to allocate trace_init_wq\n"); 9065 /* Do work here */ 9066 eval_map_work_func(&eval_map_work); 9067 return -ENOMEM; 9068 } 9069 9070 queue_work(trace_init_wq, &eval_map_work); 9071 return 0; 9072 } 9073 9074 subsys_initcall(trace_eval_init); 9075 9076 static int __init trace_eval_sync(void) 9077 { 9078 /* Make sure the eval map updates are finished */ 9079 if (trace_init_wq) 9080 destroy_workqueue(trace_init_wq); 9081 return 0; 9082 } 9083 9084 late_initcall_sync(trace_eval_sync); 9085 9086 9087 #ifdef CONFIG_MODULES 9088 9089 bool module_exists(const char *module) 9090 { 9091 /* All modules have the symbol __this_module */ 9092 static const char this_mod[] = "__this_module"; 9093 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; 9094 unsigned long val; 9095 int n; 9096 9097 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); 9098 9099 if (n > sizeof(modname) - 1) 9100 return false; 9101 9102 val = module_kallsyms_lookup_name(modname); 9103 return val != 0; 9104 } 9105 9106 static void trace_module_add_evals(struct module *mod) 9107 { 9108 /* 9109 * Modules with bad taint do not have events created, do 9110 * not bother with enums either. 9111 */ 9112 if (trace_module_has_bad_taint(mod)) 9113 return; 9114 9115 /* Even if no trace_evals, this need to sanitize field types. */ 9116 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals); 9117 } 9118 9119 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 9120 static void trace_module_remove_evals(struct module *mod) 9121 { 9122 union trace_eval_map_item *map; 9123 union trace_eval_map_item **last = &trace_eval_maps; 9124 9125 if (!mod->num_trace_evals) 9126 return; 9127 9128 guard(mutex)(&trace_eval_mutex); 9129 9130 map = trace_eval_maps; 9131 9132 while (map) { 9133 if (map->head.mod == mod) 9134 break; 9135 map = trace_eval_jmp_to_tail(map); 9136 last = &map->tail.next; 9137 map = map->tail.next; 9138 } 9139 if (!map) 9140 return; 9141 9142 *last = trace_eval_jmp_to_tail(map)->tail.next; 9143 kfree(map); 9144 } 9145 #else 9146 static inline void trace_module_remove_evals(struct module *mod) { } 9147 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 9148 9149 static void trace_module_record(struct module *mod, bool add) 9150 { 9151 struct trace_array *tr; 9152 unsigned long flags; 9153 9154 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9155 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); 9156 /* Update any persistent trace array that has already been started */ 9157 if (flags == TRACE_ARRAY_FL_BOOT && add) { 9158 guard(mutex)(&scratch_mutex); 9159 save_mod(mod, tr); 9160 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { 9161 /* Update delta if the module loaded in previous boot */ 9162 make_mod_delta(mod, tr); 9163 } 9164 } 9165 } 9166 9167 static int trace_module_notify(struct notifier_block *self, 9168 unsigned long val, void *data) 9169 { 9170 struct module *mod = data; 9171 9172 switch (val) { 9173 case MODULE_STATE_COMING: 9174 trace_module_add_evals(mod); 9175 trace_module_record(mod, true); 9176 break; 9177 case MODULE_STATE_GOING: 9178 trace_module_remove_evals(mod); 9179 trace_module_record(mod, false); 9180 break; 9181 } 9182 9183 return NOTIFY_OK; 9184 } 9185 9186 static struct notifier_block trace_module_nb = { 9187 .notifier_call = trace_module_notify, 9188 .priority = 0, 9189 }; 9190 #endif /* CONFIG_MODULES */ 9191 9192 static __init void tracer_init_tracefs_work_func(struct work_struct *work) 9193 { 9194 9195 event_trace_init(); 9196 9197 init_tracer_tracefs(&global_trace, NULL); 9198 ftrace_init_tracefs_toplevel(&global_trace, NULL); 9199 9200 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, 9201 &global_trace, &tracing_thresh_fops); 9202 9203 trace_create_file("README", TRACE_MODE_READ, NULL, 9204 NULL, &tracing_readme_fops); 9205 9206 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, 9207 NULL, &tracing_saved_cmdlines_fops); 9208 9209 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, 9210 NULL, &tracing_saved_cmdlines_size_fops); 9211 9212 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, 9213 NULL, &tracing_saved_tgids_fops); 9214 9215 trace_create_eval_file(NULL); 9216 9217 #ifdef CONFIG_MODULES 9218 register_module_notifier(&trace_module_nb); 9219 #endif 9220 9221 #ifdef CONFIG_DYNAMIC_FTRACE 9222 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, 9223 NULL, &tracing_dyn_info_fops); 9224 #endif 9225 9226 create_trace_instances(NULL); 9227 9228 update_tracer_options(); 9229 } 9230 9231 static __init int tracer_init_tracefs(void) 9232 { 9233 int ret; 9234 9235 trace_access_lock_init(); 9236 9237 ret = tracing_init_dentry(); 9238 if (ret) 9239 return 0; 9240 9241 if (trace_init_wq) { 9242 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); 9243 queue_work(trace_init_wq, &tracerfs_init_work); 9244 } else { 9245 tracer_init_tracefs_work_func(NULL); 9246 } 9247 9248 if (rv_init_interface()) 9249 pr_err("RV: Error while creating the RV interface\n"); 9250 9251 return 0; 9252 } 9253 9254 fs_initcall(tracer_init_tracefs); 9255 9256 static int trace_die_panic_handler(struct notifier_block *self, 9257 unsigned long ev, void *unused); 9258 9259 static struct notifier_block trace_panic_notifier = { 9260 .notifier_call = trace_die_panic_handler, 9261 .priority = INT_MAX - 1, 9262 }; 9263 9264 static struct notifier_block trace_die_notifier = { 9265 .notifier_call = trace_die_panic_handler, 9266 .priority = INT_MAX - 1, 9267 }; 9268 9269 /* 9270 * The idea is to execute the following die/panic callback early, in order 9271 * to avoid showing irrelevant information in the trace (like other panic 9272 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall 9273 * warnings get disabled (to prevent potential log flooding). 9274 */ 9275 static int trace_die_panic_handler(struct notifier_block *self, 9276 unsigned long ev, void *unused) 9277 { 9278 if (!ftrace_dump_on_oops_enabled()) 9279 return NOTIFY_DONE; 9280 9281 /* The die notifier requires DIE_OOPS to trigger */ 9282 if (self == &trace_die_notifier && ev != DIE_OOPS) 9283 return NOTIFY_DONE; 9284 9285 ftrace_dump(DUMP_PARAM); 9286 9287 return NOTIFY_DONE; 9288 } 9289 9290 /* 9291 * printk is set to max of 1024, we really don't need it that big. 9292 * Nothing should be printing 1000 characters anyway. 9293 */ 9294 #define TRACE_MAX_PRINT 1000 9295 9296 /* 9297 * Define here KERN_TRACE so that we have one place to modify 9298 * it if we decide to change what log level the ftrace dump 9299 * should be at. 9300 */ 9301 #define KERN_TRACE KERN_EMERG 9302 9303 void 9304 trace_printk_seq(struct trace_seq *s) 9305 { 9306 /* Probably should print a warning here. */ 9307 if (s->seq.len >= TRACE_MAX_PRINT) 9308 s->seq.len = TRACE_MAX_PRINT; 9309 9310 /* 9311 * More paranoid code. Although the buffer size is set to 9312 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just 9313 * an extra layer of protection. 9314 */ 9315 if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) 9316 s->seq.len = s->seq.size - 1; 9317 9318 /* should be zero ended, but we are paranoid. */ 9319 s->buffer[s->seq.len] = 0; 9320 9321 printk(KERN_TRACE "%s", s->buffer); 9322 9323 trace_seq_init(s); 9324 } 9325 9326 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) 9327 { 9328 iter->tr = tr; 9329 iter->trace = iter->tr->current_trace; 9330 iter->cpu_file = RING_BUFFER_ALL_CPUS; 9331 iter->array_buffer = &tr->array_buffer; 9332 9333 if (iter->trace && iter->trace->open) 9334 iter->trace->open(iter); 9335 9336 /* Annotate start of buffers if we had overruns */ 9337 if (ring_buffer_overruns(iter->array_buffer->buffer)) 9338 iter->iter_flags |= TRACE_FILE_ANNOTATE; 9339 9340 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 9341 if (trace_clocks[iter->tr->clock_id].in_ns) 9342 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 9343 9344 /* Can not use kmalloc for iter.temp and iter.fmt */ 9345 iter->temp = static_temp_buf; 9346 iter->temp_size = STATIC_TEMP_BUF_SIZE; 9347 iter->fmt = static_fmt_buf; 9348 iter->fmt_size = STATIC_FMT_BUF_SIZE; 9349 } 9350 9351 void trace_init_global_iter(struct trace_iterator *iter) 9352 { 9353 trace_init_iter(iter, &global_trace); 9354 } 9355 9356 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) 9357 { 9358 /* use static because iter can be a bit big for the stack */ 9359 static struct trace_iterator iter; 9360 unsigned int old_userobj; 9361 unsigned long flags; 9362 int cnt = 0; 9363 9364 /* 9365 * Always turn off tracing when we dump. 9366 * We don't need to show trace output of what happens 9367 * between multiple crashes. 9368 * 9369 * If the user does a sysrq-z, then they can re-enable 9370 * tracing with echo 1 > tracing_on. 9371 */ 9372 tracer_tracing_off(tr); 9373 9374 local_irq_save(flags); 9375 9376 /* Simulate the iterator */ 9377 trace_init_iter(&iter, tr); 9378 9379 /* While dumping, do not allow the buffer to be enable */ 9380 tracer_tracing_disable(tr); 9381 9382 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ); 9383 9384 /* don't look at user memory in panic mode */ 9385 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ); 9386 9387 if (dump_mode == DUMP_ORIG) 9388 iter.cpu_file = raw_smp_processor_id(); 9389 else 9390 iter.cpu_file = RING_BUFFER_ALL_CPUS; 9391 9392 if (tr == &global_trace) 9393 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 9394 else 9395 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); 9396 9397 /* Did function tracer already get disabled? */ 9398 if (ftrace_is_dead()) { 9399 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); 9400 printk("# MAY BE MISSING FUNCTION EVENTS\n"); 9401 } 9402 9403 /* 9404 * We need to stop all tracing on all CPUS to read 9405 * the next buffer. This is a bit expensive, but is 9406 * not done often. We fill all what we can read, 9407 * and then release the locks again. 9408 */ 9409 9410 while (!trace_empty(&iter)) { 9411 9412 if (!cnt) 9413 printk(KERN_TRACE "---------------------------------\n"); 9414 9415 cnt++; 9416 9417 trace_iterator_reset(&iter); 9418 iter.iter_flags |= TRACE_FILE_LAT_FMT; 9419 9420 if (trace_find_next_entry_inc(&iter) != NULL) { 9421 int ret; 9422 9423 ret = print_trace_line(&iter); 9424 if (ret != TRACE_TYPE_NO_CONSUME) 9425 trace_consume(&iter); 9426 9427 trace_printk_seq(&iter.seq); 9428 } 9429 touch_nmi_watchdog(); 9430 } 9431 9432 if (!cnt) 9433 printk(KERN_TRACE " (ftrace buffer empty)\n"); 9434 else 9435 printk(KERN_TRACE "---------------------------------\n"); 9436 9437 tr->trace_flags |= old_userobj; 9438 9439 tracer_tracing_enable(tr); 9440 local_irq_restore(flags); 9441 } 9442 9443 static void ftrace_dump_by_param(void) 9444 { 9445 bool first_param = true; 9446 char dump_param[MAX_TRACER_SIZE]; 9447 char *buf, *token, *inst_name; 9448 struct trace_array *tr; 9449 9450 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); 9451 buf = dump_param; 9452 9453 while ((token = strsep(&buf, ",")) != NULL) { 9454 if (first_param) { 9455 first_param = false; 9456 if (!strcmp("0", token)) 9457 continue; 9458 else if (!strcmp("1", token)) { 9459 ftrace_dump_one(&global_trace, DUMP_ALL); 9460 continue; 9461 } 9462 else if (!strcmp("2", token) || 9463 !strcmp("orig_cpu", token)) { 9464 ftrace_dump_one(&global_trace, DUMP_ORIG); 9465 continue; 9466 } 9467 } 9468 9469 inst_name = strsep(&token, "="); 9470 tr = trace_array_find(inst_name); 9471 if (!tr) { 9472 printk(KERN_TRACE "Instance %s not found\n", inst_name); 9473 continue; 9474 } 9475 9476 if (token && (!strcmp("2", token) || 9477 !strcmp("orig_cpu", token))) 9478 ftrace_dump_one(tr, DUMP_ORIG); 9479 else 9480 ftrace_dump_one(tr, DUMP_ALL); 9481 } 9482 } 9483 9484 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) 9485 { 9486 static atomic_t dump_running; 9487 9488 /* Only allow one dump user at a time. */ 9489 if (atomic_inc_return(&dump_running) != 1) { 9490 atomic_dec(&dump_running); 9491 return; 9492 } 9493 9494 switch (oops_dump_mode) { 9495 case DUMP_ALL: 9496 ftrace_dump_one(&global_trace, DUMP_ALL); 9497 break; 9498 case DUMP_ORIG: 9499 ftrace_dump_one(&global_trace, DUMP_ORIG); 9500 break; 9501 case DUMP_PARAM: 9502 ftrace_dump_by_param(); 9503 break; 9504 case DUMP_NONE: 9505 break; 9506 default: 9507 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); 9508 ftrace_dump_one(&global_trace, DUMP_ALL); 9509 } 9510 9511 atomic_dec(&dump_running); 9512 } 9513 EXPORT_SYMBOL_GPL(ftrace_dump); 9514 9515 #define WRITE_BUFSIZE 4096 9516 9517 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, 9518 size_t count, loff_t *ppos, 9519 int (*createfn)(const char *)) 9520 { 9521 char *kbuf __free(kfree) = NULL; 9522 char *buf, *tmp; 9523 int ret = 0; 9524 size_t done = 0; 9525 size_t size; 9526 9527 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 9528 if (!kbuf) 9529 return -ENOMEM; 9530 9531 while (done < count) { 9532 size = count - done; 9533 9534 if (size >= WRITE_BUFSIZE) 9535 size = WRITE_BUFSIZE - 1; 9536 9537 if (copy_from_user(kbuf, buffer + done, size)) 9538 return -EFAULT; 9539 9540 kbuf[size] = '\0'; 9541 buf = kbuf; 9542 do { 9543 tmp = strchr(buf, '\n'); 9544 if (tmp) { 9545 *tmp = '\0'; 9546 size = tmp - buf + 1; 9547 } else { 9548 size = strlen(buf); 9549 if (done + size < count) { 9550 if (buf != kbuf) 9551 break; 9552 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ 9553 pr_warn("Line length is too long: Should be less than %d\n", 9554 WRITE_BUFSIZE - 2); 9555 return -EINVAL; 9556 } 9557 } 9558 done += size; 9559 9560 /* Remove comments */ 9561 tmp = strchr(buf, '#'); 9562 9563 if (tmp) 9564 *tmp = '\0'; 9565 9566 ret = createfn(buf); 9567 if (ret) 9568 return ret; 9569 buf += size; 9570 9571 } while (done < count); 9572 } 9573 return done; 9574 } 9575 9576 __init static int backup_instance_area(const char *backup, 9577 unsigned long *addr, phys_addr_t *size) 9578 { 9579 struct trace_array *backup_tr; 9580 void *allocated_vaddr = NULL; 9581 9582 backup_tr = trace_array_get_by_name(backup, NULL); 9583 if (!backup_tr) { 9584 pr_warn("Tracing: Instance %s is not found.\n", backup); 9585 return -ENOENT; 9586 } 9587 9588 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) { 9589 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup); 9590 trace_array_put(backup_tr); 9591 return -EINVAL; 9592 } 9593 9594 *size = backup_tr->range_addr_size; 9595 9596 allocated_vaddr = vzalloc(*size); 9597 if (!allocated_vaddr) { 9598 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n", 9599 backup, (unsigned long)*size); 9600 trace_array_put(backup_tr); 9601 return -ENOMEM; 9602 } 9603 9604 memcpy(allocated_vaddr, 9605 (void *)backup_tr->range_addr_start, (size_t)*size); 9606 *addr = (unsigned long)allocated_vaddr; 9607 9608 trace_array_put(backup_tr); 9609 return 0; 9610 } 9611 9612 __init static void enable_instances(void) 9613 { 9614 struct trace_array *tr; 9615 bool memmap_area = false; 9616 char *curr_str; 9617 char *name; 9618 char *str; 9619 char *tok; 9620 9621 /* A tab is always appended */ 9622 boot_instance_info[boot_instance_index - 1] = '\0'; 9623 str = boot_instance_info; 9624 9625 while ((curr_str = strsep(&str, "\t"))) { 9626 phys_addr_t start = 0; 9627 phys_addr_t size = 0; 9628 unsigned long addr = 0; 9629 bool traceprintk = false; 9630 bool traceoff = false; 9631 char *flag_delim; 9632 char *addr_delim; 9633 char *rname __free(kfree) = NULL; 9634 char *backup; 9635 9636 tok = strsep(&curr_str, ","); 9637 9638 name = strsep(&tok, "="); 9639 backup = tok; 9640 9641 flag_delim = strchr(name, '^'); 9642 addr_delim = strchr(name, '@'); 9643 9644 if (addr_delim) 9645 *addr_delim++ = '\0'; 9646 9647 if (flag_delim) 9648 *flag_delim++ = '\0'; 9649 9650 if (backup) { 9651 if (backup_instance_area(backup, &addr, &size) < 0) 9652 continue; 9653 } 9654 9655 if (flag_delim) { 9656 char *flag; 9657 9658 while ((flag = strsep(&flag_delim, "^"))) { 9659 if (strcmp(flag, "traceoff") == 0) { 9660 traceoff = true; 9661 } else if ((strcmp(flag, "printk") == 0) || 9662 (strcmp(flag, "traceprintk") == 0) || 9663 (strcmp(flag, "trace_printk") == 0)) { 9664 traceprintk = true; 9665 } else { 9666 pr_info("Tracing: Invalid instance flag '%s' for %s\n", 9667 flag, name); 9668 } 9669 } 9670 } 9671 9672 tok = addr_delim; 9673 if (tok && isdigit(*tok)) { 9674 start = memparse(tok, &tok); 9675 if (!start) { 9676 pr_warn("Tracing: Invalid boot instance address for %s\n", 9677 name); 9678 continue; 9679 } 9680 if (*tok != ':') { 9681 pr_warn("Tracing: No size specified for instance %s\n", name); 9682 continue; 9683 } 9684 tok++; 9685 size = memparse(tok, &tok); 9686 if (!size) { 9687 pr_warn("Tracing: Invalid boot instance size for %s\n", 9688 name); 9689 continue; 9690 } 9691 memmap_area = true; 9692 } else if (tok) { 9693 if (!reserve_mem_find_by_name(tok, &start, &size)) { 9694 start = 0; 9695 pr_warn("Failed to map boot instance %s to %s\n", name, tok); 9696 continue; 9697 } 9698 rname = kstrdup(tok, GFP_KERNEL); 9699 } 9700 9701 if (start) { 9702 /* Start and size must be page aligned */ 9703 if (start & ~PAGE_MASK) { 9704 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); 9705 continue; 9706 } 9707 if (size & ~PAGE_MASK) { 9708 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); 9709 continue; 9710 } 9711 9712 if (memmap_area) 9713 addr = map_pages(start, size); 9714 else 9715 addr = (unsigned long)phys_to_virt(start); 9716 if (addr) { 9717 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", 9718 name, &start, (unsigned long)size); 9719 } else { 9720 pr_warn("Tracing: Failed to map boot instance %s\n", name); 9721 continue; 9722 } 9723 } else { 9724 /* Only non mapped buffers have snapshot buffers */ 9725 do_allocate_snapshot(name); 9726 } 9727 9728 tr = trace_array_create_systems(name, NULL, addr, size); 9729 if (IS_ERR(tr)) { 9730 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); 9731 continue; 9732 } 9733 9734 if (traceoff) 9735 tracer_tracing_off(tr); 9736 9737 if (traceprintk) 9738 update_printk_trace(tr); 9739 9740 /* 9741 * memmap'd buffers can not be freed. 9742 */ 9743 if (memmap_area) { 9744 tr->flags |= TRACE_ARRAY_FL_MEMMAP; 9745 tr->ref++; 9746 } 9747 9748 /* 9749 * Backup buffers can be freed but need vfree(). 9750 */ 9751 if (backup) { 9752 tr->flags |= TRACE_ARRAY_FL_VMALLOC | TRACE_ARRAY_FL_RDONLY; 9753 trace_array_start_autoremove(); 9754 } 9755 9756 if (start || backup) { 9757 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 9758 tr->range_name = no_free_ptr(rname); 9759 } 9760 9761 /* 9762 * Save the events to start and enabled them after all boot instances 9763 * have been created. 9764 */ 9765 tr->boot_events = curr_str; 9766 } 9767 9768 /* Enable the events after all boot instances have been created */ 9769 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9770 9771 if (!tr->boot_events || !(*tr->boot_events)) { 9772 tr->boot_events = NULL; 9773 continue; 9774 } 9775 9776 curr_str = tr->boot_events; 9777 9778 /* Clear the instance if this is a persistent buffer */ 9779 if (tr->flags & TRACE_ARRAY_FL_LAST_BOOT) 9780 update_last_data(tr); 9781 9782 while ((tok = strsep(&curr_str, ","))) { 9783 early_enable_events(tr, tok, true); 9784 } 9785 tr->boot_events = NULL; 9786 } 9787 } 9788 9789 __init static int tracer_alloc_buffers(void) 9790 { 9791 unsigned long ring_buf_size; 9792 int ret = -ENOMEM; 9793 9794 9795 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9796 pr_warn("Tracing disabled due to lockdown\n"); 9797 return -EPERM; 9798 } 9799 9800 /* 9801 * Make sure we don't accidentally add more trace options 9802 * than we have bits for. 9803 */ 9804 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); 9805 9806 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 9807 return -ENOMEM; 9808 9809 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) 9810 goto out_free_buffer_mask; 9811 9812 /* Only allocate trace_printk buffers if a trace_printk exists */ 9813 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) 9814 /* Must be called before global_trace.buffer is allocated */ 9815 trace_printk_init_buffers(); 9816 9817 /* To save memory, keep the ring buffer size to its minimum */ 9818 if (global_trace.ring_buffer_expanded) 9819 ring_buf_size = trace_buf_size; 9820 else 9821 ring_buf_size = 1; 9822 9823 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 9824 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); 9825 9826 raw_spin_lock_init(&global_trace.start_lock); 9827 9828 /* 9829 * The prepare callbacks allocates some memory for the ring buffer. We 9830 * don't free the buffer if the CPU goes down. If we were to free 9831 * the buffer, then the user would lose any trace that was in the 9832 * buffer. The memory will be removed once the "instance" is removed. 9833 */ 9834 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, 9835 "trace/RB:prepare", trace_rb_cpu_prepare, 9836 NULL); 9837 if (ret < 0) 9838 goto out_free_cpumask; 9839 /* Used for event triggers */ 9840 ret = -ENOMEM; 9841 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); 9842 if (!temp_buffer) 9843 goto out_rm_hp_state; 9844 9845 if (trace_create_savedcmd() < 0) 9846 goto out_free_temp_buffer; 9847 9848 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) 9849 goto out_free_savedcmd; 9850 9851 /* TODO: make the number of buffers hot pluggable with CPUS */ 9852 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { 9853 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); 9854 goto out_free_pipe_cpumask; 9855 } 9856 if (global_trace.buffer_disabled) 9857 tracing_off(); 9858 9859 if (trace_boot_clock) { 9860 ret = tracing_set_clock(&global_trace, trace_boot_clock); 9861 if (ret < 0) 9862 pr_warn("Trace clock %s not defined, going back to default\n", 9863 trace_boot_clock); 9864 } 9865 9866 /* 9867 * register_tracer() might reference current_trace, so it 9868 * needs to be set before we register anything. This is 9869 * just a bootstrap of current_trace anyway. 9870 */ 9871 global_trace.current_trace = &nop_trace; 9872 global_trace.current_trace_flags = nop_trace.flags; 9873 9874 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 9875 #ifdef CONFIG_TRACER_SNAPSHOT 9876 spin_lock_init(&global_trace.snapshot_trigger_lock); 9877 #endif 9878 ftrace_init_global_array_ops(&global_trace); 9879 9880 #ifdef CONFIG_MODULES 9881 INIT_LIST_HEAD(&global_trace.mod_events); 9882 #endif 9883 9884 init_trace_flags_index(&global_trace); 9885 9886 INIT_LIST_HEAD(&global_trace.tracers); 9887 9888 /* All seems OK, enable tracing */ 9889 tracing_disabled = 0; 9890 9891 atomic_notifier_chain_register(&panic_notifier_list, 9892 &trace_panic_notifier); 9893 9894 register_die_notifier(&trace_die_notifier); 9895 9896 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 9897 9898 global_trace.syscall_buf_sz = syscall_buf_size; 9899 9900 INIT_LIST_HEAD(&global_trace.systems); 9901 INIT_LIST_HEAD(&global_trace.events); 9902 INIT_LIST_HEAD(&global_trace.hist_vars); 9903 INIT_LIST_HEAD(&global_trace.err_log); 9904 list_add(&global_trace.marker_list, &marker_copies); 9905 list_add(&global_trace.list, &ftrace_trace_arrays); 9906 9907 register_tracer(&nop_trace); 9908 9909 /* Function tracing may start here (via kernel command line) */ 9910 init_function_trace(); 9911 9912 apply_trace_boot_options(); 9913 9914 register_snapshot_cmd(); 9915 9916 return 0; 9917 9918 out_free_pipe_cpumask: 9919 free_cpumask_var(global_trace.pipe_cpumask); 9920 out_free_savedcmd: 9921 trace_free_saved_cmdlines_buffer(); 9922 out_free_temp_buffer: 9923 ring_buffer_free(temp_buffer); 9924 out_rm_hp_state: 9925 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); 9926 out_free_cpumask: 9927 free_cpumask_var(global_trace.tracing_cpumask); 9928 out_free_buffer_mask: 9929 free_cpumask_var(tracing_buffer_mask); 9930 return ret; 9931 } 9932 9933 #ifdef CONFIG_FUNCTION_TRACER 9934 /* Used to set module cached ftrace filtering at boot up */ 9935 struct trace_array *trace_get_global_array(void) 9936 { 9937 return &global_trace; 9938 } 9939 #endif 9940 9941 void __init early_trace_init(void) 9942 { 9943 if (tracepoint_printk) { 9944 tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter); 9945 if (MEM_FAIL(!tracepoint_print_iter, 9946 "Failed to allocate trace iterator\n")) 9947 tracepoint_printk = 0; 9948 else 9949 static_key_enable(&tracepoint_printk_key.key); 9950 } 9951 tracer_alloc_buffers(); 9952 9953 init_events(); 9954 } 9955 9956 void __init trace_init(void) 9957 { 9958 trace_event_init(); 9959 9960 if (boot_instance_index) 9961 enable_instances(); 9962 } 9963 9964 __init static void clear_boot_tracer(void) 9965 { 9966 /* 9967 * The default tracer at boot buffer is an init section. 9968 * This function is called in lateinit. If we did not 9969 * find the boot tracer, then clear it out, to prevent 9970 * later registration from accessing the buffer that is 9971 * about to be freed. 9972 */ 9973 if (!default_bootup_tracer) 9974 return; 9975 9976 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", 9977 default_bootup_tracer); 9978 default_bootup_tracer = NULL; 9979 } 9980 9981 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 9982 __init static void tracing_set_default_clock(void) 9983 { 9984 /* sched_clock_stable() is determined in late_initcall */ 9985 if (!trace_boot_clock && !sched_clock_stable()) { 9986 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9987 pr_warn("Can not set tracing clock due to lockdown\n"); 9988 return; 9989 } 9990 9991 printk(KERN_WARNING 9992 "Unstable clock detected, switching default tracing clock to \"global\"\n" 9993 "If you want to keep using the local clock, then add:\n" 9994 " \"trace_clock=local\"\n" 9995 "on the kernel command line\n"); 9996 tracing_set_clock(&global_trace, "global"); 9997 } 9998 } 9999 #else 10000 static inline void tracing_set_default_clock(void) { } 10001 #endif 10002 10003 __init static int late_trace_init(void) 10004 { 10005 if (tracepoint_printk && tracepoint_printk_stop_on_boot) { 10006 static_key_disable(&tracepoint_printk_key.key); 10007 tracepoint_printk = 0; 10008 } 10009 10010 if (traceoff_after_boot) 10011 tracing_off(); 10012 10013 tracing_set_default_clock(); 10014 clear_boot_tracer(); 10015 return 0; 10016 } 10017 10018 late_initcall_sync(late_trace_init); 10019