1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ring buffer based function tracer 4 * 5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> 6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 7 * 8 * Originally taken from the RT patch by: 9 * Arnaldo Carvalho de Melo <acme@redhat.com> 10 * 11 * Based on code from the latency_tracer, that is: 12 * Copyright (C) 2004-2006 Ingo Molnar 13 * Copyright (C) 2004 Nadia Yvette Chambers 14 */ 15 #include <linux/ring_buffer.h> 16 #include <linux/utsname.h> 17 #include <linux/stacktrace.h> 18 #include <linux/writeback.h> 19 #include <linux/kallsyms.h> 20 #include <linux/security.h> 21 #include <linux/seq_file.h> 22 #include <linux/irqflags.h> 23 #include <linux/syscalls.h> 24 #include <linux/debugfs.h> 25 #include <linux/tracefs.h> 26 #include <linux/pagemap.h> 27 #include <linux/hardirq.h> 28 #include <linux/linkage.h> 29 #include <linux/uaccess.h> 30 #include <linux/cleanup.h> 31 #include <linux/vmalloc.h> 32 #include <linux/ftrace.h> 33 #include <linux/module.h> 34 #include <linux/percpu.h> 35 #include <linux/splice.h> 36 #include <linux/kdebug.h> 37 #include <linux/string.h> 38 #include <linux/mount.h> 39 #include <linux/rwsem.h> 40 #include <linux/slab.h> 41 #include <linux/ctype.h> 42 #include <linux/init.h> 43 #include <linux/panic_notifier.h> 44 #include <linux/poll.h> 45 #include <linux/nmi.h> 46 #include <linux/fs.h> 47 #include <linux/trace.h> 48 #include <linux/sched/clock.h> 49 #include <linux/sched/rt.h> 50 #include <linux/irq_work.h> 51 #include <linux/workqueue.h> 52 #include <linux/sort.h> 53 #include <linux/io.h> /* vmap_page_range() */ 54 #include <linux/fs_context.h> 55 56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */ 57 58 #include "trace.h" 59 #include "trace_output.h" 60 61 #ifdef CONFIG_FTRACE_STARTUP_TEST 62 /* 63 * We need to change this state when a selftest is running. 64 * A selftest will lurk into the ring-buffer to count the 65 * entries inserted during the selftest although some concurrent 66 * insertions into the ring-buffer such as trace_printk could occurred 67 * at the same time, giving false positive or negative results. 68 */ 69 bool __read_mostly tracing_selftest_running; 70 71 /* 72 * If boot-time tracing including tracers/events via kernel cmdline 73 * is running, we do not want to run SELFTEST. 74 */ 75 bool __read_mostly tracing_selftest_disabled; 76 77 void __init disable_tracing_selftest(const char *reason) 78 { 79 if (!tracing_selftest_disabled) { 80 tracing_selftest_disabled = true; 81 pr_info("Ftrace startup test is disabled due to %s\n", reason); 82 } 83 } 84 #else 85 #define tracing_selftest_disabled 0 86 #endif 87 88 /* Pipe tracepoints to printk */ 89 static struct trace_iterator *tracepoint_print_iter; 90 int tracepoint_printk; 91 static bool tracepoint_printk_stop_on_boot __initdata; 92 static bool traceoff_after_boot __initdata; 93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); 94 95 /* Store tracers and their flags per instance */ 96 struct tracers { 97 struct list_head list; 98 struct tracer *tracer; 99 struct tracer_flags *flags; 100 }; 101 102 /* 103 * To prevent the comm cache from being overwritten when no 104 * tracing is active, only save the comm when a trace event 105 * occurred. 106 */ 107 DEFINE_PER_CPU(bool, trace_taskinfo_save); 108 109 /* 110 * Kill all tracing for good (never come back). 111 * It is initialized to 1 but will turn to zero if the initialization 112 * of the tracer is successful. But that is the only place that sets 113 * this back to zero. 114 */ 115 int tracing_disabled = 1; 116 117 cpumask_var_t __read_mostly tracing_buffer_mask; 118 119 #define MAX_TRACER_SIZE 100 120 /* 121 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops 122 * 123 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops 124 * is set, then ftrace_dump is called. This will output the contents 125 * of the ftrace buffers to the console. This is very useful for 126 * capturing traces that lead to crashes and outputting it to a 127 * serial console. 128 * 129 * It is default off, but you can enable it with either specifying 130 * "ftrace_dump_on_oops" in the kernel command line, or setting 131 * /proc/sys/kernel/ftrace_dump_on_oops 132 * Set 1 if you want to dump buffers of all CPUs 133 * Set 2 if you want to dump the buffer of the CPU that triggered oops 134 * Set instance name if you want to dump the specific trace instance 135 * Multiple instance dump is also supported, and instances are separated 136 * by commas. 137 */ 138 /* Set to string format zero to disable by default */ 139 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0"; 140 141 /* When set, tracing will stop when a WARN*() is hit */ 142 static int __disable_trace_on_warning; 143 144 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 145 void *buffer, size_t *lenp, loff_t *ppos); 146 static const struct ctl_table trace_sysctl_table[] = { 147 { 148 .procname = "ftrace_dump_on_oops", 149 .data = &ftrace_dump_on_oops, 150 .maxlen = MAX_TRACER_SIZE, 151 .mode = 0644, 152 .proc_handler = proc_dostring, 153 }, 154 { 155 .procname = "traceoff_on_warning", 156 .data = &__disable_trace_on_warning, 157 .maxlen = sizeof(__disable_trace_on_warning), 158 .mode = 0644, 159 .proc_handler = proc_dointvec, 160 }, 161 { 162 .procname = "tracepoint_printk", 163 .data = &tracepoint_printk, 164 .maxlen = sizeof(tracepoint_printk), 165 .mode = 0644, 166 .proc_handler = tracepoint_printk_sysctl, 167 }, 168 }; 169 170 static int __init init_trace_sysctls(void) 171 { 172 register_sysctl_init("kernel", trace_sysctl_table); 173 return 0; 174 } 175 subsys_initcall(init_trace_sysctls); 176 177 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 178 /* Map of enums to their values, for "eval_map" file */ 179 struct trace_eval_map_head { 180 struct module *mod; 181 unsigned long length; 182 }; 183 184 union trace_eval_map_item; 185 186 struct trace_eval_map_tail { 187 /* 188 * "end" is first and points to NULL as it must be different 189 * than "mod" or "eval_string" 190 */ 191 union trace_eval_map_item *next; 192 const char *end; /* points to NULL */ 193 }; 194 195 static DEFINE_MUTEX(trace_eval_mutex); 196 197 /* 198 * The trace_eval_maps are saved in an array with two extra elements, 199 * one at the beginning, and one at the end. The beginning item contains 200 * the count of the saved maps (head.length), and the module they 201 * belong to if not built in (head.mod). The ending item contains a 202 * pointer to the next array of saved eval_map items. 203 */ 204 union trace_eval_map_item { 205 struct trace_eval_map map; 206 struct trace_eval_map_head head; 207 struct trace_eval_map_tail tail; 208 }; 209 210 static union trace_eval_map_item *trace_eval_maps; 211 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 212 213 int tracing_set_tracer(struct trace_array *tr, const char *buf); 214 static void ftrace_trace_userstack(struct trace_array *tr, 215 struct trace_buffer *buffer, 216 unsigned int trace_ctx); 217 218 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; 219 static char *default_bootup_tracer; 220 221 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata; 222 static int boot_instance_index; 223 224 /* 225 * Repeated boot parameters, including Bootconfig array expansions, need 226 * to stay in the delimiter form that the existing parser consumes. 227 */ 228 void __init trace_append_boot_param(char *buf, const char *str, char sep, 229 int size) 230 { 231 int len, needed, str_len; 232 233 if (!*str) 234 return; 235 236 len = strlen(buf); 237 str_len = strlen(str); 238 needed = len + str_len + 1; 239 240 /* For continuation, account for the separator. */ 241 if (len) 242 needed++; 243 if (needed > size) 244 return; 245 246 if (len) 247 buf[len++] = sep; 248 249 strscpy(buf + len, str, size - len); 250 } 251 252 static int __init set_cmdline_ftrace(char *str) 253 { 254 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 255 default_bootup_tracer = bootup_tracer_buf; 256 /* We are using ftrace early, expand it */ 257 trace_set_ring_buffer_expanded(NULL); 258 return 1; 259 } 260 __setup("ftrace=", set_cmdline_ftrace); 261 262 int ftrace_dump_on_oops_enabled(void) 263 { 264 if (!strcmp("0", ftrace_dump_on_oops)) 265 return 0; 266 else 267 return 1; 268 } 269 270 static int __init set_ftrace_dump_on_oops(char *str) 271 { 272 if (!*str) { 273 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 274 return 1; 275 } 276 277 if (*str == ',') { 278 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE); 279 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1); 280 return 1; 281 } 282 283 if (*str++ == '=') { 284 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE); 285 return 1; 286 } 287 288 return 0; 289 } 290 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 291 292 static int __init stop_trace_on_warning(char *str) 293 { 294 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 295 __disable_trace_on_warning = 1; 296 return 1; 297 } 298 __setup("traceoff_on_warning", stop_trace_on_warning); 299 300 static int __init boot_instance(char *str) 301 { 302 char *slot = boot_instance_info + boot_instance_index; 303 int left = sizeof(boot_instance_info) - boot_instance_index; 304 int ret; 305 306 if (strlen(str) >= left) 307 return -1; 308 309 ret = snprintf(slot, left, "%s\t", str); 310 boot_instance_index += ret; 311 312 return 1; 313 } 314 __setup("trace_instance=", boot_instance); 315 316 317 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; 318 319 static int __init set_trace_boot_options(char *str) 320 { 321 trace_append_boot_param(trace_boot_options_buf, str, ',', 322 MAX_TRACER_SIZE); 323 return 1; 324 } 325 __setup("trace_options=", set_trace_boot_options); 326 327 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; 328 static char *trace_boot_clock __initdata; 329 330 static int __init set_trace_boot_clock(char *str) 331 { 332 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); 333 trace_boot_clock = trace_boot_clock_buf; 334 return 1; 335 } 336 __setup("trace_clock=", set_trace_boot_clock); 337 338 static int __init set_tracepoint_printk(char *str) 339 { 340 /* Ignore the "tp_printk_stop_on_boot" param */ 341 if (*str == '_') 342 return 0; 343 344 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) 345 tracepoint_printk = 1; 346 return 1; 347 } 348 __setup("tp_printk", set_tracepoint_printk); 349 350 static int __init set_tracepoint_printk_stop(char *str) 351 { 352 tracepoint_printk_stop_on_boot = true; 353 return 1; 354 } 355 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); 356 357 static int __init set_traceoff_after_boot(char *str) 358 { 359 traceoff_after_boot = true; 360 return 1; 361 } 362 __setup("traceoff_after_boot", set_traceoff_after_boot); 363 364 unsigned long long ns2usecs(u64 nsec) 365 { 366 nsec += 500; 367 do_div(nsec, 1000); 368 return nsec; 369 } 370 371 static void 372 trace_process_export(struct trace_export *export, 373 struct ring_buffer_event *event, int flag) 374 { 375 struct trace_entry *entry; 376 unsigned int size = 0; 377 378 if (export->flags & flag) { 379 entry = ring_buffer_event_data(event); 380 size = ring_buffer_event_length(event); 381 export->write(export, entry, size); 382 } 383 } 384 385 static DEFINE_MUTEX(ftrace_export_lock); 386 387 static struct trace_export __rcu *ftrace_exports_list __read_mostly; 388 389 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled); 390 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled); 391 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled); 392 393 static inline void ftrace_exports_enable(struct trace_export *export) 394 { 395 if (export->flags & TRACE_EXPORT_FUNCTION) 396 static_branch_inc(&trace_function_exports_enabled); 397 398 if (export->flags & TRACE_EXPORT_EVENT) 399 static_branch_inc(&trace_event_exports_enabled); 400 401 if (export->flags & TRACE_EXPORT_MARKER) 402 static_branch_inc(&trace_marker_exports_enabled); 403 } 404 405 static inline void ftrace_exports_disable(struct trace_export *export) 406 { 407 if (export->flags & TRACE_EXPORT_FUNCTION) 408 static_branch_dec(&trace_function_exports_enabled); 409 410 if (export->flags & TRACE_EXPORT_EVENT) 411 static_branch_dec(&trace_event_exports_enabled); 412 413 if (export->flags & TRACE_EXPORT_MARKER) 414 static_branch_dec(&trace_marker_exports_enabled); 415 } 416 417 static void ftrace_exports(struct ring_buffer_event *event, int flag) 418 { 419 struct trace_export *export; 420 421 guard(preempt_notrace)(); 422 423 export = rcu_dereference_raw_check(ftrace_exports_list); 424 while (export) { 425 trace_process_export(export, event, flag); 426 export = rcu_dereference_raw_check(export->next); 427 } 428 } 429 430 static inline void 431 add_trace_export(struct trace_export **list, struct trace_export *export) 432 { 433 rcu_assign_pointer(export->next, *list); 434 /* 435 * We are entering export into the list but another 436 * CPU might be walking that list. We need to make sure 437 * the export->next pointer is valid before another CPU sees 438 * the export pointer included into the list. 439 */ 440 rcu_assign_pointer(*list, export); 441 } 442 443 static inline int 444 rm_trace_export(struct trace_export **list, struct trace_export *export) 445 { 446 struct trace_export **p; 447 448 for (p = list; *p != NULL; p = &(*p)->next) 449 if (*p == export) 450 break; 451 452 if (*p != export) 453 return -1; 454 455 rcu_assign_pointer(*p, (*p)->next); 456 457 return 0; 458 } 459 460 static inline void 461 add_ftrace_export(struct trace_export **list, struct trace_export *export) 462 { 463 ftrace_exports_enable(export); 464 465 add_trace_export(list, export); 466 } 467 468 static inline int 469 rm_ftrace_export(struct trace_export **list, struct trace_export *export) 470 { 471 int ret; 472 473 ret = rm_trace_export(list, export); 474 ftrace_exports_disable(export); 475 476 return ret; 477 } 478 479 int register_ftrace_export(struct trace_export *export) 480 { 481 if (WARN_ON_ONCE(!export->write)) 482 return -1; 483 484 guard(mutex)(&ftrace_export_lock); 485 486 add_ftrace_export(&ftrace_exports_list, export); 487 488 return 0; 489 } 490 EXPORT_SYMBOL_GPL(register_ftrace_export); 491 492 int unregister_ftrace_export(struct trace_export *export) 493 { 494 guard(mutex)(&ftrace_export_lock); 495 return rm_ftrace_export(&ftrace_exports_list, export); 496 } 497 EXPORT_SYMBOL_GPL(unregister_ftrace_export); 498 499 /* trace_flags holds trace_options default values */ 500 #define TRACE_DEFAULT_FLAGS \ 501 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \ 502 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \ 503 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \ 504 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \ 505 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \ 506 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \ 507 TRACE_ITER(COPY_MARKER)) 508 509 /* trace_options that are only supported by global_trace */ 510 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \ 511 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \ 512 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS) 513 514 /* trace_flags that are default zero for instances */ 515 #define ZEROED_TRACE_FLAGS \ 516 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \ 517 TRACE_ITER(COPY_MARKER)) 518 519 /* 520 * The global_trace is the descriptor that holds the top-level tracing 521 * buffers for the live tracing. 522 */ 523 static struct trace_array global_trace = { 524 .trace_flags = TRACE_DEFAULT_FLAGS, 525 }; 526 527 struct trace_array *printk_trace = &global_trace; 528 529 /* List of trace_arrays interested in the top level trace_marker */ 530 static LIST_HEAD(marker_copies); 531 532 static void update_printk_trace(struct trace_array *tr) 533 { 534 if (printk_trace == tr) 535 return; 536 537 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK); 538 printk_trace = tr; 539 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK); 540 } 541 542 /* Returns true if the status of tr changed */ 543 static bool update_marker_trace(struct trace_array *tr, int enabled) 544 { 545 lockdep_assert_held(&event_mutex); 546 547 if (enabled) { 548 if (tr->trace_flags & TRACE_ITER(COPY_MARKER)) 549 return false; 550 551 list_add_rcu(&tr->marker_list, &marker_copies); 552 tr->trace_flags |= TRACE_ITER(COPY_MARKER); 553 return true; 554 } 555 556 if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER))) 557 return false; 558 559 list_del_rcu(&tr->marker_list); 560 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER); 561 return true; 562 } 563 564 void trace_set_ring_buffer_expanded(struct trace_array *tr) 565 { 566 if (!tr) 567 tr = &global_trace; 568 tr->ring_buffer_expanded = true; 569 } 570 571 static void trace_array_autoremove(struct work_struct *work) 572 { 573 struct trace_array *tr = container_of(work, struct trace_array, autoremove_work); 574 575 trace_array_destroy(tr); 576 } 577 578 static struct workqueue_struct *autoremove_wq; 579 580 static void trace_array_kick_autoremove(struct trace_array *tr) 581 { 582 if (autoremove_wq) 583 queue_work(autoremove_wq, &tr->autoremove_work); 584 } 585 586 static void trace_array_cancel_autoremove(struct trace_array *tr) 587 { 588 /* 589 * Since this can be called inside trace_array_autoremove(), 590 * it has to avoid deadlock of the workqueue. 591 */ 592 if (work_pending(&tr->autoremove_work)) 593 cancel_work_sync(&tr->autoremove_work); 594 } 595 596 static void trace_array_init_autoremove(struct trace_array *tr) 597 { 598 INIT_WORK(&tr->autoremove_work, trace_array_autoremove); 599 } 600 601 static void trace_array_start_autoremove(void) 602 { 603 if (autoremove_wq) 604 return; 605 606 autoremove_wq = alloc_workqueue("tr_autoremove_wq", 607 WQ_UNBOUND | WQ_HIGHPRI, 0); 608 if (!autoremove_wq) 609 pr_warn("Unable to allocate tr_autoremove_wq. autoremove disabled.\n"); 610 } 611 612 LIST_HEAD(ftrace_trace_arrays); 613 614 static int __trace_array_get(struct trace_array *this_tr) 615 { 616 /* When free_on_close is set, this is not available anymore. */ 617 if (autoremove_wq && this_tr->free_on_close) 618 return -ENODEV; 619 620 this_tr->ref++; 621 return 0; 622 } 623 624 int trace_array_get(struct trace_array *this_tr) 625 { 626 struct trace_array *tr; 627 628 guard(mutex)(&trace_types_lock); 629 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 630 if (tr == this_tr) { 631 return __trace_array_get(tr); 632 } 633 } 634 635 return -ENODEV; 636 } 637 638 static void __trace_array_put(struct trace_array *this_tr) 639 { 640 WARN_ON(!this_tr->ref); 641 this_tr->ref--; 642 /* 643 * When free_on_close is set, prepare removing the array 644 * when the last reference is released. 645 */ 646 if (this_tr->ref == 1 && this_tr->free_on_close) 647 trace_array_kick_autoremove(this_tr); 648 } 649 650 /** 651 * trace_array_put - Decrement the reference counter for this trace array. 652 * @this_tr : pointer to the trace array 653 * 654 * NOTE: Use this when we no longer need the trace array returned by 655 * trace_array_get_by_name(). This ensures the trace array can be later 656 * destroyed. 657 * 658 */ 659 void trace_array_put(struct trace_array *this_tr) 660 { 661 if (!this_tr) 662 return; 663 664 guard(mutex)(&trace_types_lock); 665 __trace_array_put(this_tr); 666 } 667 EXPORT_SYMBOL_GPL(trace_array_put); 668 669 int tracing_check_open_get_tr(struct trace_array *tr) 670 { 671 int ret; 672 673 ret = security_locked_down(LOCKDOWN_TRACEFS); 674 if (ret) 675 return ret; 676 677 if (tracing_disabled) 678 return -ENODEV; 679 680 if (tr && trace_array_get(tr) < 0) 681 return -ENODEV; 682 683 return 0; 684 } 685 686 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) 687 { 688 u64 ts; 689 690 /* Early boot up does not have a buffer yet */ 691 if (!buf->buffer) 692 return trace_clock_local(); 693 694 ts = ring_buffer_time_stamp(buf->buffer); 695 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); 696 697 return ts; 698 } 699 700 u64 ftrace_now(int cpu) 701 { 702 return buffer_ftrace_now(&global_trace.array_buffer, cpu); 703 } 704 705 /** 706 * tracing_is_enabled - Show if global_trace has been enabled 707 * 708 * Shows if the global trace has been enabled or not. It uses the 709 * mirror flag "buffer_disabled" to be used in fast paths such as for 710 * the irqsoff tracer. But it may be inaccurate due to races. If you 711 * need to know the accurate state, use tracing_is_on() which is a little 712 * slower, but accurate. 713 */ 714 int tracing_is_enabled(void) 715 { 716 /* 717 * For quick access (irqsoff uses this in fast path), just 718 * return the mirror variable of the state of the ring buffer. 719 * It's a little racy, but we don't really care. 720 */ 721 return !global_trace.buffer_disabled; 722 } 723 724 /* 725 * trace_buf_size is the size in bytes that is allocated 726 * for a buffer. Note, the number of bytes is always rounded 727 * to page size. 728 * 729 * This number is purposely set to a low number of 16384. 730 * If the dump on oops happens, it will be much appreciated 731 * to not have to wait for all that output. Anyway this can be 732 * boot time and run time configurable. 733 */ 734 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ 735 736 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; 737 738 /* trace_types holds a link list of available tracers. */ 739 static struct tracer *trace_types __read_mostly; 740 741 /* 742 * trace_types_lock is used to protect the trace_types list. 743 */ 744 DEFINE_MUTEX(trace_types_lock); 745 746 /* 747 * serialize the access of the ring buffer 748 * 749 * ring buffer serializes readers, but it is low level protection. 750 * The validity of the events (which returns by ring_buffer_peek() ..etc) 751 * are not protected by ring buffer. 752 * 753 * The content of events may become garbage if we allow other process consumes 754 * these events concurrently: 755 * A) the page of the consumed events may become a normal page 756 * (not reader page) in ring buffer, and this page will be rewritten 757 * by events producer. 758 * B) The page of the consumed events may become a page for splice_read, 759 * and this page will be returned to system. 760 * 761 * These primitives allow multi process access to different cpu ring buffer 762 * concurrently. 763 * 764 * These primitives don't distinguish read-only and read-consume access. 765 * Multi read-only access are also serialized. 766 */ 767 768 #ifdef CONFIG_SMP 769 static DECLARE_RWSEM(all_cpu_access_lock); 770 static DEFINE_PER_CPU(struct mutex, cpu_access_lock); 771 772 static inline void trace_access_lock(int cpu) 773 { 774 if (cpu == RING_BUFFER_ALL_CPUS) { 775 /* gain it for accessing the whole ring buffer. */ 776 down_write(&all_cpu_access_lock); 777 } else { 778 /* gain it for accessing a cpu ring buffer. */ 779 780 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ 781 down_read(&all_cpu_access_lock); 782 783 /* Secondly block other access to this @cpu ring buffer. */ 784 mutex_lock(&per_cpu(cpu_access_lock, cpu)); 785 } 786 } 787 788 static inline void trace_access_unlock(int cpu) 789 { 790 if (cpu == RING_BUFFER_ALL_CPUS) { 791 up_write(&all_cpu_access_lock); 792 } else { 793 mutex_unlock(&per_cpu(cpu_access_lock, cpu)); 794 up_read(&all_cpu_access_lock); 795 } 796 } 797 798 static inline void trace_access_lock_init(void) 799 { 800 int cpu; 801 802 for_each_possible_cpu(cpu) 803 mutex_init(&per_cpu(cpu_access_lock, cpu)); 804 } 805 806 #else 807 808 static DEFINE_MUTEX(access_lock); 809 810 static inline void trace_access_lock(int cpu) 811 { 812 (void)cpu; 813 mutex_lock(&access_lock); 814 } 815 816 static inline void trace_access_unlock(int cpu) 817 { 818 (void)cpu; 819 mutex_unlock(&access_lock); 820 } 821 822 static inline void trace_access_lock_init(void) 823 { 824 } 825 826 #endif 827 828 void tracer_tracing_on(struct trace_array *tr) 829 { 830 if (tr->array_buffer.buffer) 831 ring_buffer_record_on(tr->array_buffer.buffer); 832 /* 833 * This flag is looked at when buffers haven't been allocated 834 * yet, or by some tracers (like irqsoff), that just want to 835 * know if the ring buffer has been disabled, but it can handle 836 * races of where it gets disabled but we still do a record. 837 * As the check is in the fast path of the tracers, it is more 838 * important to be fast than accurate. 839 */ 840 tr->buffer_disabled = 0; 841 } 842 843 /** 844 * tracing_on - enable tracing buffers 845 * 846 * This function enables tracing buffers that may have been 847 * disabled with tracing_off. 848 */ 849 void tracing_on(void) 850 { 851 tracer_tracing_on(&global_trace); 852 } 853 EXPORT_SYMBOL_GPL(tracing_on); 854 855 #ifdef CONFIG_TRACER_SNAPSHOT 856 /** 857 * tracing_snapshot - take a snapshot of the current buffer. 858 * 859 * This causes a swap between the snapshot buffer and the current live 860 * tracing buffer. You can use this to take snapshots of the live 861 * trace when some condition is triggered, but continue to trace. 862 * 863 * Note, make sure to allocate the snapshot with either 864 * a tracing_snapshot_alloc(), or by doing it manually 865 * with: echo 1 > /sys/kernel/tracing/snapshot 866 * 867 * If the snapshot buffer is not allocated, it will stop tracing. 868 * Basically making a permanent snapshot. 869 */ 870 void tracing_snapshot(void) 871 { 872 struct trace_array *tr = &global_trace; 873 874 tracing_snapshot_instance(tr); 875 } 876 EXPORT_SYMBOL_GPL(tracing_snapshot); 877 878 /** 879 * tracing_alloc_snapshot - allocate snapshot buffer. 880 * 881 * This only allocates the snapshot buffer if it isn't already 882 * allocated - it doesn't also take a snapshot. 883 * 884 * This is meant to be used in cases where the snapshot buffer needs 885 * to be set up for events that can't sleep but need to be able to 886 * trigger a snapshot. 887 */ 888 int tracing_alloc_snapshot(void) 889 { 890 struct trace_array *tr = &global_trace; 891 int ret; 892 893 ret = tracing_alloc_snapshot_instance(tr); 894 WARN_ON(ret < 0); 895 896 return ret; 897 } 898 #else 899 void tracing_snapshot(void) 900 { 901 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used"); 902 } 903 EXPORT_SYMBOL_GPL(tracing_snapshot); 904 void tracing_snapshot_alloc(void) 905 { 906 /* Give warning */ 907 tracing_snapshot(); 908 } 909 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); 910 #endif /* CONFIG_TRACER_SNAPSHOT */ 911 912 void tracer_tracing_off(struct trace_array *tr) 913 { 914 if (tr->array_buffer.buffer) 915 ring_buffer_record_off(tr->array_buffer.buffer); 916 /* 917 * This flag is looked at when buffers haven't been allocated 918 * yet, or by some tracers (like irqsoff), that just want to 919 * know if the ring buffer has been disabled, but it can handle 920 * races of where it gets disabled but we still do a record. 921 * As the check is in the fast path of the tracers, it is more 922 * important to be fast than accurate. 923 */ 924 tr->buffer_disabled = 1; 925 } 926 927 /** 928 * tracer_tracing_disable() - temporary disable the buffer from write 929 * @tr: The trace array to disable its buffer for 930 * 931 * Expects trace_tracing_enable() to re-enable tracing. 932 * The difference between this and tracer_tracing_off() is that this 933 * is a counter and can nest, whereas, tracer_tracing_off() can 934 * be called multiple times and a single trace_tracing_on() will 935 * enable it. 936 */ 937 void tracer_tracing_disable(struct trace_array *tr) 938 { 939 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 940 return; 941 942 ring_buffer_record_disable(tr->array_buffer.buffer); 943 } 944 945 /** 946 * tracer_tracing_enable() - counter part of tracer_tracing_disable() 947 * @tr: The trace array that had tracer_tracincg_disable() called on it 948 * 949 * This is called after tracer_tracing_disable() has been called on @tr, 950 * when it's safe to re-enable tracing. 951 */ 952 void tracer_tracing_enable(struct trace_array *tr) 953 { 954 if (WARN_ON_ONCE(!tr->array_buffer.buffer)) 955 return; 956 957 ring_buffer_record_enable(tr->array_buffer.buffer); 958 } 959 960 /** 961 * tracing_off - turn off tracing buffers 962 * 963 * This function stops the tracing buffers from recording data. 964 * It does not disable any overhead the tracers themselves may 965 * be causing. This function simply causes all recording to 966 * the ring buffers to fail. 967 */ 968 void tracing_off(void) 969 { 970 tracer_tracing_off(&global_trace); 971 } 972 EXPORT_SYMBOL_GPL(tracing_off); 973 974 void disable_trace_on_warning(void) 975 { 976 if (__disable_trace_on_warning) { 977 struct trace_array *tr = READ_ONCE(printk_trace); 978 979 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, 980 "Disabling tracing due to warning\n"); 981 tracing_off(); 982 983 /* Disable trace_printk() buffer too */ 984 if (tr != &global_trace) { 985 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, 986 "Disabling tracing due to warning\n"); 987 tracer_tracing_off(tr); 988 } 989 } 990 } 991 992 /** 993 * tracer_tracing_is_on - show real state of ring buffer enabled 994 * @tr : the trace array to know if ring buffer is enabled 995 * 996 * Shows real state of the ring buffer if it is enabled or not. 997 */ 998 bool tracer_tracing_is_on(struct trace_array *tr) 999 { 1000 if (tr->array_buffer.buffer) 1001 return ring_buffer_record_is_set_on(tr->array_buffer.buffer); 1002 return !tr->buffer_disabled; 1003 } 1004 1005 /** 1006 * tracing_is_on - show state of ring buffers enabled 1007 */ 1008 int tracing_is_on(void) 1009 { 1010 return tracer_tracing_is_on(&global_trace); 1011 } 1012 EXPORT_SYMBOL_GPL(tracing_is_on); 1013 1014 static int __init set_buf_size(char *str) 1015 { 1016 unsigned long buf_size; 1017 1018 if (!str) 1019 return 0; 1020 buf_size = memparse(str, &str); 1021 /* 1022 * nr_entries can not be zero and the startup 1023 * tests require some buffer space. Therefore 1024 * ensure we have at least 4096 bytes of buffer. 1025 */ 1026 trace_buf_size = max(4096UL, buf_size); 1027 return 1; 1028 } 1029 __setup("trace_buf_size=", set_buf_size); 1030 1031 static int __init set_tracing_thresh(char *str) 1032 { 1033 unsigned long threshold; 1034 int ret; 1035 1036 if (!str) 1037 return 0; 1038 ret = kstrtoul(str, 0, &threshold); 1039 if (ret < 0) 1040 return 0; 1041 tracing_thresh = threshold * 1000; 1042 return 1; 1043 } 1044 __setup("tracing_thresh=", set_tracing_thresh); 1045 1046 unsigned long nsecs_to_usecs(unsigned long nsecs) 1047 { 1048 return nsecs / 1000; 1049 } 1050 1051 /* 1052 * TRACE_FLAGS is defined as a tuple matching bit masks with strings. 1053 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that 1054 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list 1055 * of strings in the order that the evals (enum) were defined. 1056 */ 1057 #undef C 1058 #define C(a, b) b 1059 1060 /* These must match the bit positions in trace_iterator_flags */ 1061 static const char *trace_options[] = { 1062 TRACE_FLAGS 1063 NULL 1064 }; 1065 1066 static struct { 1067 u64 (*func)(void); 1068 const char *name; 1069 int in_ns; /* is this clock in nanoseconds? */ 1070 } trace_clocks[] = { 1071 { trace_clock_local, "local", 1 }, 1072 { trace_clock_global, "global", 1 }, 1073 { trace_clock_counter, "counter", 0 }, 1074 { trace_clock_jiffies, "uptime", 0 }, 1075 { trace_clock, "perf", 1 }, 1076 { ktime_get_mono_fast_ns, "mono", 1 }, 1077 { ktime_get_raw_fast_ns, "mono_raw", 1 }, 1078 { ktime_get_boot_fast_ns, "boot", 1 }, 1079 { ktime_get_tai_fast_ns, "tai", 1 }, 1080 ARCH_TRACE_CLOCKS 1081 }; 1082 1083 bool trace_clock_in_ns(struct trace_array *tr) 1084 { 1085 if (trace_clocks[tr->clock_id].in_ns) 1086 return true; 1087 1088 return false; 1089 } 1090 1091 /* 1092 * trace_parser_get_init - gets the buffer for trace parser 1093 */ 1094 int trace_parser_get_init(struct trace_parser *parser, int size) 1095 { 1096 memset(parser, 0, sizeof(*parser)); 1097 1098 parser->buffer = kmalloc(size, GFP_KERNEL); 1099 if (!parser->buffer) 1100 return 1; 1101 1102 parser->size = size; 1103 return 0; 1104 } 1105 1106 /* 1107 * trace_parser_put - frees the buffer for trace parser 1108 */ 1109 void trace_parser_put(struct trace_parser *parser) 1110 { 1111 kfree(parser->buffer); 1112 parser->buffer = NULL; 1113 } 1114 1115 /* 1116 * trace_get_user - reads the user input string separated by space 1117 * (matched by isspace(ch)) 1118 * 1119 * For each string found the 'struct trace_parser' is updated, 1120 * and the function returns. 1121 * 1122 * Returns number of bytes read. 1123 * 1124 * See kernel/trace/trace.h for 'struct trace_parser' details. 1125 */ 1126 int trace_get_user(struct trace_parser *parser, const char __user *ubuf, 1127 size_t cnt, loff_t *ppos) 1128 { 1129 char ch; 1130 size_t read = 0; 1131 ssize_t ret; 1132 1133 if (!*ppos) 1134 trace_parser_clear(parser); 1135 1136 ret = get_user(ch, ubuf++); 1137 if (ret) 1138 goto fail; 1139 1140 read++; 1141 cnt--; 1142 1143 /* 1144 * The parser is not finished with the last write, 1145 * continue reading the user input without skipping spaces. 1146 */ 1147 if (!parser->cont) { 1148 /* skip white space */ 1149 while (cnt && isspace(ch)) { 1150 ret = get_user(ch, ubuf++); 1151 if (ret) 1152 goto fail; 1153 read++; 1154 cnt--; 1155 } 1156 1157 parser->idx = 0; 1158 1159 /* only spaces were written */ 1160 if (isspace(ch) || !ch) { 1161 *ppos += read; 1162 return read; 1163 } 1164 } 1165 1166 /* read the non-space input */ 1167 while (cnt && !isspace(ch) && ch) { 1168 if (parser->idx < parser->size - 1) 1169 parser->buffer[parser->idx++] = ch; 1170 else { 1171 ret = -EINVAL; 1172 goto fail; 1173 } 1174 1175 ret = get_user(ch, ubuf++); 1176 if (ret) 1177 goto fail; 1178 read++; 1179 cnt--; 1180 } 1181 1182 /* We either got finished input or we have to wait for another call. */ 1183 if (isspace(ch) || !ch) { 1184 parser->buffer[parser->idx] = 0; 1185 parser->cont = false; 1186 } else if (parser->idx < parser->size - 1) { 1187 parser->cont = true; 1188 parser->buffer[parser->idx++] = ch; 1189 /* Make sure the parsed string always terminates with '\0'. */ 1190 parser->buffer[parser->idx] = 0; 1191 } else { 1192 ret = -EINVAL; 1193 goto fail; 1194 } 1195 1196 *ppos += read; 1197 return read; 1198 fail: 1199 trace_parser_fail(parser); 1200 return ret; 1201 } 1202 1203 /* TODO add a seq_buf_to_buffer() */ 1204 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) 1205 { 1206 int len; 1207 1208 if (trace_seq_used(s) <= s->readpos) 1209 return -EBUSY; 1210 1211 len = trace_seq_used(s) - s->readpos; 1212 if (cnt > len) 1213 cnt = len; 1214 memcpy(buf, s->buffer + s->readpos, cnt); 1215 1216 s->readpos += cnt; 1217 return cnt; 1218 } 1219 1220 unsigned long __read_mostly tracing_thresh; 1221 1222 struct pipe_wait { 1223 struct trace_iterator *iter; 1224 int wait_index; 1225 }; 1226 1227 static bool wait_pipe_cond(void *data) 1228 { 1229 struct pipe_wait *pwait = data; 1230 struct trace_iterator *iter = pwait->iter; 1231 1232 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index) 1233 return true; 1234 1235 return iter->closed; 1236 } 1237 1238 static int wait_on_pipe(struct trace_iterator *iter, int full) 1239 { 1240 struct pipe_wait pwait; 1241 int ret; 1242 1243 /* Iterators are static, they should be filled or empty */ 1244 if (trace_buffer_iter(iter, iter->cpu_file)) 1245 return 0; 1246 1247 pwait.wait_index = atomic_read_acquire(&iter->wait_index); 1248 pwait.iter = iter; 1249 1250 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, 1251 wait_pipe_cond, &pwait); 1252 1253 #ifdef CONFIG_TRACER_SNAPSHOT 1254 /* 1255 * Make sure this is still the snapshot buffer, as if a snapshot were 1256 * to happen, this would now be the main buffer. 1257 */ 1258 if (iter->snapshot) 1259 iter->array_buffer = &iter->tr->snapshot_buffer; 1260 #endif 1261 return ret; 1262 } 1263 1264 #ifdef CONFIG_FTRACE_STARTUP_TEST 1265 static bool selftests_can_run; 1266 1267 struct trace_selftests { 1268 struct list_head list; 1269 struct tracer *type; 1270 }; 1271 1272 static LIST_HEAD(postponed_selftests); 1273 1274 static int save_selftest(struct tracer *type) 1275 { 1276 struct trace_selftests *selftest; 1277 1278 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); 1279 if (!selftest) 1280 return -ENOMEM; 1281 1282 selftest->type = type; 1283 list_add(&selftest->list, &postponed_selftests); 1284 return 0; 1285 } 1286 1287 static int run_tracer_selftest(struct tracer *type) 1288 { 1289 struct trace_array *tr = &global_trace; 1290 struct tracer_flags *saved_flags = tr->current_trace_flags; 1291 struct tracer *saved_tracer = tr->current_trace; 1292 int ret; 1293 1294 if (!type->selftest || tracing_selftest_disabled) 1295 return 0; 1296 1297 /* 1298 * If a tracer registers early in boot up (before scheduling is 1299 * initialized and such), then do not run its selftests yet. 1300 * Instead, run it a little later in the boot process. 1301 */ 1302 if (!selftests_can_run) 1303 return save_selftest(type); 1304 1305 if (!tracing_is_on()) { 1306 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n", 1307 type->name); 1308 return 0; 1309 } 1310 1311 /* 1312 * Run a selftest on this tracer. 1313 * Here we reset the trace buffer, and set the current 1314 * tracer to be this tracer. The tracer can then run some 1315 * internal tracing to verify that everything is in order. 1316 * If we fail, we do not register this tracer. 1317 */ 1318 tracing_reset_online_cpus(&tr->array_buffer); 1319 1320 tr->current_trace = type; 1321 tr->current_trace_flags = type->flags ? : type->default_flags; 1322 1323 #ifdef CONFIG_TRACER_MAX_TRACE 1324 if (tracer_uses_snapshot(type)) { 1325 /* If we expanded the buffers, make sure the max is expanded too */ 1326 if (tr->ring_buffer_expanded) 1327 ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size, 1328 RING_BUFFER_ALL_CPUS); 1329 tr->allocated_snapshot = true; 1330 } 1331 #endif 1332 1333 /* the test is responsible for initializing and enabling */ 1334 pr_info("Testing tracer %s: ", type->name); 1335 ret = type->selftest(type, tr); 1336 /* the test is responsible for resetting too */ 1337 tr->current_trace = saved_tracer; 1338 tr->current_trace_flags = saved_flags; 1339 if (ret) { 1340 printk(KERN_CONT "FAILED!\n"); 1341 /* Add the warning after printing 'FAILED' */ 1342 WARN_ON(1); 1343 return -1; 1344 } 1345 /* Only reset on passing, to avoid touching corrupted buffers */ 1346 tracing_reset_online_cpus(&tr->array_buffer); 1347 1348 #ifdef CONFIG_TRACER_MAX_TRACE 1349 if (tracer_uses_snapshot(type)) { 1350 tr->allocated_snapshot = false; 1351 1352 /* Shrink the max buffer again */ 1353 if (tr->ring_buffer_expanded) 1354 ring_buffer_resize(tr->snapshot_buffer.buffer, 1, 1355 RING_BUFFER_ALL_CPUS); 1356 } 1357 #endif 1358 1359 printk(KERN_CONT "PASSED\n"); 1360 return 0; 1361 } 1362 1363 static int do_run_tracer_selftest(struct tracer *type) 1364 { 1365 int ret; 1366 1367 /* 1368 * Tests can take a long time, especially if they are run one after the 1369 * other, as does happen during bootup when all the tracers are 1370 * registered. This could cause the soft lockup watchdog to trigger. 1371 */ 1372 cond_resched(); 1373 1374 tracing_selftest_running = true; 1375 ret = run_tracer_selftest(type); 1376 tracing_selftest_running = false; 1377 1378 return ret; 1379 } 1380 1381 static __init int init_trace_selftests(void) 1382 { 1383 struct trace_selftests *p, *n; 1384 struct tracer *t, **last; 1385 int ret; 1386 1387 selftests_can_run = true; 1388 1389 guard(mutex)(&trace_types_lock); 1390 1391 if (list_empty(&postponed_selftests)) 1392 return 0; 1393 1394 pr_info("Running postponed tracer tests:\n"); 1395 1396 tracing_selftest_running = true; 1397 list_for_each_entry_safe(p, n, &postponed_selftests, list) { 1398 /* This loop can take minutes when sanitizers are enabled, so 1399 * lets make sure we allow RCU processing. 1400 */ 1401 cond_resched(); 1402 ret = run_tracer_selftest(p->type); 1403 /* If the test fails, then warn and remove from available_tracers */ 1404 if (ret < 0) { 1405 WARN(1, "tracer: %s failed selftest, disabling\n", 1406 p->type->name); 1407 last = &trace_types; 1408 for (t = trace_types; t; t = t->next) { 1409 if (t == p->type) { 1410 *last = t->next; 1411 break; 1412 } 1413 last = &t->next; 1414 } 1415 } 1416 list_del(&p->list); 1417 kfree(p); 1418 } 1419 tracing_selftest_running = false; 1420 1421 return 0; 1422 } 1423 core_initcall(init_trace_selftests); 1424 #else 1425 static inline int do_run_tracer_selftest(struct tracer *type) 1426 { 1427 return 0; 1428 } 1429 #endif /* CONFIG_FTRACE_STARTUP_TEST */ 1430 1431 static int add_tracer(struct trace_array *tr, struct tracer *t); 1432 1433 static void __init apply_trace_boot_options(void); 1434 1435 static void free_tracers(struct trace_array *tr) 1436 { 1437 struct tracers *t, *n; 1438 1439 lockdep_assert_held(&trace_types_lock); 1440 1441 list_for_each_entry_safe(t, n, &tr->tracers, list) { 1442 list_del(&t->list); 1443 kfree(t->flags); 1444 kfree(t); 1445 } 1446 } 1447 1448 /** 1449 * register_tracer - register a tracer with the ftrace system. 1450 * @type: the plugin for the tracer 1451 * 1452 * Register a new plugin tracer. 1453 */ 1454 int __init register_tracer(struct tracer *type) 1455 { 1456 struct trace_array *tr; 1457 struct tracer *t; 1458 int ret = 0; 1459 1460 if (!type->name) { 1461 pr_info("Tracer must have a name\n"); 1462 return -1; 1463 } 1464 1465 if (strlen(type->name) >= MAX_TRACER_SIZE) { 1466 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE); 1467 return -1; 1468 } 1469 1470 if (security_locked_down(LOCKDOWN_TRACEFS)) { 1471 pr_warn("Can not register tracer %s due to lockdown\n", 1472 type->name); 1473 return -EPERM; 1474 } 1475 1476 mutex_lock(&trace_types_lock); 1477 1478 for (t = trace_types; t; t = t->next) { 1479 if (strcmp(type->name, t->name) == 0) { 1480 /* already found */ 1481 pr_info("Tracer %s already registered\n", 1482 type->name); 1483 ret = -1; 1484 goto out; 1485 } 1486 } 1487 1488 /* store the tracer for __set_tracer_option */ 1489 if (type->flags) 1490 type->flags->trace = type; 1491 1492 ret = do_run_tracer_selftest(type); 1493 if (ret < 0) 1494 goto out; 1495 1496 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 1497 ret = add_tracer(tr, type); 1498 if (ret < 0) { 1499 /* The tracer will still exist but without options */ 1500 pr_warn("Failed to create tracer options for %s\n", type->name); 1501 break; 1502 } 1503 } 1504 1505 type->next = trace_types; 1506 trace_types = type; 1507 1508 out: 1509 mutex_unlock(&trace_types_lock); 1510 1511 if (ret || !default_bootup_tracer) 1512 return ret; 1513 1514 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) 1515 return 0; 1516 1517 printk(KERN_INFO "Starting tracer '%s'\n", type->name); 1518 /* Do we want this tracer to start on bootup? */ 1519 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0); 1520 default_bootup_tracer = NULL; 1521 1522 apply_trace_boot_options(); 1523 1524 /* disable other selftests, since this will break it. */ 1525 disable_tracing_selftest("running a tracer"); 1526 1527 return 0; 1528 } 1529 1530 void tracing_reset_cpu(struct array_buffer *buf, int cpu) 1531 { 1532 struct trace_buffer *buffer = buf->buffer; 1533 1534 if (!buffer) 1535 return; 1536 1537 ring_buffer_record_disable(buffer); 1538 1539 /* Make sure all commits have finished */ 1540 synchronize_rcu(); 1541 ring_buffer_reset_cpu(buffer, cpu); 1542 1543 ring_buffer_record_enable(buffer); 1544 } 1545 1546 void tracing_reset_online_cpus(struct array_buffer *buf) 1547 { 1548 struct trace_buffer *buffer = buf->buffer; 1549 1550 if (!buffer) 1551 return; 1552 1553 ring_buffer_record_disable(buffer); 1554 1555 /* Make sure all commits have finished */ 1556 synchronize_rcu(); 1557 1558 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 1559 1560 ring_buffer_reset_online_cpus(buffer); 1561 1562 ring_buffer_record_enable(buffer); 1563 } 1564 1565 static void tracing_reset_all_cpus(struct array_buffer *buf) 1566 { 1567 struct trace_buffer *buffer = buf->buffer; 1568 1569 if (!buffer) 1570 return; 1571 1572 ring_buffer_record_disable(buffer); 1573 1574 /* Make sure all commits have finished */ 1575 synchronize_rcu(); 1576 1577 buf->time_start = buffer_ftrace_now(buf, buf->cpu); 1578 1579 ring_buffer_reset(buffer); 1580 1581 ring_buffer_record_enable(buffer); 1582 } 1583 1584 /* Must have trace_types_lock held */ 1585 void tracing_reset_all_online_cpus_unlocked(void) 1586 { 1587 struct trace_array *tr; 1588 1589 lockdep_assert_held(&trace_types_lock); 1590 1591 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 1592 if (!tr->clear_trace) 1593 continue; 1594 tr->clear_trace = false; 1595 tracing_reset_online_cpus(&tr->array_buffer); 1596 #ifdef CONFIG_TRACER_SNAPSHOT 1597 tracing_reset_online_cpus(&tr->snapshot_buffer); 1598 #endif 1599 } 1600 } 1601 1602 void tracing_reset_all_online_cpus(void) 1603 { 1604 guard(mutex)(&trace_types_lock); 1605 tracing_reset_all_online_cpus_unlocked(); 1606 } 1607 1608 int is_tracing_stopped(void) 1609 { 1610 return global_trace.stop_count; 1611 } 1612 1613 static void tracing_start_tr(struct trace_array *tr) 1614 { 1615 struct trace_buffer *buffer; 1616 1617 if (tracing_disabled) 1618 return; 1619 1620 guard(raw_spinlock_irqsave)(&tr->start_lock); 1621 if (--tr->stop_count) { 1622 if (WARN_ON_ONCE(tr->stop_count < 0)) { 1623 /* Someone screwed up their debugging */ 1624 tr->stop_count = 0; 1625 } 1626 return; 1627 } 1628 1629 /* Prevent the buffers from switching */ 1630 arch_spin_lock(&tr->max_lock); 1631 1632 buffer = tr->array_buffer.buffer; 1633 if (buffer) 1634 ring_buffer_record_enable(buffer); 1635 1636 #ifdef CONFIG_TRACER_SNAPSHOT 1637 buffer = tr->snapshot_buffer.buffer; 1638 if (buffer) 1639 ring_buffer_record_enable(buffer); 1640 #endif 1641 1642 arch_spin_unlock(&tr->max_lock); 1643 } 1644 1645 /** 1646 * tracing_start - quick start of the tracer 1647 * 1648 * If tracing is enabled but was stopped by tracing_stop, 1649 * this will start the tracer back up. 1650 */ 1651 void tracing_start(void) 1652 1653 { 1654 return tracing_start_tr(&global_trace); 1655 } 1656 1657 static void tracing_stop_tr(struct trace_array *tr) 1658 { 1659 struct trace_buffer *buffer; 1660 1661 guard(raw_spinlock_irqsave)(&tr->start_lock); 1662 if (tr->stop_count++) 1663 return; 1664 1665 /* Prevent the buffers from switching */ 1666 arch_spin_lock(&tr->max_lock); 1667 1668 buffer = tr->array_buffer.buffer; 1669 if (buffer) 1670 ring_buffer_record_disable(buffer); 1671 1672 #ifdef CONFIG_TRACER_SNAPSHOT 1673 buffer = tr->snapshot_buffer.buffer; 1674 if (buffer) 1675 ring_buffer_record_disable(buffer); 1676 #endif 1677 1678 arch_spin_unlock(&tr->max_lock); 1679 } 1680 1681 /** 1682 * tracing_stop - quick stop of the tracer 1683 * 1684 * Light weight way to stop tracing. Use in conjunction with 1685 * tracing_start. 1686 */ 1687 void tracing_stop(void) 1688 { 1689 return tracing_stop_tr(&global_trace); 1690 } 1691 1692 /* 1693 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq 1694 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function 1695 * simplifies those functions and keeps them in sync. 1696 */ 1697 enum print_line_t trace_handle_return(struct trace_seq *s) 1698 { 1699 return trace_seq_has_overflowed(s) ? 1700 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; 1701 } 1702 EXPORT_SYMBOL_GPL(trace_handle_return); 1703 1704 static unsigned short migration_disable_value(void) 1705 { 1706 #if defined(CONFIG_SMP) 1707 return current->migration_disabled; 1708 #else 1709 return 0; 1710 #endif 1711 } 1712 1713 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) 1714 { 1715 unsigned int trace_flags = irqs_status; 1716 unsigned int pc; 1717 1718 pc = preempt_count(); 1719 1720 if (pc & NMI_MASK) 1721 trace_flags |= TRACE_FLAG_NMI; 1722 if (pc & HARDIRQ_MASK) 1723 trace_flags |= TRACE_FLAG_HARDIRQ; 1724 if (in_serving_softirq()) 1725 trace_flags |= TRACE_FLAG_SOFTIRQ; 1726 if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) 1727 trace_flags |= TRACE_FLAG_BH_OFF; 1728 1729 if (tif_need_resched()) 1730 trace_flags |= TRACE_FLAG_NEED_RESCHED; 1731 if (test_preempt_need_resched()) 1732 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; 1733 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY)) 1734 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; 1735 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | 1736 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; 1737 } 1738 1739 struct ring_buffer_event * 1740 trace_buffer_lock_reserve(struct trace_buffer *buffer, 1741 int type, 1742 unsigned long len, 1743 unsigned int trace_ctx) 1744 { 1745 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); 1746 } 1747 1748 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); 1749 DEFINE_PER_CPU(int, trace_buffered_event_cnt); 1750 static int trace_buffered_event_ref; 1751 1752 /** 1753 * trace_buffered_event_enable - enable buffering events 1754 * 1755 * When events are being filtered, it is quicker to use a temporary 1756 * buffer to write the event data into if there's a likely chance 1757 * that it will not be committed. The discard of the ring buffer 1758 * is not as fast as committing, and is much slower than copying 1759 * a commit. 1760 * 1761 * When an event is to be filtered, allocate per cpu buffers to 1762 * write the event data into, and if the event is filtered and discarded 1763 * it is simply dropped, otherwise, the entire data is to be committed 1764 * in one shot. 1765 */ 1766 void trace_buffered_event_enable(void) 1767 { 1768 struct ring_buffer_event *event; 1769 struct page *page; 1770 int cpu; 1771 1772 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 1773 1774 if (trace_buffered_event_ref++) 1775 return; 1776 1777 for_each_tracing_cpu(cpu) { 1778 page = alloc_pages_node(cpu_to_node(cpu), 1779 GFP_KERNEL | __GFP_NORETRY, 0); 1780 /* This is just an optimization and can handle failures */ 1781 if (!page) { 1782 pr_err("Failed to allocate event buffer\n"); 1783 break; 1784 } 1785 1786 event = page_address(page); 1787 memset(event, 0, sizeof(*event)); 1788 1789 per_cpu(trace_buffered_event, cpu) = event; 1790 1791 scoped_guard(preempt,) { 1792 if (cpu == smp_processor_id() && 1793 __this_cpu_read(trace_buffered_event) != 1794 per_cpu(trace_buffered_event, cpu)) 1795 WARN_ON_ONCE(1); 1796 } 1797 } 1798 } 1799 1800 static void enable_trace_buffered_event(void *data) 1801 { 1802 this_cpu_dec(trace_buffered_event_cnt); 1803 } 1804 1805 static void disable_trace_buffered_event(void *data) 1806 { 1807 this_cpu_inc(trace_buffered_event_cnt); 1808 } 1809 1810 /** 1811 * trace_buffered_event_disable - disable buffering events 1812 * 1813 * When a filter is removed, it is faster to not use the buffered 1814 * events, and to commit directly into the ring buffer. Free up 1815 * the temp buffers when there are no more users. This requires 1816 * special synchronization with current events. 1817 */ 1818 void trace_buffered_event_disable(void) 1819 { 1820 int cpu; 1821 1822 WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); 1823 1824 if (WARN_ON_ONCE(!trace_buffered_event_ref)) 1825 return; 1826 1827 if (--trace_buffered_event_ref) 1828 return; 1829 1830 /* For each CPU, set the buffer as used. */ 1831 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event, 1832 NULL, true); 1833 1834 /* Wait for all current users to finish */ 1835 synchronize_rcu(); 1836 1837 for_each_tracing_cpu(cpu) { 1838 free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); 1839 per_cpu(trace_buffered_event, cpu) = NULL; 1840 } 1841 1842 /* 1843 * Wait for all CPUs that potentially started checking if they can use 1844 * their event buffer only after the previous synchronize_rcu() call and 1845 * they still read a valid pointer from trace_buffered_event. It must be 1846 * ensured they don't see cleared trace_buffered_event_cnt else they 1847 * could wrongly decide to use the pointed-to buffer which is now freed. 1848 */ 1849 synchronize_rcu(); 1850 1851 /* For each CPU, relinquish the buffer */ 1852 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL, 1853 true); 1854 } 1855 1856 static struct trace_buffer *temp_buffer; 1857 1858 struct ring_buffer_event * 1859 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, 1860 struct trace_event_file *trace_file, 1861 int type, unsigned long len, 1862 unsigned int trace_ctx) 1863 { 1864 struct ring_buffer_event *entry; 1865 struct trace_array *tr = trace_file->tr; 1866 int val; 1867 1868 *current_rb = tr->array_buffer.buffer; 1869 1870 if (!tr->no_filter_buffering_ref && 1871 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) { 1872 preempt_disable_notrace(); 1873 /* 1874 * Filtering is on, so try to use the per cpu buffer first. 1875 * This buffer will simulate a ring_buffer_event, 1876 * where the type_len is zero and the array[0] will 1877 * hold the full length. 1878 * (see include/linux/ring-buffer.h for details on 1879 * how the ring_buffer_event is structured). 1880 * 1881 * Using a temp buffer during filtering and copying it 1882 * on a matched filter is quicker than writing directly 1883 * into the ring buffer and then discarding it when 1884 * it doesn't match. That is because the discard 1885 * requires several atomic operations to get right. 1886 * Copying on match and doing nothing on a failed match 1887 * is still quicker than no copy on match, but having 1888 * to discard out of the ring buffer on a failed match. 1889 */ 1890 if ((entry = __this_cpu_read(trace_buffered_event))) { 1891 int max_len = PAGE_SIZE - struct_size(entry, array, 1); 1892 1893 val = this_cpu_inc_return(trace_buffered_event_cnt); 1894 1895 /* 1896 * Preemption is disabled, but interrupts and NMIs 1897 * can still come in now. If that happens after 1898 * the above increment, then it will have to go 1899 * back to the old method of allocating the event 1900 * on the ring buffer, and if the filter fails, it 1901 * will have to call ring_buffer_discard_commit() 1902 * to remove it. 1903 * 1904 * Need to also check the unlikely case that the 1905 * length is bigger than the temp buffer size. 1906 * If that happens, then the reserve is pretty much 1907 * guaranteed to fail, as the ring buffer currently 1908 * only allows events less than a page. But that may 1909 * change in the future, so let the ring buffer reserve 1910 * handle the failure in that case. 1911 */ 1912 if (val == 1 && likely(len <= max_len)) { 1913 trace_event_setup(entry, type, trace_ctx); 1914 entry->array[0] = len; 1915 /* Return with preemption disabled */ 1916 return entry; 1917 } 1918 this_cpu_dec(trace_buffered_event_cnt); 1919 } 1920 /* __trace_buffer_lock_reserve() disables preemption */ 1921 preempt_enable_notrace(); 1922 } 1923 1924 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 1925 trace_ctx); 1926 /* 1927 * If tracing is off, but we have triggers enabled 1928 * we still need to look at the event data. Use the temp_buffer 1929 * to store the trace event for the trigger to use. It's recursive 1930 * safe and will not be recorded anywhere. 1931 */ 1932 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { 1933 *current_rb = temp_buffer; 1934 entry = __trace_buffer_lock_reserve(*current_rb, type, len, 1935 trace_ctx); 1936 } 1937 return entry; 1938 } 1939 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); 1940 1941 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock); 1942 static DEFINE_MUTEX(tracepoint_printk_mutex); 1943 1944 static void output_printk(struct trace_event_buffer *fbuffer) 1945 { 1946 struct trace_event_call *event_call; 1947 struct trace_event_file *file; 1948 struct trace_event *event; 1949 unsigned long flags; 1950 struct trace_iterator *iter = tracepoint_print_iter; 1951 1952 /* We should never get here if iter is NULL */ 1953 if (WARN_ON_ONCE(!iter)) 1954 return; 1955 1956 event_call = fbuffer->trace_file->event_call; 1957 if (!event_call || !event_call->event.funcs || 1958 !event_call->event.funcs->trace) 1959 return; 1960 1961 file = fbuffer->trace_file; 1962 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || 1963 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && 1964 !filter_match_preds(file->filter, fbuffer->entry))) 1965 return; 1966 1967 event = &fbuffer->trace_file->event_call->event; 1968 1969 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags); 1970 trace_seq_init(&iter->seq); 1971 iter->ent = fbuffer->entry; 1972 event_call->event.funcs->trace(iter, 0, event); 1973 trace_seq_putc(&iter->seq, 0); 1974 printk("%s", iter->seq.buffer); 1975 1976 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags); 1977 } 1978 1979 int tracepoint_printk_sysctl(const struct ctl_table *table, int write, 1980 void *buffer, size_t *lenp, 1981 loff_t *ppos) 1982 { 1983 int save_tracepoint_printk; 1984 int ret; 1985 1986 guard(mutex)(&tracepoint_printk_mutex); 1987 save_tracepoint_printk = tracepoint_printk; 1988 1989 ret = proc_dointvec(table, write, buffer, lenp, ppos); 1990 1991 /* 1992 * This will force exiting early, as tracepoint_printk 1993 * is always zero when tracepoint_printk_iter is not allocated 1994 */ 1995 if (!tracepoint_print_iter) 1996 tracepoint_printk = 0; 1997 1998 if (save_tracepoint_printk == tracepoint_printk) 1999 return ret; 2000 2001 if (tracepoint_printk) 2002 static_key_enable(&tracepoint_printk_key.key); 2003 else 2004 static_key_disable(&tracepoint_printk_key.key); 2005 2006 return ret; 2007 } 2008 2009 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) 2010 { 2011 enum event_trigger_type tt = ETT_NONE; 2012 struct trace_event_file *file = fbuffer->trace_file; 2013 2014 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, 2015 fbuffer->entry, &tt)) 2016 goto discard; 2017 2018 if (static_key_false(&tracepoint_printk_key.key)) 2019 output_printk(fbuffer); 2020 2021 if (static_branch_unlikely(&trace_event_exports_enabled)) 2022 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); 2023 2024 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, 2025 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); 2026 2027 discard: 2028 if (tt) 2029 event_triggers_post_call(file, tt); 2030 2031 } 2032 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2033 2034 /* 2035 * Skip 3: 2036 * 2037 * trace_buffer_unlock_commit_regs() 2038 * trace_event_buffer_commit() 2039 * trace_event_raw_event_xxx() 2040 */ 2041 # define STACK_SKIP 3 2042 2043 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2044 struct trace_buffer *buffer, 2045 struct ring_buffer_event *event, 2046 unsigned int trace_ctx, 2047 struct pt_regs *regs) 2048 { 2049 __buffer_unlock_commit(buffer, event); 2050 2051 /* 2052 * If regs is not set, then skip the necessary functions. 2053 * Note, we can still get here via blktrace, wakeup tracer 2054 * and mmiotrace, but that's ok if they lose a function or 2055 * two. They are not that meaningful. 2056 */ 2057 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); 2058 ftrace_trace_userstack(tr, buffer, trace_ctx); 2059 } 2060 2061 /* 2062 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. 2063 */ 2064 void 2065 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer, 2066 struct ring_buffer_event *event) 2067 { 2068 __buffer_unlock_commit(buffer, event); 2069 } 2070 2071 void 2072 trace_function(struct trace_array *tr, unsigned long ip, unsigned long 2073 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs) 2074 { 2075 struct trace_buffer *buffer = tr->array_buffer.buffer; 2076 struct ring_buffer_event *event; 2077 struct ftrace_entry *entry; 2078 int size = sizeof(*entry); 2079 2080 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long); 2081 2082 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size, 2083 trace_ctx); 2084 if (!event) 2085 return; 2086 entry = ring_buffer_event_data(event); 2087 entry->ip = ip; 2088 entry->parent_ip = parent_ip; 2089 2090 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 2091 if (fregs) { 2092 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++) 2093 entry->args[i] = ftrace_regs_get_argument(fregs, i); 2094 } 2095 #endif 2096 2097 if (static_branch_unlikely(&trace_function_exports_enabled)) 2098 ftrace_exports(event, TRACE_EXPORT_FUNCTION); 2099 __buffer_unlock_commit(buffer, event); 2100 } 2101 2102 #ifdef CONFIG_STACKTRACE 2103 2104 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */ 2105 #define FTRACE_KSTACK_NESTING 4 2106 2107 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING) 2108 2109 struct ftrace_stack { 2110 unsigned long calls[FTRACE_KSTACK_ENTRIES]; 2111 }; 2112 2113 2114 struct ftrace_stacks { 2115 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING]; 2116 }; 2117 2118 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); 2119 static DEFINE_PER_CPU(int, ftrace_stack_reserve); 2120 2121 void __ftrace_trace_stack(struct trace_array *tr, 2122 struct trace_buffer *buffer, 2123 unsigned int trace_ctx, 2124 int skip, struct pt_regs *regs) 2125 { 2126 struct ring_buffer_event *event; 2127 unsigned int size, nr_entries; 2128 struct ftrace_stack *fstack; 2129 struct stack_entry *entry; 2130 int stackidx; 2131 int bit; 2132 2133 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START); 2134 if (bit < 0) 2135 return; 2136 2137 /* 2138 * Add one, for this function and the call to save_stack_trace() 2139 * If regs is set, then these functions will not be in the way. 2140 */ 2141 #ifndef CONFIG_UNWINDER_ORC 2142 if (!regs) 2143 skip++; 2144 #endif 2145 2146 guard(preempt_notrace)(); 2147 2148 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1; 2149 2150 /* This should never happen. If it does, yell once and skip */ 2151 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING)) 2152 goto out; 2153 2154 /* 2155 * The above __this_cpu_inc_return() is 'atomic' cpu local. An 2156 * interrupt will either see the value pre increment or post 2157 * increment. If the interrupt happens pre increment it will have 2158 * restored the counter when it returns. We just need a barrier to 2159 * keep gcc from moving things around. 2160 */ 2161 barrier(); 2162 2163 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx; 2164 size = ARRAY_SIZE(fstack->calls); 2165 2166 if (regs) { 2167 nr_entries = stack_trace_save_regs(regs, fstack->calls, 2168 size, skip); 2169 } else { 2170 nr_entries = stack_trace_save(fstack->calls, size, skip); 2171 } 2172 2173 #ifdef CONFIG_DYNAMIC_FTRACE 2174 /* Mark entry of stack trace as trampoline code */ 2175 if (tr->ops && tr->ops->trampoline) { 2176 unsigned long tramp_start = tr->ops->trampoline; 2177 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size; 2178 unsigned long *calls = fstack->calls; 2179 2180 for (int i = 0; i < nr_entries; i++) { 2181 if (calls[i] >= tramp_start && calls[i] < tramp_end) 2182 calls[i] = FTRACE_TRAMPOLINE_MARKER; 2183 } 2184 } 2185 #endif 2186 2187 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, 2188 struct_size(entry, caller, nr_entries), 2189 trace_ctx); 2190 if (!event) 2191 goto out; 2192 entry = ring_buffer_event_data(event); 2193 2194 entry->size = nr_entries; 2195 memcpy(&entry->caller, fstack->calls, 2196 flex_array_size(entry, caller, nr_entries)); 2197 2198 __buffer_unlock_commit(buffer, event); 2199 2200 out: 2201 /* Again, don't let gcc optimize things here */ 2202 barrier(); 2203 __this_cpu_dec(ftrace_stack_reserve); 2204 trace_clear_recursion(bit); 2205 } 2206 2207 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, 2208 int skip) 2209 { 2210 struct trace_buffer *buffer = tr->array_buffer.buffer; 2211 2212 if (rcu_is_watching()) { 2213 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 2214 return; 2215 } 2216 2217 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) 2218 return; 2219 2220 /* 2221 * When an NMI triggers, RCU is enabled via ct_nmi_enter(), 2222 * but if the above rcu_is_watching() failed, then the NMI 2223 * triggered someplace critical, and ct_irq_enter() should 2224 * not be called from NMI. 2225 */ 2226 if (unlikely(in_nmi())) 2227 return; 2228 2229 ct_irq_enter_irqson(); 2230 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL); 2231 ct_irq_exit_irqson(); 2232 } 2233 2234 /** 2235 * trace_dump_stack - record a stack back trace in the trace buffer 2236 * @skip: Number of functions to skip (helper handlers) 2237 */ 2238 void trace_dump_stack(int skip) 2239 { 2240 if (tracing_disabled || tracing_selftest_running) 2241 return; 2242 2243 #ifndef CONFIG_UNWINDER_ORC 2244 /* Skip 1 to skip this function. */ 2245 skip++; 2246 #endif 2247 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer, 2248 tracing_gen_ctx(), skip, NULL); 2249 } 2250 EXPORT_SYMBOL_GPL(trace_dump_stack); 2251 2252 #ifdef CONFIG_USER_STACKTRACE_SUPPORT 2253 static DEFINE_PER_CPU(int, user_stack_count); 2254 2255 static void 2256 ftrace_trace_userstack(struct trace_array *tr, 2257 struct trace_buffer *buffer, unsigned int trace_ctx) 2258 { 2259 struct ring_buffer_event *event; 2260 struct userstack_entry *entry; 2261 2262 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE))) 2263 return; 2264 2265 /* 2266 * NMIs can not handle page faults, even with fix ups. 2267 * The save user stack can (and often does) fault. 2268 */ 2269 if (unlikely(in_nmi())) 2270 return; 2271 2272 /* 2273 * prevent recursion, since the user stack tracing may 2274 * trigger other kernel events. 2275 */ 2276 guard(preempt)(); 2277 if (__this_cpu_read(user_stack_count)) 2278 return; 2279 2280 __this_cpu_inc(user_stack_count); 2281 2282 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 2283 sizeof(*entry), trace_ctx); 2284 if (!event) 2285 goto out_drop_count; 2286 entry = ring_buffer_event_data(event); 2287 2288 entry->tgid = current->tgid; 2289 memset(&entry->caller, 0, sizeof(entry->caller)); 2290 2291 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES); 2292 __buffer_unlock_commit(buffer, event); 2293 2294 out_drop_count: 2295 __this_cpu_dec(user_stack_count); 2296 } 2297 #else /* CONFIG_USER_STACKTRACE_SUPPORT */ 2298 static void ftrace_trace_userstack(struct trace_array *tr, 2299 struct trace_buffer *buffer, 2300 unsigned int trace_ctx) 2301 { 2302 } 2303 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ 2304 2305 #endif /* CONFIG_STACKTRACE */ 2306 2307 static inline void 2308 func_repeats_set_delta_ts(struct func_repeats_entry *entry, 2309 unsigned long long delta) 2310 { 2311 entry->bottom_delta_ts = delta & U32_MAX; 2312 entry->top_delta_ts = (delta >> 32); 2313 } 2314 2315 void trace_last_func_repeats(struct trace_array *tr, 2316 struct trace_func_repeats *last_info, 2317 unsigned int trace_ctx) 2318 { 2319 struct trace_buffer *buffer = tr->array_buffer.buffer; 2320 struct func_repeats_entry *entry; 2321 struct ring_buffer_event *event; 2322 u64 delta; 2323 2324 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS, 2325 sizeof(*entry), trace_ctx); 2326 if (!event) 2327 return; 2328 2329 delta = ring_buffer_event_time_stamp(buffer, event) - 2330 last_info->ts_last_call; 2331 2332 entry = ring_buffer_event_data(event); 2333 entry->ip = last_info->ip; 2334 entry->parent_ip = last_info->parent_ip; 2335 entry->count = last_info->count; 2336 func_repeats_set_delta_ts(entry, delta); 2337 2338 __buffer_unlock_commit(buffer, event); 2339 } 2340 2341 static void trace_iterator_increment(struct trace_iterator *iter) 2342 { 2343 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); 2344 2345 iter->idx++; 2346 if (buf_iter) 2347 ring_buffer_iter_advance(buf_iter); 2348 } 2349 2350 static struct trace_entry * 2351 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, 2352 unsigned long *lost_events) 2353 { 2354 struct ring_buffer_event *event; 2355 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); 2356 2357 if (buf_iter) { 2358 event = ring_buffer_iter_peek(buf_iter, ts); 2359 if (lost_events) 2360 *lost_events = ring_buffer_iter_dropped(buf_iter) ? 2361 (unsigned long)-1 : 0; 2362 } else { 2363 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts, 2364 lost_events); 2365 } 2366 2367 if (event) { 2368 iter->ent_size = ring_buffer_event_length(event); 2369 return ring_buffer_event_data(event); 2370 } 2371 iter->ent_size = 0; 2372 return NULL; 2373 } 2374 2375 static struct trace_entry * 2376 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, 2377 unsigned long *missing_events, u64 *ent_ts) 2378 { 2379 struct trace_buffer *buffer = iter->array_buffer->buffer; 2380 struct trace_entry *ent, *next = NULL; 2381 unsigned long lost_events = 0, next_lost = 0; 2382 int cpu_file = iter->cpu_file; 2383 u64 next_ts = 0, ts; 2384 int next_cpu = -1; 2385 int next_size = 0; 2386 int cpu; 2387 2388 /* 2389 * If we are in a per_cpu trace file, don't bother by iterating over 2390 * all cpu and peek directly. 2391 */ 2392 if (cpu_file > RING_BUFFER_ALL_CPUS) { 2393 if (ring_buffer_empty_cpu(buffer, cpu_file)) 2394 return NULL; 2395 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); 2396 if (ent_cpu) 2397 *ent_cpu = cpu_file; 2398 2399 return ent; 2400 } 2401 2402 for_each_tracing_cpu(cpu) { 2403 2404 if (ring_buffer_empty_cpu(buffer, cpu)) 2405 continue; 2406 2407 ent = peek_next_entry(iter, cpu, &ts, &lost_events); 2408 2409 /* 2410 * Pick the entry with the smallest timestamp: 2411 */ 2412 if (ent && (!next || ts < next_ts)) { 2413 next = ent; 2414 next_cpu = cpu; 2415 next_ts = ts; 2416 next_lost = lost_events; 2417 next_size = iter->ent_size; 2418 } 2419 } 2420 2421 iter->ent_size = next_size; 2422 2423 if (ent_cpu) 2424 *ent_cpu = next_cpu; 2425 2426 if (ent_ts) 2427 *ent_ts = next_ts; 2428 2429 if (missing_events) 2430 *missing_events = next_lost; 2431 2432 return next; 2433 } 2434 2435 #define STATIC_FMT_BUF_SIZE 128 2436 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; 2437 2438 char *trace_iter_expand_format(struct trace_iterator *iter) 2439 { 2440 char *tmp; 2441 2442 /* 2443 * iter->tr is NULL when used with tp_printk, which makes 2444 * this get called where it is not safe to call krealloc(). 2445 */ 2446 if (!iter->tr || iter->fmt == static_fmt_buf) 2447 return NULL; 2448 2449 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE, 2450 GFP_KERNEL); 2451 if (tmp) { 2452 iter->fmt_size += STATIC_FMT_BUF_SIZE; 2453 iter->fmt = tmp; 2454 } 2455 2456 return tmp; 2457 } 2458 2459 /* Returns true if the string is safe to dereference from an event */ 2460 static bool trace_safe_str(struct trace_iterator *iter, const char *str) 2461 { 2462 unsigned long addr = (unsigned long)str; 2463 struct trace_event *trace_event; 2464 struct trace_event_call *event; 2465 2466 /* OK if part of the event data */ 2467 if ((addr >= (unsigned long)iter->ent) && 2468 (addr < (unsigned long)iter->ent + iter->ent_size)) 2469 return true; 2470 2471 /* OK if part of the temp seq buffer */ 2472 if ((addr >= (unsigned long)iter->tmp_seq.buffer) && 2473 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE)) 2474 return true; 2475 2476 /* Core rodata can not be freed */ 2477 if (is_kernel_rodata(addr)) 2478 return true; 2479 2480 if (trace_is_tracepoint_string(str)) 2481 return true; 2482 2483 /* 2484 * Now this could be a module event, referencing core module 2485 * data, which is OK. 2486 */ 2487 if (!iter->ent) 2488 return false; 2489 2490 trace_event = ftrace_find_event(iter->ent->type); 2491 if (!trace_event) 2492 return false; 2493 2494 event = container_of(trace_event, struct trace_event_call, event); 2495 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) 2496 return false; 2497 2498 /* Would rather have rodata, but this will suffice */ 2499 if (within_module_core(addr, event->module)) 2500 return true; 2501 2502 return false; 2503 } 2504 2505 /** 2506 * ignore_event - Check dereferenced fields while writing to the seq buffer 2507 * @iter: The iterator that holds the seq buffer and the event being printed 2508 * 2509 * At boot up, test_event_printk() will flag any event that dereferences 2510 * a string with "%s" that does exist in the ring buffer. It may still 2511 * be valid, as the string may point to a static string in the kernel 2512 * rodata that never gets freed. But if the string pointer is pointing 2513 * to something that was allocated, there's a chance that it can be freed 2514 * by the time the user reads the trace. This would cause a bad memory 2515 * access by the kernel and possibly crash the system. 2516 * 2517 * This function will check if the event has any fields flagged as needing 2518 * to be checked at runtime and perform those checks. 2519 * 2520 * If it is found that a field is unsafe, it will write into the @iter->seq 2521 * a message stating what was found to be unsafe. 2522 * 2523 * @return: true if the event is unsafe and should be ignored, 2524 * false otherwise. 2525 */ 2526 bool ignore_event(struct trace_iterator *iter) 2527 { 2528 struct ftrace_event_field *field; 2529 struct trace_event *trace_event; 2530 struct trace_event_call *event; 2531 struct list_head *head; 2532 struct trace_seq *seq; 2533 const void *ptr; 2534 2535 trace_event = ftrace_find_event(iter->ent->type); 2536 2537 seq = &iter->seq; 2538 2539 if (!trace_event) { 2540 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type); 2541 return true; 2542 } 2543 2544 event = container_of(trace_event, struct trace_event_call, event); 2545 if (!(event->flags & TRACE_EVENT_FL_TEST_STR)) 2546 return false; 2547 2548 head = trace_get_fields(event); 2549 if (!head) { 2550 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n", 2551 trace_event_name(event)); 2552 return true; 2553 } 2554 2555 /* Offsets are from the iter->ent that points to the raw event */ 2556 ptr = iter->ent; 2557 2558 list_for_each_entry(field, head, link) { 2559 const char *str; 2560 bool good; 2561 2562 if (!field->needs_test) 2563 continue; 2564 2565 str = *(const char **)(ptr + field->offset); 2566 2567 good = trace_safe_str(iter, str); 2568 2569 /* 2570 * If you hit this warning, it is likely that the 2571 * trace event in question used %s on a string that 2572 * was saved at the time of the event, but may not be 2573 * around when the trace is read. Use __string(), 2574 * __assign_str() and __get_str() helpers in the TRACE_EVENT() 2575 * instead. See samples/trace_events/trace-events-sample.h 2576 * for reference. 2577 */ 2578 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'", 2579 trace_event_name(event), field->name)) { 2580 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n", 2581 trace_event_name(event), field->name); 2582 return true; 2583 } 2584 } 2585 return false; 2586 } 2587 2588 const char *trace_event_format(struct trace_iterator *iter, const char *fmt) 2589 { 2590 const char *p, *new_fmt; 2591 char *q; 2592 2593 if (WARN_ON_ONCE(!fmt)) 2594 return fmt; 2595 2596 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR)) 2597 return fmt; 2598 2599 p = fmt; 2600 new_fmt = q = iter->fmt; 2601 while (*p) { 2602 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) { 2603 if (!trace_iter_expand_format(iter)) 2604 return fmt; 2605 2606 q += iter->fmt - new_fmt; 2607 new_fmt = iter->fmt; 2608 } 2609 2610 *q++ = *p++; 2611 2612 /* Replace %p with %px */ 2613 if (p[-1] == '%') { 2614 if (p[0] == '%') { 2615 *q++ = *p++; 2616 } else if (p[0] == 'p' && !isalnum(p[1])) { 2617 *q++ = *p++; 2618 *q++ = 'x'; 2619 } 2620 } 2621 } 2622 *q = '\0'; 2623 2624 return new_fmt; 2625 } 2626 2627 #define STATIC_TEMP_BUF_SIZE 128 2628 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4); 2629 2630 /* Find the next real entry, without updating the iterator itself */ 2631 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 2632 int *ent_cpu, u64 *ent_ts) 2633 { 2634 /* __find_next_entry will reset ent_size */ 2635 int ent_size = iter->ent_size; 2636 struct trace_entry *entry; 2637 2638 /* 2639 * If called from ftrace_dump(), then the iter->temp buffer 2640 * will be the static_temp_buf and not created from kmalloc. 2641 * If the entry size is greater than the buffer, we can 2642 * not save it. Just return NULL in that case. This is only 2643 * used to add markers when two consecutive events' time 2644 * stamps have a large delta. See trace_print_lat_context() 2645 */ 2646 if (iter->temp == static_temp_buf && 2647 STATIC_TEMP_BUF_SIZE < ent_size) 2648 return NULL; 2649 2650 /* 2651 * The __find_next_entry() may call peek_next_entry(), which may 2652 * call ring_buffer_peek() that may make the contents of iter->ent 2653 * undefined. Need to copy iter->ent now. 2654 */ 2655 if (iter->ent && iter->ent != iter->temp) { 2656 if ((!iter->temp || iter->temp_size < iter->ent_size) && 2657 !WARN_ON_ONCE(iter->temp == static_temp_buf)) { 2658 void *temp; 2659 temp = kmalloc(iter->ent_size, GFP_KERNEL); 2660 if (!temp) 2661 return NULL; 2662 kfree(iter->temp); 2663 iter->temp = temp; 2664 iter->temp_size = iter->ent_size; 2665 } 2666 memcpy(iter->temp, iter->ent, iter->ent_size); 2667 iter->ent = iter->temp; 2668 } 2669 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); 2670 /* Put back the original ent_size */ 2671 iter->ent_size = ent_size; 2672 2673 return entry; 2674 } 2675 2676 /* Find the next real entry, and increment the iterator to the next entry */ 2677 void *trace_find_next_entry_inc(struct trace_iterator *iter) 2678 { 2679 iter->ent = __find_next_entry(iter, &iter->cpu, 2680 &iter->lost_events, &iter->ts); 2681 2682 if (iter->ent) 2683 trace_iterator_increment(iter); 2684 2685 return iter->ent ? iter : NULL; 2686 } 2687 2688 static void trace_consume(struct trace_iterator *iter) 2689 { 2690 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts, 2691 &iter->lost_events); 2692 } 2693 2694 static void *s_next(struct seq_file *m, void *v, loff_t *pos) 2695 { 2696 struct trace_iterator *iter = m->private; 2697 int i = (int)*pos; 2698 void *ent; 2699 2700 WARN_ON_ONCE(iter->leftover); 2701 2702 (*pos)++; 2703 2704 /* can't go backwards */ 2705 if (iter->idx > i) 2706 return NULL; 2707 2708 if (iter->idx < 0) 2709 ent = trace_find_next_entry_inc(iter); 2710 else 2711 ent = iter; 2712 2713 while (ent && iter->idx < i) 2714 ent = trace_find_next_entry_inc(iter); 2715 2716 iter->pos = *pos; 2717 2718 return ent; 2719 } 2720 2721 void tracing_iter_reset(struct trace_iterator *iter, int cpu) 2722 { 2723 struct ring_buffer_iter *buf_iter; 2724 unsigned long entries = 0; 2725 u64 ts; 2726 2727 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0; 2728 2729 buf_iter = trace_buffer_iter(iter, cpu); 2730 if (!buf_iter) 2731 return; 2732 2733 ring_buffer_iter_reset(buf_iter); 2734 2735 /* 2736 * We could have the case with the max latency tracers 2737 * that a reset never took place on a cpu. This is evident 2738 * by the timestamp being before the start of the buffer. 2739 */ 2740 while (ring_buffer_iter_peek(buf_iter, &ts)) { 2741 if (ts >= iter->array_buffer->time_start) 2742 break; 2743 entries++; 2744 ring_buffer_iter_advance(buf_iter); 2745 /* This could be a big loop */ 2746 cond_resched(); 2747 } 2748 2749 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; 2750 } 2751 2752 /* 2753 * The current tracer is copied to avoid a global locking 2754 * all around. 2755 */ 2756 static void *s_start(struct seq_file *m, loff_t *pos) 2757 { 2758 struct trace_iterator *iter = m->private; 2759 struct trace_array *tr = iter->tr; 2760 int cpu_file = iter->cpu_file; 2761 void *p = NULL; 2762 loff_t l = 0; 2763 int cpu; 2764 2765 mutex_lock(&trace_types_lock); 2766 if (unlikely(tr->current_trace != iter->trace)) { 2767 /* Close iter->trace before switching to the new current tracer */ 2768 if (iter->trace->close) 2769 iter->trace->close(iter); 2770 iter->trace = tr->current_trace; 2771 /* Reopen the new current tracer */ 2772 if (iter->trace->open) 2773 iter->trace->open(iter); 2774 } 2775 mutex_unlock(&trace_types_lock); 2776 2777 if (iter->snapshot && tracer_uses_snapshot(iter->trace)) 2778 return ERR_PTR(-EBUSY); 2779 2780 if (*pos != iter->pos) { 2781 iter->ent = NULL; 2782 iter->cpu = 0; 2783 iter->idx = -1; 2784 2785 if (cpu_file == RING_BUFFER_ALL_CPUS) { 2786 for_each_tracing_cpu(cpu) 2787 tracing_iter_reset(iter, cpu); 2788 } else 2789 tracing_iter_reset(iter, cpu_file); 2790 2791 iter->leftover = 0; 2792 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 2793 ; 2794 2795 } else { 2796 /* 2797 * If we overflowed the seq_file before, then we want 2798 * to just reuse the trace_seq buffer again. 2799 */ 2800 if (iter->leftover) 2801 p = iter; 2802 else { 2803 l = *pos - 1; 2804 p = s_next(m, p, &l); 2805 } 2806 } 2807 2808 trace_event_read_lock(); 2809 trace_access_lock(cpu_file); 2810 return p; 2811 } 2812 2813 static void s_stop(struct seq_file *m, void *p) 2814 { 2815 struct trace_iterator *iter = m->private; 2816 2817 if (iter->snapshot && tracer_uses_snapshot(iter->trace)) 2818 return; 2819 2820 trace_access_unlock(iter->cpu_file); 2821 trace_event_read_unlock(); 2822 } 2823 2824 static void 2825 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total, 2826 unsigned long *entries, int cpu) 2827 { 2828 unsigned long count; 2829 2830 count = ring_buffer_entries_cpu(buf->buffer, cpu); 2831 /* 2832 * If this buffer has skipped entries, then we hold all 2833 * entries for the trace and we need to ignore the 2834 * ones before the time stamp. 2835 */ 2836 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { 2837 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; 2838 /* total is the same as the entries */ 2839 *total = count; 2840 } else 2841 *total = count + 2842 ring_buffer_overrun_cpu(buf->buffer, cpu); 2843 *entries = count; 2844 } 2845 2846 static void 2847 get_total_entries(struct array_buffer *buf, 2848 unsigned long *total, unsigned long *entries) 2849 { 2850 unsigned long t, e; 2851 int cpu; 2852 2853 *total = 0; 2854 *entries = 0; 2855 2856 for_each_tracing_cpu(cpu) { 2857 get_total_entries_cpu(buf, &t, &e, cpu); 2858 *total += t; 2859 *entries += e; 2860 } 2861 } 2862 2863 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu) 2864 { 2865 unsigned long total, entries; 2866 2867 if (!tr) 2868 tr = &global_trace; 2869 2870 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu); 2871 2872 return entries; 2873 } 2874 2875 unsigned long trace_total_entries(struct trace_array *tr) 2876 { 2877 unsigned long total, entries; 2878 2879 if (!tr) 2880 tr = &global_trace; 2881 2882 get_total_entries(&tr->array_buffer, &total, &entries); 2883 2884 return entries; 2885 } 2886 2887 static void print_lat_help_header(struct seq_file *m) 2888 { 2889 seq_puts(m, "# _------=> CPU# \n" 2890 "# / _-----=> irqs-off/BH-disabled\n" 2891 "# | / _----=> need-resched \n" 2892 "# || / _---=> hardirq/softirq \n" 2893 "# ||| / _--=> preempt-depth \n" 2894 "# |||| / _-=> migrate-disable \n" 2895 "# ||||| / delay \n" 2896 "# cmd pid |||||| time | caller \n" 2897 "# \\ / |||||| \\ | / \n"); 2898 } 2899 2900 static void print_event_info(struct array_buffer *buf, struct seq_file *m) 2901 { 2902 unsigned long total; 2903 unsigned long entries; 2904 2905 get_total_entries(buf, &total, &entries); 2906 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n", 2907 entries, total, num_online_cpus()); 2908 seq_puts(m, "#\n"); 2909 } 2910 2911 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, 2912 unsigned int flags) 2913 { 2914 bool tgid = flags & TRACE_ITER(RECORD_TGID); 2915 2916 print_event_info(buf, m); 2917 2918 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); 2919 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); 2920 } 2921 2922 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, 2923 unsigned int flags) 2924 { 2925 bool tgid = flags & TRACE_ITER(RECORD_TGID); 2926 static const char space[] = " "; 2927 int prec = tgid ? 12 : 2; 2928 2929 print_event_info(buf, m); 2930 2931 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); 2932 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); 2933 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); 2934 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); 2935 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); 2936 seq_printf(m, "# %.*s|||| / delay\n", prec, space); 2937 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); 2938 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); 2939 } 2940 2941 void 2942 print_trace_header(struct seq_file *m, struct trace_iterator *iter) 2943 { 2944 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK); 2945 struct array_buffer *buf = iter->array_buffer; 2946 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu); 2947 struct tracer *type = iter->trace; 2948 unsigned long entries; 2949 unsigned long total; 2950 const char *name = type->name; 2951 2952 get_total_entries(buf, &total, &entries); 2953 2954 seq_printf(m, "# %s latency trace v1.1.5 on %s\n", 2955 name, init_utsname()->release); 2956 seq_puts(m, "# -----------------------------------" 2957 "---------------------------------\n"); 2958 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |" 2959 " (M:%s VP:%d, KP:%d, SP:%d HP:%d", 2960 nsecs_to_usecs(data->saved_latency), 2961 entries, 2962 total, 2963 buf->cpu, 2964 preempt_model_str(), 2965 /* These are reserved for later use */ 2966 0, 0, 0, 0); 2967 #ifdef CONFIG_SMP 2968 seq_printf(m, " #P:%d)\n", num_online_cpus()); 2969 #else 2970 seq_puts(m, ")\n"); 2971 #endif 2972 seq_puts(m, "# -----------------\n"); 2973 seq_printf(m, "# | task: %.16s-%d " 2974 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", 2975 data->comm, data->pid, 2976 from_kuid_munged(seq_user_ns(m), data->uid), data->nice, 2977 data->policy, data->rt_priority); 2978 seq_puts(m, "# -----------------\n"); 2979 2980 if (data->critical_start) { 2981 seq_puts(m, "# => started at: "); 2982 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags); 2983 trace_print_seq(m, &iter->seq); 2984 seq_puts(m, "\n# => ended at: "); 2985 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags); 2986 trace_print_seq(m, &iter->seq); 2987 seq_puts(m, "\n#\n"); 2988 } 2989 2990 seq_puts(m, "#\n"); 2991 } 2992 2993 static void test_cpu_buff_start(struct trace_iterator *iter) 2994 { 2995 struct trace_seq *s = &iter->seq; 2996 struct trace_array *tr = iter->tr; 2997 2998 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE))) 2999 return; 3000 3001 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE)) 3002 return; 3003 3004 if (cpumask_available(iter->started) && 3005 cpumask_test_cpu(iter->cpu, iter->started)) 3006 return; 3007 3008 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries) 3009 return; 3010 3011 if (cpumask_available(iter->started)) 3012 cpumask_set_cpu(iter->cpu, iter->started); 3013 3014 /* Don't print started cpu buffer for the first entry of the trace */ 3015 if (iter->idx > 1) 3016 trace_seq_printf(s, "##### CPU %u buffer started ####\n", 3017 iter->cpu); 3018 } 3019 3020 #ifdef CONFIG_FTRACE_SYSCALLS 3021 static bool is_syscall_event(struct trace_event *event) 3022 { 3023 return (event->funcs == &enter_syscall_print_funcs) || 3024 (event->funcs == &exit_syscall_print_funcs); 3025 3026 } 3027 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT 3028 #else 3029 static inline bool is_syscall_event(struct trace_event *event) 3030 { 3031 return false; 3032 } 3033 #define syscall_buf_size 0 3034 #endif /* CONFIG_FTRACE_SYSCALLS */ 3035 3036 static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 3037 { 3038 struct trace_array *tr = iter->tr; 3039 struct trace_seq *s = &iter->seq; 3040 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK); 3041 struct trace_entry *entry; 3042 struct trace_event *event; 3043 3044 entry = iter->ent; 3045 3046 test_cpu_buff_start(iter); 3047 3048 event = ftrace_find_event(entry->type); 3049 3050 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3051 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 3052 trace_print_lat_context(iter); 3053 else 3054 trace_print_context(iter); 3055 } 3056 3057 if (trace_seq_has_overflowed(s)) 3058 return TRACE_TYPE_PARTIAL_LINE; 3059 3060 if (event) { 3061 if (tr->trace_flags & TRACE_ITER(FIELDS)) 3062 return print_event_fields(iter, event); 3063 /* 3064 * For TRACE_EVENT() events, the print_fmt is not 3065 * safe to use if the array has delta offsets 3066 * Force printing via the fields. 3067 */ 3068 if ((tr->text_delta)) { 3069 /* ftrace and system call events are still OK */ 3070 if ((event->type > __TRACE_LAST_TYPE) && 3071 !is_syscall_event(event)) 3072 return print_event_fields(iter, event); 3073 } 3074 return event->funcs->trace(iter, sym_flags, event); 3075 } 3076 3077 trace_seq_printf(s, "Unknown type %d\n", entry->type); 3078 3079 return trace_handle_return(s); 3080 } 3081 3082 static enum print_line_t print_raw_fmt(struct trace_iterator *iter) 3083 { 3084 struct trace_array *tr = iter->tr; 3085 struct trace_seq *s = &iter->seq; 3086 struct trace_entry *entry; 3087 struct trace_event *event; 3088 3089 entry = iter->ent; 3090 3091 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) 3092 trace_seq_printf(s, "%d %d %llu ", 3093 entry->pid, iter->cpu, iter->ts); 3094 3095 if (trace_seq_has_overflowed(s)) 3096 return TRACE_TYPE_PARTIAL_LINE; 3097 3098 event = ftrace_find_event(entry->type); 3099 if (event) 3100 return event->funcs->raw(iter, 0, event); 3101 3102 trace_seq_printf(s, "%d ?\n", entry->type); 3103 3104 return trace_handle_return(s); 3105 } 3106 3107 static enum print_line_t print_hex_fmt(struct trace_iterator *iter) 3108 { 3109 struct trace_array *tr = iter->tr; 3110 struct trace_seq *s = &iter->seq; 3111 unsigned char newline = '\n'; 3112 struct trace_entry *entry; 3113 struct trace_event *event; 3114 3115 entry = iter->ent; 3116 3117 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3118 SEQ_PUT_HEX_FIELD(s, entry->pid); 3119 SEQ_PUT_HEX_FIELD(s, iter->cpu); 3120 SEQ_PUT_HEX_FIELD(s, iter->ts); 3121 if (trace_seq_has_overflowed(s)) 3122 return TRACE_TYPE_PARTIAL_LINE; 3123 } 3124 3125 event = ftrace_find_event(entry->type); 3126 if (event) { 3127 enum print_line_t ret = event->funcs->hex(iter, 0, event); 3128 if (ret != TRACE_TYPE_HANDLED) 3129 return ret; 3130 } 3131 3132 SEQ_PUT_FIELD(s, newline); 3133 3134 return trace_handle_return(s); 3135 } 3136 3137 static enum print_line_t print_bin_fmt(struct trace_iterator *iter) 3138 { 3139 struct trace_array *tr = iter->tr; 3140 struct trace_seq *s = &iter->seq; 3141 struct trace_entry *entry; 3142 struct trace_event *event; 3143 3144 entry = iter->ent; 3145 3146 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) { 3147 SEQ_PUT_FIELD(s, entry->pid); 3148 SEQ_PUT_FIELD(s, iter->cpu); 3149 SEQ_PUT_FIELD(s, iter->ts); 3150 if (trace_seq_has_overflowed(s)) 3151 return TRACE_TYPE_PARTIAL_LINE; 3152 } 3153 3154 event = ftrace_find_event(entry->type); 3155 return event ? event->funcs->binary(iter, 0, event) : 3156 TRACE_TYPE_HANDLED; 3157 } 3158 3159 int trace_empty(struct trace_iterator *iter) 3160 { 3161 struct ring_buffer_iter *buf_iter; 3162 int cpu; 3163 3164 /* If we are looking at one CPU buffer, only check that one */ 3165 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) { 3166 cpu = iter->cpu_file; 3167 buf_iter = trace_buffer_iter(iter, cpu); 3168 if (buf_iter) { 3169 if (!ring_buffer_iter_empty(buf_iter)) 3170 return 0; 3171 } else { 3172 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 3173 return 0; 3174 } 3175 return 1; 3176 } 3177 3178 for_each_tracing_cpu(cpu) { 3179 buf_iter = trace_buffer_iter(iter, cpu); 3180 if (buf_iter) { 3181 if (!ring_buffer_iter_empty(buf_iter)) 3182 return 0; 3183 } else { 3184 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu)) 3185 return 0; 3186 } 3187 } 3188 3189 return 1; 3190 } 3191 3192 /* Called with trace_event_read_lock() held. */ 3193 enum print_line_t print_trace_line(struct trace_iterator *iter) 3194 { 3195 struct trace_array *tr = iter->tr; 3196 unsigned long trace_flags = tr->trace_flags; 3197 enum print_line_t ret; 3198 3199 if (iter->lost_events) { 3200 if (iter->lost_events == (unsigned long)-1) 3201 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n", 3202 iter->cpu); 3203 else 3204 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 3205 iter->cpu, iter->lost_events); 3206 if (trace_seq_has_overflowed(&iter->seq)) 3207 return TRACE_TYPE_PARTIAL_LINE; 3208 } 3209 3210 if (iter->trace && iter->trace->print_line) { 3211 ret = iter->trace->print_line(iter); 3212 if (ret != TRACE_TYPE_UNHANDLED) 3213 return ret; 3214 } 3215 3216 if (iter->ent->type == TRACE_BPUTS && 3217 trace_flags & TRACE_ITER(PRINTK) && 3218 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3219 return trace_print_bputs_msg_only(iter); 3220 3221 if (iter->ent->type == TRACE_BPRINT && 3222 trace_flags & TRACE_ITER(PRINTK) && 3223 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3224 return trace_print_bprintk_msg_only(iter); 3225 3226 if (iter->ent->type == TRACE_PRINT && 3227 trace_flags & TRACE_ITER(PRINTK) && 3228 trace_flags & TRACE_ITER(PRINTK_MSGONLY)) 3229 return trace_print_printk_msg_only(iter); 3230 3231 if (trace_flags & TRACE_ITER(BIN)) 3232 return print_bin_fmt(iter); 3233 3234 if (trace_flags & TRACE_ITER(HEX)) 3235 return print_hex_fmt(iter); 3236 3237 if (trace_flags & TRACE_ITER(RAW)) 3238 return print_raw_fmt(iter); 3239 3240 return print_trace_fmt(iter); 3241 } 3242 3243 void trace_latency_header(struct seq_file *m) 3244 { 3245 struct trace_iterator *iter = m->private; 3246 struct trace_array *tr = iter->tr; 3247 3248 /* print nothing if the buffers are empty */ 3249 if (trace_empty(iter)) 3250 return; 3251 3252 if (iter->iter_flags & TRACE_FILE_LAT_FMT) 3253 print_trace_header(m, iter); 3254 3255 if (!(tr->trace_flags & TRACE_ITER(VERBOSE))) 3256 print_lat_help_header(m); 3257 } 3258 3259 void trace_default_header(struct seq_file *m) 3260 { 3261 struct trace_iterator *iter = m->private; 3262 struct trace_array *tr = iter->tr; 3263 unsigned long trace_flags = tr->trace_flags; 3264 3265 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO))) 3266 return; 3267 3268 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 3269 /* print nothing if the buffers are empty */ 3270 if (trace_empty(iter)) 3271 return; 3272 print_trace_header(m, iter); 3273 if (!(trace_flags & TRACE_ITER(VERBOSE))) 3274 print_lat_help_header(m); 3275 } else { 3276 if (!(trace_flags & TRACE_ITER(VERBOSE))) { 3277 if (trace_flags & TRACE_ITER(IRQ_INFO)) 3278 print_func_help_header_irq(iter->array_buffer, 3279 m, trace_flags); 3280 else 3281 print_func_help_header(iter->array_buffer, m, 3282 trace_flags); 3283 } 3284 } 3285 } 3286 3287 static void test_ftrace_alive(struct seq_file *m) 3288 { 3289 if (!ftrace_is_dead()) 3290 return; 3291 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" 3292 "# MAY BE MISSING FUNCTION EVENTS\n"); 3293 } 3294 3295 static int s_show(struct seq_file *m, void *v) 3296 { 3297 struct trace_iterator *iter = v; 3298 int ret; 3299 3300 if (iter->ent == NULL) { 3301 if (iter->tr) { 3302 seq_printf(m, "# tracer: %s\n", iter->trace->name); 3303 seq_puts(m, "#\n"); 3304 test_ftrace_alive(m); 3305 } 3306 if (iter->snapshot && trace_empty(iter)) 3307 print_snapshot_help(m, iter); 3308 else if (iter->trace && iter->trace->print_header) 3309 iter->trace->print_header(m); 3310 else 3311 trace_default_header(m); 3312 3313 } else if (iter->leftover) { 3314 /* 3315 * If we filled the seq_file buffer earlier, we 3316 * want to just show it now. 3317 */ 3318 ret = trace_print_seq(m, &iter->seq); 3319 3320 /* ret should this time be zero, but you never know */ 3321 iter->leftover = ret; 3322 3323 } else { 3324 ret = print_trace_line(iter); 3325 if (ret == TRACE_TYPE_PARTIAL_LINE) { 3326 iter->seq.full = 0; 3327 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 3328 } 3329 ret = trace_print_seq(m, &iter->seq); 3330 /* 3331 * If we overflow the seq_file buffer, then it will 3332 * ask us for this data again at start up. 3333 * Use that instead. 3334 * ret is 0 if seq_file write succeeded. 3335 * -1 otherwise. 3336 */ 3337 iter->leftover = ret; 3338 } 3339 3340 return 0; 3341 } 3342 3343 static const struct seq_operations tracer_seq_ops = { 3344 .start = s_start, 3345 .next = s_next, 3346 .stop = s_stop, 3347 .show = s_show, 3348 }; 3349 3350 /* 3351 * Note, as iter itself can be allocated and freed in different 3352 * ways, this function is only used to free its content, and not 3353 * the iterator itself. The only requirement to all the allocations 3354 * is that it must zero all fields (kzalloc), as freeing works with 3355 * ethier allocated content or NULL. 3356 */ 3357 static void free_trace_iter_content(struct trace_iterator *iter) 3358 { 3359 /* The fmt is either NULL, allocated or points to static_fmt_buf */ 3360 if (iter->fmt != static_fmt_buf) 3361 kfree(iter->fmt); 3362 3363 kfree(iter->temp); 3364 kfree(iter->buffer_iter); 3365 mutex_destroy(&iter->mutex); 3366 free_cpumask_var(iter->started); 3367 } 3368 3369 struct trace_iterator * 3370 __tracing_open(struct inode *inode, struct file *file, bool snapshot) 3371 { 3372 struct trace_array *tr = inode->i_private; 3373 struct trace_iterator *iter; 3374 int cpu; 3375 3376 if (tracing_disabled) 3377 return ERR_PTR(-ENODEV); 3378 3379 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter)); 3380 if (!iter) 3381 return ERR_PTR(-ENOMEM); 3382 3383 iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids); 3384 if (!iter->buffer_iter) 3385 goto release; 3386 3387 /* 3388 * trace_find_next_entry() may need to save off iter->ent. 3389 * It will place it into the iter->temp buffer. As most 3390 * events are less than 128, allocate a buffer of that size. 3391 * If one is greater, then trace_find_next_entry() will 3392 * allocate a new buffer to adjust for the bigger iter->ent. 3393 * It's not critical if it fails to get allocated here. 3394 */ 3395 iter->temp = kmalloc(128, GFP_KERNEL); 3396 if (iter->temp) 3397 iter->temp_size = 128; 3398 3399 /* 3400 * trace_event_printf() may need to modify given format 3401 * string to replace %p with %px so that it shows real address 3402 * instead of hash value. However, that is only for the event 3403 * tracing, other tracer may not need. Defer the allocation 3404 * until it is needed. 3405 */ 3406 iter->fmt = NULL; 3407 iter->fmt_size = 0; 3408 3409 mutex_lock(&trace_types_lock); 3410 iter->trace = tr->current_trace; 3411 3412 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) 3413 goto fail; 3414 3415 iter->tr = tr; 3416 3417 #ifdef CONFIG_TRACER_SNAPSHOT 3418 /* Currently only the top directory has a snapshot */ 3419 if (tr->current_trace->print_max || snapshot) 3420 iter->array_buffer = &tr->snapshot_buffer; 3421 else 3422 #endif 3423 iter->array_buffer = &tr->array_buffer; 3424 iter->snapshot = snapshot; 3425 iter->pos = -1; 3426 iter->cpu_file = tracing_get_cpu(inode); 3427 mutex_init(&iter->mutex); 3428 3429 /* Notify the tracer early; before we stop tracing. */ 3430 if (iter->trace->open) 3431 iter->trace->open(iter); 3432 3433 /* Annotate start of buffers if we had overruns */ 3434 if (ring_buffer_overruns(iter->array_buffer->buffer)) 3435 iter->iter_flags |= TRACE_FILE_ANNOTATE; 3436 3437 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 3438 if (trace_clocks[tr->clock_id].in_ns) 3439 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 3440 3441 /* 3442 * If pause-on-trace is enabled, then stop the trace while 3443 * dumping, unless this is the "snapshot" file 3444 */ 3445 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) { 3446 iter->iter_flags |= TRACE_FILE_PAUSE; 3447 tracing_stop_tr(tr); 3448 } 3449 3450 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) { 3451 for_each_tracing_cpu(cpu) { 3452 iter->buffer_iter[cpu] = 3453 ring_buffer_read_start(iter->array_buffer->buffer, 3454 cpu, GFP_KERNEL); 3455 tracing_iter_reset(iter, cpu); 3456 } 3457 } else { 3458 cpu = iter->cpu_file; 3459 iter->buffer_iter[cpu] = 3460 ring_buffer_read_start(iter->array_buffer->buffer, 3461 cpu, GFP_KERNEL); 3462 tracing_iter_reset(iter, cpu); 3463 } 3464 3465 mutex_unlock(&trace_types_lock); 3466 3467 return iter; 3468 3469 fail: 3470 mutex_unlock(&trace_types_lock); 3471 free_trace_iter_content(iter); 3472 release: 3473 seq_release_private(inode, file); 3474 return ERR_PTR(-ENOMEM); 3475 } 3476 3477 int tracing_open_generic(struct inode *inode, struct file *filp) 3478 { 3479 int ret; 3480 3481 ret = tracing_check_open_get_tr(NULL); 3482 if (ret) 3483 return ret; 3484 3485 filp->private_data = inode->i_private; 3486 return 0; 3487 } 3488 3489 /* 3490 * Open and update trace_array ref count. 3491 * Must have the current trace_array passed to it. 3492 */ 3493 int tracing_open_generic_tr(struct inode *inode, struct file *filp) 3494 { 3495 struct trace_array *tr = inode->i_private; 3496 int ret; 3497 3498 ret = tracing_check_open_get_tr(tr); 3499 if (ret) 3500 return ret; 3501 3502 if ((filp->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) { 3503 trace_array_put(tr); 3504 return -EACCES; 3505 } 3506 3507 filp->private_data = inode->i_private; 3508 3509 return 0; 3510 } 3511 3512 /* 3513 * The private pointer of the inode is the trace_event_file. 3514 * Update the tr ref count associated to it. 3515 */ 3516 int tracing_open_file_tr(struct inode *inode, struct file *filp) 3517 { 3518 struct trace_event_file *file = inode->i_private; 3519 int ret; 3520 3521 ret = tracing_check_open_get_tr(file->tr); 3522 if (ret) 3523 return ret; 3524 3525 guard(mutex)(&event_mutex); 3526 3527 /* Fail if the file is marked for removal */ 3528 if (file->flags & EVENT_FILE_FL_FREED) { 3529 trace_array_put(file->tr); 3530 return -ENODEV; 3531 } else { 3532 event_file_get(file); 3533 } 3534 3535 return 0; 3536 } 3537 3538 int tracing_release_file_tr(struct inode *inode, struct file *filp) 3539 { 3540 struct trace_event_file *file = inode->i_private; 3541 3542 trace_array_put(file->tr); 3543 event_file_put(file); 3544 3545 return 0; 3546 } 3547 3548 int tracing_single_release_file_tr(struct inode *inode, struct file *filp) 3549 { 3550 tracing_release_file_tr(inode, filp); 3551 return single_release(inode, filp); 3552 } 3553 3554 int tracing_release(struct inode *inode, struct file *file) 3555 { 3556 struct trace_array *tr = inode->i_private; 3557 struct seq_file *m = file->private_data; 3558 struct trace_iterator *iter; 3559 int cpu; 3560 3561 if (!(file->f_mode & FMODE_READ)) { 3562 trace_array_put(tr); 3563 return 0; 3564 } 3565 3566 /* Writes do not use seq_file */ 3567 iter = m->private; 3568 mutex_lock(&trace_types_lock); 3569 3570 for_each_tracing_cpu(cpu) { 3571 if (iter->buffer_iter[cpu]) 3572 ring_buffer_read_finish(iter->buffer_iter[cpu]); 3573 } 3574 3575 if (iter->trace && iter->trace->close) 3576 iter->trace->close(iter); 3577 3578 if (iter->iter_flags & TRACE_FILE_PAUSE) 3579 /* reenable tracing if it was previously enabled */ 3580 tracing_start_tr(tr); 3581 3582 __trace_array_put(tr); 3583 3584 mutex_unlock(&trace_types_lock); 3585 3586 free_trace_iter_content(iter); 3587 seq_release_private(inode, file); 3588 3589 return 0; 3590 } 3591 3592 int tracing_release_generic_tr(struct inode *inode, struct file *file) 3593 { 3594 struct trace_array *tr = inode->i_private; 3595 3596 trace_array_put(tr); 3597 return 0; 3598 } 3599 3600 static int tracing_single_release_tr(struct inode *inode, struct file *file) 3601 { 3602 struct trace_array *tr = inode->i_private; 3603 3604 trace_array_put(tr); 3605 3606 return single_release(inode, file); 3607 } 3608 3609 static bool update_last_data_if_empty(struct trace_array *tr); 3610 3611 static int tracing_open(struct inode *inode, struct file *file) 3612 { 3613 struct trace_array *tr = inode->i_private; 3614 struct trace_iterator *iter; 3615 int ret; 3616 3617 ret = tracing_check_open_get_tr(tr); 3618 if (ret) 3619 return ret; 3620 3621 /* If this file was open for write, then erase contents */ 3622 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { 3623 int cpu = tracing_get_cpu(inode); 3624 struct array_buffer *trace_buf = &tr->array_buffer; 3625 3626 #ifdef CONFIG_TRACER_MAX_TRACE 3627 if (tr->current_trace->print_max) 3628 trace_buf = &tr->snapshot_buffer; 3629 #endif 3630 3631 if (cpu == RING_BUFFER_ALL_CPUS) 3632 tracing_reset_online_cpus(trace_buf); 3633 else 3634 tracing_reset_cpu(trace_buf, cpu); 3635 3636 update_last_data_if_empty(tr); 3637 } 3638 3639 if (file->f_mode & FMODE_READ) { 3640 iter = __tracing_open(inode, file, false); 3641 if (IS_ERR(iter)) 3642 ret = PTR_ERR(iter); 3643 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 3644 iter->iter_flags |= TRACE_FILE_LAT_FMT; 3645 } 3646 3647 if (ret < 0) 3648 trace_array_put(tr); 3649 3650 return ret; 3651 } 3652 3653 /* 3654 * Some tracers are not suitable for instance buffers. 3655 * A tracer is always available for the global array (toplevel) 3656 * or if it explicitly states that it is. 3657 */ 3658 static bool 3659 trace_ok_for_array(struct tracer *t, struct trace_array *tr) 3660 { 3661 /* arrays with mapped buffer range do not have snapshots */ 3662 if (tr->range_addr_start && tracer_uses_snapshot(t)) 3663 return false; 3664 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; 3665 } 3666 3667 /* Find the next tracer that this trace array may use */ 3668 static struct tracer * 3669 get_tracer_for_array(struct trace_array *tr, struct tracer *t) 3670 { 3671 while (t && !trace_ok_for_array(t, tr)) 3672 t = t->next; 3673 3674 return t; 3675 } 3676 3677 static void * 3678 t_next(struct seq_file *m, void *v, loff_t *pos) 3679 { 3680 struct trace_array *tr = m->private; 3681 struct tracer *t = v; 3682 3683 (*pos)++; 3684 3685 if (t) 3686 t = get_tracer_for_array(tr, t->next); 3687 3688 return t; 3689 } 3690 3691 static void *t_start(struct seq_file *m, loff_t *pos) 3692 { 3693 struct trace_array *tr = m->private; 3694 struct tracer *t; 3695 loff_t l = 0; 3696 3697 mutex_lock(&trace_types_lock); 3698 3699 t = get_tracer_for_array(tr, trace_types); 3700 for (; t && l < *pos; t = t_next(m, t, &l)) 3701 ; 3702 3703 return t; 3704 } 3705 3706 static void t_stop(struct seq_file *m, void *p) 3707 { 3708 mutex_unlock(&trace_types_lock); 3709 } 3710 3711 static int t_show(struct seq_file *m, void *v) 3712 { 3713 struct tracer *t = v; 3714 3715 if (!t) 3716 return 0; 3717 3718 seq_puts(m, t->name); 3719 if (t->next) 3720 seq_putc(m, ' '); 3721 else 3722 seq_putc(m, '\n'); 3723 3724 return 0; 3725 } 3726 3727 static const struct seq_operations show_traces_seq_ops = { 3728 .start = t_start, 3729 .next = t_next, 3730 .stop = t_stop, 3731 .show = t_show, 3732 }; 3733 3734 static int show_traces_open(struct inode *inode, struct file *file) 3735 { 3736 struct trace_array *tr = inode->i_private; 3737 struct seq_file *m; 3738 int ret; 3739 3740 ret = tracing_check_open_get_tr(tr); 3741 if (ret) 3742 return ret; 3743 3744 ret = seq_open(file, &show_traces_seq_ops); 3745 if (ret) { 3746 trace_array_put(tr); 3747 return ret; 3748 } 3749 3750 m = file->private_data; 3751 m->private = tr; 3752 3753 return 0; 3754 } 3755 3756 static int tracing_seq_release(struct inode *inode, struct file *file) 3757 { 3758 struct trace_array *tr = inode->i_private; 3759 3760 trace_array_put(tr); 3761 return seq_release(inode, file); 3762 } 3763 3764 static ssize_t 3765 tracing_write_stub(struct file *filp, const char __user *ubuf, 3766 size_t count, loff_t *ppos) 3767 { 3768 return count; 3769 } 3770 3771 loff_t tracing_lseek(struct file *file, loff_t offset, int whence) 3772 { 3773 int ret; 3774 3775 if (file->f_mode & FMODE_READ) 3776 ret = seq_lseek(file, offset, whence); 3777 else 3778 file->f_pos = ret = 0; 3779 3780 return ret; 3781 } 3782 3783 static const struct file_operations tracing_fops = { 3784 .open = tracing_open, 3785 .read = seq_read, 3786 .read_iter = seq_read_iter, 3787 .splice_read = copy_splice_read, 3788 .write = tracing_write_stub, 3789 .llseek = tracing_lseek, 3790 .release = tracing_release, 3791 }; 3792 3793 static const struct file_operations show_traces_fops = { 3794 .open = show_traces_open, 3795 .read = seq_read, 3796 .llseek = seq_lseek, 3797 .release = tracing_seq_release, 3798 }; 3799 3800 static ssize_t 3801 tracing_cpumask_read(struct file *filp, char __user *ubuf, 3802 size_t count, loff_t *ppos) 3803 { 3804 struct trace_array *tr = file_inode(filp)->i_private; 3805 char *mask_str __free(kfree) = NULL; 3806 int len; 3807 3808 len = snprintf(NULL, 0, "%*pb\n", 3809 cpumask_pr_args(tr->tracing_cpumask)) + 1; 3810 mask_str = kmalloc(len, GFP_KERNEL); 3811 if (!mask_str) 3812 return -ENOMEM; 3813 3814 len = snprintf(mask_str, len, "%*pb\n", 3815 cpumask_pr_args(tr->tracing_cpumask)); 3816 if (len >= count) 3817 return -EINVAL; 3818 3819 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len); 3820 } 3821 3822 int tracing_set_cpumask(struct trace_array *tr, 3823 cpumask_var_t tracing_cpumask_new) 3824 { 3825 int cpu; 3826 3827 if (!tr) 3828 return -EINVAL; 3829 3830 local_irq_disable(); 3831 arch_spin_lock(&tr->max_lock); 3832 for_each_tracing_cpu(cpu) { 3833 /* 3834 * Increase/decrease the disabled counter if we are 3835 * about to flip a bit in the cpumask: 3836 */ 3837 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && 3838 !cpumask_test_cpu(cpu, tracing_cpumask_new)) { 3839 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); 3840 #ifdef CONFIG_TRACER_SNAPSHOT 3841 ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu); 3842 #endif 3843 } 3844 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && 3845 cpumask_test_cpu(cpu, tracing_cpumask_new)) { 3846 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); 3847 #ifdef CONFIG_TRACER_SNAPSHOT 3848 ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu); 3849 #endif 3850 } 3851 } 3852 arch_spin_unlock(&tr->max_lock); 3853 local_irq_enable(); 3854 3855 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 3856 3857 return 0; 3858 } 3859 3860 static ssize_t 3861 tracing_cpumask_write(struct file *filp, const char __user *ubuf, 3862 size_t count, loff_t *ppos) 3863 { 3864 struct trace_array *tr = file_inode(filp)->i_private; 3865 cpumask_var_t tracing_cpumask_new; 3866 int err; 3867 3868 if (count == 0 || count > KMALLOC_MAX_SIZE) 3869 return -EINVAL; 3870 3871 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) 3872 return -ENOMEM; 3873 3874 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); 3875 if (err) 3876 goto err_free; 3877 3878 err = tracing_set_cpumask(tr, tracing_cpumask_new); 3879 if (err) 3880 goto err_free; 3881 3882 free_cpumask_var(tracing_cpumask_new); 3883 3884 return count; 3885 3886 err_free: 3887 free_cpumask_var(tracing_cpumask_new); 3888 3889 return err; 3890 } 3891 3892 static const struct file_operations tracing_cpumask_fops = { 3893 .open = tracing_open_generic_tr, 3894 .read = tracing_cpumask_read, 3895 .write = tracing_cpumask_write, 3896 .release = tracing_release_generic_tr, 3897 .llseek = generic_file_llseek, 3898 }; 3899 3900 static int tracing_trace_options_show(struct seq_file *m, void *v) 3901 { 3902 struct tracer_opt *trace_opts; 3903 struct trace_array *tr = m->private; 3904 struct tracer_flags *flags; 3905 u32 tracer_flags; 3906 int i; 3907 3908 guard(mutex)(&trace_types_lock); 3909 3910 for (i = 0; trace_options[i]; i++) { 3911 if (tr->trace_flags & (1ULL << i)) 3912 seq_printf(m, "%s\n", trace_options[i]); 3913 else 3914 seq_printf(m, "no%s\n", trace_options[i]); 3915 } 3916 3917 flags = tr->current_trace_flags; 3918 if (!flags || !flags->opts) 3919 return 0; 3920 3921 tracer_flags = flags->val; 3922 trace_opts = flags->opts; 3923 3924 for (i = 0; trace_opts[i].name; i++) { 3925 if (tracer_flags & trace_opts[i].bit) 3926 seq_printf(m, "%s\n", trace_opts[i].name); 3927 else 3928 seq_printf(m, "no%s\n", trace_opts[i].name); 3929 } 3930 3931 return 0; 3932 } 3933 3934 static int __set_tracer_option(struct trace_array *tr, 3935 struct tracer_flags *tracer_flags, 3936 struct tracer_opt *opts, int neg) 3937 { 3938 struct tracer *trace = tracer_flags->trace; 3939 int ret = 0; 3940 3941 if (trace->set_flag) 3942 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg); 3943 if (ret) 3944 return ret; 3945 3946 if (neg) 3947 tracer_flags->val &= ~opts->bit; 3948 else 3949 tracer_flags->val |= opts->bit; 3950 return 0; 3951 } 3952 3953 /* Try to assign a tracer specific option */ 3954 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg) 3955 { 3956 struct tracer_flags *tracer_flags = tr->current_trace_flags; 3957 struct tracer_opt *opts = NULL; 3958 int i; 3959 3960 if (!tracer_flags || !tracer_flags->opts) 3961 return 0; 3962 3963 for (i = 0; tracer_flags->opts[i].name; i++) { 3964 opts = &tracer_flags->opts[i]; 3965 3966 if (strcmp(cmp, opts->name) == 0) 3967 return __set_tracer_option(tr, tracer_flags, opts, neg); 3968 } 3969 3970 return -EINVAL; 3971 } 3972 3973 /* Some tracers require overwrite to stay enabled */ 3974 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set) 3975 { 3976 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set) 3977 return -1; 3978 3979 return 0; 3980 } 3981 3982 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) 3983 { 3984 switch (mask) { 3985 case TRACE_ITER(RECORD_TGID): 3986 case TRACE_ITER(RECORD_CMD): 3987 case TRACE_ITER(TRACE_PRINTK): 3988 case TRACE_ITER(COPY_MARKER): 3989 lockdep_assert_held(&event_mutex); 3990 } 3991 3992 /* do nothing if flag is already set */ 3993 if (!!(tr->trace_flags & mask) == !!enabled) 3994 return 0; 3995 3996 /* Give the tracer a chance to approve the change */ 3997 if (tr->current_trace->flag_changed) 3998 if (tr->current_trace->flag_changed(tr, mask, !!enabled)) 3999 return -EINVAL; 4000 4001 switch (mask) { 4002 case TRACE_ITER(TRACE_PRINTK): 4003 if (enabled) { 4004 update_printk_trace(tr); 4005 } else { 4006 /* 4007 * The global_trace cannot clear this. 4008 * It's flag only gets cleared if another instance sets it. 4009 */ 4010 if (printk_trace == &global_trace) 4011 return -EINVAL; 4012 /* 4013 * An instance must always have it set. 4014 * by default, that's the global_trace instance. 4015 */ 4016 if (printk_trace == tr) 4017 update_printk_trace(&global_trace); 4018 } 4019 break; 4020 4021 case TRACE_ITER(COPY_MARKER): 4022 update_marker_trace(tr, enabled); 4023 /* update_marker_trace updates the tr->trace_flags */ 4024 return 0; 4025 } 4026 4027 if (enabled) 4028 tr->trace_flags |= mask; 4029 else 4030 tr->trace_flags &= ~mask; 4031 4032 switch (mask) { 4033 case TRACE_ITER(RECORD_CMD): 4034 trace_event_enable_cmd_record(enabled); 4035 break; 4036 4037 case TRACE_ITER(RECORD_TGID): 4038 4039 if (trace_alloc_tgid_map() < 0) { 4040 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID); 4041 return -ENOMEM; 4042 } 4043 4044 trace_event_enable_tgid_record(enabled); 4045 break; 4046 4047 case TRACE_ITER(EVENT_FORK): 4048 trace_event_follow_fork(tr, enabled); 4049 break; 4050 4051 case TRACE_ITER(FUNC_FORK): 4052 ftrace_pid_follow_fork(tr, enabled); 4053 break; 4054 4055 case TRACE_ITER(OVERWRITE): 4056 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); 4057 #ifdef CONFIG_TRACER_SNAPSHOT 4058 ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled); 4059 #endif 4060 break; 4061 4062 case TRACE_ITER(PRINTK): 4063 trace_printk_start_stop_comm(enabled); 4064 trace_printk_control(enabled); 4065 break; 4066 4067 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER) 4068 case TRACE_GRAPH_GRAPH_TIME: 4069 ftrace_graph_graph_time_control(enabled); 4070 break; 4071 #endif 4072 } 4073 4074 return 0; 4075 } 4076 4077 int trace_set_options(struct trace_array *tr, char *option) 4078 { 4079 char *cmp; 4080 int neg = 0; 4081 int ret; 4082 size_t orig_len = strlen(option); 4083 int len; 4084 4085 cmp = strstrip(option); 4086 4087 len = str_has_prefix(cmp, "no"); 4088 if (len) 4089 neg = 1; 4090 4091 cmp += len; 4092 4093 mutex_lock(&event_mutex); 4094 mutex_lock(&trace_types_lock); 4095 4096 ret = match_string(trace_options, -1, cmp); 4097 /* If no option could be set, test the specific tracer options */ 4098 if (ret < 0) 4099 ret = set_tracer_option(tr, cmp, neg); 4100 else 4101 ret = set_tracer_flag(tr, 1ULL << ret, !neg); 4102 4103 mutex_unlock(&trace_types_lock); 4104 mutex_unlock(&event_mutex); 4105 4106 /* 4107 * If the first trailing whitespace is replaced with '\0' by strstrip, 4108 * turn it back into a space. 4109 */ 4110 if (orig_len > strlen(option)) 4111 option[strlen(option)] = ' '; 4112 4113 return ret; 4114 } 4115 4116 static void __init apply_trace_boot_options(void) 4117 { 4118 char *buf = trace_boot_options_buf; 4119 char *option; 4120 4121 while (true) { 4122 option = strsep(&buf, ","); 4123 4124 if (!option) 4125 break; 4126 4127 if (*option) 4128 trace_set_options(&global_trace, option); 4129 4130 /* Put back the comma to allow this to be called again */ 4131 if (buf) 4132 *(buf - 1) = ','; 4133 } 4134 } 4135 4136 static ssize_t 4137 tracing_trace_options_write(struct file *filp, const char __user *ubuf, 4138 size_t cnt, loff_t *ppos) 4139 { 4140 struct seq_file *m = filp->private_data; 4141 struct trace_array *tr = m->private; 4142 char buf[64]; 4143 int ret; 4144 4145 if (cnt >= sizeof(buf)) 4146 return -EINVAL; 4147 4148 if (copy_from_user(buf, ubuf, cnt)) 4149 return -EFAULT; 4150 4151 buf[cnt] = 0; 4152 4153 ret = trace_set_options(tr, buf); 4154 if (ret < 0) 4155 return ret; 4156 4157 *ppos += cnt; 4158 4159 return cnt; 4160 } 4161 4162 static int tracing_trace_options_open(struct inode *inode, struct file *file) 4163 { 4164 struct trace_array *tr = inode->i_private; 4165 int ret; 4166 4167 ret = tracing_check_open_get_tr(tr); 4168 if (ret) 4169 return ret; 4170 4171 ret = single_open(file, tracing_trace_options_show, inode->i_private); 4172 if (ret < 0) 4173 trace_array_put(tr); 4174 4175 return ret; 4176 } 4177 4178 static const struct file_operations tracing_iter_fops = { 4179 .open = tracing_trace_options_open, 4180 .read = seq_read, 4181 .llseek = seq_lseek, 4182 .release = tracing_single_release_tr, 4183 .write = tracing_trace_options_write, 4184 }; 4185 4186 static const char readme_msg[] = 4187 "tracing mini-HOWTO:\n\n" 4188 "By default tracefs removes all OTH file permission bits.\n" 4189 "When mounting tracefs an optional group id can be specified\n" 4190 "which adds the group to every directory and file in tracefs:\n\n" 4191 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n" 4192 "# echo 0 > tracing_on : quick way to disable tracing\n" 4193 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n" 4194 " Important files:\n" 4195 " trace\t\t\t- The static contents of the buffer\n" 4196 "\t\t\t To clear the buffer write into this file: echo > trace\n" 4197 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n" 4198 " current_tracer\t- function and latency tracers\n" 4199 " available_tracers\t- list of configured tracers for current_tracer\n" 4200 " error_log\t- error log for failed commands (that support it)\n" 4201 " buffer_size_kb\t- view and modify size of per cpu buffer\n" 4202 " buffer_total_size_kb - view total size of all cpu buffers\n\n" 4203 " trace_clock\t\t- change the clock used to order events\n" 4204 " local: Per cpu clock but may not be synced across CPUs\n" 4205 " global: Synced across CPUs but slows tracing down.\n" 4206 " counter: Not a clock, but just an increment\n" 4207 " uptime: Jiffy counter from time of boot\n" 4208 " perf: Same clock that perf events use\n" 4209 #ifdef CONFIG_X86_64 4210 " x86-tsc: TSC cycle counter\n" 4211 #endif 4212 "\n timestamp_mode\t- view the mode used to timestamp events\n" 4213 " delta: Delta difference against a buffer-wide timestamp\n" 4214 " absolute: Absolute (standalone) timestamp\n" 4215 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" 4216 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" 4217 " tracing_cpumask\t- Limit which CPUs to trace\n" 4218 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n" 4219 "\t\t\t Remove sub-buffer with rmdir\n" 4220 " trace_options\t\t- Set format or modify how tracing happens\n" 4221 "\t\t\t Disable an option by prefixing 'no' to the\n" 4222 "\t\t\t option name\n" 4223 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n" 4224 #ifdef CONFIG_DYNAMIC_FTRACE 4225 "\n available_filter_functions - list of functions that can be filtered on\n" 4226 " set_ftrace_filter\t- echo function name in here to only trace these\n" 4227 "\t\t\t functions\n" 4228 "\t accepts: func_full_name or glob-matching-pattern\n" 4229 "\t modules: Can select a group via module\n" 4230 "\t Format: :mod:<module-name>\n" 4231 "\t example: echo :mod:ext3 > set_ftrace_filter\n" 4232 "\t triggers: a command to perform when function is hit\n" 4233 "\t Format: <function>:<trigger>[:count]\n" 4234 "\t trigger: traceon, traceoff\n" 4235 "\t\t enable_event:<system>:<event>\n" 4236 "\t\t disable_event:<system>:<event>\n" 4237 #ifdef CONFIG_STACKTRACE 4238 "\t\t stacktrace\n" 4239 #endif 4240 #ifdef CONFIG_TRACER_SNAPSHOT 4241 "\t\t snapshot\n" 4242 #endif 4243 "\t\t dump\n" 4244 "\t\t cpudump\n" 4245 "\t example: echo do_fault:traceoff > set_ftrace_filter\n" 4246 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n" 4247 "\t The first one will disable tracing every time do_fault is hit\n" 4248 "\t The second will disable tracing at most 3 times when do_trap is hit\n" 4249 "\t The first time do trap is hit and it disables tracing, the\n" 4250 "\t counter will decrement to 2. If tracing is already disabled,\n" 4251 "\t the counter will not decrement. It only decrements when the\n" 4252 "\t trigger did work\n" 4253 "\t To remove trigger without count:\n" 4254 "\t echo '!<function>:<trigger> > set_ftrace_filter\n" 4255 "\t To remove trigger with a count:\n" 4256 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n" 4257 " set_ftrace_notrace\t- echo function name in here to never trace.\n" 4258 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n" 4259 "\t modules: Can select a group via module command :mod:\n" 4260 "\t Does not accept triggers\n" 4261 #endif /* CONFIG_DYNAMIC_FTRACE */ 4262 #ifdef CONFIG_FUNCTION_TRACER 4263 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n" 4264 "\t\t (function)\n" 4265 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n" 4266 "\t\t (function)\n" 4267 #endif 4268 #ifdef CONFIG_FUNCTION_GRAPH_TRACER 4269 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n" 4270 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n" 4271 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n" 4272 #endif 4273 #ifdef CONFIG_TRACER_SNAPSHOT 4274 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n" 4275 "\t\t\t snapshot buffer. Read the contents for more\n" 4276 "\t\t\t information\n" 4277 #endif 4278 #ifdef CONFIG_STACK_TRACER 4279 " stack_trace\t\t- Shows the max stack trace when active\n" 4280 " stack_max_size\t- Shows current max stack size that was traced\n" 4281 "\t\t\t Write into this file to reset the max size (trigger a\n" 4282 "\t\t\t new trace)\n" 4283 #ifdef CONFIG_DYNAMIC_FTRACE 4284 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n" 4285 "\t\t\t traces\n" 4286 #endif 4287 #endif /* CONFIG_STACK_TRACER */ 4288 #ifdef CONFIG_DYNAMIC_EVENTS 4289 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n" 4290 "\t\t\t Write into this file to define/undefine new trace events.\n" 4291 #endif 4292 #ifdef CONFIG_KPROBE_EVENTS 4293 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n" 4294 "\t\t\t Write into this file to define/undefine new trace events.\n" 4295 #endif 4296 #ifdef CONFIG_UPROBE_EVENTS 4297 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n" 4298 "\t\t\t Write into this file to define/undefine new trace events.\n" 4299 #endif 4300 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \ 4301 defined(CONFIG_FPROBE_EVENTS) 4302 "\t accepts: event-definitions (one definition per line)\n" 4303 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) 4304 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n" 4305 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n" 4306 #endif 4307 #ifdef CONFIG_FPROBE_EVENTS 4308 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n" 4309 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n" 4310 #endif 4311 #ifdef CONFIG_HIST_TRIGGERS 4312 "\t s:[synthetic/]<event> <field> [<field>]\n" 4313 #endif 4314 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n" 4315 "\t -:[<group>/][<event>]\n" 4316 #ifdef CONFIG_KPROBE_EVENTS 4317 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" 4318 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n" 4319 #endif 4320 #ifdef CONFIG_UPROBE_EVENTS 4321 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" 4322 #endif 4323 "\t args: <name>=fetcharg[:type]\n" 4324 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" 4325 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 4326 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" 4327 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS 4328 "\t <argname>[->field[->field|.field...]],\n" 4329 #endif 4330 #else 4331 "\t $stack<index>, $stack, $retval, $comm,\n" 4332 #endif 4333 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" 4334 "\t kernel return probes support: $retval, $arg<N>, $comm\n" 4335 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" 4336 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" 4337 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n" 4338 #ifdef CONFIG_HIST_TRIGGERS 4339 "\t field: <stype> <name>;\n" 4340 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" 4341 "\t [unsigned] char/int/long\n" 4342 #endif 4343 "\t efield: For event probes ('e' types), the field is on of the fields\n" 4344 "\t of the <attached-group>/<attached-event>.\n" 4345 #endif 4346 " set_event\t\t- Enables events by name written into it\n" 4347 "\t\t\t Can enable module events via: :mod:<module>\n" 4348 " events/\t\t- Directory containing all trace event subsystems:\n" 4349 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" 4350 " events/<system>/\t- Directory containing all trace events for <system>:\n" 4351 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n" 4352 "\t\t\t events\n" 4353 " filter\t\t- If set, only events passing filter are traced\n" 4354 " events/<system>/<event>/\t- Directory containing control files for\n" 4355 "\t\t\t <event>:\n" 4356 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n" 4357 " filter\t\t- If set, only events passing filter are traced\n" 4358 " trigger\t\t- If set, a command to perform when event is hit\n" 4359 "\t Format: <trigger>[:count][if <filter>]\n" 4360 "\t trigger: traceon, traceoff\n" 4361 "\t enable_event:<system>:<event>\n" 4362 "\t disable_event:<system>:<event>\n" 4363 #ifdef CONFIG_HIST_TRIGGERS 4364 "\t enable_hist:<system>:<event>\n" 4365 "\t disable_hist:<system>:<event>\n" 4366 #endif 4367 #ifdef CONFIG_STACKTRACE 4368 "\t\t stacktrace\n" 4369 #endif 4370 #ifdef CONFIG_TRACER_SNAPSHOT 4371 "\t\t snapshot\n" 4372 #endif 4373 #ifdef CONFIG_HIST_TRIGGERS 4374 "\t\t hist (see below)\n" 4375 #endif 4376 "\t example: echo traceoff > events/block/block_unplug/trigger\n" 4377 "\t echo traceoff:3 > events/block/block_unplug/trigger\n" 4378 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" 4379 "\t events/block/block_unplug/trigger\n" 4380 "\t The first disables tracing every time block_unplug is hit.\n" 4381 "\t The second disables tracing the first 3 times block_unplug is hit.\n" 4382 "\t The third enables the kmalloc event the first 3 times block_unplug\n" 4383 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n" 4384 "\t Like function triggers, the counter is only decremented if it\n" 4385 "\t enabled or disabled tracing.\n" 4386 "\t To remove a trigger without a count:\n" 4387 "\t echo '!<trigger> > <system>/<event>/trigger\n" 4388 "\t To remove a trigger with a count:\n" 4389 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" 4390 "\t Filters can be ignored when removing a trigger.\n" 4391 #ifdef CONFIG_HIST_TRIGGERS 4392 " hist trigger\t- If set, event hits are aggregated into a hash table\n" 4393 "\t Format: hist:keys=<field1[,field2,...]>\n" 4394 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n" 4395 "\t [:values=<field1[,field2,...]>]\n" 4396 "\t [:sort=<field1[,field2,...]>]\n" 4397 "\t [:size=#entries]\n" 4398 "\t [:pause][:continue][:clear]\n" 4399 "\t [:name=histname1]\n" 4400 "\t [:nohitcount]\n" 4401 "\t [:<handler>.<action>]\n" 4402 "\t [if <filter>]\n\n" 4403 "\t Note, special fields can be used as well:\n" 4404 "\t common_timestamp - to record current timestamp\n" 4405 "\t common_cpu - to record the CPU the event happened on\n" 4406 "\n" 4407 "\t A hist trigger variable can be:\n" 4408 "\t - a reference to a field e.g. x=current_timestamp,\n" 4409 "\t - a reference to another variable e.g. y=$x,\n" 4410 "\t - a numeric literal: e.g. ms_per_sec=1000,\n" 4411 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" 4412 "\n" 4413 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n" 4414 "\t multiplication(*) and division(/) operators. An operand can be either a\n" 4415 "\t variable reference, field or numeric literal.\n" 4416 "\n" 4417 "\t When a matching event is hit, an entry is added to a hash\n" 4418 "\t table using the key(s) and value(s) named, and the value of a\n" 4419 "\t sum called 'hitcount' is incremented. Keys and values\n" 4420 "\t correspond to fields in the event's format description. Keys\n" 4421 "\t can be any field, or the special string 'common_stacktrace'.\n" 4422 "\t Compound keys consisting of up to two fields can be specified\n" 4423 "\t by the 'keys' keyword. Values must correspond to numeric\n" 4424 "\t fields. Sort keys consisting of up to two fields can be\n" 4425 "\t specified using the 'sort' keyword. The sort direction can\n" 4426 "\t be modified by appending '.descending' or '.ascending' to a\n" 4427 "\t sort field. The 'size' parameter can be used to specify more\n" 4428 "\t or fewer than the default 2048 entries for the hashtable size.\n" 4429 "\t If a hist trigger is given a name using the 'name' parameter,\n" 4430 "\t its histogram data will be shared with other triggers of the\n" 4431 "\t same name, and trigger hits will update this common data.\n\n" 4432 "\t Reading the 'hist' file for the event will dump the hash\n" 4433 "\t table in its entirety to stdout. If there are multiple hist\n" 4434 "\t triggers attached to an event, there will be a table for each\n" 4435 "\t trigger in the output. The table displayed for a named\n" 4436 "\t trigger will be the same as any other instance having the\n" 4437 "\t same name. The default format used to display a given field\n" 4438 "\t can be modified by appending any of the following modifiers\n" 4439 "\t to the field name, as applicable:\n\n" 4440 "\t .hex display a number as a hex value\n" 4441 "\t .sym display an address as a symbol\n" 4442 "\t .sym-offset display an address as a symbol and offset\n" 4443 "\t .execname display a common_pid as a program name\n" 4444 "\t .syscall display a syscall id as a syscall name\n" 4445 "\t .log2 display log2 value rather than raw number\n" 4446 "\t .buckets=size display values in groups of size rather than raw number\n" 4447 "\t .usecs display a common_timestamp in microseconds\n" 4448 "\t .percent display a number of percentage value\n" 4449 "\t .graph display a bar-graph of a value\n\n" 4450 "\t The 'pause' parameter can be used to pause an existing hist\n" 4451 "\t trigger or to start a hist trigger but not log any events\n" 4452 "\t until told to do so. 'continue' can be used to start or\n" 4453 "\t restart a paused hist trigger.\n\n" 4454 "\t The 'clear' parameter will clear the contents of a running\n" 4455 "\t hist trigger and leave its current paused/active state\n" 4456 "\t unchanged.\n\n" 4457 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n" 4458 "\t raw hitcount in the histogram.\n\n" 4459 "\t The enable_hist and disable_hist triggers can be used to\n" 4460 "\t have one event conditionally start and stop another event's\n" 4461 "\t already-attached hist trigger. The syntax is analogous to\n" 4462 "\t the enable_event and disable_event triggers.\n\n" 4463 "\t Hist trigger handlers and actions are executed whenever a\n" 4464 "\t a histogram entry is added or updated. They take the form:\n\n" 4465 "\t <handler>.<action>\n\n" 4466 "\t The available handlers are:\n\n" 4467 "\t onmatch(matching.event) - invoke on addition or update\n" 4468 "\t onmax(var) - invoke if var exceeds current max\n" 4469 "\t onchange(var) - invoke action if var changes\n\n" 4470 "\t The available actions are:\n\n" 4471 "\t trace(<synthetic_event>,param list) - generate synthetic event\n" 4472 "\t save(field,...) - save current event fields\n" 4473 #ifdef CONFIG_TRACER_SNAPSHOT 4474 "\t snapshot() - snapshot the trace buffer\n\n" 4475 #endif 4476 #ifdef CONFIG_SYNTH_EVENTS 4477 " events/synthetic_events\t- Create/append/remove/show synthetic events\n" 4478 "\t Write into this file to define/undefine new synthetic events.\n" 4479 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n" 4480 #endif 4481 #endif 4482 ; 4483 4484 static ssize_t 4485 tracing_readme_read(struct file *filp, char __user *ubuf, 4486 size_t cnt, loff_t *ppos) 4487 { 4488 return simple_read_from_buffer(ubuf, cnt, ppos, 4489 readme_msg, strlen(readme_msg)); 4490 } 4491 4492 static const struct file_operations tracing_readme_fops = { 4493 .open = tracing_open_generic, 4494 .read = tracing_readme_read, 4495 .llseek = generic_file_llseek, 4496 }; 4497 4498 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 4499 static union trace_eval_map_item * 4500 update_eval_map(union trace_eval_map_item *ptr) 4501 { 4502 if (!ptr->map.eval_string) { 4503 if (ptr->tail.next) { 4504 ptr = ptr->tail.next; 4505 /* Set ptr to the next real item (skip head) */ 4506 ptr++; 4507 } else 4508 return NULL; 4509 } 4510 return ptr; 4511 } 4512 4513 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos) 4514 { 4515 union trace_eval_map_item *ptr = v; 4516 4517 /* 4518 * Paranoid! If ptr points to end, we don't want to increment past it. 4519 * This really should never happen. 4520 */ 4521 (*pos)++; 4522 ptr = update_eval_map(ptr); 4523 if (WARN_ON_ONCE(!ptr)) 4524 return NULL; 4525 4526 ptr++; 4527 ptr = update_eval_map(ptr); 4528 4529 return ptr; 4530 } 4531 4532 static void *eval_map_start(struct seq_file *m, loff_t *pos) 4533 { 4534 union trace_eval_map_item *v; 4535 loff_t l = 0; 4536 4537 mutex_lock(&trace_eval_mutex); 4538 4539 v = trace_eval_maps; 4540 if (v) 4541 v++; 4542 4543 while (v && l < *pos) { 4544 v = eval_map_next(m, v, &l); 4545 } 4546 4547 return v; 4548 } 4549 4550 static void eval_map_stop(struct seq_file *m, void *v) 4551 { 4552 mutex_unlock(&trace_eval_mutex); 4553 } 4554 4555 static int eval_map_show(struct seq_file *m, void *v) 4556 { 4557 union trace_eval_map_item *ptr = v; 4558 4559 seq_printf(m, "%s %ld (%s)\n", 4560 ptr->map.eval_string, ptr->map.eval_value, 4561 ptr->map.system); 4562 4563 return 0; 4564 } 4565 4566 static const struct seq_operations tracing_eval_map_seq_ops = { 4567 .start = eval_map_start, 4568 .next = eval_map_next, 4569 .stop = eval_map_stop, 4570 .show = eval_map_show, 4571 }; 4572 4573 static int tracing_eval_map_open(struct inode *inode, struct file *filp) 4574 { 4575 int ret; 4576 4577 ret = tracing_check_open_get_tr(NULL); 4578 if (ret) 4579 return ret; 4580 4581 return seq_open(filp, &tracing_eval_map_seq_ops); 4582 } 4583 4584 static const struct file_operations tracing_eval_map_fops = { 4585 .open = tracing_eval_map_open, 4586 .read = seq_read, 4587 .llseek = seq_lseek, 4588 .release = seq_release, 4589 }; 4590 4591 static inline union trace_eval_map_item * 4592 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr) 4593 { 4594 /* Return tail of array given the head */ 4595 return ptr + ptr->head.length + 1; 4596 } 4597 4598 static void 4599 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, 4600 int len) 4601 { 4602 struct trace_eval_map **stop; 4603 struct trace_eval_map **map; 4604 union trace_eval_map_item *map_array; 4605 union trace_eval_map_item *ptr; 4606 4607 stop = start + len; 4608 4609 /* 4610 * The trace_eval_maps contains the map plus a head and tail item, 4611 * where the head holds the module and length of array, and the 4612 * tail holds a pointer to the next list. 4613 */ 4614 map_array = kmalloc_objs(*map_array, len + 2); 4615 if (!map_array) { 4616 pr_warn("Unable to allocate trace eval mapping\n"); 4617 return; 4618 } 4619 4620 guard(mutex)(&trace_eval_mutex); 4621 4622 if (!trace_eval_maps) 4623 trace_eval_maps = map_array; 4624 else { 4625 ptr = trace_eval_maps; 4626 for (;;) { 4627 ptr = trace_eval_jmp_to_tail(ptr); 4628 if (!ptr->tail.next) 4629 break; 4630 ptr = ptr->tail.next; 4631 4632 } 4633 ptr->tail.next = map_array; 4634 } 4635 map_array->head.mod = mod; 4636 map_array->head.length = len; 4637 map_array++; 4638 4639 for (map = start; (unsigned long)map < (unsigned long)stop; map++) { 4640 map_array->map = **map; 4641 map_array++; 4642 } 4643 memset(map_array, 0, sizeof(*map_array)); 4644 } 4645 4646 static void trace_create_eval_file(struct dentry *d_tracer) 4647 { 4648 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, 4649 NULL, &tracing_eval_map_fops); 4650 } 4651 4652 #else /* CONFIG_TRACE_EVAL_MAP_FILE */ 4653 static inline void trace_create_eval_file(struct dentry *d_tracer) { } 4654 static inline void trace_insert_eval_map_file(struct module *mod, 4655 struct trace_eval_map **start, int len) { } 4656 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */ 4657 4658 static void 4659 trace_event_update_with_eval_map(struct module *mod, 4660 struct trace_eval_map **start, 4661 int len) 4662 { 4663 struct trace_eval_map **map; 4664 4665 /* Always run sanitizer only if btf_type_tag attr exists. */ 4666 if (len <= 0) { 4667 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) && 4668 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) && 4669 __has_attribute(btf_type_tag))) 4670 return; 4671 } 4672 4673 map = start; 4674 4675 trace_event_update_all(map, len); 4676 4677 if (len <= 0) 4678 return; 4679 4680 trace_insert_eval_map_file(mod, start, len); 4681 } 4682 4683 static ssize_t 4684 tracing_set_trace_read(struct file *filp, char __user *ubuf, 4685 size_t cnt, loff_t *ppos) 4686 { 4687 struct trace_array *tr = filp->private_data; 4688 char buf[MAX_TRACER_SIZE+2]; 4689 int r; 4690 4691 scoped_guard(mutex, &trace_types_lock) { 4692 r = sprintf(buf, "%s\n", tr->current_trace->name); 4693 } 4694 4695 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 4696 } 4697 4698 int tracer_init(struct tracer *t, struct trace_array *tr) 4699 { 4700 tracing_reset_online_cpus(&tr->array_buffer); 4701 update_last_data_if_empty(tr); 4702 return t->init(tr); 4703 } 4704 4705 void trace_set_buffer_entries(struct array_buffer *buf, unsigned long val) 4706 { 4707 int cpu; 4708 4709 for_each_tracing_cpu(cpu) 4710 per_cpu_ptr(buf->data, cpu)->entries = val; 4711 } 4712 4713 static void update_buffer_entries(struct array_buffer *buf, int cpu) 4714 { 4715 if (cpu == RING_BUFFER_ALL_CPUS) { 4716 trace_set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0)); 4717 } else { 4718 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu); 4719 } 4720 } 4721 4722 static int __tracing_resize_ring_buffer(struct trace_array *tr, 4723 unsigned long size, int cpu) 4724 { 4725 int ret; 4726 4727 /* 4728 * If kernel or user changes the size of the ring buffer 4729 * we use the size that was given, and we can forget about 4730 * expanding it later. 4731 */ 4732 trace_set_ring_buffer_expanded(tr); 4733 4734 /* May be called before buffers are initialized */ 4735 if (!tr->array_buffer.buffer) 4736 return 0; 4737 4738 /* Do not allow tracing while resizing ring buffer */ 4739 tracing_stop_tr(tr); 4740 4741 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu); 4742 if (ret < 0) 4743 goto out_start; 4744 4745 #ifdef CONFIG_TRACER_SNAPSHOT 4746 if (!tr->allocated_snapshot) 4747 goto out; 4748 4749 ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu); 4750 if (ret < 0) { 4751 int r = resize_buffer_duplicate_size(&tr->array_buffer, 4752 &tr->array_buffer, cpu); 4753 if (r < 0) { 4754 /* 4755 * AARGH! We are left with different 4756 * size max buffer!!!! 4757 * The max buffer is our "snapshot" buffer. 4758 * When a tracer needs a snapshot (one of the 4759 * latency tracers), it swaps the max buffer 4760 * with the saved snap shot. We succeeded to 4761 * update the size of the main buffer, but failed to 4762 * update the size of the max buffer. But when we tried 4763 * to reset the main buffer to the original size, we 4764 * failed there too. This is very unlikely to 4765 * happen, but if it does, warn and kill all 4766 * tracing. 4767 */ 4768 WARN_ON(1); 4769 tracing_disabled = 1; 4770 } 4771 goto out_start; 4772 } 4773 4774 update_buffer_entries(&tr->snapshot_buffer, cpu); 4775 4776 out: 4777 #endif /* CONFIG_TRACER_SNAPSHOT */ 4778 4779 update_buffer_entries(&tr->array_buffer, cpu); 4780 out_start: 4781 tracing_start_tr(tr); 4782 return ret; 4783 } 4784 4785 ssize_t tracing_resize_ring_buffer(struct trace_array *tr, 4786 unsigned long size, int cpu_id) 4787 { 4788 guard(mutex)(&trace_types_lock); 4789 4790 if (cpu_id != RING_BUFFER_ALL_CPUS) { 4791 /* make sure, this cpu is enabled in the mask */ 4792 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) 4793 return -EINVAL; 4794 } 4795 4796 return __tracing_resize_ring_buffer(tr, size, cpu_id); 4797 } 4798 4799 struct trace_mod_entry { 4800 unsigned long mod_addr; 4801 char mod_name[MODULE_NAME_LEN]; 4802 }; 4803 4804 struct trace_scratch { 4805 unsigned int clock_id; 4806 unsigned long text_addr; 4807 unsigned long nr_entries; 4808 struct trace_mod_entry entries[]; 4809 }; 4810 4811 static DEFINE_MUTEX(scratch_mutex); 4812 4813 static int cmp_mod_entry(const void *key, const void *pivot) 4814 { 4815 unsigned long addr = (unsigned long)key; 4816 const struct trace_mod_entry *ent = pivot; 4817 4818 if (addr < ent[0].mod_addr) 4819 return -1; 4820 4821 return addr >= ent[1].mod_addr; 4822 } 4823 4824 /** 4825 * trace_adjust_address() - Adjust prev boot address to current address. 4826 * @tr: Persistent ring buffer's trace_array. 4827 * @addr: Address in @tr which is adjusted. 4828 */ 4829 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr) 4830 { 4831 struct trace_module_delta *module_delta; 4832 struct trace_scratch *tscratch; 4833 struct trace_mod_entry *entry; 4834 unsigned long raddr; 4835 int idx = 0, nr_entries; 4836 4837 /* If we don't have last boot delta, return the address */ 4838 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 4839 return addr; 4840 4841 /* tr->module_delta must be protected by rcu. */ 4842 guard(rcu)(); 4843 tscratch = tr->scratch; 4844 /* if there is no tscrach, module_delta must be NULL. */ 4845 module_delta = READ_ONCE(tr->module_delta); 4846 if (!module_delta || !tscratch->nr_entries || 4847 tscratch->entries[0].mod_addr > addr) { 4848 raddr = addr + tr->text_delta; 4849 return __is_kernel(raddr) || is_kernel_core_data(raddr) || 4850 is_kernel_rodata(raddr) ? raddr : addr; 4851 } 4852 4853 /* Note that entries must be sorted. */ 4854 nr_entries = tscratch->nr_entries; 4855 if (nr_entries == 1 || 4856 tscratch->entries[nr_entries - 1].mod_addr < addr) 4857 idx = nr_entries - 1; 4858 else { 4859 entry = __inline_bsearch((void *)addr, 4860 tscratch->entries, 4861 nr_entries - 1, 4862 sizeof(tscratch->entries[0]), 4863 cmp_mod_entry); 4864 if (entry) 4865 idx = entry - tscratch->entries; 4866 } 4867 4868 return addr + module_delta->delta[idx]; 4869 } 4870 4871 #ifdef CONFIG_MODULES 4872 static int save_mod(struct module *mod, void *data) 4873 { 4874 struct trace_array *tr = data; 4875 struct trace_scratch *tscratch; 4876 struct trace_mod_entry *entry; 4877 unsigned int size; 4878 4879 tscratch = tr->scratch; 4880 if (!tscratch) 4881 return -1; 4882 size = tr->scratch_size; 4883 4884 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size) 4885 return -1; 4886 4887 entry = &tscratch->entries[tscratch->nr_entries]; 4888 4889 tscratch->nr_entries++; 4890 4891 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base; 4892 strscpy(entry->mod_name, mod->name); 4893 4894 return 0; 4895 } 4896 #else 4897 static int save_mod(struct module *mod, void *data) 4898 { 4899 return 0; 4900 } 4901 #endif 4902 4903 static void update_last_data(struct trace_array *tr) 4904 { 4905 struct trace_module_delta *module_delta; 4906 struct trace_scratch *tscratch; 4907 4908 if (!(tr->flags & TRACE_ARRAY_FL_BOOT)) 4909 return; 4910 4911 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 4912 return; 4913 4914 /* Only if the buffer has previous boot data clear and update it. */ 4915 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT; 4916 4917 /* If this is a backup instance, mark it for autoremove. */ 4918 if (tr->flags & TRACE_ARRAY_FL_VMALLOC) 4919 tr->free_on_close = true; 4920 4921 /* Reset the module list and reload them */ 4922 if (tr->scratch) { 4923 struct trace_scratch *tscratch = tr->scratch; 4924 4925 tscratch->clock_id = tr->clock_id; 4926 memset(tscratch->entries, 0, 4927 flex_array_size(tscratch, entries, tscratch->nr_entries)); 4928 tscratch->nr_entries = 0; 4929 4930 guard(mutex)(&scratch_mutex); 4931 module_for_each_mod(save_mod, tr); 4932 } 4933 4934 /* 4935 * Need to clear all CPU buffers as there cannot be events 4936 * from the previous boot mixed with events with this boot 4937 * as that will cause a confusing trace. Need to clear all 4938 * CPU buffers, even for those that may currently be offline. 4939 */ 4940 tracing_reset_all_cpus(&tr->array_buffer); 4941 4942 /* Using current data now */ 4943 tr->text_delta = 0; 4944 4945 if (!tr->scratch) 4946 return; 4947 4948 tscratch = tr->scratch; 4949 module_delta = READ_ONCE(tr->module_delta); 4950 WRITE_ONCE(tr->module_delta, NULL); 4951 kfree_rcu(module_delta, rcu); 4952 4953 /* Set the persistent ring buffer meta data to this address */ 4954 tscratch->text_addr = (unsigned long)_text; 4955 } 4956 4957 /** 4958 * tracing_update_buffers - used by tracing facility to expand ring buffers 4959 * @tr: The tracing instance 4960 * 4961 * To save on memory when the tracing is never used on a system with it 4962 * configured in. The ring buffers are set to a minimum size. But once 4963 * a user starts to use the tracing facility, then they need to grow 4964 * to their default size. 4965 * 4966 * This function is to be called when a tracer is about to be used. 4967 */ 4968 int tracing_update_buffers(struct trace_array *tr) 4969 { 4970 int ret = 0; 4971 4972 if (!tr) 4973 tr = &global_trace; 4974 4975 guard(mutex)(&trace_types_lock); 4976 4977 update_last_data(tr); 4978 4979 if (!tr->ring_buffer_expanded) 4980 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 4981 RING_BUFFER_ALL_CPUS); 4982 return ret; 4983 } 4984 4985 /* 4986 * Used to clear out the tracer before deletion of an instance. 4987 * Must have trace_types_lock held. 4988 */ 4989 static void tracing_set_nop(struct trace_array *tr) 4990 { 4991 if (tr->current_trace == &nop_trace) 4992 return; 4993 4994 tr->current_trace->enabled--; 4995 4996 if (tr->current_trace->reset) 4997 tr->current_trace->reset(tr); 4998 4999 tr->current_trace = &nop_trace; 5000 tr->current_trace_flags = nop_trace.flags; 5001 } 5002 5003 static bool tracer_options_updated; 5004 5005 int tracing_set_tracer(struct trace_array *tr, const char *buf) 5006 { 5007 struct tracer *trace = NULL; 5008 struct tracers *t; 5009 bool had_max_tr; 5010 int ret; 5011 5012 guard(mutex)(&trace_types_lock); 5013 5014 update_last_data(tr); 5015 5016 if (!tr->ring_buffer_expanded) { 5017 ret = __tracing_resize_ring_buffer(tr, trace_buf_size, 5018 RING_BUFFER_ALL_CPUS); 5019 if (ret < 0) 5020 return ret; 5021 ret = 0; 5022 } 5023 5024 list_for_each_entry(t, &tr->tracers, list) { 5025 if (strcmp(t->tracer->name, buf) == 0) { 5026 trace = t->tracer; 5027 break; 5028 } 5029 } 5030 if (!trace) 5031 return -EINVAL; 5032 5033 if (trace == tr->current_trace) 5034 return 0; 5035 5036 #ifdef CONFIG_TRACER_SNAPSHOT 5037 if (tracer_uses_snapshot(trace)) { 5038 local_irq_disable(); 5039 arch_spin_lock(&tr->max_lock); 5040 ret = tr->cond_snapshot ? -EBUSY : 0; 5041 arch_spin_unlock(&tr->max_lock); 5042 local_irq_enable(); 5043 if (ret) 5044 return ret; 5045 } 5046 #endif 5047 /* Some tracers won't work on kernel command line */ 5048 if (system_state < SYSTEM_RUNNING && trace->noboot) { 5049 pr_warn("Tracer '%s' is not allowed on command line, ignored\n", 5050 trace->name); 5051 return -EINVAL; 5052 } 5053 5054 /* Some tracers are only allowed for the top level buffer */ 5055 if (!trace_ok_for_array(trace, tr)) 5056 return -EINVAL; 5057 5058 /* If trace pipe files are being read, we can't change the tracer */ 5059 if (tr->trace_ref) 5060 return -EBUSY; 5061 5062 trace_branch_disable(); 5063 5064 tr->current_trace->enabled--; 5065 5066 if (tr->current_trace->reset) 5067 tr->current_trace->reset(tr); 5068 5069 had_max_tr = tracer_uses_snapshot(tr->current_trace); 5070 5071 /* Current trace needs to be nop_trace before synchronize_rcu */ 5072 tr->current_trace = &nop_trace; 5073 tr->current_trace_flags = nop_trace.flags; 5074 5075 if (had_max_tr && !tracer_uses_snapshot(trace)) { 5076 /* 5077 * We need to make sure that the update_max_tr sees that 5078 * current_trace changed to nop_trace to keep it from 5079 * swapping the buffers after we resize it. 5080 * The update_max_tr is called from interrupts disabled 5081 * so a synchronized_sched() is sufficient. 5082 */ 5083 synchronize_rcu(); 5084 free_snapshot(tr); 5085 tracing_disarm_snapshot(tr); 5086 } 5087 5088 if (!had_max_tr && tracer_uses_snapshot(trace)) { 5089 ret = tracing_arm_snapshot_locked(tr); 5090 if (ret) 5091 return ret; 5092 } 5093 5094 tr->current_trace_flags = t->flags ? : t->tracer->flags; 5095 5096 if (trace->init) { 5097 ret = tracer_init(trace, tr); 5098 if (ret) { 5099 if (tracer_uses_snapshot(trace)) 5100 tracing_disarm_snapshot(tr); 5101 tr->current_trace_flags = nop_trace.flags; 5102 return ret; 5103 } 5104 } 5105 5106 tr->current_trace = trace; 5107 tr->current_trace->enabled++; 5108 trace_branch_enable(tr); 5109 5110 return 0; 5111 } 5112 5113 static ssize_t 5114 tracing_set_trace_write(struct file *filp, const char __user *ubuf, 5115 size_t cnt, loff_t *ppos) 5116 { 5117 struct trace_array *tr = filp->private_data; 5118 char buf[MAX_TRACER_SIZE+1]; 5119 char *name; 5120 size_t ret; 5121 int err; 5122 5123 ret = cnt; 5124 5125 if (cnt > MAX_TRACER_SIZE) 5126 cnt = MAX_TRACER_SIZE; 5127 5128 if (copy_from_user(buf, ubuf, cnt)) 5129 return -EFAULT; 5130 5131 buf[cnt] = 0; 5132 5133 name = strim(buf); 5134 5135 err = tracing_set_tracer(tr, name); 5136 if (err) 5137 return err; 5138 5139 *ppos += ret; 5140 5141 return ret; 5142 } 5143 5144 ssize_t tracing_nsecs_read(unsigned long *ptr, char __user *ubuf, 5145 size_t cnt, loff_t *ppos) 5146 { 5147 char buf[64]; 5148 int r; 5149 5150 r = snprintf(buf, sizeof(buf), "%ld\n", 5151 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr)); 5152 if (r > sizeof(buf)) 5153 r = sizeof(buf); 5154 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5155 } 5156 5157 ssize_t tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf, 5158 size_t cnt, loff_t *ppos) 5159 { 5160 unsigned long val; 5161 int ret; 5162 5163 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 5164 if (ret) 5165 return ret; 5166 5167 *ptr = val * 1000; 5168 5169 return cnt; 5170 } 5171 5172 static ssize_t 5173 tracing_thresh_read(struct file *filp, char __user *ubuf, 5174 size_t cnt, loff_t *ppos) 5175 { 5176 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos); 5177 } 5178 5179 static ssize_t 5180 tracing_thresh_write(struct file *filp, const char __user *ubuf, 5181 size_t cnt, loff_t *ppos) 5182 { 5183 struct trace_array *tr = filp->private_data; 5184 int ret; 5185 5186 guard(mutex)(&trace_types_lock); 5187 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos); 5188 if (ret < 0) 5189 return ret; 5190 5191 if (tr->current_trace->update_thresh) { 5192 ret = tr->current_trace->update_thresh(tr); 5193 if (ret < 0) 5194 return ret; 5195 } 5196 5197 return cnt; 5198 } 5199 5200 static int open_pipe_on_cpu(struct trace_array *tr, int cpu) 5201 { 5202 if (cpu == RING_BUFFER_ALL_CPUS) { 5203 if (cpumask_empty(tr->pipe_cpumask)) { 5204 cpumask_setall(tr->pipe_cpumask); 5205 return 0; 5206 } 5207 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { 5208 cpumask_set_cpu(cpu, tr->pipe_cpumask); 5209 return 0; 5210 } 5211 return -EBUSY; 5212 } 5213 5214 static void close_pipe_on_cpu(struct trace_array *tr, int cpu) 5215 { 5216 if (cpu == RING_BUFFER_ALL_CPUS) { 5217 WARN_ON(!cpumask_full(tr->pipe_cpumask)); 5218 cpumask_clear(tr->pipe_cpumask); 5219 } else { 5220 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); 5221 cpumask_clear_cpu(cpu, tr->pipe_cpumask); 5222 } 5223 } 5224 5225 static int tracing_open_pipe(struct inode *inode, struct file *filp) 5226 { 5227 struct trace_array *tr = inode->i_private; 5228 struct trace_iterator *iter; 5229 int cpu; 5230 int ret; 5231 5232 ret = tracing_check_open_get_tr(tr); 5233 if (ret) 5234 return ret; 5235 5236 guard(mutex)(&trace_types_lock); 5237 cpu = tracing_get_cpu(inode); 5238 ret = open_pipe_on_cpu(tr, cpu); 5239 if (ret) 5240 goto fail_pipe_on_cpu; 5241 5242 /* create a buffer to store the information to pass to userspace */ 5243 iter = kzalloc_obj(*iter); 5244 if (!iter) { 5245 ret = -ENOMEM; 5246 goto fail_alloc_iter; 5247 } 5248 5249 trace_seq_init(&iter->seq); 5250 iter->trace = tr->current_trace; 5251 5252 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { 5253 ret = -ENOMEM; 5254 goto fail; 5255 } 5256 5257 /* trace pipe does not show start of buffer */ 5258 cpumask_setall(iter->started); 5259 5260 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT)) 5261 iter->iter_flags |= TRACE_FILE_LAT_FMT; 5262 5263 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 5264 if (trace_clocks[tr->clock_id].in_ns) 5265 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 5266 5267 iter->tr = tr; 5268 iter->array_buffer = &tr->array_buffer; 5269 iter->cpu_file = cpu; 5270 mutex_init(&iter->mutex); 5271 filp->private_data = iter; 5272 5273 if (iter->trace->pipe_open) 5274 iter->trace->pipe_open(iter); 5275 5276 nonseekable_open(inode, filp); 5277 5278 tr->trace_ref++; 5279 5280 return ret; 5281 5282 fail: 5283 kfree(iter); 5284 fail_alloc_iter: 5285 close_pipe_on_cpu(tr, cpu); 5286 fail_pipe_on_cpu: 5287 __trace_array_put(tr); 5288 return ret; 5289 } 5290 5291 static int tracing_release_pipe(struct inode *inode, struct file *file) 5292 { 5293 struct trace_iterator *iter = file->private_data; 5294 struct trace_array *tr = inode->i_private; 5295 5296 scoped_guard(mutex, &trace_types_lock) { 5297 tr->trace_ref--; 5298 5299 if (iter->trace->pipe_close) 5300 iter->trace->pipe_close(iter); 5301 close_pipe_on_cpu(tr, iter->cpu_file); 5302 } 5303 5304 free_trace_iter_content(iter); 5305 kfree(iter); 5306 5307 trace_array_put(tr); 5308 5309 return 0; 5310 } 5311 5312 static __poll_t 5313 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table) 5314 { 5315 struct trace_array *tr = iter->tr; 5316 5317 /* Iterators are static, they should be filled or empty */ 5318 if (trace_buffer_iter(iter, iter->cpu_file)) 5319 return EPOLLIN | EPOLLRDNORM; 5320 5321 if (tr->trace_flags & TRACE_ITER(BLOCK)) 5322 /* 5323 * Always select as readable when in blocking mode 5324 */ 5325 return EPOLLIN | EPOLLRDNORM; 5326 else 5327 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, 5328 filp, poll_table, iter->tr->buffer_percent); 5329 } 5330 5331 static __poll_t 5332 tracing_poll_pipe(struct file *filp, poll_table *poll_table) 5333 { 5334 struct trace_iterator *iter = filp->private_data; 5335 5336 return trace_poll(iter, filp, poll_table); 5337 } 5338 5339 /* Must be called with iter->mutex held. */ 5340 static int tracing_wait_pipe(struct file *filp) 5341 { 5342 struct trace_iterator *iter = filp->private_data; 5343 int ret; 5344 5345 while (trace_empty(iter)) { 5346 5347 if ((filp->f_flags & O_NONBLOCK)) { 5348 return -EAGAIN; 5349 } 5350 5351 /* 5352 * We block until we read something and tracing is disabled. 5353 * We still block if tracing is disabled, but we have never 5354 * read anything. This allows a user to cat this file, and 5355 * then enable tracing. But after we have read something, 5356 * we give an EOF when tracing is again disabled. 5357 * 5358 * iter->pos will be 0 if we haven't read anything. 5359 */ 5360 if (!tracer_tracing_is_on(iter->tr) && iter->pos) 5361 break; 5362 5363 mutex_unlock(&iter->mutex); 5364 5365 ret = wait_on_pipe(iter, 0); 5366 5367 mutex_lock(&iter->mutex); 5368 5369 if (ret) 5370 return ret; 5371 } 5372 5373 return 1; 5374 } 5375 5376 static bool update_last_data_if_empty(struct trace_array *tr) 5377 { 5378 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5379 return false; 5380 5381 if (!ring_buffer_empty(tr->array_buffer.buffer)) 5382 return false; 5383 5384 /* 5385 * If the buffer contains the last boot data and all per-cpu 5386 * buffers are empty, reset it from the kernel side. 5387 */ 5388 update_last_data(tr); 5389 return true; 5390 } 5391 5392 /* 5393 * Consumer reader. 5394 */ 5395 static ssize_t 5396 tracing_read_pipe(struct file *filp, char __user *ubuf, 5397 size_t cnt, loff_t *ppos) 5398 { 5399 struct trace_iterator *iter = filp->private_data; 5400 ssize_t sret; 5401 5402 /* 5403 * Avoid more than one consumer on a single file descriptor 5404 * This is just a matter of traces coherency, the ring buffer itself 5405 * is protected. 5406 */ 5407 guard(mutex)(&iter->mutex); 5408 5409 /* return any leftover data */ 5410 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 5411 if (sret != -EBUSY) 5412 return sret; 5413 5414 trace_seq_init(&iter->seq); 5415 5416 if (iter->trace->read) { 5417 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos); 5418 if (sret) 5419 return sret; 5420 } 5421 5422 waitagain: 5423 if (update_last_data_if_empty(iter->tr)) 5424 return 0; 5425 5426 sret = tracing_wait_pipe(filp); 5427 if (sret <= 0) 5428 return sret; 5429 5430 /* stop when tracing is finished */ 5431 if (trace_empty(iter)) 5432 return 0; 5433 5434 if (cnt >= TRACE_SEQ_BUFFER_SIZE) 5435 cnt = TRACE_SEQ_BUFFER_SIZE - 1; 5436 5437 /* reset all but tr, trace, and overruns */ 5438 trace_iterator_reset(iter); 5439 cpumask_clear(iter->started); 5440 trace_seq_init(&iter->seq); 5441 5442 trace_event_read_lock(); 5443 trace_access_lock(iter->cpu_file); 5444 while (trace_find_next_entry_inc(iter) != NULL) { 5445 enum print_line_t ret; 5446 int save_len = iter->seq.seq.len; 5447 5448 ret = print_trace_line(iter); 5449 if (ret == TRACE_TYPE_PARTIAL_LINE) { 5450 /* 5451 * If one print_trace_line() fills entire trace_seq in one shot, 5452 * trace_seq_to_user() will returns -EBUSY because save_len == 0, 5453 * In this case, we need to consume it, otherwise, loop will peek 5454 * this event next time, resulting in an infinite loop. 5455 */ 5456 if (save_len == 0) { 5457 iter->seq.full = 0; 5458 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); 5459 trace_consume(iter); 5460 break; 5461 } 5462 5463 /* In other cases, don't print partial lines */ 5464 iter->seq.seq.len = save_len; 5465 break; 5466 } 5467 if (ret != TRACE_TYPE_NO_CONSUME) 5468 trace_consume(iter); 5469 5470 if (trace_seq_used(&iter->seq) >= cnt) 5471 break; 5472 5473 /* 5474 * Setting the full flag means we reached the trace_seq buffer 5475 * size and we should leave by partial output condition above. 5476 * One of the trace_seq_* functions is not used properly. 5477 */ 5478 WARN_ONCE(iter->seq.full, "full flag set for trace type %d", 5479 iter->ent->type); 5480 } 5481 trace_access_unlock(iter->cpu_file); 5482 trace_event_read_unlock(); 5483 5484 /* Now copy what we have to the user */ 5485 sret = trace_seq_to_user(&iter->seq, ubuf, cnt); 5486 if (iter->seq.readpos >= trace_seq_used(&iter->seq)) 5487 trace_seq_init(&iter->seq); 5488 5489 /* 5490 * If there was nothing to send to user, in spite of consuming trace 5491 * entries, go back to wait for more entries. 5492 */ 5493 if (sret == -EBUSY) 5494 goto waitagain; 5495 5496 return sret; 5497 } 5498 5499 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd, 5500 unsigned int idx) 5501 { 5502 __free_page(spd->pages[idx]); 5503 } 5504 5505 static size_t 5506 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) 5507 { 5508 size_t count; 5509 int save_len; 5510 int ret; 5511 5512 /* Seq buffer is page-sized, exactly what we need. */ 5513 for (;;) { 5514 save_len = iter->seq.seq.len; 5515 ret = print_trace_line(iter); 5516 5517 if (trace_seq_has_overflowed(&iter->seq)) { 5518 iter->seq.seq.len = save_len; 5519 break; 5520 } 5521 5522 /* 5523 * This should not be hit, because it should only 5524 * be set if the iter->seq overflowed. But check it 5525 * anyway to be safe. 5526 */ 5527 if (ret == TRACE_TYPE_PARTIAL_LINE) { 5528 iter->seq.seq.len = save_len; 5529 break; 5530 } 5531 5532 count = trace_seq_used(&iter->seq) - save_len; 5533 if (rem < count) { 5534 rem = 0; 5535 iter->seq.seq.len = save_len; 5536 break; 5537 } 5538 5539 if (ret != TRACE_TYPE_NO_CONSUME) 5540 trace_consume(iter); 5541 rem -= count; 5542 if (!trace_find_next_entry_inc(iter)) { 5543 rem = 0; 5544 iter->ent = NULL; 5545 break; 5546 } 5547 } 5548 5549 return rem; 5550 } 5551 5552 static ssize_t tracing_splice_read_pipe(struct file *filp, 5553 loff_t *ppos, 5554 struct pipe_inode_info *pipe, 5555 size_t len, 5556 unsigned int flags) 5557 { 5558 struct page *pages_def[PIPE_DEF_BUFFERS]; 5559 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 5560 struct trace_iterator *iter = filp->private_data; 5561 struct splice_pipe_desc spd = { 5562 .pages = pages_def, 5563 .partial = partial_def, 5564 .nr_pages = 0, /* This gets updated below. */ 5565 .nr_pages_max = PIPE_DEF_BUFFERS, 5566 .ops = &default_pipe_buf_ops, 5567 .spd_release = tracing_spd_release_pipe, 5568 }; 5569 ssize_t ret; 5570 size_t rem; 5571 unsigned int i; 5572 5573 if (splice_grow_spd(pipe, &spd)) 5574 return -ENOMEM; 5575 5576 mutex_lock(&iter->mutex); 5577 5578 if (iter->trace->splice_read) { 5579 ret = iter->trace->splice_read(iter, filp, 5580 ppos, pipe, len, flags); 5581 if (ret) 5582 goto out_err; 5583 } 5584 5585 ret = tracing_wait_pipe(filp); 5586 if (ret <= 0) 5587 goto out_err; 5588 5589 if (!iter->ent && !trace_find_next_entry_inc(iter)) { 5590 ret = -EFAULT; 5591 goto out_err; 5592 } 5593 5594 trace_event_read_lock(); 5595 trace_access_lock(iter->cpu_file); 5596 5597 /* Fill as many pages as possible. */ 5598 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) { 5599 spd.pages[i] = alloc_page(GFP_KERNEL); 5600 if (!spd.pages[i]) 5601 break; 5602 5603 rem = tracing_fill_pipe_page(rem, iter); 5604 5605 /* Copy the data into the page, so we can start over. */ 5606 ret = trace_seq_to_buffer(&iter->seq, 5607 page_address(spd.pages[i]), 5608 min((size_t)trace_seq_used(&iter->seq), 5609 (size_t)PAGE_SIZE)); 5610 if (ret < 0) { 5611 __free_page(spd.pages[i]); 5612 break; 5613 } 5614 spd.partial[i].offset = 0; 5615 spd.partial[i].len = ret; 5616 5617 trace_seq_init(&iter->seq); 5618 } 5619 5620 trace_access_unlock(iter->cpu_file); 5621 trace_event_read_unlock(); 5622 mutex_unlock(&iter->mutex); 5623 5624 spd.nr_pages = i; 5625 5626 if (i) 5627 ret = splice_to_pipe(pipe, &spd); 5628 else 5629 ret = 0; 5630 out: 5631 splice_shrink_spd(&spd); 5632 return ret; 5633 5634 out_err: 5635 mutex_unlock(&iter->mutex); 5636 goto out; 5637 } 5638 5639 static ssize_t 5640 tracing_syscall_buf_read(struct file *filp, char __user *ubuf, 5641 size_t cnt, loff_t *ppos) 5642 { 5643 struct inode *inode = file_inode(filp); 5644 struct trace_array *tr = inode->i_private; 5645 char buf[64]; 5646 int r; 5647 5648 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz); 5649 5650 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5651 } 5652 5653 static ssize_t 5654 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf, 5655 size_t cnt, loff_t *ppos) 5656 { 5657 struct inode *inode = file_inode(filp); 5658 struct trace_array *tr = inode->i_private; 5659 unsigned long val; 5660 int ret; 5661 5662 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 5663 if (ret) 5664 return ret; 5665 5666 if (val > SYSCALL_FAULT_USER_MAX) 5667 val = SYSCALL_FAULT_USER_MAX; 5668 5669 tr->syscall_buf_sz = val; 5670 5671 *ppos += cnt; 5672 5673 return cnt; 5674 } 5675 5676 static ssize_t 5677 tracing_entries_read(struct file *filp, char __user *ubuf, 5678 size_t cnt, loff_t *ppos) 5679 { 5680 struct inode *inode = file_inode(filp); 5681 struct trace_array *tr = inode->i_private; 5682 int cpu = tracing_get_cpu(inode); 5683 char buf[64]; 5684 int r = 0; 5685 ssize_t ret; 5686 5687 mutex_lock(&trace_types_lock); 5688 5689 if (cpu == RING_BUFFER_ALL_CPUS) { 5690 int cpu, buf_size_same; 5691 unsigned long size; 5692 5693 size = 0; 5694 buf_size_same = 1; 5695 /* check if all cpu sizes are same */ 5696 for_each_tracing_cpu(cpu) { 5697 /* fill in the size from first enabled cpu */ 5698 if (size == 0) 5699 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries; 5700 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) { 5701 buf_size_same = 0; 5702 break; 5703 } 5704 } 5705 5706 if (buf_size_same) { 5707 if (!tr->ring_buffer_expanded) 5708 r = sprintf(buf, "%lu (expanded: %lu)\n", 5709 size >> 10, 5710 trace_buf_size >> 10); 5711 else 5712 r = sprintf(buf, "%lu\n", size >> 10); 5713 } else 5714 r = sprintf(buf, "X\n"); 5715 } else 5716 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10); 5717 5718 mutex_unlock(&trace_types_lock); 5719 5720 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5721 return ret; 5722 } 5723 5724 static ssize_t 5725 tracing_entries_write(struct file *filp, const char __user *ubuf, 5726 size_t cnt, loff_t *ppos) 5727 { 5728 struct inode *inode = file_inode(filp); 5729 struct trace_array *tr = inode->i_private; 5730 unsigned long val; 5731 int ret; 5732 5733 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 5734 if (ret) 5735 return ret; 5736 5737 /* must have at least 1 entry */ 5738 if (!val) 5739 return -EINVAL; 5740 5741 /* value is in KB */ 5742 val <<= 10; 5743 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode)); 5744 if (ret < 0) 5745 return ret; 5746 5747 *ppos += cnt; 5748 5749 return cnt; 5750 } 5751 5752 static ssize_t 5753 tracing_total_entries_read(struct file *filp, char __user *ubuf, 5754 size_t cnt, loff_t *ppos) 5755 { 5756 struct trace_array *tr = filp->private_data; 5757 char buf[64]; 5758 int r, cpu; 5759 unsigned long size = 0, expanded_size = 0; 5760 5761 mutex_lock(&trace_types_lock); 5762 for_each_tracing_cpu(cpu) { 5763 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10; 5764 if (!tr->ring_buffer_expanded) 5765 expanded_size += trace_buf_size >> 10; 5766 } 5767 if (tr->ring_buffer_expanded) 5768 r = sprintf(buf, "%lu\n", size); 5769 else 5770 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); 5771 mutex_unlock(&trace_types_lock); 5772 5773 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 5774 } 5775 5776 #define LAST_BOOT_HEADER ((void *)1) 5777 5778 static void *l_next(struct seq_file *m, void *v, loff_t *pos) 5779 { 5780 struct trace_array *tr = m->private; 5781 struct trace_scratch *tscratch = tr->scratch; 5782 unsigned int index = *pos; 5783 5784 (*pos)++; 5785 5786 if (*pos == 1) 5787 return LAST_BOOT_HEADER; 5788 5789 /* Only show offsets of the last boot data */ 5790 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5791 return NULL; 5792 5793 /* *pos 0 is for the header, 1 is for the first module */ 5794 index--; 5795 5796 if (index >= tscratch->nr_entries) 5797 return NULL; 5798 5799 return &tscratch->entries[index]; 5800 } 5801 5802 static void *l_start(struct seq_file *m, loff_t *pos) 5803 { 5804 mutex_lock(&scratch_mutex); 5805 5806 return l_next(m, NULL, pos); 5807 } 5808 5809 static void l_stop(struct seq_file *m, void *p) 5810 { 5811 mutex_unlock(&scratch_mutex); 5812 } 5813 5814 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr) 5815 { 5816 struct trace_scratch *tscratch = tr->scratch; 5817 5818 /* 5819 * Do not leak KASLR address. This only shows the KASLR address of 5820 * the last boot. When the ring buffer is started, the LAST_BOOT 5821 * flag gets cleared, and this should only report "current". 5822 * Otherwise it shows the KASLR address from the previous boot which 5823 * should not be the same as the current boot. 5824 */ 5825 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) 5826 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr); 5827 else 5828 seq_puts(m, "# Current\n"); 5829 } 5830 5831 static int l_show(struct seq_file *m, void *v) 5832 { 5833 struct trace_array *tr = m->private; 5834 struct trace_mod_entry *entry = v; 5835 5836 if (v == LAST_BOOT_HEADER) { 5837 show_last_boot_header(m, tr); 5838 return 0; 5839 } 5840 5841 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name); 5842 return 0; 5843 } 5844 5845 static const struct seq_operations last_boot_seq_ops = { 5846 .start = l_start, 5847 .next = l_next, 5848 .stop = l_stop, 5849 .show = l_show, 5850 }; 5851 5852 static int tracing_last_boot_open(struct inode *inode, struct file *file) 5853 { 5854 struct trace_array *tr = inode->i_private; 5855 struct seq_file *m; 5856 int ret; 5857 5858 ret = tracing_check_open_get_tr(tr); 5859 if (ret) 5860 return ret; 5861 5862 ret = seq_open(file, &last_boot_seq_ops); 5863 if (ret) { 5864 trace_array_put(tr); 5865 return ret; 5866 } 5867 5868 m = file->private_data; 5869 m->private = tr; 5870 5871 return 0; 5872 } 5873 5874 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp) 5875 { 5876 struct trace_array *tr = inode->i_private; 5877 int cpu = tracing_get_cpu(inode); 5878 int ret; 5879 5880 ret = tracing_check_open_get_tr(tr); 5881 if (ret) 5882 return ret; 5883 5884 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu); 5885 if (ret < 0) 5886 __trace_array_put(tr); 5887 return ret; 5888 } 5889 5890 static ssize_t 5891 tracing_free_buffer_write(struct file *filp, const char __user *ubuf, 5892 size_t cnt, loff_t *ppos) 5893 { 5894 /* 5895 * There is no need to read what the user has written, this function 5896 * is just to make sure that there is no error when "echo" is used 5897 */ 5898 5899 *ppos += cnt; 5900 5901 return cnt; 5902 } 5903 5904 static int 5905 tracing_free_buffer_release(struct inode *inode, struct file *filp) 5906 { 5907 struct trace_array *tr = inode->i_private; 5908 5909 /* disable tracing ? */ 5910 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE)) 5911 tracer_tracing_off(tr); 5912 /* resize the ring buffer to 0 */ 5913 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS); 5914 5915 trace_array_put(tr); 5916 5917 return 0; 5918 } 5919 5920 #define TRACE_MARKER_MAX_SIZE 4096 5921 5922 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf, 5923 size_t cnt, unsigned long ip) 5924 { 5925 struct ring_buffer_event *event; 5926 enum event_trigger_type tt = ETT_NONE; 5927 struct trace_buffer *buffer; 5928 struct print_entry *entry; 5929 int meta_size; 5930 ssize_t written; 5931 size_t size; 5932 5933 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */ 5934 again: 5935 size = cnt + meta_size; 5936 5937 buffer = tr->array_buffer.buffer; 5938 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, 5939 tracing_gen_ctx()); 5940 if (unlikely(!event)) { 5941 /* 5942 * If the size was greater than what was allowed, then 5943 * make it smaller and try again. 5944 */ 5945 if (size > ring_buffer_max_event_size(buffer)) { 5946 cnt = ring_buffer_max_event_size(buffer) - meta_size; 5947 /* The above should only happen once */ 5948 if (WARN_ON_ONCE(cnt + meta_size == size)) 5949 return -EBADF; 5950 goto again; 5951 } 5952 5953 /* Ring buffer disabled, return as if not open for write */ 5954 return -EBADF; 5955 } 5956 5957 entry = ring_buffer_event_data(event); 5958 entry->ip = ip; 5959 memcpy(&entry->buf, buf, cnt); 5960 written = cnt; 5961 5962 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) { 5963 /* do not add \n before testing triggers, but add \0 */ 5964 entry->buf[cnt] = '\0'; 5965 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event); 5966 } 5967 5968 if (entry->buf[cnt - 1] != '\n') { 5969 entry->buf[cnt] = '\n'; 5970 entry->buf[cnt + 1] = '\0'; 5971 } else 5972 entry->buf[cnt] = '\0'; 5973 5974 if (static_branch_unlikely(&trace_marker_exports_enabled)) 5975 ftrace_exports(event, TRACE_EXPORT_MARKER); 5976 __buffer_unlock_commit(buffer, event); 5977 5978 if (tt) 5979 event_triggers_post_call(tr->trace_marker_file, tt); 5980 5981 return written; 5982 } 5983 5984 struct trace_user_buf { 5985 char *buf; 5986 }; 5987 5988 static DEFINE_MUTEX(trace_user_buffer_mutex); 5989 static struct trace_user_buf_info *trace_user_buffer; 5990 5991 /** 5992 * trace_user_fault_destroy - free up allocated memory of a trace user buffer 5993 * @tinfo: The descriptor to free up 5994 * 5995 * Frees any data allocated in the trace info dsecriptor. 5996 */ 5997 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo) 5998 { 5999 char *buf; 6000 int cpu; 6001 6002 if (!tinfo || !tinfo->tbuf) 6003 return; 6004 6005 for_each_possible_cpu(cpu) { 6006 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6007 kfree(buf); 6008 } 6009 free_percpu(tinfo->tbuf); 6010 } 6011 6012 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size) 6013 { 6014 char *buf; 6015 int cpu; 6016 6017 lockdep_assert_held(&trace_user_buffer_mutex); 6018 6019 tinfo->tbuf = alloc_percpu(struct trace_user_buf); 6020 if (!tinfo->tbuf) 6021 return -ENOMEM; 6022 6023 tinfo->ref = 1; 6024 tinfo->size = size; 6025 6026 /* Clear each buffer in case of error */ 6027 for_each_possible_cpu(cpu) { 6028 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL; 6029 } 6030 6031 for_each_possible_cpu(cpu) { 6032 buf = kmalloc_node(size, GFP_KERNEL, 6033 cpu_to_node(cpu)); 6034 if (!buf) 6035 return -ENOMEM; 6036 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf; 6037 } 6038 6039 return 0; 6040 } 6041 6042 /* For internal use. Free and reinitialize */ 6043 static void user_buffer_free(struct trace_user_buf_info **tinfo) 6044 { 6045 lockdep_assert_held(&trace_user_buffer_mutex); 6046 6047 trace_user_fault_destroy(*tinfo); 6048 kfree(*tinfo); 6049 *tinfo = NULL; 6050 } 6051 6052 /* For internal use. Initialize and allocate */ 6053 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size) 6054 { 6055 bool alloc = false; 6056 int ret; 6057 6058 lockdep_assert_held(&trace_user_buffer_mutex); 6059 6060 if (!*tinfo) { 6061 alloc = true; 6062 *tinfo = kzalloc_obj(**tinfo); 6063 if (!*tinfo) 6064 return -ENOMEM; 6065 } 6066 6067 ret = user_fault_buffer_enable(*tinfo, size); 6068 if (ret < 0 && alloc) 6069 user_buffer_free(tinfo); 6070 6071 return ret; 6072 } 6073 6074 /* For internal use, derefrence and free if necessary */ 6075 static void user_buffer_put(struct trace_user_buf_info **tinfo) 6076 { 6077 guard(mutex)(&trace_user_buffer_mutex); 6078 6079 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref)) 6080 return; 6081 6082 if (--(*tinfo)->ref) 6083 return; 6084 6085 user_buffer_free(tinfo); 6086 } 6087 6088 /** 6089 * trace_user_fault_init - Allocated or reference a per CPU buffer 6090 * @tinfo: A pointer to the trace buffer descriptor 6091 * @size: The size to allocate each per CPU buffer 6092 * 6093 * Create a per CPU buffer that can be used to copy from user space 6094 * in a task context. When calling trace_user_fault_read(), preemption 6095 * must be disabled, and it will enable preemption and copy user 6096 * space data to the buffer. If any schedule switches occur, it will 6097 * retry until it succeeds without a schedule switch knowing the buffer 6098 * is still valid. 6099 * 6100 * Returns 0 on success, negative on failure. 6101 */ 6102 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size) 6103 { 6104 int ret; 6105 6106 if (!tinfo) 6107 return -EINVAL; 6108 6109 guard(mutex)(&trace_user_buffer_mutex); 6110 6111 ret = user_buffer_init(&tinfo, size); 6112 if (ret < 0) 6113 trace_user_fault_destroy(tinfo); 6114 6115 return ret; 6116 } 6117 6118 /** 6119 * trace_user_fault_get - up the ref count for the user buffer 6120 * @tinfo: A pointer to a pointer to the trace buffer descriptor 6121 * 6122 * Ups the ref count of the trace buffer. 6123 * 6124 * Returns the new ref count. 6125 */ 6126 int trace_user_fault_get(struct trace_user_buf_info *tinfo) 6127 { 6128 if (!tinfo) 6129 return -1; 6130 6131 guard(mutex)(&trace_user_buffer_mutex); 6132 6133 tinfo->ref++; 6134 return tinfo->ref; 6135 } 6136 6137 /** 6138 * trace_user_fault_put - dereference a per cpu trace buffer 6139 * @tinfo: The @tinfo that was passed to trace_user_fault_get() 6140 * 6141 * Decrement the ref count of @tinfo. 6142 * 6143 * Returns the new refcount (negative on error). 6144 */ 6145 int trace_user_fault_put(struct trace_user_buf_info *tinfo) 6146 { 6147 guard(mutex)(&trace_user_buffer_mutex); 6148 6149 if (WARN_ON_ONCE(!tinfo || !tinfo->ref)) 6150 return -1; 6151 6152 --tinfo->ref; 6153 return tinfo->ref; 6154 } 6155 6156 /** 6157 * trace_user_fault_read - Read user space into a per CPU buffer 6158 * @tinfo: The @tinfo allocated by trace_user_fault_get() 6159 * @ptr: The user space pointer to read 6160 * @size: The size of user space to read. 6161 * @copy_func: Optional function to use to copy from user space 6162 * @data: Data to pass to copy_func if it was supplied 6163 * 6164 * Preemption must be disabled when this is called, and must not 6165 * be enabled while using the returned buffer. 6166 * This does the copying from user space into a per CPU buffer. 6167 * 6168 * The @size must not be greater than the size passed in to 6169 * trace_user_fault_init(). 6170 * 6171 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(), 6172 * otherwise it will call @copy_func. It will call @copy_func with: 6173 * 6174 * buffer: the per CPU buffer of the @tinfo. 6175 * ptr: The pointer @ptr to user space to read 6176 * size: The @size of the ptr to read 6177 * data: The @data parameter 6178 * 6179 * It is expected that @copy_func will return 0 on success and non zero 6180 * if there was a fault. 6181 * 6182 * Returns a pointer to the buffer with the content read from @ptr. 6183 * Preemption must remain disabled while the caller accesses the 6184 * buffer returned by this function. 6185 * Returns NULL if there was a fault, or the size passed in is 6186 * greater than the size passed to trace_user_fault_init(). 6187 */ 6188 char *trace_user_fault_read(struct trace_user_buf_info *tinfo, 6189 const char __user *ptr, size_t size, 6190 trace_user_buf_copy copy_func, void *data) 6191 { 6192 int cpu = smp_processor_id(); 6193 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6194 unsigned int cnt; 6195 int trys = 0; 6196 int ret; 6197 6198 lockdep_assert_preemption_disabled(); 6199 6200 /* 6201 * It's up to the caller to not try to copy more than it said 6202 * it would. 6203 */ 6204 if (size > tinfo->size) 6205 return NULL; 6206 6207 /* 6208 * This acts similar to a seqcount. The per CPU context switches are 6209 * recorded, migration is disabled and preemption is enabled. The 6210 * read of the user space memory is copied into the per CPU buffer. 6211 * Preemption is disabled again, and if the per CPU context switches count 6212 * is still the same, it means the buffer has not been corrupted. 6213 * If the count is different, it is assumed the buffer is corrupted 6214 * and reading must be tried again. 6215 */ 6216 6217 do { 6218 /* 6219 * It is possible that something is trying to migrate this 6220 * task. What happens then, is when preemption is enabled, 6221 * the migration thread will preempt this task, try to 6222 * migrate it, fail, then let it run again. That will 6223 * cause this to loop again and never succeed. 6224 * On failures, enabled and disable preemption with 6225 * migration enabled, to allow the migration thread to 6226 * migrate this task. 6227 */ 6228 if (trys) { 6229 preempt_enable_notrace(); 6230 preempt_disable_notrace(); 6231 cpu = smp_processor_id(); 6232 buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf; 6233 } 6234 6235 /* 6236 * If for some reason, copy_from_user() always causes a context 6237 * switch, this would then cause an infinite loop. 6238 * If this task is preempted by another user space task, it 6239 * will cause this task to try again. But just in case something 6240 * changes where the copying from user space causes another task 6241 * to run, prevent this from going into an infinite loop. 6242 * 100 tries should be plenty. 6243 */ 6244 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space")) 6245 return NULL; 6246 6247 /* Read the current CPU context switch counter */ 6248 cnt = nr_context_switches_cpu(cpu); 6249 6250 /* 6251 * Preemption is going to be enabled, but this task must 6252 * remain on this CPU. 6253 */ 6254 migrate_disable(); 6255 6256 /* 6257 * Now preemption is being enabled and another task can come in 6258 * and use the same buffer and corrupt our data. 6259 */ 6260 preempt_enable_notrace(); 6261 6262 /* Make sure preemption is enabled here */ 6263 lockdep_assert_preemption_enabled(); 6264 6265 if (copy_func) { 6266 ret = copy_func(buffer, ptr, size, data); 6267 } else { 6268 ret = __copy_from_user(buffer, ptr, size); 6269 } 6270 6271 preempt_disable_notrace(); 6272 migrate_enable(); 6273 6274 /* if it faulted, no need to test if the buffer was corrupted */ 6275 if (ret) 6276 return NULL; 6277 6278 /* 6279 * Preemption is disabled again, now check the per CPU context 6280 * switch counter. If it doesn't match, then another user space 6281 * process may have schedule in and corrupted our buffer. In that 6282 * case the copying must be retried. 6283 */ 6284 } while (nr_context_switches_cpu(cpu) != cnt); 6285 6286 return buffer; 6287 } 6288 6289 static ssize_t 6290 tracing_mark_write(struct file *filp, const char __user *ubuf, 6291 size_t cnt, loff_t *fpos) 6292 { 6293 struct trace_array *tr = filp->private_data; 6294 ssize_t written = -ENODEV; 6295 unsigned long ip; 6296 char *buf; 6297 6298 if (unlikely(tracing_disabled)) 6299 return -EINVAL; 6300 6301 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 6302 return -EINVAL; 6303 6304 if ((ssize_t)cnt < 0) 6305 return -EINVAL; 6306 6307 if (cnt > TRACE_MARKER_MAX_SIZE) 6308 cnt = TRACE_MARKER_MAX_SIZE; 6309 6310 /* Must have preemption disabled while having access to the buffer */ 6311 guard(preempt_notrace)(); 6312 6313 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 6314 if (!buf) 6315 return -EFAULT; 6316 6317 /* The selftests expect this function to be the IP address */ 6318 ip = _THIS_IP_; 6319 6320 /* The global trace_marker can go to multiple instances */ 6321 if (tr == &global_trace) { 6322 guard(rcu)(); 6323 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 6324 written = write_marker_to_buffer(tr, buf, cnt, ip); 6325 if (written < 0) 6326 break; 6327 } 6328 } else { 6329 written = write_marker_to_buffer(tr, buf, cnt, ip); 6330 } 6331 6332 return written; 6333 } 6334 6335 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr, 6336 const char *buf, size_t cnt) 6337 { 6338 struct ring_buffer_event *event; 6339 struct trace_buffer *buffer; 6340 struct raw_data_entry *entry; 6341 ssize_t written; 6342 size_t size; 6343 6344 /* cnt includes both the entry->id and the data behind it. */ 6345 size = struct_offset(entry, id) + cnt; 6346 6347 buffer = tr->array_buffer.buffer; 6348 6349 if (size > ring_buffer_max_event_size(buffer)) 6350 return -EINVAL; 6351 6352 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, 6353 tracing_gen_ctx()); 6354 if (!event) 6355 /* Ring buffer disabled, return as if not open for write */ 6356 return -EBADF; 6357 6358 entry = ring_buffer_event_data(event); 6359 unsafe_memcpy(&entry->id, buf, cnt, 6360 "id and content already reserved on ring buffer" 6361 "'buf' includes the 'id' and the data." 6362 "'entry' was allocated with cnt from 'id'."); 6363 written = cnt; 6364 6365 __buffer_unlock_commit(buffer, event); 6366 6367 return written; 6368 } 6369 6370 static ssize_t 6371 tracing_mark_raw_write(struct file *filp, const char __user *ubuf, 6372 size_t cnt, loff_t *fpos) 6373 { 6374 struct trace_array *tr = filp->private_data; 6375 ssize_t written = -ENODEV; 6376 char *buf; 6377 6378 if (unlikely(tracing_disabled)) 6379 return -EINVAL; 6380 6381 if (!(tr->trace_flags & TRACE_ITER(MARKERS))) 6382 return -EINVAL; 6383 6384 /* The marker must at least have a tag id */ 6385 if (cnt < sizeof(unsigned int)) 6386 return -EINVAL; 6387 6388 /* raw write is all or nothing */ 6389 if (cnt > TRACE_MARKER_MAX_SIZE) 6390 return -EINVAL; 6391 6392 /* Must have preemption disabled while having access to the buffer */ 6393 guard(preempt_notrace)(); 6394 6395 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL); 6396 if (!buf) 6397 return -EFAULT; 6398 6399 /* The global trace_marker_raw can go to multiple instances */ 6400 if (tr == &global_trace) { 6401 guard(rcu)(); 6402 list_for_each_entry_rcu(tr, &marker_copies, marker_list) { 6403 written = write_raw_marker_to_buffer(tr, buf, cnt); 6404 if (written < 0) 6405 break; 6406 } 6407 } else { 6408 written = write_raw_marker_to_buffer(tr, buf, cnt); 6409 } 6410 6411 return written; 6412 } 6413 6414 static int tracing_mark_open(struct inode *inode, struct file *filp) 6415 { 6416 int ret; 6417 6418 scoped_guard(mutex, &trace_user_buffer_mutex) { 6419 if (!trace_user_buffer) { 6420 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE); 6421 if (ret < 0) 6422 return ret; 6423 } else { 6424 trace_user_buffer->ref++; 6425 } 6426 } 6427 6428 stream_open(inode, filp); 6429 ret = tracing_open_generic_tr(inode, filp); 6430 if (ret < 0) 6431 user_buffer_put(&trace_user_buffer); 6432 return ret; 6433 } 6434 6435 static int tracing_mark_release(struct inode *inode, struct file *file) 6436 { 6437 user_buffer_put(&trace_user_buffer); 6438 return tracing_release_generic_tr(inode, file); 6439 } 6440 6441 static int tracing_clock_show(struct seq_file *m, void *v) 6442 { 6443 struct trace_array *tr = m->private; 6444 int i; 6445 6446 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) 6447 seq_printf(m, 6448 "%s%s%s%s", i ? " " : "", 6449 i == tr->clock_id ? "[" : "", trace_clocks[i].name, 6450 i == tr->clock_id ? "]" : ""); 6451 seq_putc(m, '\n'); 6452 6453 return 0; 6454 } 6455 6456 int tracing_set_clock(struct trace_array *tr, const char *clockstr) 6457 { 6458 int i; 6459 6460 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) { 6461 if (strcmp(trace_clocks[i].name, clockstr) == 0) 6462 break; 6463 } 6464 if (i == ARRAY_SIZE(trace_clocks)) 6465 return -EINVAL; 6466 6467 guard(mutex)(&trace_types_lock); 6468 6469 tr->clock_id = i; 6470 6471 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func); 6472 6473 /* 6474 * New clock may not be consistent with the previous clock. 6475 * Reset the buffer so that it doesn't have incomparable timestamps. 6476 */ 6477 tracing_reset_online_cpus(&tr->array_buffer); 6478 6479 #ifdef CONFIG_TRACER_SNAPSHOT 6480 if (tr->snapshot_buffer.buffer) 6481 ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func); 6482 tracing_reset_online_cpus(&tr->snapshot_buffer); 6483 #endif 6484 update_last_data_if_empty(tr); 6485 6486 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { 6487 struct trace_scratch *tscratch = tr->scratch; 6488 6489 tscratch->clock_id = i; 6490 } 6491 6492 return 0; 6493 } 6494 6495 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, 6496 size_t cnt, loff_t *fpos) 6497 { 6498 struct seq_file *m = filp->private_data; 6499 struct trace_array *tr = m->private; 6500 char buf[64]; 6501 const char *clockstr; 6502 int ret; 6503 6504 if (cnt >= sizeof(buf)) 6505 return -EINVAL; 6506 6507 if (copy_from_user(buf, ubuf, cnt)) 6508 return -EFAULT; 6509 6510 buf[cnt] = 0; 6511 6512 clockstr = strstrip(buf); 6513 6514 ret = tracing_set_clock(tr, clockstr); 6515 if (ret) 6516 return ret; 6517 6518 *fpos += cnt; 6519 6520 return cnt; 6521 } 6522 6523 static int tracing_clock_open(struct inode *inode, struct file *file) 6524 { 6525 struct trace_array *tr = inode->i_private; 6526 int ret; 6527 6528 ret = tracing_check_open_get_tr(tr); 6529 if (ret) 6530 return ret; 6531 6532 if ((file->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) { 6533 trace_array_put(tr); 6534 return -EACCES; 6535 } 6536 6537 ret = single_open(file, tracing_clock_show, inode->i_private); 6538 if (ret < 0) 6539 trace_array_put(tr); 6540 6541 return ret; 6542 } 6543 6544 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) 6545 { 6546 struct trace_array *tr = m->private; 6547 6548 guard(mutex)(&trace_types_lock); 6549 6550 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer)) 6551 seq_puts(m, "delta [absolute]\n"); 6552 else 6553 seq_puts(m, "[delta] absolute\n"); 6554 6555 return 0; 6556 } 6557 6558 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) 6559 { 6560 struct trace_array *tr = inode->i_private; 6561 int ret; 6562 6563 ret = tracing_check_open_get_tr(tr); 6564 if (ret) 6565 return ret; 6566 6567 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); 6568 if (ret < 0) 6569 trace_array_put(tr); 6570 6571 return ret; 6572 } 6573 6574 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe) 6575 { 6576 if (rbe == this_cpu_read(trace_buffered_event)) 6577 return ring_buffer_time_stamp(buffer); 6578 6579 return ring_buffer_event_time_stamp(buffer, rbe); 6580 } 6581 6582 static const struct file_operations tracing_thresh_fops = { 6583 .open = tracing_open_generic, 6584 .read = tracing_thresh_read, 6585 .write = tracing_thresh_write, 6586 .llseek = generic_file_llseek, 6587 }; 6588 6589 static const struct file_operations set_tracer_fops = { 6590 .open = tracing_open_generic_tr, 6591 .read = tracing_set_trace_read, 6592 .write = tracing_set_trace_write, 6593 .llseek = generic_file_llseek, 6594 .release = tracing_release_generic_tr, 6595 }; 6596 6597 static const struct file_operations tracing_pipe_fops = { 6598 .open = tracing_open_pipe, 6599 .poll = tracing_poll_pipe, 6600 .read = tracing_read_pipe, 6601 .splice_read = tracing_splice_read_pipe, 6602 .release = tracing_release_pipe, 6603 }; 6604 6605 static const struct file_operations tracing_entries_fops = { 6606 .open = tracing_open_generic_tr, 6607 .read = tracing_entries_read, 6608 .write = tracing_entries_write, 6609 .llseek = generic_file_llseek, 6610 .release = tracing_release_generic_tr, 6611 }; 6612 6613 static const struct file_operations tracing_syscall_buf_fops = { 6614 .open = tracing_open_generic_tr, 6615 .read = tracing_syscall_buf_read, 6616 .write = tracing_syscall_buf_write, 6617 .llseek = generic_file_llseek, 6618 .release = tracing_release_generic_tr, 6619 }; 6620 6621 static const struct file_operations tracing_buffer_meta_fops = { 6622 .open = tracing_buffer_meta_open, 6623 .read = seq_read, 6624 .llseek = seq_lseek, 6625 .release = tracing_seq_release, 6626 }; 6627 6628 static const struct file_operations tracing_total_entries_fops = { 6629 .open = tracing_open_generic_tr, 6630 .read = tracing_total_entries_read, 6631 .llseek = generic_file_llseek, 6632 .release = tracing_release_generic_tr, 6633 }; 6634 6635 static const struct file_operations tracing_free_buffer_fops = { 6636 .open = tracing_open_generic_tr, 6637 .write = tracing_free_buffer_write, 6638 .release = tracing_free_buffer_release, 6639 }; 6640 6641 static const struct file_operations tracing_mark_fops = { 6642 .open = tracing_mark_open, 6643 .write = tracing_mark_write, 6644 .release = tracing_mark_release, 6645 }; 6646 6647 static const struct file_operations tracing_mark_raw_fops = { 6648 .open = tracing_mark_open, 6649 .write = tracing_mark_raw_write, 6650 .release = tracing_mark_release, 6651 }; 6652 6653 static const struct file_operations trace_clock_fops = { 6654 .open = tracing_clock_open, 6655 .read = seq_read, 6656 .llseek = seq_lseek, 6657 .release = tracing_single_release_tr, 6658 .write = tracing_clock_write, 6659 }; 6660 6661 static const struct file_operations trace_time_stamp_mode_fops = { 6662 .open = tracing_time_stamp_mode_open, 6663 .read = seq_read, 6664 .llseek = seq_lseek, 6665 .release = tracing_single_release_tr, 6666 }; 6667 6668 static const struct file_operations last_boot_fops = { 6669 .open = tracing_last_boot_open, 6670 .read = seq_read, 6671 .llseek = seq_lseek, 6672 .release = tracing_seq_release, 6673 }; 6674 6675 /* 6676 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct 6677 * @filp: The active open file structure 6678 * @ubuf: The userspace provided buffer to read value into 6679 * @cnt: The maximum number of bytes to read 6680 * @ppos: The current "file" position 6681 * 6682 * This function implements the write interface for a struct trace_min_max_param. 6683 * The filp->private_data must point to a trace_min_max_param structure that 6684 * defines where to write the value, the min and the max acceptable values, 6685 * and a lock to protect the write. 6686 */ 6687 static ssize_t 6688 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 6689 { 6690 struct trace_min_max_param *param = filp->private_data; 6691 u64 val; 6692 int err; 6693 6694 if (!param) 6695 return -EFAULT; 6696 6697 err = kstrtoull_from_user(ubuf, cnt, 10, &val); 6698 if (err) 6699 return err; 6700 6701 if (param->lock) 6702 mutex_lock(param->lock); 6703 6704 if (param->min && val < *param->min) 6705 err = -EINVAL; 6706 6707 if (param->max && val > *param->max) 6708 err = -EINVAL; 6709 6710 if (!err) 6711 *param->val = val; 6712 6713 if (param->lock) 6714 mutex_unlock(param->lock); 6715 6716 if (err) 6717 return err; 6718 6719 return cnt; 6720 } 6721 6722 /* 6723 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct 6724 * @filp: The active open file structure 6725 * @ubuf: The userspace provided buffer to read value into 6726 * @cnt: The maximum number of bytes to read 6727 * @ppos: The current "file" position 6728 * 6729 * This function implements the read interface for a struct trace_min_max_param. 6730 * The filp->private_data must point to a trace_min_max_param struct with valid 6731 * data. 6732 */ 6733 static ssize_t 6734 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 6735 { 6736 struct trace_min_max_param *param = filp->private_data; 6737 char buf[U64_STR_SIZE]; 6738 int len; 6739 u64 val; 6740 6741 if (!param) 6742 return -EFAULT; 6743 6744 val = *param->val; 6745 6746 if (cnt > sizeof(buf)) 6747 cnt = sizeof(buf); 6748 6749 len = snprintf(buf, sizeof(buf), "%llu\n", val); 6750 6751 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); 6752 } 6753 6754 const struct file_operations trace_min_max_fops = { 6755 .open = tracing_open_generic, 6756 .read = trace_min_max_read, 6757 .write = trace_min_max_write, 6758 }; 6759 6760 #define TRACING_LOG_ERRS_MAX 8 6761 #define TRACING_LOG_LOC_MAX 128 6762 6763 #define CMD_PREFIX " Command: " 6764 6765 struct err_info { 6766 const char **errs; /* ptr to loc-specific array of err strings */ 6767 u8 type; /* index into errs -> specific err string */ 6768 u16 pos; /* caret position */ 6769 u64 ts; 6770 }; 6771 6772 struct tracing_log_err { 6773 struct list_head list; 6774 struct err_info info; 6775 char loc[TRACING_LOG_LOC_MAX]; /* err location */ 6776 char *cmd; /* what caused err */ 6777 }; 6778 6779 static DEFINE_MUTEX(tracing_err_log_lock); 6780 6781 static struct tracing_log_err *alloc_tracing_log_err(int len) 6782 { 6783 struct tracing_log_err *err; 6784 6785 err = kzalloc_obj(*err); 6786 if (!err) 6787 return ERR_PTR(-ENOMEM); 6788 6789 err->cmd = kzalloc(len, GFP_KERNEL); 6790 if (!err->cmd) { 6791 kfree(err); 6792 return ERR_PTR(-ENOMEM); 6793 } 6794 6795 return err; 6796 } 6797 6798 static void free_tracing_log_err(struct tracing_log_err *err) 6799 { 6800 kfree(err->cmd); 6801 kfree(err); 6802 } 6803 6804 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, 6805 int len) 6806 { 6807 struct tracing_log_err *err; 6808 char *cmd; 6809 6810 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { 6811 err = alloc_tracing_log_err(len); 6812 if (PTR_ERR(err) != -ENOMEM) 6813 tr->n_err_log_entries++; 6814 6815 return err; 6816 } 6817 cmd = kzalloc(len, GFP_KERNEL); 6818 if (!cmd) 6819 return ERR_PTR(-ENOMEM); 6820 err = list_first_entry(&tr->err_log, struct tracing_log_err, list); 6821 kfree(err->cmd); 6822 err->cmd = cmd; 6823 list_del(&err->list); 6824 6825 return err; 6826 } 6827 6828 /** 6829 * err_pos - find the position of a string within a command for error careting 6830 * @cmd: The tracing command that caused the error 6831 * @str: The string to position the caret at within @cmd 6832 * 6833 * Finds the position of the first occurrence of @str within @cmd. The 6834 * return value can be passed to tracing_log_err() for caret placement 6835 * within @cmd. 6836 * 6837 * Returns the index within @cmd of the first occurrence of @str or 0 6838 * if @str was not found. 6839 */ 6840 unsigned int err_pos(char *cmd, const char *str) 6841 { 6842 char *found; 6843 6844 if (WARN_ON(!strlen(cmd))) 6845 return 0; 6846 6847 found = strstr(cmd, str); 6848 if (found) 6849 return found - cmd; 6850 6851 return 0; 6852 } 6853 6854 /** 6855 * tracing_log_err - write an error to the tracing error log 6856 * @tr: The associated trace array for the error (NULL for top level array) 6857 * @loc: A string describing where the error occurred 6858 * @cmd: The tracing command that caused the error 6859 * @errs: The array of loc-specific static error strings 6860 * @type: The index into errs[], which produces the specific static err string 6861 * @pos: The position the caret should be placed in the cmd 6862 * 6863 * Writes an error into tracing/error_log of the form: 6864 * 6865 * <loc>: error: <text> 6866 * Command: <cmd> 6867 * ^ 6868 * 6869 * tracing/error_log is a small log file containing the last 6870 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated 6871 * unless there has been a tracing error, and the error log can be 6872 * cleared and have its memory freed by writing the empty string in 6873 * truncation mode to it i.e. echo > tracing/error_log. 6874 * 6875 * NOTE: the @errs array along with the @type param are used to 6876 * produce a static error string - this string is not copied and saved 6877 * when the error is logged - only a pointer to it is saved. See 6878 * existing callers for examples of how static strings are typically 6879 * defined for use with tracing_log_err(). 6880 */ 6881 void tracing_log_err(struct trace_array *tr, 6882 const char *loc, const char *cmd, 6883 const char **errs, u8 type, u16 pos) 6884 { 6885 struct tracing_log_err *err; 6886 int len = 0; 6887 6888 if (!tr) 6889 tr = &global_trace; 6890 6891 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1; 6892 6893 guard(mutex)(&tracing_err_log_lock); 6894 6895 err = get_tracing_log_err(tr, len); 6896 if (PTR_ERR(err) == -ENOMEM) 6897 return; 6898 6899 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc); 6900 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd); 6901 6902 err->info.errs = errs; 6903 err->info.type = type; 6904 err->info.pos = pos; 6905 err->info.ts = local_clock(); 6906 6907 list_add_tail(&err->list, &tr->err_log); 6908 } 6909 6910 static void clear_tracing_err_log(struct trace_array *tr) 6911 { 6912 struct tracing_log_err *err, *next; 6913 6914 guard(mutex)(&tracing_err_log_lock); 6915 6916 list_for_each_entry_safe(err, next, &tr->err_log, list) { 6917 list_del(&err->list); 6918 free_tracing_log_err(err); 6919 } 6920 6921 tr->n_err_log_entries = 0; 6922 } 6923 6924 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos) 6925 { 6926 struct trace_array *tr = m->private; 6927 6928 mutex_lock(&tracing_err_log_lock); 6929 6930 return seq_list_start(&tr->err_log, *pos); 6931 } 6932 6933 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos) 6934 { 6935 struct trace_array *tr = m->private; 6936 6937 return seq_list_next(v, &tr->err_log, pos); 6938 } 6939 6940 static void tracing_err_log_seq_stop(struct seq_file *m, void *v) 6941 { 6942 mutex_unlock(&tracing_err_log_lock); 6943 } 6944 6945 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos) 6946 { 6947 u16 i; 6948 6949 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++) 6950 seq_putc(m, ' '); 6951 for (i = 0; i < pos; i++) 6952 seq_putc(m, ' '); 6953 seq_puts(m, "^\n"); 6954 } 6955 6956 static int tracing_err_log_seq_show(struct seq_file *m, void *v) 6957 { 6958 struct tracing_log_err *err = v; 6959 6960 if (err) { 6961 const char *err_text = err->info.errs[err->info.type]; 6962 u64 sec = err->info.ts; 6963 u32 nsec; 6964 6965 nsec = do_div(sec, NSEC_PER_SEC); 6966 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000, 6967 err->loc, err_text); 6968 seq_printf(m, "%s", err->cmd); 6969 tracing_err_log_show_pos(m, err->info.pos); 6970 } 6971 6972 return 0; 6973 } 6974 6975 static const struct seq_operations tracing_err_log_seq_ops = { 6976 .start = tracing_err_log_seq_start, 6977 .next = tracing_err_log_seq_next, 6978 .stop = tracing_err_log_seq_stop, 6979 .show = tracing_err_log_seq_show 6980 }; 6981 6982 static int tracing_err_log_open(struct inode *inode, struct file *file) 6983 { 6984 struct trace_array *tr = inode->i_private; 6985 int ret = 0; 6986 6987 ret = tracing_check_open_get_tr(tr); 6988 if (ret) 6989 return ret; 6990 6991 /* If this file was opened for write, then erase contents */ 6992 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 6993 clear_tracing_err_log(tr); 6994 6995 if (file->f_mode & FMODE_READ) { 6996 ret = seq_open(file, &tracing_err_log_seq_ops); 6997 if (!ret) { 6998 struct seq_file *m = file->private_data; 6999 m->private = tr; 7000 } else { 7001 trace_array_put(tr); 7002 } 7003 } 7004 return ret; 7005 } 7006 7007 static ssize_t tracing_err_log_write(struct file *file, 7008 const char __user *buffer, 7009 size_t count, loff_t *ppos) 7010 { 7011 return count; 7012 } 7013 7014 static int tracing_err_log_release(struct inode *inode, struct file *file) 7015 { 7016 struct trace_array *tr = inode->i_private; 7017 7018 trace_array_put(tr); 7019 7020 if (file->f_mode & FMODE_READ) 7021 seq_release(inode, file); 7022 7023 return 0; 7024 } 7025 7026 static const struct file_operations tracing_err_log_fops = { 7027 .open = tracing_err_log_open, 7028 .write = tracing_err_log_write, 7029 .read = seq_read, 7030 .llseek = tracing_lseek, 7031 .release = tracing_err_log_release, 7032 }; 7033 7034 int tracing_buffers_open(struct inode *inode, struct file *filp) 7035 { 7036 struct trace_array *tr = inode->i_private; 7037 struct ftrace_buffer_info *info; 7038 int ret; 7039 7040 ret = tracing_check_open_get_tr(tr); 7041 if (ret) 7042 return ret; 7043 7044 info = kvzalloc_obj(*info); 7045 if (!info) { 7046 trace_array_put(tr); 7047 return -ENOMEM; 7048 } 7049 7050 mutex_lock(&trace_types_lock); 7051 7052 info->iter.tr = tr; 7053 info->iter.cpu_file = tracing_get_cpu(inode); 7054 info->iter.trace = tr->current_trace; 7055 info->iter.array_buffer = &tr->array_buffer; 7056 info->spare = NULL; 7057 /* Force reading ring buffer for first read */ 7058 info->read = (unsigned int)-1; 7059 7060 filp->private_data = info; 7061 7062 tr->trace_ref++; 7063 7064 mutex_unlock(&trace_types_lock); 7065 7066 ret = nonseekable_open(inode, filp); 7067 if (ret < 0) 7068 trace_array_put(tr); 7069 7070 return ret; 7071 } 7072 7073 static __poll_t 7074 tracing_buffers_poll(struct file *filp, poll_table *poll_table) 7075 { 7076 struct ftrace_buffer_info *info = filp->private_data; 7077 struct trace_iterator *iter = &info->iter; 7078 7079 return trace_poll(iter, filp, poll_table); 7080 } 7081 7082 ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf, 7083 size_t count, loff_t *ppos) 7084 { 7085 struct ftrace_buffer_info *info = filp->private_data; 7086 struct trace_iterator *iter = &info->iter; 7087 void *trace_data; 7088 int page_size; 7089 ssize_t ret = 0; 7090 ssize_t size; 7091 7092 if (!count) 7093 return 0; 7094 7095 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) 7096 return -EBUSY; 7097 7098 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 7099 7100 /* Make sure the spare matches the current sub buffer size */ 7101 if (info->spare) { 7102 if (page_size != info->spare_size) { 7103 ring_buffer_free_read_page(iter->array_buffer->buffer, 7104 info->spare_cpu, info->spare); 7105 info->spare = NULL; 7106 } 7107 } 7108 7109 if (!info->spare) { 7110 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer, 7111 iter->cpu_file); 7112 if (IS_ERR(info->spare)) { 7113 ret = PTR_ERR(info->spare); 7114 info->spare = NULL; 7115 } else { 7116 info->spare_cpu = iter->cpu_file; 7117 info->spare_size = page_size; 7118 } 7119 } 7120 if (!info->spare) 7121 return ret; 7122 7123 /* Do we have previous read data to read? */ 7124 if (info->read < page_size) 7125 goto read; 7126 7127 again: 7128 trace_access_lock(iter->cpu_file); 7129 ret = ring_buffer_read_page(iter->array_buffer->buffer, 7130 info->spare, 7131 count, 7132 iter->cpu_file, 0); 7133 trace_access_unlock(iter->cpu_file); 7134 7135 if (ret < 0) { 7136 if (trace_empty(iter) && !iter->closed) { 7137 if (update_last_data_if_empty(iter->tr)) 7138 return 0; 7139 7140 if ((filp->f_flags & O_NONBLOCK)) 7141 return -EAGAIN; 7142 7143 ret = wait_on_pipe(iter, 0); 7144 if (ret) 7145 return ret; 7146 7147 goto again; 7148 } 7149 return 0; 7150 } 7151 7152 info->read = 0; 7153 read: 7154 size = page_size - info->read; 7155 if (size > count) 7156 size = count; 7157 trace_data = ring_buffer_read_page_data(info->spare); 7158 ret = copy_to_user(ubuf, trace_data + info->read, size); 7159 if (ret == size) 7160 return -EFAULT; 7161 7162 size -= ret; 7163 7164 *ppos += size; 7165 info->read += size; 7166 7167 return size; 7168 } 7169 7170 static int tracing_buffers_flush(struct file *file, fl_owner_t id) 7171 { 7172 struct ftrace_buffer_info *info = file->private_data; 7173 struct trace_iterator *iter = &info->iter; 7174 7175 iter->closed = true; 7176 /* Make sure the waiters see the new wait_index */ 7177 (void)atomic_fetch_inc_release(&iter->wait_index); 7178 7179 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 7180 7181 return 0; 7182 } 7183 7184 int tracing_buffers_release(struct inode *inode, struct file *file) 7185 { 7186 struct ftrace_buffer_info *info = file->private_data; 7187 struct trace_iterator *iter = &info->iter; 7188 7189 guard(mutex)(&trace_types_lock); 7190 7191 iter->tr->trace_ref--; 7192 7193 __trace_array_put(iter->tr); 7194 7195 if (info->spare) 7196 ring_buffer_free_read_page(iter->array_buffer->buffer, 7197 info->spare_cpu, info->spare); 7198 kvfree(info); 7199 7200 return 0; 7201 } 7202 7203 struct buffer_ref { 7204 struct trace_buffer *buffer; 7205 void *page; 7206 int cpu; 7207 refcount_t refcount; 7208 }; 7209 7210 static void buffer_ref_release(struct buffer_ref *ref) 7211 { 7212 if (!refcount_dec_and_test(&ref->refcount)) 7213 return; 7214 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page); 7215 kfree(ref); 7216 } 7217 7218 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe, 7219 struct pipe_buffer *buf) 7220 { 7221 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7222 7223 buffer_ref_release(ref); 7224 buf->private = 0; 7225 } 7226 7227 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe, 7228 struct pipe_buffer *buf) 7229 { 7230 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7231 7232 if (refcount_read(&ref->refcount) > INT_MAX/2) 7233 return false; 7234 7235 refcount_inc(&ref->refcount); 7236 return true; 7237 } 7238 7239 /* Pipe buffer operations for a buffer. */ 7240 static const struct pipe_buf_operations buffer_pipe_buf_ops = { 7241 .release = buffer_pipe_buf_release, 7242 .get = buffer_pipe_buf_get, 7243 }; 7244 7245 /* 7246 * Callback from splice_to_pipe(), if we need to release some pages 7247 * at the end of the spd in case we error'ed out in filling the pipe. 7248 */ 7249 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i) 7250 { 7251 struct buffer_ref *ref = 7252 (struct buffer_ref *)spd->partial[i].private; 7253 7254 buffer_ref_release(ref); 7255 spd->partial[i].private = 0; 7256 } 7257 7258 ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos, 7259 struct pipe_inode_info *pipe, size_t len, 7260 unsigned int flags) 7261 { 7262 struct ftrace_buffer_info *info = file->private_data; 7263 struct trace_iterator *iter = &info->iter; 7264 struct partial_page partial_def[PIPE_DEF_BUFFERS]; 7265 struct page *pages_def[PIPE_DEF_BUFFERS]; 7266 struct splice_pipe_desc spd = { 7267 .pages = pages_def, 7268 .partial = partial_def, 7269 .nr_pages_max = PIPE_DEF_BUFFERS, 7270 .ops = &buffer_pipe_buf_ops, 7271 .spd_release = buffer_spd_release, 7272 }; 7273 struct buffer_ref *ref; 7274 bool woken = false; 7275 int page_size; 7276 int entries, i; 7277 ssize_t ret = 0; 7278 7279 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) 7280 return -EBUSY; 7281 7282 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); 7283 if (*ppos & (page_size - 1)) 7284 return -EINVAL; 7285 7286 if (len & (page_size - 1)) { 7287 if (len < page_size) 7288 return -EINVAL; 7289 len &= (~(page_size - 1)); 7290 } 7291 7292 if (splice_grow_spd(pipe, &spd)) 7293 return -ENOMEM; 7294 7295 again: 7296 trace_access_lock(iter->cpu_file); 7297 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 7298 7299 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) { 7300 struct page *page; 7301 int r; 7302 7303 ref = kzalloc_obj(*ref); 7304 if (!ref) { 7305 ret = -ENOMEM; 7306 break; 7307 } 7308 7309 refcount_set(&ref->refcount, 1); 7310 ref->buffer = iter->array_buffer->buffer; 7311 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); 7312 if (IS_ERR(ref->page)) { 7313 ret = PTR_ERR(ref->page); 7314 ref->page = NULL; 7315 kfree(ref); 7316 break; 7317 } 7318 ref->cpu = iter->cpu_file; 7319 7320 r = ring_buffer_read_page(ref->buffer, ref->page, 7321 len, iter->cpu_file, 1); 7322 if (r < 0) { 7323 ring_buffer_free_read_page(ref->buffer, ref->cpu, 7324 ref->page); 7325 kfree(ref); 7326 break; 7327 } 7328 7329 page = virt_to_page(ring_buffer_read_page_data(ref->page)); 7330 7331 spd.pages[i] = page; 7332 spd.partial[i].len = page_size; 7333 spd.partial[i].offset = 0; 7334 spd.partial[i].private = (unsigned long)ref; 7335 spd.nr_pages++; 7336 *ppos += page_size; 7337 7338 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file); 7339 } 7340 7341 trace_access_unlock(iter->cpu_file); 7342 spd.nr_pages = i; 7343 7344 /* did we read anything? */ 7345 if (!spd.nr_pages) { 7346 7347 if (ret) 7348 goto out; 7349 7350 if (woken) 7351 goto out; 7352 7353 ret = -EAGAIN; 7354 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) 7355 goto out; 7356 7357 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent); 7358 if (ret) 7359 goto out; 7360 7361 /* No need to wait after waking up when tracing is off */ 7362 if (!tracer_tracing_is_on(iter->tr)) 7363 goto out; 7364 7365 /* Iterate one more time to collect any new data then exit */ 7366 woken = true; 7367 7368 goto again; 7369 } 7370 7371 ret = splice_to_pipe(pipe, &spd); 7372 out: 7373 splice_shrink_spd(&spd); 7374 7375 return ret; 7376 } 7377 7378 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 7379 { 7380 struct ftrace_buffer_info *info = file->private_data; 7381 struct trace_iterator *iter = &info->iter; 7382 int err; 7383 7384 if (cmd == TRACE_MMAP_IOCTL_GET_READER) { 7385 if (!(file->f_flags & O_NONBLOCK)) { 7386 err = ring_buffer_wait(iter->array_buffer->buffer, 7387 iter->cpu_file, 7388 iter->tr->buffer_percent, 7389 NULL, NULL); 7390 if (err) 7391 return err; 7392 } 7393 7394 return ring_buffer_map_get_reader(iter->array_buffer->buffer, 7395 iter->cpu_file); 7396 } else if (cmd) { 7397 return -ENOTTY; 7398 } 7399 7400 /* 7401 * An ioctl call with cmd 0 to the ring buffer file will wake up all 7402 * waiters 7403 */ 7404 guard(mutex)(&trace_types_lock); 7405 7406 /* Make sure the waiters see the new wait_index */ 7407 (void)atomic_fetch_inc_release(&iter->wait_index); 7408 7409 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); 7410 7411 return 0; 7412 } 7413 7414 /* 7415 * This is called when a VMA is duplicated (e.g., on fork()) to increment 7416 * the user_mapped counter without remapping pages. 7417 */ 7418 static void tracing_buffers_mmap_open(struct vm_area_struct *vma) 7419 { 7420 struct ftrace_buffer_info *info = vma->vm_file->private_data; 7421 struct trace_iterator *iter = &info->iter; 7422 7423 ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file); 7424 } 7425 7426 static void tracing_buffers_mmap_close(struct vm_area_struct *vma) 7427 { 7428 struct ftrace_buffer_info *info = vma->vm_file->private_data; 7429 struct trace_iterator *iter = &info->iter; 7430 7431 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file)); 7432 put_snapshot_map(iter->tr); 7433 } 7434 7435 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr) 7436 { 7437 /* 7438 * Trace buffer mappings require the complete buffer including 7439 * the meta page. Partial mappings are not supported. 7440 */ 7441 return -EINVAL; 7442 } 7443 7444 static const struct vm_operations_struct tracing_buffers_vmops = { 7445 .open = tracing_buffers_mmap_open, 7446 .close = tracing_buffers_mmap_close, 7447 .may_split = tracing_buffers_may_split, 7448 }; 7449 7450 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) 7451 { 7452 struct ftrace_buffer_info *info = filp->private_data; 7453 struct trace_iterator *iter = &info->iter; 7454 int ret = 0; 7455 7456 /* A memmap'ed and backup buffers are not supported for user space mmap */ 7457 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC)) 7458 return -ENODEV; 7459 7460 ret = get_snapshot_map(iter->tr); 7461 if (ret) 7462 return ret; 7463 7464 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma); 7465 if (ret) 7466 put_snapshot_map(iter->tr); 7467 7468 vma->vm_ops = &tracing_buffers_vmops; 7469 7470 return ret; 7471 } 7472 7473 static const struct file_operations tracing_buffers_fops = { 7474 .open = tracing_buffers_open, 7475 .read = tracing_buffers_read, 7476 .poll = tracing_buffers_poll, 7477 .release = tracing_buffers_release, 7478 .flush = tracing_buffers_flush, 7479 .splice_read = tracing_buffers_splice_read, 7480 .unlocked_ioctl = tracing_buffers_ioctl, 7481 .mmap = tracing_buffers_mmap, 7482 }; 7483 7484 static ssize_t 7485 tracing_stats_read(struct file *filp, char __user *ubuf, 7486 size_t count, loff_t *ppos) 7487 { 7488 struct inode *inode = file_inode(filp); 7489 struct trace_array *tr = inode->i_private; 7490 struct array_buffer *trace_buf = &tr->array_buffer; 7491 int cpu = tracing_get_cpu(inode); 7492 struct trace_seq *s; 7493 unsigned long cnt; 7494 unsigned long long t; 7495 unsigned long usec_rem; 7496 7497 s = kmalloc_obj(*s); 7498 if (!s) 7499 return -ENOMEM; 7500 7501 trace_seq_init(s); 7502 7503 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu); 7504 trace_seq_printf(s, "entries: %ld\n", cnt); 7505 7506 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu); 7507 trace_seq_printf(s, "overrun: %ld\n", cnt); 7508 7509 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu); 7510 trace_seq_printf(s, "commit overrun: %ld\n", cnt); 7511 7512 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu); 7513 trace_seq_printf(s, "bytes: %ld\n", cnt); 7514 7515 if (trace_clocks[tr->clock_id].in_ns) { 7516 /* local or global for trace_clock */ 7517 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 7518 usec_rem = do_div(t, USEC_PER_SEC); 7519 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", 7520 t, usec_rem); 7521 7522 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer)); 7523 usec_rem = do_div(t, USEC_PER_SEC); 7524 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); 7525 } else { 7526 /* counter or tsc mode for trace_clock */ 7527 trace_seq_printf(s, "oldest event ts: %llu\n", 7528 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu)); 7529 7530 trace_seq_printf(s, "now ts: %llu\n", 7531 ring_buffer_time_stamp(trace_buf->buffer)); 7532 } 7533 7534 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu); 7535 trace_seq_printf(s, "dropped events: %ld\n", cnt); 7536 7537 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); 7538 trace_seq_printf(s, "read events: %ld\n", cnt); 7539 7540 count = simple_read_from_buffer(ubuf, count, ppos, 7541 s->buffer, trace_seq_used(s)); 7542 7543 kfree(s); 7544 7545 return count; 7546 } 7547 7548 static const struct file_operations tracing_stats_fops = { 7549 .open = tracing_open_generic_tr, 7550 .read = tracing_stats_read, 7551 .llseek = generic_file_llseek, 7552 .release = tracing_release_generic_tr, 7553 }; 7554 7555 #ifdef CONFIG_DYNAMIC_FTRACE 7556 7557 static ssize_t 7558 tracing_read_dyn_info(struct file *filp, char __user *ubuf, 7559 size_t cnt, loff_t *ppos) 7560 { 7561 ssize_t ret; 7562 char *buf; 7563 int r; 7564 7565 /* 512 should be plenty to hold the amount needed */ 7566 #define DYN_INFO_BUF_SIZE 512 7567 7568 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL); 7569 if (!buf) 7570 return -ENOMEM; 7571 7572 r = scnprintf(buf, DYN_INFO_BUF_SIZE, 7573 "%ld pages:%ld groups: %ld\n" 7574 "ftrace boot update time = %llu (ns)\n" 7575 "ftrace module total update time = %llu (ns)\n", 7576 ftrace_update_tot_cnt, 7577 ftrace_number_of_pages, 7578 ftrace_number_of_groups, 7579 ftrace_update_time, 7580 ftrace_total_mod_time); 7581 7582 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 7583 kfree(buf); 7584 return ret; 7585 } 7586 7587 static const struct file_operations tracing_dyn_info_fops = { 7588 .open = tracing_open_generic, 7589 .read = tracing_read_dyn_info, 7590 .llseek = generic_file_llseek, 7591 }; 7592 #endif /* CONFIG_DYNAMIC_FTRACE */ 7593 7594 static struct dentry *tracing_get_dentry(struct trace_array *tr) 7595 { 7596 /* Top directory uses NULL as the parent */ 7597 if (tr->flags & TRACE_ARRAY_FL_GLOBAL) 7598 return NULL; 7599 7600 if (WARN_ON(!tr->dir)) 7601 return ERR_PTR(-ENODEV); 7602 7603 /* All sub buffers have a descriptor */ 7604 return tr->dir; 7605 } 7606 7607 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) 7608 { 7609 struct dentry *d_tracer; 7610 7611 if (tr->percpu_dir) 7612 return tr->percpu_dir; 7613 7614 d_tracer = tracing_get_dentry(tr); 7615 if (IS_ERR(d_tracer)) 7616 return NULL; 7617 7618 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer); 7619 7620 MEM_FAIL(!tr->percpu_dir, 7621 "Could not create tracefs directory 'per_cpu/%d'\n", cpu); 7622 7623 return tr->percpu_dir; 7624 } 7625 7626 struct dentry * 7627 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent, 7628 void *data, long cpu, const struct file_operations *fops) 7629 { 7630 struct dentry *ret = trace_create_file(name, mode, parent, data, fops); 7631 7632 if (ret) /* See tracing_get_cpu() */ 7633 d_inode(ret)->i_cdev = (void *)(cpu + 1); 7634 return ret; 7635 } 7636 7637 static void 7638 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) 7639 { 7640 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu); 7641 struct dentry *d_cpu; 7642 char cpu_dir[30]; /* 30 characters should be more than enough */ 7643 7644 if (!d_percpu) 7645 return; 7646 7647 snprintf(cpu_dir, 30, "cpu%ld", cpu); 7648 d_cpu = tracefs_create_dir(cpu_dir, d_percpu); 7649 if (!d_cpu) { 7650 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir); 7651 return; 7652 } 7653 7654 /* per cpu trace_pipe */ 7655 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, 7656 tr, cpu, &tracing_pipe_fops); 7657 7658 /* per cpu trace */ 7659 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, 7660 tr, cpu, &tracing_fops); 7661 7662 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, 7663 tr, cpu, &tracing_buffers_fops); 7664 7665 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, 7666 tr, cpu, &tracing_stats_fops); 7667 7668 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu, 7669 tr, cpu, &tracing_entries_fops); 7670 7671 if (tr->range_addr_start) 7672 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu, 7673 tr, cpu, &tracing_buffer_meta_fops); 7674 #ifdef CONFIG_TRACER_SNAPSHOT 7675 if (!tr->range_addr_start) { 7676 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, 7677 tr, cpu, &snapshot_fops); 7678 7679 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, 7680 tr, cpu, &snapshot_raw_fops); 7681 } 7682 #endif 7683 } 7684 7685 #ifdef CONFIG_FTRACE_SELFTEST 7686 /* Let selftest have access to static functions in this file */ 7687 #include "trace_selftest.c" 7688 #endif 7689 7690 static ssize_t 7691 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt, 7692 loff_t *ppos) 7693 { 7694 struct trace_option_dentry *topt = filp->private_data; 7695 char *buf; 7696 7697 if (topt->flags->val & topt->opt->bit) 7698 buf = "1\n"; 7699 else 7700 buf = "0\n"; 7701 7702 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 7703 } 7704 7705 static ssize_t 7706 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt, 7707 loff_t *ppos) 7708 { 7709 struct trace_option_dentry *topt = filp->private_data; 7710 unsigned long val; 7711 int ret; 7712 7713 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7714 if (ret) 7715 return ret; 7716 7717 if (val != 0 && val != 1) 7718 return -EINVAL; 7719 7720 if (!!(topt->flags->val & topt->opt->bit) != val) { 7721 guard(mutex)(&trace_types_lock); 7722 ret = __set_tracer_option(topt->tr, topt->flags, 7723 topt->opt, !val); 7724 if (ret) 7725 return ret; 7726 } 7727 7728 *ppos += cnt; 7729 7730 return cnt; 7731 } 7732 7733 static int tracing_open_options(struct inode *inode, struct file *filp) 7734 { 7735 struct trace_option_dentry *topt = inode->i_private; 7736 int ret; 7737 7738 ret = tracing_check_open_get_tr(topt->tr); 7739 if (ret) 7740 return ret; 7741 7742 filp->private_data = inode->i_private; 7743 return 0; 7744 } 7745 7746 static int tracing_release_options(struct inode *inode, struct file *file) 7747 { 7748 struct trace_option_dentry *topt = file->private_data; 7749 7750 trace_array_put(topt->tr); 7751 return 0; 7752 } 7753 7754 static const struct file_operations trace_options_fops = { 7755 .open = tracing_open_options, 7756 .read = trace_options_read, 7757 .write = trace_options_write, 7758 .llseek = generic_file_llseek, 7759 .release = tracing_release_options, 7760 }; 7761 7762 /* 7763 * In order to pass in both the trace_array descriptor as well as the index 7764 * to the flag that the trace option file represents, the trace_array 7765 * has a character array of trace_flags_index[], which holds the index 7766 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc. 7767 * The address of this character array is passed to the flag option file 7768 * read/write callbacks. 7769 * 7770 * In order to extract both the index and the trace_array descriptor, 7771 * get_tr_index() uses the following algorithm. 7772 * 7773 * idx = *ptr; 7774 * 7775 * As the pointer itself contains the address of the index (remember 7776 * index[1] == 1). 7777 * 7778 * Then to get the trace_array descriptor, by subtracting that index 7779 * from the ptr, we get to the start of the index itself. 7780 * 7781 * ptr - idx == &index[0] 7782 * 7783 * Then a simple container_of() from that pointer gets us to the 7784 * trace_array descriptor. 7785 */ 7786 static void get_tr_index(void *data, struct trace_array **ptr, 7787 unsigned int *pindex) 7788 { 7789 *pindex = *(unsigned char *)data; 7790 7791 *ptr = container_of(data - *pindex, struct trace_array, 7792 trace_flags_index); 7793 } 7794 7795 static ssize_t 7796 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt, 7797 loff_t *ppos) 7798 { 7799 void *tr_index = filp->private_data; 7800 struct trace_array *tr; 7801 unsigned int index; 7802 char *buf; 7803 7804 get_tr_index(tr_index, &tr, &index); 7805 7806 if (tr->trace_flags & (1ULL << index)) 7807 buf = "1\n"; 7808 else 7809 buf = "0\n"; 7810 7811 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2); 7812 } 7813 7814 static ssize_t 7815 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, 7816 loff_t *ppos) 7817 { 7818 void *tr_index = filp->private_data; 7819 struct trace_array *tr; 7820 unsigned int index; 7821 unsigned long val; 7822 int ret; 7823 7824 get_tr_index(tr_index, &tr, &index); 7825 7826 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 7827 if (ret) 7828 return ret; 7829 7830 if (val != 0 && val != 1) 7831 return -EINVAL; 7832 7833 mutex_lock(&event_mutex); 7834 mutex_lock(&trace_types_lock); 7835 ret = set_tracer_flag(tr, 1ULL << index, val); 7836 mutex_unlock(&trace_types_lock); 7837 mutex_unlock(&event_mutex); 7838 7839 if (ret < 0) 7840 return ret; 7841 7842 *ppos += cnt; 7843 7844 return cnt; 7845 } 7846 7847 static const struct file_operations trace_options_core_fops = { 7848 .open = tracing_open_generic, 7849 .read = trace_options_core_read, 7850 .write = trace_options_core_write, 7851 .llseek = generic_file_llseek, 7852 }; 7853 7854 struct dentry *trace_create_file(const char *name, 7855 umode_t mode, 7856 struct dentry *parent, 7857 void *data, 7858 const struct file_operations *fops) 7859 { 7860 struct dentry *ret; 7861 7862 ret = tracefs_create_file(name, mode, parent, data, fops); 7863 if (!ret) 7864 pr_warn("Could not create tracefs '%s' entry\n", name); 7865 7866 return ret; 7867 } 7868 7869 7870 static struct dentry *trace_options_init_dentry(struct trace_array *tr) 7871 { 7872 struct dentry *d_tracer; 7873 7874 if (tr->options) 7875 return tr->options; 7876 7877 d_tracer = tracing_get_dentry(tr); 7878 if (IS_ERR(d_tracer)) 7879 return NULL; 7880 7881 tr->options = tracefs_create_dir("options", d_tracer); 7882 if (!tr->options) { 7883 pr_warn("Could not create tracefs directory 'options'\n"); 7884 return NULL; 7885 } 7886 7887 return tr->options; 7888 } 7889 7890 static void 7891 create_trace_option_file(struct trace_array *tr, 7892 struct trace_option_dentry *topt, 7893 struct tracer_flags *flags, 7894 struct tracer_opt *opt) 7895 { 7896 struct dentry *t_options; 7897 7898 t_options = trace_options_init_dentry(tr); 7899 if (!t_options) 7900 return; 7901 7902 topt->flags = flags; 7903 topt->opt = opt; 7904 topt->tr = tr; 7905 7906 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, 7907 t_options, topt, &trace_options_fops); 7908 } 7909 7910 static int 7911 create_trace_option_files(struct trace_array *tr, struct tracer *tracer, 7912 struct tracer_flags *flags) 7913 { 7914 struct trace_option_dentry *topts; 7915 struct trace_options *tr_topts; 7916 struct tracer_opt *opts; 7917 int cnt; 7918 7919 if (!flags || !flags->opts) 7920 return 0; 7921 7922 opts = flags->opts; 7923 7924 for (cnt = 0; opts[cnt].name; cnt++) 7925 ; 7926 7927 topts = kzalloc_objs(*topts, cnt + 1); 7928 if (!topts) 7929 return 0; 7930 7931 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1), 7932 GFP_KERNEL); 7933 if (!tr_topts) { 7934 kfree(topts); 7935 return -ENOMEM; 7936 } 7937 7938 tr->topts = tr_topts; 7939 tr->topts[tr->nr_topts].tracer = tracer; 7940 tr->topts[tr->nr_topts].topts = topts; 7941 tr->nr_topts++; 7942 7943 for (cnt = 0; opts[cnt].name; cnt++) { 7944 create_trace_option_file(tr, &topts[cnt], flags, 7945 &opts[cnt]); 7946 MEM_FAIL(topts[cnt].entry == NULL, 7947 "Failed to create trace option: %s", 7948 opts[cnt].name); 7949 } 7950 return 0; 7951 } 7952 7953 static int get_global_flags_val(struct tracer *tracer) 7954 { 7955 struct tracers *t; 7956 7957 list_for_each_entry(t, &global_trace.tracers, list) { 7958 if (t->tracer != tracer) 7959 continue; 7960 if (!t->flags) 7961 return -1; 7962 return t->flags->val; 7963 } 7964 return -1; 7965 } 7966 7967 static int add_tracer_options(struct trace_array *tr, struct tracers *t) 7968 { 7969 struct tracer *tracer = t->tracer; 7970 struct tracer_flags *flags = t->flags ?: tracer->flags; 7971 7972 if (!flags) 7973 return 0; 7974 7975 /* Only add tracer options after update_tracer_options finish */ 7976 if (!tracer_options_updated) 7977 return 0; 7978 7979 return create_trace_option_files(tr, tracer, flags); 7980 } 7981 7982 static int add_tracer(struct trace_array *tr, struct tracer *tracer) 7983 { 7984 struct tracer_flags *flags; 7985 struct tracers *t; 7986 int ret; 7987 7988 /* Only enable if the directory has been created already. */ 7989 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL)) 7990 return 0; 7991 7992 /* 7993 * If this is an instance, only create flags for tracers 7994 * the instance may have. 7995 */ 7996 if (!trace_ok_for_array(tracer, tr)) 7997 return 0; 7998 7999 t = kmalloc_obj(*t); 8000 if (!t) 8001 return -ENOMEM; 8002 8003 t->tracer = tracer; 8004 t->flags = NULL; 8005 list_add(&t->list, &tr->tracers); 8006 8007 flags = tracer->flags; 8008 if (!flags) { 8009 if (!tracer->default_flags) 8010 return 0; 8011 8012 /* 8013 * If the tracer defines default flags, it means the flags are 8014 * per trace instance. 8015 */ 8016 flags = kmalloc_obj(*flags); 8017 if (!flags) 8018 return -ENOMEM; 8019 8020 *flags = *tracer->default_flags; 8021 flags->trace = tracer; 8022 8023 t->flags = flags; 8024 8025 /* If this is an instance, inherit the global_trace flags */ 8026 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) { 8027 int val = get_global_flags_val(tracer); 8028 if (!WARN_ON_ONCE(val < 0)) 8029 flags->val = val; 8030 } 8031 } 8032 8033 ret = add_tracer_options(tr, t); 8034 if (ret < 0) { 8035 list_del(&t->list); 8036 kfree(t->flags); 8037 kfree(t); 8038 } 8039 8040 return ret; 8041 } 8042 8043 static struct dentry * 8044 create_trace_option_core_file(struct trace_array *tr, 8045 const char *option, long index) 8046 { 8047 struct dentry *t_options; 8048 8049 t_options = trace_options_init_dentry(tr); 8050 if (!t_options) 8051 return NULL; 8052 8053 return trace_create_file(option, TRACE_MODE_WRITE, t_options, 8054 (void *)&tr->trace_flags_index[index], 8055 &trace_options_core_fops); 8056 } 8057 8058 static void create_trace_options_dir(struct trace_array *tr) 8059 { 8060 struct dentry *t_options; 8061 bool top_level = tr == &global_trace; 8062 int i; 8063 8064 t_options = trace_options_init_dentry(tr); 8065 if (!t_options) 8066 return; 8067 8068 for (i = 0; trace_options[i]; i++) { 8069 if (top_level || 8070 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) { 8071 create_trace_option_core_file(tr, trace_options[i], i); 8072 } 8073 } 8074 } 8075 8076 static ssize_t 8077 rb_simple_read(struct file *filp, char __user *ubuf, 8078 size_t cnt, loff_t *ppos) 8079 { 8080 struct trace_array *tr = filp->private_data; 8081 char buf[64]; 8082 int r; 8083 8084 r = tracer_tracing_is_on(tr); 8085 r = sprintf(buf, "%d\n", r); 8086 8087 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8088 } 8089 8090 static ssize_t 8091 rb_simple_write(struct file *filp, const char __user *ubuf, 8092 size_t cnt, loff_t *ppos) 8093 { 8094 struct trace_array *tr = filp->private_data; 8095 struct trace_buffer *buffer = tr->array_buffer.buffer; 8096 unsigned long val; 8097 int ret; 8098 8099 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8100 if (ret) 8101 return ret; 8102 8103 if (buffer) { 8104 guard(mutex)(&trace_types_lock); 8105 if (!!val == tracer_tracing_is_on(tr)) { 8106 val = 0; /* do nothing */ 8107 } else if (val) { 8108 tracer_tracing_on(tr); 8109 if (tr->current_trace->start) 8110 tr->current_trace->start(tr); 8111 } else { 8112 tracer_tracing_off(tr); 8113 if (tr->current_trace->stop) 8114 tr->current_trace->stop(tr); 8115 /* Wake up any waiters */ 8116 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); 8117 } 8118 } 8119 8120 (*ppos)++; 8121 8122 return cnt; 8123 } 8124 8125 static const struct file_operations rb_simple_fops = { 8126 .open = tracing_open_generic_tr, 8127 .read = rb_simple_read, 8128 .write = rb_simple_write, 8129 .release = tracing_release_generic_tr, 8130 .llseek = default_llseek, 8131 }; 8132 8133 static ssize_t 8134 buffer_percent_read(struct file *filp, char __user *ubuf, 8135 size_t cnt, loff_t *ppos) 8136 { 8137 struct trace_array *tr = filp->private_data; 8138 char buf[64]; 8139 int r; 8140 8141 r = tr->buffer_percent; 8142 r = sprintf(buf, "%d\n", r); 8143 8144 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8145 } 8146 8147 static ssize_t 8148 buffer_percent_write(struct file *filp, const char __user *ubuf, 8149 size_t cnt, loff_t *ppos) 8150 { 8151 struct trace_array *tr = filp->private_data; 8152 unsigned long val; 8153 int ret; 8154 8155 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8156 if (ret) 8157 return ret; 8158 8159 if (val > 100) 8160 return -EINVAL; 8161 8162 tr->buffer_percent = val; 8163 8164 (*ppos)++; 8165 8166 return cnt; 8167 } 8168 8169 static const struct file_operations buffer_percent_fops = { 8170 .open = tracing_open_generic_tr, 8171 .read = buffer_percent_read, 8172 .write = buffer_percent_write, 8173 .release = tracing_release_generic_tr, 8174 .llseek = default_llseek, 8175 }; 8176 8177 static ssize_t 8178 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 8179 { 8180 struct trace_array *tr = filp->private_data; 8181 size_t size; 8182 char buf[64]; 8183 int order; 8184 int r; 8185 8186 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 8187 size = (PAGE_SIZE << order) / 1024; 8188 8189 r = sprintf(buf, "%zd\n", size); 8190 8191 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); 8192 } 8193 8194 static ssize_t 8195 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, 8196 size_t cnt, loff_t *ppos) 8197 { 8198 struct trace_array *tr = filp->private_data; 8199 unsigned long val; 8200 int old_order; 8201 int order; 8202 int pages; 8203 int ret; 8204 8205 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 8206 if (ret) 8207 return ret; 8208 8209 val *= 1024; /* value passed in is in KB */ 8210 8211 pages = DIV_ROUND_UP(val, PAGE_SIZE); 8212 order = fls(pages - 1); 8213 8214 /* limit between 1 and 128 system pages */ 8215 if (order < 0 || order > 7) 8216 return -EINVAL; 8217 8218 /* Do not allow tracing while changing the order of the ring buffer */ 8219 tracing_stop_tr(tr); 8220 8221 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); 8222 if (old_order == order) 8223 goto out; 8224 8225 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order); 8226 if (ret) 8227 goto out; 8228 8229 #ifdef CONFIG_TRACER_SNAPSHOT 8230 8231 if (!tr->allocated_snapshot) 8232 goto out_max; 8233 8234 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); 8235 if (ret) { 8236 /* Put back the old order */ 8237 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); 8238 if (WARN_ON_ONCE(cnt)) { 8239 /* 8240 * AARGH! We are left with different orders! 8241 * The max buffer is our "snapshot" buffer. 8242 * When a tracer needs a snapshot (one of the 8243 * latency tracers), it swaps the max buffer 8244 * with the saved snap shot. We succeeded to 8245 * update the order of the main buffer, but failed to 8246 * update the order of the max buffer. But when we tried 8247 * to reset the main buffer to the original size, we 8248 * failed there too. This is very unlikely to 8249 * happen, but if it does, warn and kill all 8250 * tracing. 8251 */ 8252 tracing_disabled = 1; 8253 } 8254 goto out; 8255 } 8256 out_max: 8257 #endif 8258 (*ppos)++; 8259 out: 8260 if (ret) 8261 cnt = ret; 8262 tracing_start_tr(tr); 8263 return cnt; 8264 } 8265 8266 static const struct file_operations buffer_subbuf_size_fops = { 8267 .open = tracing_open_generic_tr, 8268 .read = buffer_subbuf_size_read, 8269 .write = buffer_subbuf_size_write, 8270 .release = tracing_release_generic_tr, 8271 .llseek = default_llseek, 8272 }; 8273 8274 static struct dentry *trace_instance_dir; 8275 8276 static void 8277 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer); 8278 8279 #ifdef CONFIG_MODULES 8280 static int make_mod_delta(struct module *mod, void *data) 8281 { 8282 struct trace_module_delta *module_delta; 8283 struct trace_scratch *tscratch; 8284 struct trace_mod_entry *entry; 8285 struct trace_array *tr = data; 8286 int i; 8287 8288 tscratch = tr->scratch; 8289 module_delta = READ_ONCE(tr->module_delta); 8290 for (i = 0; i < tscratch->nr_entries; i++) { 8291 entry = &tscratch->entries[i]; 8292 if (strcmp(mod->name, entry->mod_name)) 8293 continue; 8294 if (mod->state == MODULE_STATE_GOING) 8295 module_delta->delta[i] = 0; 8296 else 8297 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base 8298 - entry->mod_addr; 8299 break; 8300 } 8301 return 0; 8302 } 8303 #else 8304 static int make_mod_delta(struct module *mod, void *data) 8305 { 8306 return 0; 8307 } 8308 #endif 8309 8310 static int mod_addr_comp(const void *a, const void *b, const void *data) 8311 { 8312 const struct trace_mod_entry *e1 = a; 8313 const struct trace_mod_entry *e2 = b; 8314 8315 return e1->mod_addr > e2->mod_addr ? 1 : -1; 8316 } 8317 8318 static void setup_trace_scratch(struct trace_array *tr, 8319 struct trace_scratch *tscratch, unsigned int size) 8320 { 8321 struct trace_module_delta *module_delta; 8322 struct trace_mod_entry *entry; 8323 int i, nr_entries; 8324 8325 if (!tscratch) 8326 return; 8327 8328 tr->scratch = tscratch; 8329 tr->scratch_size = size; 8330 8331 if (tscratch->text_addr) 8332 tr->text_delta = (unsigned long)_text - tscratch->text_addr; 8333 8334 if (struct_size(tscratch, entries, tscratch->nr_entries) > size) 8335 goto reset; 8336 8337 /* Check if each module name is a valid string */ 8338 for (i = 0; i < tscratch->nr_entries; i++) { 8339 int n; 8340 8341 entry = &tscratch->entries[i]; 8342 8343 for (n = 0; n < MODULE_NAME_LEN; n++) { 8344 if (entry->mod_name[n] == '\0') 8345 break; 8346 if (!isprint(entry->mod_name[n])) 8347 goto reset; 8348 } 8349 if (n == MODULE_NAME_LEN) 8350 goto reset; 8351 } 8352 8353 /* Sort the entries so that we can find appropriate module from address. */ 8354 nr_entries = tscratch->nr_entries; 8355 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry), 8356 mod_addr_comp, NULL, NULL); 8357 8358 if (IS_ENABLED(CONFIG_MODULES)) { 8359 module_delta = kzalloc_flex(*module_delta, delta, nr_entries); 8360 if (!module_delta) { 8361 pr_info("module_delta allocation failed. Not able to decode module address."); 8362 goto reset; 8363 } 8364 init_rcu_head(&module_delta->rcu); 8365 } else 8366 module_delta = NULL; 8367 WRITE_ONCE(tr->module_delta, module_delta); 8368 8369 /* Scan modules to make text delta for modules. */ 8370 module_for_each_mod(make_mod_delta, tr); 8371 8372 /* Set trace_clock as the same of the previous boot. */ 8373 if (tscratch->clock_id != tr->clock_id) { 8374 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) || 8375 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) { 8376 pr_info("the previous trace_clock info is not valid."); 8377 goto reset; 8378 } 8379 } 8380 return; 8381 reset: 8382 /* Invalid trace modules */ 8383 memset(tscratch, 0, size); 8384 } 8385 8386 int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size) 8387 { 8388 enum ring_buffer_flags rb_flags; 8389 struct trace_scratch *tscratch; 8390 unsigned int scratch_size = 0; 8391 8392 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0; 8393 8394 buf->tr = tr; 8395 8396 if (tr->range_addr_start && tr->range_addr_size) { 8397 /* Add scratch buffer to handle 128 modules */ 8398 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0, 8399 tr->range_addr_start, 8400 tr->range_addr_size, 8401 struct_size(tscratch, entries, 128)); 8402 8403 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size); 8404 setup_trace_scratch(tr, tscratch, scratch_size); 8405 8406 /* 8407 * This is basically the same as a mapped buffer, 8408 * with the same restrictions. 8409 */ 8410 tr->mapped++; 8411 } else { 8412 buf->buffer = ring_buffer_alloc(size, rb_flags); 8413 } 8414 if (!buf->buffer) 8415 return -ENOMEM; 8416 8417 buf->data = alloc_percpu(struct trace_array_cpu); 8418 if (!buf->data) { 8419 ring_buffer_free(buf->buffer); 8420 buf->buffer = NULL; 8421 return -ENOMEM; 8422 } 8423 8424 /* Allocate the first page for all buffers */ 8425 trace_set_buffer_entries(&tr->array_buffer, 8426 ring_buffer_size(tr->array_buffer.buffer, 0)); 8427 8428 return 0; 8429 } 8430 8431 static void free_trace_buffer(struct array_buffer *buf) 8432 { 8433 if (buf->buffer) { 8434 ring_buffer_free(buf->buffer); 8435 buf->buffer = NULL; 8436 free_percpu(buf->data); 8437 buf->data = NULL; 8438 } 8439 } 8440 8441 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size) 8442 { 8443 int ret; 8444 8445 ret = allocate_trace_buffer(tr, &tr->array_buffer, size); 8446 if (ret) 8447 return ret; 8448 8449 ret = trace_allocate_snapshot(tr, size); 8450 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) 8451 free_trace_buffer(&tr->array_buffer); 8452 8453 return ret; 8454 } 8455 8456 static void free_trace_buffers(struct trace_array *tr) 8457 { 8458 if (!tr) 8459 return; 8460 8461 free_trace_buffer(&tr->array_buffer); 8462 kfree(tr->module_delta); 8463 8464 #ifdef CONFIG_TRACER_SNAPSHOT 8465 free_trace_buffer(&tr->snapshot_buffer); 8466 #endif 8467 } 8468 8469 static void init_trace_flags_index(struct trace_array *tr) 8470 { 8471 int i; 8472 8473 /* Used by the trace options files */ 8474 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) 8475 tr->trace_flags_index[i] = i; 8476 } 8477 8478 static int __update_tracer(struct trace_array *tr) 8479 { 8480 struct tracer *t; 8481 int ret = 0; 8482 8483 for (t = trace_types; t && !ret; t = t->next) 8484 ret = add_tracer(tr, t); 8485 8486 return ret; 8487 } 8488 8489 static __init int __update_tracer_options(struct trace_array *tr) 8490 { 8491 struct tracers *t; 8492 int ret = 0; 8493 8494 list_for_each_entry(t, &tr->tracers, list) { 8495 ret = add_tracer_options(tr, t); 8496 if (ret < 0) 8497 break; 8498 } 8499 8500 return ret; 8501 } 8502 8503 static __init void update_tracer_options(void) 8504 { 8505 struct trace_array *tr; 8506 8507 guard(mutex)(&trace_types_lock); 8508 tracer_options_updated = true; 8509 list_for_each_entry(tr, &ftrace_trace_arrays, list) 8510 __update_tracer_options(tr); 8511 } 8512 8513 /* Must have trace_types_lock held */ 8514 struct trace_array *trace_array_find(const char *instance) 8515 { 8516 struct trace_array *tr, *found = NULL; 8517 8518 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 8519 if (tr->name && strcmp(tr->name, instance) == 0) { 8520 found = tr; 8521 break; 8522 } 8523 } 8524 8525 return found; 8526 } 8527 8528 struct trace_array *trace_array_find_get(const char *instance) 8529 { 8530 struct trace_array *tr; 8531 8532 guard(mutex)(&trace_types_lock); 8533 tr = trace_array_find(instance); 8534 if (tr && __trace_array_get(tr) < 0) 8535 tr = NULL; 8536 8537 return tr; 8538 } 8539 8540 static int trace_array_create_dir(struct trace_array *tr) 8541 { 8542 int ret; 8543 8544 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir); 8545 if (!tr->dir) 8546 return -EINVAL; 8547 8548 ret = event_trace_add_tracer(tr->dir, tr); 8549 if (ret) { 8550 tracefs_remove(tr->dir); 8551 return ret; 8552 } 8553 8554 init_tracer_tracefs(tr, tr->dir); 8555 ret = __update_tracer(tr); 8556 if (ret) { 8557 event_trace_del_tracer(tr); 8558 tracefs_remove(tr->dir); 8559 return ret; 8560 } 8561 return 0; 8562 } 8563 8564 static struct trace_array * 8565 trace_array_create_systems(const char *name, const char *systems, 8566 unsigned long range_addr_start, 8567 unsigned long range_addr_size) 8568 { 8569 struct trace_array *tr; 8570 int ret; 8571 8572 ret = -ENOMEM; 8573 tr = kzalloc_obj(*tr); 8574 if (!tr) 8575 return ERR_PTR(ret); 8576 8577 tr->name = kstrdup(name, GFP_KERNEL); 8578 if (!tr->name) 8579 goto out_free_tr; 8580 8581 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) 8582 goto out_free_tr; 8583 8584 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) 8585 goto out_free_tr; 8586 8587 if (systems) { 8588 tr->system_names = kstrdup_const(systems, GFP_KERNEL); 8589 if (!tr->system_names) 8590 goto out_free_tr; 8591 } 8592 8593 /* Only for boot up memory mapped ring buffers */ 8594 tr->range_addr_start = range_addr_start; 8595 tr->range_addr_size = range_addr_size; 8596 8597 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; 8598 8599 cpumask_copy(tr->tracing_cpumask, cpu_all_mask); 8600 8601 raw_spin_lock_init(&tr->start_lock); 8602 8603 tr->syscall_buf_sz = global_trace.syscall_buf_sz; 8604 8605 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 8606 #ifdef CONFIG_TRACER_SNAPSHOT 8607 spin_lock_init(&tr->snapshot_trigger_lock); 8608 #endif 8609 tr->current_trace = &nop_trace; 8610 tr->current_trace_flags = nop_trace.flags; 8611 8612 INIT_LIST_HEAD(&tr->systems); 8613 INIT_LIST_HEAD(&tr->events); 8614 INIT_LIST_HEAD(&tr->hist_vars); 8615 INIT_LIST_HEAD(&tr->err_log); 8616 INIT_LIST_HEAD(&tr->tracers); 8617 INIT_LIST_HEAD(&tr->marker_list); 8618 8619 #ifdef CONFIG_MODULES 8620 INIT_LIST_HEAD(&tr->mod_events); 8621 #endif 8622 8623 if (allocate_trace_buffers(tr, trace_buf_size) < 0) 8624 goto out_free_tr; 8625 8626 /* The ring buffer is defaultly expanded */ 8627 trace_set_ring_buffer_expanded(tr); 8628 8629 if (ftrace_allocate_ftrace_ops(tr) < 0) 8630 goto out_free_tr; 8631 8632 trace_array_init_autoremove(tr); 8633 8634 ftrace_init_trace_array(tr); 8635 8636 init_trace_flags_index(tr); 8637 8638 if (trace_instance_dir) { 8639 ret = trace_array_create_dir(tr); 8640 if (ret) 8641 goto out_free_tr; 8642 } else 8643 __trace_early_add_events(tr); 8644 8645 list_add(&tr->list, &ftrace_trace_arrays); 8646 8647 tr->ref++; 8648 8649 return tr; 8650 8651 out_free_tr: 8652 ftrace_free_ftrace_ops(tr); 8653 free_trace_buffers(tr); 8654 free_cpumask_var(tr->pipe_cpumask); 8655 free_cpumask_var(tr->tracing_cpumask); 8656 kfree_const(tr->system_names); 8657 kfree(tr->range_name); 8658 kfree(tr->name); 8659 kfree(tr); 8660 8661 return ERR_PTR(ret); 8662 } 8663 8664 static struct trace_array *trace_array_create(const char *name) 8665 { 8666 return trace_array_create_systems(name, NULL, 0, 0); 8667 } 8668 8669 static int instance_mkdir(const char *name) 8670 { 8671 struct trace_array *tr; 8672 int ret; 8673 8674 guard(mutex)(&event_mutex); 8675 guard(mutex)(&trace_types_lock); 8676 8677 ret = -EEXIST; 8678 if (trace_array_find(name)) 8679 return -EEXIST; 8680 8681 tr = trace_array_create(name); 8682 8683 ret = PTR_ERR_OR_ZERO(tr); 8684 8685 return ret; 8686 } 8687 8688 #ifdef CONFIG_MMU 8689 static u64 map_pages(unsigned long start, unsigned long size) 8690 { 8691 unsigned long vmap_start, vmap_end; 8692 struct vm_struct *area; 8693 int ret; 8694 8695 area = get_vm_area(size, VM_IOREMAP); 8696 if (!area) 8697 return 0; 8698 8699 vmap_start = (unsigned long) area->addr; 8700 vmap_end = vmap_start + size; 8701 8702 ret = vmap_page_range(vmap_start, vmap_end, 8703 start, pgprot_nx(PAGE_KERNEL)); 8704 if (ret < 0) { 8705 free_vm_area(area); 8706 return 0; 8707 } 8708 8709 return (u64)vmap_start; 8710 } 8711 #else 8712 static inline u64 map_pages(unsigned long start, unsigned long size) 8713 { 8714 return 0; 8715 } 8716 #endif 8717 8718 /** 8719 * trace_array_get_by_name - Create/Lookup a trace array, given its name. 8720 * @name: The name of the trace array to be looked up/created. 8721 * @systems: A list of systems to create event directories for (NULL for all) 8722 * 8723 * Returns pointer to trace array with given name. 8724 * NULL, if it cannot be created. 8725 * 8726 * NOTE: This function increments the reference counter associated with the 8727 * trace array returned. This makes sure it cannot be freed while in use. 8728 * Use trace_array_put() once the trace array is no longer needed. 8729 * If the trace_array is to be freed, trace_array_destroy() needs to 8730 * be called after the trace_array_put(), or simply let user space delete 8731 * it from the tracefs instances directory. But until the 8732 * trace_array_put() is called, user space can not delete it. 8733 * 8734 */ 8735 struct trace_array *trace_array_get_by_name(const char *name, const char *systems) 8736 { 8737 struct trace_array *tr; 8738 8739 guard(mutex)(&event_mutex); 8740 guard(mutex)(&trace_types_lock); 8741 8742 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 8743 if (tr->name && strcmp(tr->name, name) == 0) { 8744 /* if this fails, @tr is going to be removed. */ 8745 if (__trace_array_get(tr) < 0) 8746 tr = NULL; 8747 return tr; 8748 } 8749 } 8750 8751 tr = trace_array_create_systems(name, systems, 0, 0); 8752 8753 if (IS_ERR(tr)) 8754 tr = NULL; 8755 else 8756 tr->ref++; 8757 8758 return tr; 8759 } 8760 EXPORT_SYMBOL_GPL(trace_array_get_by_name); 8761 8762 static int __remove_instance(struct trace_array *tr) 8763 { 8764 int i; 8765 8766 /* Reference counter for a newly created trace array = 1. */ 8767 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) 8768 return -EBUSY; 8769 8770 list_del(&tr->list); 8771 8772 if (printk_trace == tr) 8773 update_printk_trace(&global_trace); 8774 8775 /* Must be done before disabling all the flags */ 8776 if (update_marker_trace(tr, 0)) 8777 synchronize_rcu(); 8778 8779 /* Disable all the flags that were enabled coming in */ 8780 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { 8781 if ((1ULL << i) & ZEROED_TRACE_FLAGS) 8782 set_tracer_flag(tr, 1ULL << i, 0); 8783 } 8784 8785 trace_array_cancel_autoremove(tr); 8786 tracing_set_nop(tr); 8787 clear_ftrace_function_probes(tr); 8788 event_trace_del_tracer(tr); 8789 ftrace_clear_pids(tr); 8790 ftrace_destroy_function_files(tr); 8791 tracefs_remove(tr->dir); 8792 free_percpu(tr->last_func_repeats); 8793 free_trace_buffers(tr); 8794 clear_tracing_err_log(tr); 8795 free_tracers(tr); 8796 8797 if (tr->range_name) { 8798 reserve_mem_release_by_name(tr->range_name); 8799 kfree(tr->range_name); 8800 } 8801 if (tr->flags & TRACE_ARRAY_FL_VMALLOC) 8802 vfree((void *)tr->range_addr_start); 8803 8804 for (i = 0; i < tr->nr_topts; i++) { 8805 kfree(tr->topts[i].topts); 8806 } 8807 kfree(tr->topts); 8808 8809 free_cpumask_var(tr->pipe_cpumask); 8810 free_cpumask_var(tr->tracing_cpumask); 8811 kfree_const(tr->system_names); 8812 kfree(tr->name); 8813 kfree(tr); 8814 8815 return 0; 8816 } 8817 8818 int trace_array_destroy(struct trace_array *this_tr) 8819 { 8820 struct trace_array *tr; 8821 8822 if (!this_tr) 8823 return -EINVAL; 8824 8825 guard(mutex)(&event_mutex); 8826 guard(mutex)(&trace_types_lock); 8827 8828 8829 /* Making sure trace array exists before destroying it. */ 8830 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 8831 if (tr == this_tr) 8832 return __remove_instance(tr); 8833 } 8834 8835 return -ENODEV; 8836 } 8837 EXPORT_SYMBOL_GPL(trace_array_destroy); 8838 8839 static int instance_rmdir(const char *name) 8840 { 8841 struct trace_array *tr; 8842 8843 guard(mutex)(&event_mutex); 8844 guard(mutex)(&trace_types_lock); 8845 8846 tr = trace_array_find(name); 8847 if (!tr) 8848 return -ENODEV; 8849 8850 return __remove_instance(tr); 8851 } 8852 8853 static __init void create_trace_instances(struct dentry *d_tracer) 8854 { 8855 struct trace_array *tr; 8856 8857 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer, 8858 instance_mkdir, 8859 instance_rmdir); 8860 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n")) 8861 return; 8862 8863 guard(mutex)(&event_mutex); 8864 guard(mutex)(&trace_types_lock); 8865 8866 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 8867 if (!tr->name) 8868 continue; 8869 if (MEM_FAIL(trace_array_create_dir(tr) < 0, 8870 "Failed to create instance directory\n")) 8871 return; 8872 } 8873 } 8874 8875 static void 8876 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) 8877 { 8878 umode_t writable_mode = TRACE_MODE_WRITE; 8879 int cpu; 8880 8881 if (trace_array_is_readonly(tr)) 8882 writable_mode = TRACE_MODE_READ; 8883 8884 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, 8885 tr, &show_traces_fops); 8886 8887 trace_create_file("current_tracer", writable_mode, d_tracer, 8888 tr, &set_tracer_fops); 8889 8890 trace_create_file("tracing_cpumask", writable_mode, d_tracer, 8891 tr, &tracing_cpumask_fops); 8892 8893 /* Options are used for changing print-format even for readonly instance. */ 8894 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, 8895 tr, &tracing_iter_fops); 8896 8897 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, 8898 tr, &tracing_fops); 8899 8900 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, 8901 tr, &tracing_pipe_fops); 8902 8903 trace_create_file("buffer_size_kb", writable_mode, d_tracer, 8904 tr, &tracing_entries_fops); 8905 8906 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, 8907 tr, &tracing_total_entries_fops); 8908 8909 trace_create_file("trace_clock", writable_mode, d_tracer, tr, 8910 &trace_clock_fops); 8911 8912 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, 8913 &trace_time_stamp_mode_fops); 8914 8915 tr->buffer_percent = 50; 8916 8917 trace_create_file("buffer_subbuf_size_kb", writable_mode, d_tracer, 8918 tr, &buffer_subbuf_size_fops); 8919 8920 create_trace_options_dir(tr); 8921 8922 if (tr->range_addr_start) 8923 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer, 8924 tr, &last_boot_fops); 8925 8926 for_each_tracing_cpu(cpu) 8927 tracing_init_tracefs_percpu(tr, cpu); 8928 8929 /* Read-only instance has above files only. */ 8930 if (trace_array_is_readonly(tr)) 8931 return; 8932 8933 trace_create_file("free_buffer", 0200, d_tracer, 8934 tr, &tracing_free_buffer_fops); 8935 8936 trace_create_file("trace_marker", 0220, d_tracer, 8937 tr, &tracing_mark_fops); 8938 8939 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print"); 8940 8941 trace_create_file("trace_marker_raw", 0220, d_tracer, 8942 tr, &tracing_mark_raw_fops); 8943 8944 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, 8945 tr, &buffer_percent_fops); 8946 8947 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer, 8948 tr, &tracing_syscall_buf_fops); 8949 8950 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, 8951 tr, &rb_simple_fops); 8952 8953 trace_create_maxlat_file(tr, d_tracer); 8954 8955 if (ftrace_create_function_files(tr, d_tracer)) 8956 MEM_FAIL(1, "Could not allocate function filter files"); 8957 8958 #ifdef CONFIG_TRACER_SNAPSHOT 8959 if (!tr->range_addr_start) 8960 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, 8961 tr, &snapshot_fops); 8962 #endif 8963 8964 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, 8965 tr, &tracing_err_log_fops); 8966 8967 ftrace_init_tracefs(tr, d_tracer); 8968 } 8969 8970 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 8971 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore) 8972 { 8973 struct vfsmount *mnt; 8974 struct file_system_type *type; 8975 struct fs_context *fc; 8976 int ret; 8977 8978 /* 8979 * To maintain backward compatibility for tools that mount 8980 * debugfs to get to the tracing facility, tracefs is automatically 8981 * mounted to the debugfs/tracing directory. 8982 */ 8983 type = get_fs_type("tracefs"); 8984 if (!type) 8985 return NULL; 8986 8987 fc = fs_context_for_submount(type, mntpt); 8988 put_filesystem(type); 8989 if (IS_ERR(fc)) 8990 return ERR_CAST(fc); 8991 8992 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n"); 8993 8994 ret = vfs_parse_fs_string(fc, "source", "tracefs"); 8995 if (!ret) 8996 mnt = fc_mount(fc); 8997 else 8998 mnt = ERR_PTR(ret); 8999 9000 put_fs_context(fc); 9001 return mnt; 9002 } 9003 #endif 9004 9005 /** 9006 * tracing_init_dentry - initialize top level trace array 9007 * 9008 * This is called when creating files or directories in the tracing 9009 * directory. It is called via fs_initcall() by any of the boot up code 9010 * and expects to return the dentry of the top level tracing directory. 9011 */ 9012 int tracing_init_dentry(void) 9013 { 9014 struct trace_array *tr = &global_trace; 9015 9016 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9017 pr_warn("Tracing disabled due to lockdown\n"); 9018 return -EPERM; 9019 } 9020 9021 /* The top level trace array uses NULL as parent */ 9022 if (tr->dir) 9023 return 0; 9024 9025 if (WARN_ON(!tracefs_initialized())) 9026 return -ENODEV; 9027 9028 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED 9029 /* 9030 * As there may still be users that expect the tracing 9031 * files to exist in debugfs/tracing, we must automount 9032 * the tracefs file system there, so older tools still 9033 * work with the newer kernel. 9034 */ 9035 tr->dir = debugfs_create_automount("tracing", NULL, 9036 trace_automount, NULL); 9037 #endif 9038 9039 return 0; 9040 } 9041 9042 extern struct trace_eval_map *__start_ftrace_eval_maps[]; 9043 extern struct trace_eval_map *__stop_ftrace_eval_maps[]; 9044 9045 struct workqueue_struct *trace_init_wq __initdata; 9046 static struct work_struct eval_map_work __initdata; 9047 static struct work_struct tracerfs_init_work __initdata; 9048 9049 static void __init eval_map_work_func(struct work_struct *work) 9050 { 9051 int len; 9052 9053 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps; 9054 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len); 9055 } 9056 9057 static int __init trace_eval_init(void) 9058 { 9059 INIT_WORK(&eval_map_work, eval_map_work_func); 9060 9061 trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0); 9062 if (!trace_init_wq) { 9063 pr_err("Unable to allocate trace_init_wq\n"); 9064 /* Do work here */ 9065 eval_map_work_func(&eval_map_work); 9066 return -ENOMEM; 9067 } 9068 9069 queue_work(trace_init_wq, &eval_map_work); 9070 return 0; 9071 } 9072 9073 subsys_initcall(trace_eval_init); 9074 9075 static int __init trace_eval_sync(void) 9076 { 9077 /* Make sure the eval map updates are finished */ 9078 if (trace_init_wq) 9079 destroy_workqueue(trace_init_wq); 9080 return 0; 9081 } 9082 9083 late_initcall_sync(trace_eval_sync); 9084 9085 9086 #ifdef CONFIG_MODULES 9087 9088 bool module_exists(const char *module) 9089 { 9090 /* All modules have the symbol __this_module */ 9091 static const char this_mod[] = "__this_module"; 9092 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2]; 9093 unsigned long val; 9094 int n; 9095 9096 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod); 9097 9098 if (n > sizeof(modname) - 1) 9099 return false; 9100 9101 val = module_kallsyms_lookup_name(modname); 9102 return val != 0; 9103 } 9104 9105 static void trace_module_add_evals(struct module *mod) 9106 { 9107 /* 9108 * Modules with bad taint do not have events created, do 9109 * not bother with enums either. 9110 */ 9111 if (trace_module_has_bad_taint(mod)) 9112 return; 9113 9114 /* Even if no trace_evals, this need to sanitize field types. */ 9115 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals); 9116 } 9117 9118 #ifdef CONFIG_TRACE_EVAL_MAP_FILE 9119 static void trace_module_remove_evals(struct module *mod) 9120 { 9121 union trace_eval_map_item *map; 9122 union trace_eval_map_item **last = &trace_eval_maps; 9123 9124 if (!mod->num_trace_evals) 9125 return; 9126 9127 guard(mutex)(&trace_eval_mutex); 9128 9129 map = trace_eval_maps; 9130 9131 while (map) { 9132 if (map->head.mod == mod) 9133 break; 9134 map = trace_eval_jmp_to_tail(map); 9135 last = &map->tail.next; 9136 map = map->tail.next; 9137 } 9138 if (!map) 9139 return; 9140 9141 *last = trace_eval_jmp_to_tail(map)->tail.next; 9142 kfree(map); 9143 } 9144 #else 9145 static inline void trace_module_remove_evals(struct module *mod) { } 9146 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ 9147 9148 static void trace_module_record(struct module *mod, bool add) 9149 { 9150 struct trace_array *tr; 9151 unsigned long flags; 9152 9153 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9154 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT); 9155 /* Update any persistent trace array that has already been started */ 9156 if (flags == TRACE_ARRAY_FL_BOOT && add) { 9157 guard(mutex)(&scratch_mutex); 9158 save_mod(mod, tr); 9159 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) { 9160 /* Update delta if the module loaded in previous boot */ 9161 make_mod_delta(mod, tr); 9162 } 9163 } 9164 } 9165 9166 static int trace_module_notify(struct notifier_block *self, 9167 unsigned long val, void *data) 9168 { 9169 struct module *mod = data; 9170 9171 switch (val) { 9172 case MODULE_STATE_COMING: 9173 trace_module_add_evals(mod); 9174 trace_module_record(mod, true); 9175 break; 9176 case MODULE_STATE_GOING: 9177 trace_module_remove_evals(mod); 9178 trace_module_record(mod, false); 9179 break; 9180 } 9181 9182 return NOTIFY_OK; 9183 } 9184 9185 static struct notifier_block trace_module_nb = { 9186 .notifier_call = trace_module_notify, 9187 .priority = 0, 9188 }; 9189 #endif /* CONFIG_MODULES */ 9190 9191 static __init void tracer_init_tracefs_work_func(struct work_struct *work) 9192 { 9193 9194 event_trace_init(); 9195 9196 init_tracer_tracefs(&global_trace, NULL); 9197 ftrace_init_tracefs_toplevel(&global_trace, NULL); 9198 9199 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, 9200 &global_trace, &tracing_thresh_fops); 9201 9202 trace_create_file("README", TRACE_MODE_READ, NULL, 9203 NULL, &tracing_readme_fops); 9204 9205 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, 9206 NULL, &tracing_saved_cmdlines_fops); 9207 9208 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, 9209 NULL, &tracing_saved_cmdlines_size_fops); 9210 9211 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, 9212 NULL, &tracing_saved_tgids_fops); 9213 9214 trace_create_eval_file(NULL); 9215 9216 #ifdef CONFIG_MODULES 9217 register_module_notifier(&trace_module_nb); 9218 #endif 9219 9220 #ifdef CONFIG_DYNAMIC_FTRACE 9221 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, 9222 NULL, &tracing_dyn_info_fops); 9223 #endif 9224 9225 create_trace_instances(NULL); 9226 9227 update_tracer_options(); 9228 } 9229 9230 static __init int tracer_init_tracefs(void) 9231 { 9232 int ret; 9233 9234 trace_access_lock_init(); 9235 9236 ret = tracing_init_dentry(); 9237 if (ret) 9238 return 0; 9239 9240 if (trace_init_wq) { 9241 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); 9242 queue_work(trace_init_wq, &tracerfs_init_work); 9243 } else { 9244 tracer_init_tracefs_work_func(NULL); 9245 } 9246 9247 if (rv_init_interface()) 9248 pr_err("RV: Error while creating the RV interface\n"); 9249 9250 return 0; 9251 } 9252 9253 fs_initcall(tracer_init_tracefs); 9254 9255 static int trace_die_panic_handler(struct notifier_block *self, 9256 unsigned long ev, void *unused); 9257 9258 static struct notifier_block trace_panic_notifier = { 9259 .notifier_call = trace_die_panic_handler, 9260 .priority = INT_MAX - 1, 9261 }; 9262 9263 static struct notifier_block trace_die_notifier = { 9264 .notifier_call = trace_die_panic_handler, 9265 .priority = INT_MAX - 1, 9266 }; 9267 9268 /* 9269 * The idea is to execute the following die/panic callback early, in order 9270 * to avoid showing irrelevant information in the trace (like other panic 9271 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall 9272 * warnings get disabled (to prevent potential log flooding). 9273 */ 9274 static int trace_die_panic_handler(struct notifier_block *self, 9275 unsigned long ev, void *unused) 9276 { 9277 if (!ftrace_dump_on_oops_enabled()) 9278 return NOTIFY_DONE; 9279 9280 /* The die notifier requires DIE_OOPS to trigger */ 9281 if (self == &trace_die_notifier && ev != DIE_OOPS) 9282 return NOTIFY_DONE; 9283 9284 ftrace_dump(DUMP_PARAM); 9285 9286 return NOTIFY_DONE; 9287 } 9288 9289 /* 9290 * printk is set to max of 1024, we really don't need it that big. 9291 * Nothing should be printing 1000 characters anyway. 9292 */ 9293 #define TRACE_MAX_PRINT 1000 9294 9295 /* 9296 * Define here KERN_TRACE so that we have one place to modify 9297 * it if we decide to change what log level the ftrace dump 9298 * should be at. 9299 */ 9300 #define KERN_TRACE KERN_EMERG 9301 9302 void 9303 trace_printk_seq(struct trace_seq *s) 9304 { 9305 /* Probably should print a warning here. */ 9306 if (s->seq.len >= TRACE_MAX_PRINT) 9307 s->seq.len = TRACE_MAX_PRINT; 9308 9309 /* 9310 * More paranoid code. Although the buffer size is set to 9311 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just 9312 * an extra layer of protection. 9313 */ 9314 if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) 9315 s->seq.len = s->seq.size - 1; 9316 9317 /* should be zero ended, but we are paranoid. */ 9318 s->buffer[s->seq.len] = 0; 9319 9320 printk(KERN_TRACE "%s", s->buffer); 9321 9322 trace_seq_init(s); 9323 } 9324 9325 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr) 9326 { 9327 iter->tr = tr; 9328 iter->trace = iter->tr->current_trace; 9329 iter->cpu_file = RING_BUFFER_ALL_CPUS; 9330 iter->array_buffer = &tr->array_buffer; 9331 9332 if (iter->trace && iter->trace->open) 9333 iter->trace->open(iter); 9334 9335 /* Annotate start of buffers if we had overruns */ 9336 if (ring_buffer_overruns(iter->array_buffer->buffer)) 9337 iter->iter_flags |= TRACE_FILE_ANNOTATE; 9338 9339 /* Output in nanoseconds only if we are using a clock in nanoseconds. */ 9340 if (trace_clocks[iter->tr->clock_id].in_ns) 9341 iter->iter_flags |= TRACE_FILE_TIME_IN_NS; 9342 9343 /* Can not use kmalloc for iter.temp and iter.fmt */ 9344 iter->temp = static_temp_buf; 9345 iter->temp_size = STATIC_TEMP_BUF_SIZE; 9346 iter->fmt = static_fmt_buf; 9347 iter->fmt_size = STATIC_FMT_BUF_SIZE; 9348 } 9349 9350 void trace_init_global_iter(struct trace_iterator *iter) 9351 { 9352 trace_init_iter(iter, &global_trace); 9353 } 9354 9355 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode) 9356 { 9357 /* use static because iter can be a bit big for the stack */ 9358 static struct trace_iterator iter; 9359 unsigned int old_userobj; 9360 unsigned long flags; 9361 int cnt = 0; 9362 9363 /* 9364 * Always turn off tracing when we dump. 9365 * We don't need to show trace output of what happens 9366 * between multiple crashes. 9367 * 9368 * If the user does a sysrq-z, then they can re-enable 9369 * tracing with echo 1 > tracing_on. 9370 */ 9371 tracer_tracing_off(tr); 9372 9373 local_irq_save(flags); 9374 9375 /* Simulate the iterator */ 9376 trace_init_iter(&iter, tr); 9377 9378 /* While dumping, do not allow the buffer to be enable */ 9379 tracer_tracing_disable(tr); 9380 9381 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ); 9382 9383 /* don't look at user memory in panic mode */ 9384 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ); 9385 9386 if (dump_mode == DUMP_ORIG) 9387 iter.cpu_file = raw_smp_processor_id(); 9388 else 9389 iter.cpu_file = RING_BUFFER_ALL_CPUS; 9390 9391 if (tr == &global_trace) 9392 printk(KERN_TRACE "Dumping ftrace buffer:\n"); 9393 else 9394 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name); 9395 9396 /* Did function tracer already get disabled? */ 9397 if (ftrace_is_dead()) { 9398 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); 9399 printk("# MAY BE MISSING FUNCTION EVENTS\n"); 9400 } 9401 9402 /* 9403 * We need to stop all tracing on all CPUS to read 9404 * the next buffer. This is a bit expensive, but is 9405 * not done often. We fill all what we can read, 9406 * and then release the locks again. 9407 */ 9408 9409 while (!trace_empty(&iter)) { 9410 9411 if (!cnt) 9412 printk(KERN_TRACE "---------------------------------\n"); 9413 9414 cnt++; 9415 9416 trace_iterator_reset(&iter); 9417 iter.iter_flags |= TRACE_FILE_LAT_FMT; 9418 9419 if (trace_find_next_entry_inc(&iter) != NULL) { 9420 int ret; 9421 9422 ret = print_trace_line(&iter); 9423 if (ret != TRACE_TYPE_NO_CONSUME) 9424 trace_consume(&iter); 9425 9426 trace_printk_seq(&iter.seq); 9427 } 9428 touch_nmi_watchdog(); 9429 } 9430 9431 if (!cnt) 9432 printk(KERN_TRACE " (ftrace buffer empty)\n"); 9433 else 9434 printk(KERN_TRACE "---------------------------------\n"); 9435 9436 tr->trace_flags |= old_userobj; 9437 9438 tracer_tracing_enable(tr); 9439 local_irq_restore(flags); 9440 } 9441 9442 static void ftrace_dump_by_param(void) 9443 { 9444 bool first_param = true; 9445 char dump_param[MAX_TRACER_SIZE]; 9446 char *buf, *token, *inst_name; 9447 struct trace_array *tr; 9448 9449 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE); 9450 buf = dump_param; 9451 9452 while ((token = strsep(&buf, ",")) != NULL) { 9453 if (first_param) { 9454 first_param = false; 9455 if (!strcmp("0", token)) 9456 continue; 9457 else if (!strcmp("1", token)) { 9458 ftrace_dump_one(&global_trace, DUMP_ALL); 9459 continue; 9460 } 9461 else if (!strcmp("2", token) || 9462 !strcmp("orig_cpu", token)) { 9463 ftrace_dump_one(&global_trace, DUMP_ORIG); 9464 continue; 9465 } 9466 } 9467 9468 inst_name = strsep(&token, "="); 9469 tr = trace_array_find(inst_name); 9470 if (!tr) { 9471 printk(KERN_TRACE "Instance %s not found\n", inst_name); 9472 continue; 9473 } 9474 9475 if (token && (!strcmp("2", token) || 9476 !strcmp("orig_cpu", token))) 9477 ftrace_dump_one(tr, DUMP_ORIG); 9478 else 9479 ftrace_dump_one(tr, DUMP_ALL); 9480 } 9481 } 9482 9483 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) 9484 { 9485 static atomic_t dump_running; 9486 9487 /* Only allow one dump user at a time. */ 9488 if (atomic_inc_return(&dump_running) != 1) { 9489 atomic_dec(&dump_running); 9490 return; 9491 } 9492 9493 switch (oops_dump_mode) { 9494 case DUMP_ALL: 9495 ftrace_dump_one(&global_trace, DUMP_ALL); 9496 break; 9497 case DUMP_ORIG: 9498 ftrace_dump_one(&global_trace, DUMP_ORIG); 9499 break; 9500 case DUMP_PARAM: 9501 ftrace_dump_by_param(); 9502 break; 9503 case DUMP_NONE: 9504 break; 9505 default: 9506 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n"); 9507 ftrace_dump_one(&global_trace, DUMP_ALL); 9508 } 9509 9510 atomic_dec(&dump_running); 9511 } 9512 EXPORT_SYMBOL_GPL(ftrace_dump); 9513 9514 #define WRITE_BUFSIZE 4096 9515 9516 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, 9517 size_t count, loff_t *ppos, 9518 int (*createfn)(const char *)) 9519 { 9520 char *kbuf __free(kfree) = NULL; 9521 char *buf, *tmp; 9522 int ret = 0; 9523 size_t done = 0; 9524 size_t size; 9525 9526 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); 9527 if (!kbuf) 9528 return -ENOMEM; 9529 9530 while (done < count) { 9531 size = count - done; 9532 9533 if (size >= WRITE_BUFSIZE) 9534 size = WRITE_BUFSIZE - 1; 9535 9536 if (copy_from_user(kbuf, buffer + done, size)) 9537 return -EFAULT; 9538 9539 kbuf[size] = '\0'; 9540 buf = kbuf; 9541 do { 9542 tmp = strchr(buf, '\n'); 9543 if (tmp) { 9544 *tmp = '\0'; 9545 size = tmp - buf + 1; 9546 } else { 9547 size = strlen(buf); 9548 if (done + size < count) { 9549 if (buf != kbuf) 9550 break; 9551 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ 9552 pr_warn("Line length is too long: Should be less than %d\n", 9553 WRITE_BUFSIZE - 2); 9554 return -EINVAL; 9555 } 9556 } 9557 done += size; 9558 9559 /* Remove comments */ 9560 tmp = strchr(buf, '#'); 9561 9562 if (tmp) 9563 *tmp = '\0'; 9564 9565 ret = createfn(buf); 9566 if (ret) 9567 return ret; 9568 buf += size; 9569 9570 } while (done < count); 9571 } 9572 return done; 9573 } 9574 9575 __init static int backup_instance_area(const char *backup, 9576 unsigned long *addr, phys_addr_t *size) 9577 { 9578 struct trace_array *backup_tr; 9579 void *allocated_vaddr = NULL; 9580 9581 backup_tr = trace_array_get_by_name(backup, NULL); 9582 if (!backup_tr) { 9583 pr_warn("Tracing: Instance %s is not found.\n", backup); 9584 return -ENOENT; 9585 } 9586 9587 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) { 9588 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup); 9589 trace_array_put(backup_tr); 9590 return -EINVAL; 9591 } 9592 9593 *size = backup_tr->range_addr_size; 9594 9595 allocated_vaddr = vzalloc(*size); 9596 if (!allocated_vaddr) { 9597 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n", 9598 backup, (unsigned long)*size); 9599 trace_array_put(backup_tr); 9600 return -ENOMEM; 9601 } 9602 9603 memcpy(allocated_vaddr, 9604 (void *)backup_tr->range_addr_start, (size_t)*size); 9605 *addr = (unsigned long)allocated_vaddr; 9606 9607 trace_array_put(backup_tr); 9608 return 0; 9609 } 9610 9611 __init static void enable_instances(void) 9612 { 9613 struct trace_array *tr; 9614 bool memmap_area = false; 9615 char *curr_str; 9616 char *name; 9617 char *str; 9618 char *tok; 9619 9620 /* A tab is always appended */ 9621 boot_instance_info[boot_instance_index - 1] = '\0'; 9622 str = boot_instance_info; 9623 9624 while ((curr_str = strsep(&str, "\t"))) { 9625 phys_addr_t start = 0; 9626 phys_addr_t size = 0; 9627 unsigned long addr = 0; 9628 bool traceprintk = false; 9629 bool traceoff = false; 9630 char *flag_delim; 9631 char *addr_delim; 9632 char *rname __free(kfree) = NULL; 9633 char *backup; 9634 9635 tok = strsep(&curr_str, ","); 9636 9637 name = strsep(&tok, "="); 9638 backup = tok; 9639 9640 flag_delim = strchr(name, '^'); 9641 addr_delim = strchr(name, '@'); 9642 9643 if (addr_delim) 9644 *addr_delim++ = '\0'; 9645 9646 if (flag_delim) 9647 *flag_delim++ = '\0'; 9648 9649 if (backup) { 9650 if (backup_instance_area(backup, &addr, &size) < 0) 9651 continue; 9652 } 9653 9654 if (flag_delim) { 9655 char *flag; 9656 9657 while ((flag = strsep(&flag_delim, "^"))) { 9658 if (strcmp(flag, "traceoff") == 0) { 9659 traceoff = true; 9660 } else if ((strcmp(flag, "printk") == 0) || 9661 (strcmp(flag, "traceprintk") == 0) || 9662 (strcmp(flag, "trace_printk") == 0)) { 9663 traceprintk = true; 9664 } else { 9665 pr_info("Tracing: Invalid instance flag '%s' for %s\n", 9666 flag, name); 9667 } 9668 } 9669 } 9670 9671 tok = addr_delim; 9672 if (tok && isdigit(*tok)) { 9673 start = memparse(tok, &tok); 9674 if (!start) { 9675 pr_warn("Tracing: Invalid boot instance address for %s\n", 9676 name); 9677 continue; 9678 } 9679 if (*tok != ':') { 9680 pr_warn("Tracing: No size specified for instance %s\n", name); 9681 continue; 9682 } 9683 tok++; 9684 size = memparse(tok, &tok); 9685 if (!size) { 9686 pr_warn("Tracing: Invalid boot instance size for %s\n", 9687 name); 9688 continue; 9689 } 9690 memmap_area = true; 9691 } else if (tok) { 9692 if (!reserve_mem_find_by_name(tok, &start, &size)) { 9693 start = 0; 9694 pr_warn("Failed to map boot instance %s to %s\n", name, tok); 9695 continue; 9696 } 9697 rname = kstrdup(tok, GFP_KERNEL); 9698 } 9699 9700 if (start) { 9701 /* Start and size must be page aligned */ 9702 if (start & ~PAGE_MASK) { 9703 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start); 9704 continue; 9705 } 9706 if (size & ~PAGE_MASK) { 9707 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size); 9708 continue; 9709 } 9710 9711 if (memmap_area) 9712 addr = map_pages(start, size); 9713 else 9714 addr = (unsigned long)phys_to_virt(start); 9715 if (addr) { 9716 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n", 9717 name, &start, (unsigned long)size); 9718 } else { 9719 pr_warn("Tracing: Failed to map boot instance %s\n", name); 9720 continue; 9721 } 9722 } else { 9723 /* Only non mapped buffers have snapshot buffers */ 9724 do_allocate_snapshot(name); 9725 } 9726 9727 tr = trace_array_create_systems(name, NULL, addr, size); 9728 if (IS_ERR(tr)) { 9729 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); 9730 continue; 9731 } 9732 9733 if (traceoff) 9734 tracer_tracing_off(tr); 9735 9736 if (traceprintk) 9737 update_printk_trace(tr); 9738 9739 /* 9740 * memmap'd buffers can not be freed. 9741 */ 9742 if (memmap_area) { 9743 tr->flags |= TRACE_ARRAY_FL_MEMMAP; 9744 tr->ref++; 9745 } 9746 9747 /* 9748 * Backup buffers can be freed but need vfree(). 9749 */ 9750 if (backup) { 9751 tr->flags |= TRACE_ARRAY_FL_VMALLOC | TRACE_ARRAY_FL_RDONLY; 9752 trace_array_start_autoremove(); 9753 } 9754 9755 if (start || backup) { 9756 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT; 9757 tr->range_name = no_free_ptr(rname); 9758 } 9759 9760 /* 9761 * Save the events to start and enabled them after all boot instances 9762 * have been created. 9763 */ 9764 tr->boot_events = curr_str; 9765 } 9766 9767 /* Enable the events after all boot instances have been created */ 9768 list_for_each_entry(tr, &ftrace_trace_arrays, list) { 9769 9770 if (!tr->boot_events || !(*tr->boot_events)) { 9771 tr->boot_events = NULL; 9772 continue; 9773 } 9774 9775 curr_str = tr->boot_events; 9776 9777 /* Clear the instance if this is a persistent buffer */ 9778 if (tr->flags & TRACE_ARRAY_FL_LAST_BOOT) 9779 update_last_data(tr); 9780 9781 while ((tok = strsep(&curr_str, ","))) { 9782 early_enable_events(tr, tok, true); 9783 } 9784 tr->boot_events = NULL; 9785 } 9786 } 9787 9788 __init static int tracer_alloc_buffers(void) 9789 { 9790 unsigned long ring_buf_size; 9791 int ret = -ENOMEM; 9792 9793 9794 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9795 pr_warn("Tracing disabled due to lockdown\n"); 9796 return -EPERM; 9797 } 9798 9799 /* 9800 * Make sure we don't accidentally add more trace options 9801 * than we have bits for. 9802 */ 9803 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE); 9804 9805 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) 9806 return -ENOMEM; 9807 9808 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL)) 9809 goto out_free_buffer_mask; 9810 9811 /* Only allocate trace_printk buffers if a trace_printk exists */ 9812 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt) 9813 /* Must be called before global_trace.buffer is allocated */ 9814 trace_printk_init_buffers(); 9815 9816 /* To save memory, keep the ring buffer size to its minimum */ 9817 if (global_trace.ring_buffer_expanded) 9818 ring_buf_size = trace_buf_size; 9819 else 9820 ring_buf_size = 1; 9821 9822 cpumask_copy(tracing_buffer_mask, cpu_possible_mask); 9823 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask); 9824 9825 raw_spin_lock_init(&global_trace.start_lock); 9826 9827 /* 9828 * The prepare callbacks allocates some memory for the ring buffer. We 9829 * don't free the buffer if the CPU goes down. If we were to free 9830 * the buffer, then the user would lose any trace that was in the 9831 * buffer. The memory will be removed once the "instance" is removed. 9832 */ 9833 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, 9834 "trace/RB:prepare", trace_rb_cpu_prepare, 9835 NULL); 9836 if (ret < 0) 9837 goto out_free_cpumask; 9838 /* Used for event triggers */ 9839 ret = -ENOMEM; 9840 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); 9841 if (!temp_buffer) 9842 goto out_rm_hp_state; 9843 9844 if (trace_create_savedcmd() < 0) 9845 goto out_free_temp_buffer; 9846 9847 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) 9848 goto out_free_savedcmd; 9849 9850 /* TODO: make the number of buffers hot pluggable with CPUS */ 9851 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { 9852 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); 9853 goto out_free_pipe_cpumask; 9854 } 9855 if (global_trace.buffer_disabled) 9856 tracing_off(); 9857 9858 if (trace_boot_clock) { 9859 ret = tracing_set_clock(&global_trace, trace_boot_clock); 9860 if (ret < 0) 9861 pr_warn("Trace clock %s not defined, going back to default\n", 9862 trace_boot_clock); 9863 } 9864 9865 /* 9866 * register_tracer() might reference current_trace, so it 9867 * needs to be set before we register anything. This is 9868 * just a bootstrap of current_trace anyway. 9869 */ 9870 global_trace.current_trace = &nop_trace; 9871 global_trace.current_trace_flags = nop_trace.flags; 9872 9873 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 9874 #ifdef CONFIG_TRACER_SNAPSHOT 9875 spin_lock_init(&global_trace.snapshot_trigger_lock); 9876 #endif 9877 ftrace_init_global_array_ops(&global_trace); 9878 9879 #ifdef CONFIG_MODULES 9880 INIT_LIST_HEAD(&global_trace.mod_events); 9881 #endif 9882 9883 init_trace_flags_index(&global_trace); 9884 9885 INIT_LIST_HEAD(&global_trace.tracers); 9886 9887 /* All seems OK, enable tracing */ 9888 tracing_disabled = 0; 9889 9890 atomic_notifier_chain_register(&panic_notifier_list, 9891 &trace_panic_notifier); 9892 9893 register_die_notifier(&trace_die_notifier); 9894 9895 global_trace.flags = TRACE_ARRAY_FL_GLOBAL; 9896 9897 global_trace.syscall_buf_sz = syscall_buf_size; 9898 9899 INIT_LIST_HEAD(&global_trace.systems); 9900 INIT_LIST_HEAD(&global_trace.events); 9901 INIT_LIST_HEAD(&global_trace.hist_vars); 9902 INIT_LIST_HEAD(&global_trace.err_log); 9903 list_add(&global_trace.marker_list, &marker_copies); 9904 list_add(&global_trace.list, &ftrace_trace_arrays); 9905 9906 register_tracer(&nop_trace); 9907 9908 /* Function tracing may start here (via kernel command line) */ 9909 init_function_trace(); 9910 9911 apply_trace_boot_options(); 9912 9913 register_snapshot_cmd(); 9914 9915 return 0; 9916 9917 out_free_pipe_cpumask: 9918 free_cpumask_var(global_trace.pipe_cpumask); 9919 out_free_savedcmd: 9920 trace_free_saved_cmdlines_buffer(); 9921 out_free_temp_buffer: 9922 ring_buffer_free(temp_buffer); 9923 out_rm_hp_state: 9924 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); 9925 out_free_cpumask: 9926 free_cpumask_var(global_trace.tracing_cpumask); 9927 out_free_buffer_mask: 9928 free_cpumask_var(tracing_buffer_mask); 9929 return ret; 9930 } 9931 9932 #ifdef CONFIG_FUNCTION_TRACER 9933 /* Used to set module cached ftrace filtering at boot up */ 9934 struct trace_array *trace_get_global_array(void) 9935 { 9936 return &global_trace; 9937 } 9938 #endif 9939 9940 void __init early_trace_init(void) 9941 { 9942 if (tracepoint_printk) { 9943 tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter); 9944 if (MEM_FAIL(!tracepoint_print_iter, 9945 "Failed to allocate trace iterator\n")) 9946 tracepoint_printk = 0; 9947 else 9948 static_key_enable(&tracepoint_printk_key.key); 9949 } 9950 tracer_alloc_buffers(); 9951 9952 init_events(); 9953 } 9954 9955 void __init trace_init(void) 9956 { 9957 trace_event_init(); 9958 9959 if (boot_instance_index) 9960 enable_instances(); 9961 } 9962 9963 __init static void clear_boot_tracer(void) 9964 { 9965 /* 9966 * The default tracer at boot buffer is an init section. 9967 * This function is called in lateinit. If we did not 9968 * find the boot tracer, then clear it out, to prevent 9969 * later registration from accessing the buffer that is 9970 * about to be freed. 9971 */ 9972 if (!default_bootup_tracer) 9973 return; 9974 9975 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", 9976 default_bootup_tracer); 9977 default_bootup_tracer = NULL; 9978 } 9979 9980 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 9981 __init static void tracing_set_default_clock(void) 9982 { 9983 /* sched_clock_stable() is determined in late_initcall */ 9984 if (!trace_boot_clock && !sched_clock_stable()) { 9985 if (security_locked_down(LOCKDOWN_TRACEFS)) { 9986 pr_warn("Can not set tracing clock due to lockdown\n"); 9987 return; 9988 } 9989 9990 printk(KERN_WARNING 9991 "Unstable clock detected, switching default tracing clock to \"global\"\n" 9992 "If you want to keep using the local clock, then add:\n" 9993 " \"trace_clock=local\"\n" 9994 "on the kernel command line\n"); 9995 tracing_set_clock(&global_trace, "global"); 9996 } 9997 } 9998 #else 9999 static inline void tracing_set_default_clock(void) { } 10000 #endif 10001 10002 __init static int late_trace_init(void) 10003 { 10004 if (tracepoint_printk && tracepoint_printk_stop_on_boot) { 10005 static_key_disable(&tracepoint_printk_key.key); 10006 tracepoint_printk = 0; 10007 } 10008 10009 if (traceoff_after_boot) 10010 tracing_off(); 10011 10012 tracing_set_default_clock(); 10013 clear_boot_tracer(); 10014 return 0; 10015 } 10016 10017 late_initcall_sync(late_trace_init); 10018