1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/syscalls.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/cleanup.h>
31 #include <linux/vmalloc.h>
32 #include <linux/ftrace.h>
33 #include <linux/module.h>
34 #include <linux/percpu.h>
35 #include <linux/splice.h>
36 #include <linux/kdebug.h>
37 #include <linux/string.h>
38 #include <linux/mount.h>
39 #include <linux/rwsem.h>
40 #include <linux/slab.h>
41 #include <linux/ctype.h>
42 #include <linux/init.h>
43 #include <linux/panic_notifier.h>
44 #include <linux/poll.h>
45 #include <linux/nmi.h>
46 #include <linux/fs.h>
47 #include <linux/trace.h>
48 #include <linux/sched/clock.h>
49 #include <linux/sched/rt.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57
58 #include "trace.h"
59 #include "trace_output.h"
60
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63 * We need to change this state when a selftest is running.
64 * A selftest will lurk into the ring-buffer to count the
65 * entries inserted during the selftest although some concurrent
66 * insertions into the ring-buffer such as trace_printk could occurred
67 * at the same time, giving false positive or negative results.
68 */
69 bool __read_mostly tracing_selftest_running;
70
71 /*
72 * If boot-time tracing including tracers/events via kernel cmdline
73 * is running, we do not want to run SELFTEST.
74 */
75 bool __read_mostly tracing_selftest_disabled;
76
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 }
83 }
84 #else
85 #define tracing_selftest_disabled 0
86 #endif
87
88 /* Pipe tracepoints to printk */
89 static struct trace_iterator *tracepoint_print_iter;
90 int tracepoint_printk;
91 static bool tracepoint_printk_stop_on_boot __initdata;
92 static bool traceoff_after_boot __initdata;
93 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
94
95 /* Store tracers and their flags per instance */
96 struct tracers {
97 struct list_head list;
98 struct tracer *tracer;
99 struct tracer_flags *flags;
100 };
101
102 /*
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
105 * occurred.
106 */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
113 * this back to zero.
114 */
115 int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly tracing_buffer_mask;
118
119 #define MAX_TRACER_SIZE 100
120 /*
121 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122 *
123 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124 * is set, then ftrace_dump is called. This will output the contents
125 * of the ftrace buffers to the console. This is very useful for
126 * capturing traces that lead to crashes and outputting it to a
127 * serial console.
128 *
129 * It is default off, but you can enable it with either specifying
130 * "ftrace_dump_on_oops" in the kernel command line, or setting
131 * /proc/sys/kernel/ftrace_dump_on_oops
132 * Set 1 if you want to dump buffers of all CPUs
133 * Set 2 if you want to dump the buffer of the CPU that triggered oops
134 * Set instance name if you want to dump the specific trace instance
135 * Multiple instance dump is also supported, and instances are separated
136 * by commas.
137 */
138 /* Set to string format zero to disable by default */
139 static char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
140
141 /* When set, tracing will stop when a WARN*() is hit */
142 static int __disable_trace_on_warning;
143
144 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
145 void *buffer, size_t *lenp, loff_t *ppos);
146 static const struct ctl_table trace_sysctl_table[] = {
147 {
148 .procname = "ftrace_dump_on_oops",
149 .data = &ftrace_dump_on_oops,
150 .maxlen = MAX_TRACER_SIZE,
151 .mode = 0644,
152 .proc_handler = proc_dostring,
153 },
154 {
155 .procname = "traceoff_on_warning",
156 .data = &__disable_trace_on_warning,
157 .maxlen = sizeof(__disable_trace_on_warning),
158 .mode = 0644,
159 .proc_handler = proc_dointvec,
160 },
161 {
162 .procname = "tracepoint_printk",
163 .data = &tracepoint_printk,
164 .maxlen = sizeof(tracepoint_printk),
165 .mode = 0644,
166 .proc_handler = tracepoint_printk_sysctl,
167 },
168 };
169
init_trace_sysctls(void)170 static int __init init_trace_sysctls(void)
171 {
172 register_sysctl_init("kernel", trace_sysctl_table);
173 return 0;
174 }
175 subsys_initcall(init_trace_sysctls);
176
177 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
178 /* Map of enums to their values, for "eval_map" file */
179 struct trace_eval_map_head {
180 struct module *mod;
181 unsigned long length;
182 };
183
184 union trace_eval_map_item;
185
186 struct trace_eval_map_tail {
187 /*
188 * "end" is first and points to NULL as it must be different
189 * than "mod" or "eval_string"
190 */
191 union trace_eval_map_item *next;
192 const char *end; /* points to NULL */
193 };
194
195 static DEFINE_MUTEX(trace_eval_mutex);
196
197 /*
198 * The trace_eval_maps are saved in an array with two extra elements,
199 * one at the beginning, and one at the end. The beginning item contains
200 * the count of the saved maps (head.length), and the module they
201 * belong to if not built in (head.mod). The ending item contains a
202 * pointer to the next array of saved eval_map items.
203 */
204 union trace_eval_map_item {
205 struct trace_eval_map map;
206 struct trace_eval_map_head head;
207 struct trace_eval_map_tail tail;
208 };
209
210 static union trace_eval_map_item *trace_eval_maps;
211 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
212
213 int tracing_set_tracer(struct trace_array *tr, const char *buf);
214 static void ftrace_trace_userstack(struct trace_array *tr,
215 struct trace_buffer *buffer,
216 unsigned int trace_ctx);
217
218 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
219 static char *default_bootup_tracer;
220
221 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
222 static int boot_instance_index;
223
224 /*
225 * Repeated boot parameters, including Bootconfig array expansions, need
226 * to stay in the delimiter form that the existing parser consumes.
227 */
trace_append_boot_param(char * buf,const char * str,char sep,int size)228 void __init trace_append_boot_param(char *buf, const char *str, char sep,
229 int size)
230 {
231 int len, needed, str_len;
232
233 if (!*str)
234 return;
235
236 len = strlen(buf);
237 str_len = strlen(str);
238 needed = len + str_len + 1;
239
240 /* For continuation, account for the separator. */
241 if (len)
242 needed++;
243 if (needed > size)
244 return;
245
246 if (len)
247 buf[len++] = sep;
248
249 strscpy(buf + len, str, size - len);
250 }
251
set_cmdline_ftrace(char * str)252 static int __init set_cmdline_ftrace(char *str)
253 {
254 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
255 default_bootup_tracer = bootup_tracer_buf;
256 /* We are using ftrace early, expand it */
257 trace_set_ring_buffer_expanded(NULL);
258 return 1;
259 }
260 __setup("ftrace=", set_cmdline_ftrace);
261
ftrace_dump_on_oops_enabled(void)262 int ftrace_dump_on_oops_enabled(void)
263 {
264 if (!strcmp("0", ftrace_dump_on_oops))
265 return 0;
266 else
267 return 1;
268 }
269
set_ftrace_dump_on_oops(char * str)270 static int __init set_ftrace_dump_on_oops(char *str)
271 {
272 if (!*str) {
273 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
274 return 1;
275 }
276
277 if (*str == ',') {
278 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
279 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
280 return 1;
281 }
282
283 if (*str++ == '=') {
284 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
285 return 1;
286 }
287
288 return 0;
289 }
290 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
291
stop_trace_on_warning(char * str)292 static int __init stop_trace_on_warning(char *str)
293 {
294 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
295 __disable_trace_on_warning = 1;
296 return 1;
297 }
298 __setup("traceoff_on_warning", stop_trace_on_warning);
299
boot_instance(char * str)300 static int __init boot_instance(char *str)
301 {
302 char *slot = boot_instance_info + boot_instance_index;
303 int left = sizeof(boot_instance_info) - boot_instance_index;
304 int ret;
305
306 if (strlen(str) >= left)
307 return -1;
308
309 ret = snprintf(slot, left, "%s\t", str);
310 boot_instance_index += ret;
311
312 return 1;
313 }
314 __setup("trace_instance=", boot_instance);
315
316
317 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
318
set_trace_boot_options(char * str)319 static int __init set_trace_boot_options(char *str)
320 {
321 trace_append_boot_param(trace_boot_options_buf, str, ',',
322 MAX_TRACER_SIZE);
323 return 1;
324 }
325 __setup("trace_options=", set_trace_boot_options);
326
327 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
328 static char *trace_boot_clock __initdata;
329
set_trace_boot_clock(char * str)330 static int __init set_trace_boot_clock(char *str)
331 {
332 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
333 trace_boot_clock = trace_boot_clock_buf;
334 return 1;
335 }
336 __setup("trace_clock=", set_trace_boot_clock);
337
set_tracepoint_printk(char * str)338 static int __init set_tracepoint_printk(char *str)
339 {
340 /* Ignore the "tp_printk_stop_on_boot" param */
341 if (*str == '_')
342 return 0;
343
344 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
345 tracepoint_printk = 1;
346 return 1;
347 }
348 __setup("tp_printk", set_tracepoint_printk);
349
set_tracepoint_printk_stop(char * str)350 static int __init set_tracepoint_printk_stop(char *str)
351 {
352 tracepoint_printk_stop_on_boot = true;
353 return 1;
354 }
355 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
356
set_traceoff_after_boot(char * str)357 static int __init set_traceoff_after_boot(char *str)
358 {
359 traceoff_after_boot = true;
360 return 1;
361 }
362 __setup("traceoff_after_boot", set_traceoff_after_boot);
363
ns2usecs(u64 nsec)364 unsigned long long ns2usecs(u64 nsec)
365 {
366 nsec += 500;
367 do_div(nsec, 1000);
368 return nsec;
369 }
370
371 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)372 trace_process_export(struct trace_export *export,
373 struct ring_buffer_event *event, int flag)
374 {
375 struct trace_entry *entry;
376 unsigned int size = 0;
377
378 if (export->flags & flag) {
379 entry = ring_buffer_event_data(event);
380 size = ring_buffer_event_length(event);
381 export->write(export, entry, size);
382 }
383 }
384
385 static DEFINE_MUTEX(ftrace_export_lock);
386
387 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
388
389 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
390 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
391 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
392
ftrace_exports_enable(struct trace_export * export)393 static inline void ftrace_exports_enable(struct trace_export *export)
394 {
395 if (export->flags & TRACE_EXPORT_FUNCTION)
396 static_branch_inc(&trace_function_exports_enabled);
397
398 if (export->flags & TRACE_EXPORT_EVENT)
399 static_branch_inc(&trace_event_exports_enabled);
400
401 if (export->flags & TRACE_EXPORT_MARKER)
402 static_branch_inc(&trace_marker_exports_enabled);
403 }
404
ftrace_exports_disable(struct trace_export * export)405 static inline void ftrace_exports_disable(struct trace_export *export)
406 {
407 if (export->flags & TRACE_EXPORT_FUNCTION)
408 static_branch_dec(&trace_function_exports_enabled);
409
410 if (export->flags & TRACE_EXPORT_EVENT)
411 static_branch_dec(&trace_event_exports_enabled);
412
413 if (export->flags & TRACE_EXPORT_MARKER)
414 static_branch_dec(&trace_marker_exports_enabled);
415 }
416
ftrace_exports(struct ring_buffer_event * event,int flag)417 static void ftrace_exports(struct ring_buffer_event *event, int flag)
418 {
419 struct trace_export *export;
420
421 guard(preempt_notrace)();
422
423 export = rcu_dereference_raw_check(ftrace_exports_list);
424 while (export) {
425 trace_process_export(export, event, flag);
426 export = rcu_dereference_raw_check(export->next);
427 }
428 }
429
430 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)431 add_trace_export(struct trace_export **list, struct trace_export *export)
432 {
433 rcu_assign_pointer(export->next, *list);
434 /*
435 * We are entering export into the list but another
436 * CPU might be walking that list. We need to make sure
437 * the export->next pointer is valid before another CPU sees
438 * the export pointer included into the list.
439 */
440 rcu_assign_pointer(*list, export);
441 }
442
443 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)444 rm_trace_export(struct trace_export **list, struct trace_export *export)
445 {
446 struct trace_export **p;
447
448 for (p = list; *p != NULL; p = &(*p)->next)
449 if (*p == export)
450 break;
451
452 if (*p != export)
453 return -1;
454
455 rcu_assign_pointer(*p, (*p)->next);
456
457 return 0;
458 }
459
460 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)461 add_ftrace_export(struct trace_export **list, struct trace_export *export)
462 {
463 ftrace_exports_enable(export);
464
465 add_trace_export(list, export);
466 }
467
468 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)469 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
470 {
471 int ret;
472
473 ret = rm_trace_export(list, export);
474 ftrace_exports_disable(export);
475
476 return ret;
477 }
478
register_ftrace_export(struct trace_export * export)479 int register_ftrace_export(struct trace_export *export)
480 {
481 if (WARN_ON_ONCE(!export->write))
482 return -1;
483
484 guard(mutex)(&ftrace_export_lock);
485
486 add_ftrace_export(&ftrace_exports_list, export);
487
488 return 0;
489 }
490 EXPORT_SYMBOL_GPL(register_ftrace_export);
491
unregister_ftrace_export(struct trace_export * export)492 int unregister_ftrace_export(struct trace_export *export)
493 {
494 guard(mutex)(&ftrace_export_lock);
495 return rm_ftrace_export(&ftrace_exports_list, export);
496 }
497 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
498
499 /* trace_flags holds trace_options default values */
500 #define TRACE_DEFAULT_FLAGS \
501 (FUNCTION_DEFAULT_FLAGS | FPROFILE_DEFAULT_FLAGS | \
502 TRACE_ITER(PRINT_PARENT) | TRACE_ITER(PRINTK) | \
503 TRACE_ITER(ANNOTATE) | TRACE_ITER(CONTEXT_INFO) | \
504 TRACE_ITER(RECORD_CMD) | TRACE_ITER(OVERWRITE) | \
505 TRACE_ITER(IRQ_INFO) | TRACE_ITER(MARKERS) | \
506 TRACE_ITER(HASH_PTR) | TRACE_ITER(TRACE_PRINTK) | \
507 TRACE_ITER(COPY_MARKER))
508
509 /* trace_options that are only supported by global_trace */
510 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER(PRINTK) | \
511 TRACE_ITER(PRINTK_MSGONLY) | TRACE_ITER(RECORD_CMD) | \
512 TRACE_ITER(PROF_TEXT_OFFSET) | FPROFILE_DEFAULT_FLAGS)
513
514 /* trace_flags that are default zero for instances */
515 #define ZEROED_TRACE_FLAGS \
516 (TRACE_ITER(EVENT_FORK) | TRACE_ITER(FUNC_FORK) | TRACE_ITER(TRACE_PRINTK) | \
517 TRACE_ITER(COPY_MARKER))
518
519 /*
520 * The global_trace is the descriptor that holds the top-level tracing
521 * buffers for the live tracing.
522 */
523 static struct trace_array global_trace = {
524 .trace_flags = TRACE_DEFAULT_FLAGS,
525 };
526
527 struct trace_array *printk_trace = &global_trace;
528
529 /* List of trace_arrays interested in the top level trace_marker */
530 static LIST_HEAD(marker_copies);
531
update_printk_trace(struct trace_array * tr)532 static void update_printk_trace(struct trace_array *tr)
533 {
534 if (printk_trace == tr)
535 return;
536
537 printk_trace->trace_flags &= ~TRACE_ITER(TRACE_PRINTK);
538 printk_trace = tr;
539 tr->trace_flags |= TRACE_ITER(TRACE_PRINTK);
540 }
541
542 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)543 static bool update_marker_trace(struct trace_array *tr, int enabled)
544 {
545 lockdep_assert_held(&event_mutex);
546
547 if (enabled) {
548 if (tr->trace_flags & TRACE_ITER(COPY_MARKER))
549 return false;
550
551 list_add_rcu(&tr->marker_list, &marker_copies);
552 tr->trace_flags |= TRACE_ITER(COPY_MARKER);
553 return true;
554 }
555
556 if (!(tr->trace_flags & TRACE_ITER(COPY_MARKER)))
557 return false;
558
559 list_del_rcu(&tr->marker_list);
560 tr->trace_flags &= ~TRACE_ITER(COPY_MARKER);
561 return true;
562 }
563
trace_set_ring_buffer_expanded(struct trace_array * tr)564 void trace_set_ring_buffer_expanded(struct trace_array *tr)
565 {
566 if (!tr)
567 tr = &global_trace;
568 tr->ring_buffer_expanded = true;
569 }
570
trace_array_autoremove(struct work_struct * work)571 static void trace_array_autoremove(struct work_struct *work)
572 {
573 struct trace_array *tr = container_of(work, struct trace_array, autoremove_work);
574
575 trace_array_destroy(tr);
576 }
577
578 static struct workqueue_struct *autoremove_wq;
579
trace_array_kick_autoremove(struct trace_array * tr)580 static void trace_array_kick_autoremove(struct trace_array *tr)
581 {
582 if (autoremove_wq)
583 queue_work(autoremove_wq, &tr->autoremove_work);
584 }
585
trace_array_cancel_autoremove(struct trace_array * tr)586 static void trace_array_cancel_autoremove(struct trace_array *tr)
587 {
588 /*
589 * Since this can be called inside trace_array_autoremove(),
590 * it has to avoid deadlock of the workqueue.
591 */
592 if (work_pending(&tr->autoremove_work))
593 cancel_work_sync(&tr->autoremove_work);
594 }
595
trace_array_init_autoremove(struct trace_array * tr)596 static void trace_array_init_autoremove(struct trace_array *tr)
597 {
598 INIT_WORK(&tr->autoremove_work, trace_array_autoremove);
599 }
600
trace_array_start_autoremove(void)601 static void trace_array_start_autoremove(void)
602 {
603 if (autoremove_wq)
604 return;
605
606 autoremove_wq = alloc_workqueue("tr_autoremove_wq",
607 WQ_UNBOUND | WQ_HIGHPRI, 0);
608 if (!autoremove_wq)
609 pr_warn("Unable to allocate tr_autoremove_wq. autoremove disabled.\n");
610 }
611
612 LIST_HEAD(ftrace_trace_arrays);
613
__trace_array_get(struct trace_array * this_tr)614 static int __trace_array_get(struct trace_array *this_tr)
615 {
616 /* When free_on_close is set, this is not available anymore. */
617 if (autoremove_wq && this_tr->free_on_close)
618 return -ENODEV;
619
620 this_tr->ref++;
621 return 0;
622 }
623
trace_array_get(struct trace_array * this_tr)624 int trace_array_get(struct trace_array *this_tr)
625 {
626 struct trace_array *tr;
627
628 guard(mutex)(&trace_types_lock);
629 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
630 if (tr == this_tr) {
631 return __trace_array_get(tr);
632 }
633 }
634
635 return -ENODEV;
636 }
637
__trace_array_put(struct trace_array * this_tr)638 static void __trace_array_put(struct trace_array *this_tr)
639 {
640 WARN_ON(!this_tr->ref);
641 this_tr->ref--;
642 /*
643 * When free_on_close is set, prepare removing the array
644 * when the last reference is released.
645 */
646 if (this_tr->ref == 1 && this_tr->free_on_close)
647 trace_array_kick_autoremove(this_tr);
648 }
649
650 /**
651 * trace_array_put - Decrement the reference counter for this trace array.
652 * @this_tr : pointer to the trace array
653 *
654 * NOTE: Use this when we no longer need the trace array returned by
655 * trace_array_get_by_name(). This ensures the trace array can be later
656 * destroyed.
657 *
658 */
trace_array_put(struct trace_array * this_tr)659 void trace_array_put(struct trace_array *this_tr)
660 {
661 if (!this_tr)
662 return;
663
664 guard(mutex)(&trace_types_lock);
665 __trace_array_put(this_tr);
666 }
667 EXPORT_SYMBOL_GPL(trace_array_put);
668
tracing_check_open_get_tr(struct trace_array * tr)669 int tracing_check_open_get_tr(struct trace_array *tr)
670 {
671 int ret;
672
673 ret = security_locked_down(LOCKDOWN_TRACEFS);
674 if (ret)
675 return ret;
676
677 if (tracing_disabled)
678 return -ENODEV;
679
680 if (tr && trace_array_get(tr) < 0)
681 return -ENODEV;
682
683 return 0;
684 }
685
buffer_ftrace_now(struct array_buffer * buf,int cpu)686 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
687 {
688 u64 ts;
689
690 /* Early boot up does not have a buffer yet */
691 if (!buf->buffer)
692 return trace_clock_local();
693
694 ts = ring_buffer_time_stamp(buf->buffer);
695 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
696
697 return ts;
698 }
699
ftrace_now(int cpu)700 u64 ftrace_now(int cpu)
701 {
702 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
703 }
704
705 /**
706 * tracing_is_enabled - Show if global_trace has been enabled
707 *
708 * Shows if the global trace has been enabled or not. It uses the
709 * mirror flag "buffer_disabled" to be used in fast paths such as for
710 * the irqsoff tracer. But it may be inaccurate due to races. If you
711 * need to know the accurate state, use tracing_is_on() which is a little
712 * slower, but accurate.
713 */
tracing_is_enabled(void)714 int tracing_is_enabled(void)
715 {
716 /*
717 * For quick access (irqsoff uses this in fast path), just
718 * return the mirror variable of the state of the ring buffer.
719 * It's a little racy, but we don't really care.
720 */
721 return !global_trace.buffer_disabled;
722 }
723
724 /*
725 * trace_buf_size is the size in bytes that is allocated
726 * for a buffer. Note, the number of bytes is always rounded
727 * to page size.
728 *
729 * This number is purposely set to a low number of 16384.
730 * If the dump on oops happens, it will be much appreciated
731 * to not have to wait for all that output. Anyway this can be
732 * boot time and run time configurable.
733 */
734 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
735
736 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
737
738 /* trace_types holds a link list of available tracers. */
739 static struct tracer *trace_types __read_mostly;
740
741 /*
742 * trace_types_lock is used to protect the trace_types list.
743 */
744 DEFINE_MUTEX(trace_types_lock);
745
746 /*
747 * serialize the access of the ring buffer
748 *
749 * ring buffer serializes readers, but it is low level protection.
750 * The validity of the events (which returns by ring_buffer_peek() ..etc)
751 * are not protected by ring buffer.
752 *
753 * The content of events may become garbage if we allow other process consumes
754 * these events concurrently:
755 * A) the page of the consumed events may become a normal page
756 * (not reader page) in ring buffer, and this page will be rewritten
757 * by events producer.
758 * B) The page of the consumed events may become a page for splice_read,
759 * and this page will be returned to system.
760 *
761 * These primitives allow multi process access to different cpu ring buffer
762 * concurrently.
763 *
764 * These primitives don't distinguish read-only and read-consume access.
765 * Multi read-only access are also serialized.
766 */
767
768 #ifdef CONFIG_SMP
769 static DECLARE_RWSEM(all_cpu_access_lock);
770 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
771
trace_access_lock(int cpu)772 static inline void trace_access_lock(int cpu)
773 {
774 if (cpu == RING_BUFFER_ALL_CPUS) {
775 /* gain it for accessing the whole ring buffer. */
776 down_write(&all_cpu_access_lock);
777 } else {
778 /* gain it for accessing a cpu ring buffer. */
779
780 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
781 down_read(&all_cpu_access_lock);
782
783 /* Secondly block other access to this @cpu ring buffer. */
784 mutex_lock(&per_cpu(cpu_access_lock, cpu));
785 }
786 }
787
trace_access_unlock(int cpu)788 static inline void trace_access_unlock(int cpu)
789 {
790 if (cpu == RING_BUFFER_ALL_CPUS) {
791 up_write(&all_cpu_access_lock);
792 } else {
793 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
794 up_read(&all_cpu_access_lock);
795 }
796 }
797
trace_access_lock_init(void)798 static inline void trace_access_lock_init(void)
799 {
800 int cpu;
801
802 for_each_possible_cpu(cpu)
803 mutex_init(&per_cpu(cpu_access_lock, cpu));
804 }
805
806 #else
807
808 static DEFINE_MUTEX(access_lock);
809
trace_access_lock(int cpu)810 static inline void trace_access_lock(int cpu)
811 {
812 (void)cpu;
813 mutex_lock(&access_lock);
814 }
815
trace_access_unlock(int cpu)816 static inline void trace_access_unlock(int cpu)
817 {
818 (void)cpu;
819 mutex_unlock(&access_lock);
820 }
821
trace_access_lock_init(void)822 static inline void trace_access_lock_init(void)
823 {
824 }
825
826 #endif
827
tracer_tracing_on(struct trace_array * tr)828 void tracer_tracing_on(struct trace_array *tr)
829 {
830 if (tr->array_buffer.buffer)
831 ring_buffer_record_on(tr->array_buffer.buffer);
832 /*
833 * This flag is looked at when buffers haven't been allocated
834 * yet, or by some tracers (like irqsoff), that just want to
835 * know if the ring buffer has been disabled, but it can handle
836 * races of where it gets disabled but we still do a record.
837 * As the check is in the fast path of the tracers, it is more
838 * important to be fast than accurate.
839 */
840 tr->buffer_disabled = 0;
841 }
842
843 /**
844 * tracing_on - enable tracing buffers
845 *
846 * This function enables tracing buffers that may have been
847 * disabled with tracing_off.
848 */
tracing_on(void)849 void tracing_on(void)
850 {
851 tracer_tracing_on(&global_trace);
852 }
853 EXPORT_SYMBOL_GPL(tracing_on);
854
855 #ifdef CONFIG_TRACER_SNAPSHOT
856 /**
857 * tracing_snapshot - take a snapshot of the current buffer.
858 *
859 * This causes a swap between the snapshot buffer and the current live
860 * tracing buffer. You can use this to take snapshots of the live
861 * trace when some condition is triggered, but continue to trace.
862 *
863 * Note, make sure to allocate the snapshot with either
864 * a tracing_snapshot_alloc(), or by doing it manually
865 * with: echo 1 > /sys/kernel/tracing/snapshot
866 *
867 * If the snapshot buffer is not allocated, it will stop tracing.
868 * Basically making a permanent snapshot.
869 */
tracing_snapshot(void)870 void tracing_snapshot(void)
871 {
872 struct trace_array *tr = &global_trace;
873
874 tracing_snapshot_instance(tr);
875 }
876 EXPORT_SYMBOL_GPL(tracing_snapshot);
877
878 /**
879 * tracing_alloc_snapshot - allocate snapshot buffer.
880 *
881 * This only allocates the snapshot buffer if it isn't already
882 * allocated - it doesn't also take a snapshot.
883 *
884 * This is meant to be used in cases where the snapshot buffer needs
885 * to be set up for events that can't sleep but need to be able to
886 * trigger a snapshot.
887 */
tracing_alloc_snapshot(void)888 int tracing_alloc_snapshot(void)
889 {
890 struct trace_array *tr = &global_trace;
891 int ret;
892
893 ret = tracing_alloc_snapshot_instance(tr);
894 WARN_ON(ret < 0);
895
896 return ret;
897 }
898 #else
tracing_snapshot(void)899 void tracing_snapshot(void)
900 {
901 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
902 }
903 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_alloc(void)904 void tracing_snapshot_alloc(void)
905 {
906 /* Give warning */
907 tracing_snapshot();
908 }
909 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
910 #endif /* CONFIG_TRACER_SNAPSHOT */
911
tracer_tracing_off(struct trace_array * tr)912 void tracer_tracing_off(struct trace_array *tr)
913 {
914 if (tr->array_buffer.buffer)
915 ring_buffer_record_off(tr->array_buffer.buffer);
916 /*
917 * This flag is looked at when buffers haven't been allocated
918 * yet, or by some tracers (like irqsoff), that just want to
919 * know if the ring buffer has been disabled, but it can handle
920 * races of where it gets disabled but we still do a record.
921 * As the check is in the fast path of the tracers, it is more
922 * important to be fast than accurate.
923 */
924 tr->buffer_disabled = 1;
925 }
926
927 /**
928 * tracer_tracing_disable() - temporary disable the buffer from write
929 * @tr: The trace array to disable its buffer for
930 *
931 * Expects trace_tracing_enable() to re-enable tracing.
932 * The difference between this and tracer_tracing_off() is that this
933 * is a counter and can nest, whereas, tracer_tracing_off() can
934 * be called multiple times and a single trace_tracing_on() will
935 * enable it.
936 */
tracer_tracing_disable(struct trace_array * tr)937 void tracer_tracing_disable(struct trace_array *tr)
938 {
939 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
940 return;
941
942 ring_buffer_record_disable(tr->array_buffer.buffer);
943 }
944
945 /**
946 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
947 * @tr: The trace array that had tracer_tracincg_disable() called on it
948 *
949 * This is called after tracer_tracing_disable() has been called on @tr,
950 * when it's safe to re-enable tracing.
951 */
tracer_tracing_enable(struct trace_array * tr)952 void tracer_tracing_enable(struct trace_array *tr)
953 {
954 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
955 return;
956
957 ring_buffer_record_enable(tr->array_buffer.buffer);
958 }
959
960 /**
961 * tracing_off - turn off tracing buffers
962 *
963 * This function stops the tracing buffers from recording data.
964 * It does not disable any overhead the tracers themselves may
965 * be causing. This function simply causes all recording to
966 * the ring buffers to fail.
967 */
tracing_off(void)968 void tracing_off(void)
969 {
970 tracer_tracing_off(&global_trace);
971 }
972 EXPORT_SYMBOL_GPL(tracing_off);
973
disable_trace_on_warning(void)974 void disable_trace_on_warning(void)
975 {
976 if (__disable_trace_on_warning) {
977 struct trace_array *tr = READ_ONCE(printk_trace);
978
979 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
980 "Disabling tracing due to warning\n");
981 tracing_off();
982
983 /* Disable trace_printk() buffer too */
984 if (tr != &global_trace) {
985 trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
986 "Disabling tracing due to warning\n");
987 tracer_tracing_off(tr);
988 }
989 }
990 }
991
992 /**
993 * tracer_tracing_is_on - show real state of ring buffer enabled
994 * @tr : the trace array to know if ring buffer is enabled
995 *
996 * Shows real state of the ring buffer if it is enabled or not.
997 */
tracer_tracing_is_on(struct trace_array * tr)998 bool tracer_tracing_is_on(struct trace_array *tr)
999 {
1000 if (tr->array_buffer.buffer)
1001 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1002 return !tr->buffer_disabled;
1003 }
1004
1005 /**
1006 * tracing_is_on - show state of ring buffers enabled
1007 */
tracing_is_on(void)1008 int tracing_is_on(void)
1009 {
1010 return tracer_tracing_is_on(&global_trace);
1011 }
1012 EXPORT_SYMBOL_GPL(tracing_is_on);
1013
set_buf_size(char * str)1014 static int __init set_buf_size(char *str)
1015 {
1016 unsigned long buf_size;
1017
1018 if (!str)
1019 return 0;
1020 buf_size = memparse(str, &str);
1021 /*
1022 * nr_entries can not be zero and the startup
1023 * tests require some buffer space. Therefore
1024 * ensure we have at least 4096 bytes of buffer.
1025 */
1026 trace_buf_size = max(4096UL, buf_size);
1027 return 1;
1028 }
1029 __setup("trace_buf_size=", set_buf_size);
1030
set_tracing_thresh(char * str)1031 static int __init set_tracing_thresh(char *str)
1032 {
1033 unsigned long threshold;
1034 int ret;
1035
1036 if (!str)
1037 return 0;
1038 ret = kstrtoul(str, 0, &threshold);
1039 if (ret < 0)
1040 return 0;
1041 tracing_thresh = threshold * 1000;
1042 return 1;
1043 }
1044 __setup("tracing_thresh=", set_tracing_thresh);
1045
nsecs_to_usecs(unsigned long nsecs)1046 unsigned long nsecs_to_usecs(unsigned long nsecs)
1047 {
1048 return nsecs / 1000;
1049 }
1050
1051 /*
1052 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1053 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1054 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1055 * of strings in the order that the evals (enum) were defined.
1056 */
1057 #undef C
1058 #define C(a, b) b
1059
1060 /* These must match the bit positions in trace_iterator_flags */
1061 static const char *trace_options[] = {
1062 TRACE_FLAGS
1063 NULL
1064 };
1065
1066 static struct {
1067 u64 (*func)(void);
1068 const char *name;
1069 int in_ns; /* is this clock in nanoseconds? */
1070 } trace_clocks[] = {
1071 { trace_clock_local, "local", 1 },
1072 { trace_clock_global, "global", 1 },
1073 { trace_clock_counter, "counter", 0 },
1074 { trace_clock_jiffies, "uptime", 0 },
1075 { trace_clock, "perf", 1 },
1076 { ktime_get_mono_fast_ns, "mono", 1 },
1077 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1078 { ktime_get_boot_fast_ns, "boot", 1 },
1079 { ktime_get_tai_fast_ns, "tai", 1 },
1080 ARCH_TRACE_CLOCKS
1081 };
1082
trace_clock_in_ns(struct trace_array * tr)1083 bool trace_clock_in_ns(struct trace_array *tr)
1084 {
1085 if (trace_clocks[tr->clock_id].in_ns)
1086 return true;
1087
1088 return false;
1089 }
1090
1091 /*
1092 * trace_parser_get_init - gets the buffer for trace parser
1093 */
trace_parser_get_init(struct trace_parser * parser,int size)1094 int trace_parser_get_init(struct trace_parser *parser, int size)
1095 {
1096 memset(parser, 0, sizeof(*parser));
1097
1098 parser->buffer = kmalloc(size, GFP_KERNEL);
1099 if (!parser->buffer)
1100 return 1;
1101
1102 parser->size = size;
1103 return 0;
1104 }
1105
1106 /*
1107 * trace_parser_put - frees the buffer for trace parser
1108 */
trace_parser_put(struct trace_parser * parser)1109 void trace_parser_put(struct trace_parser *parser)
1110 {
1111 kfree(parser->buffer);
1112 parser->buffer = NULL;
1113 }
1114
1115 /*
1116 * trace_get_user - reads the user input string separated by space
1117 * (matched by isspace(ch))
1118 *
1119 * For each string found the 'struct trace_parser' is updated,
1120 * and the function returns.
1121 *
1122 * Returns number of bytes read.
1123 *
1124 * See kernel/trace/trace.h for 'struct trace_parser' details.
1125 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1126 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1127 size_t cnt, loff_t *ppos)
1128 {
1129 char ch;
1130 size_t read = 0;
1131 ssize_t ret;
1132
1133 if (!*ppos)
1134 trace_parser_clear(parser);
1135
1136 ret = get_user(ch, ubuf++);
1137 if (ret)
1138 goto fail;
1139
1140 read++;
1141 cnt--;
1142
1143 /*
1144 * The parser is not finished with the last write,
1145 * continue reading the user input without skipping spaces.
1146 */
1147 if (!parser->cont) {
1148 /* skip white space */
1149 while (cnt && isspace(ch)) {
1150 ret = get_user(ch, ubuf++);
1151 if (ret)
1152 goto fail;
1153 read++;
1154 cnt--;
1155 }
1156
1157 parser->idx = 0;
1158
1159 /* only spaces were written */
1160 if (isspace(ch) || !ch) {
1161 *ppos += read;
1162 return read;
1163 }
1164 }
1165
1166 /* read the non-space input */
1167 while (cnt && !isspace(ch) && ch) {
1168 if (parser->idx < parser->size - 1)
1169 parser->buffer[parser->idx++] = ch;
1170 else {
1171 ret = -EINVAL;
1172 goto fail;
1173 }
1174
1175 ret = get_user(ch, ubuf++);
1176 if (ret)
1177 goto fail;
1178 read++;
1179 cnt--;
1180 }
1181
1182 /* We either got finished input or we have to wait for another call. */
1183 if (isspace(ch) || !ch) {
1184 parser->buffer[parser->idx] = 0;
1185 parser->cont = false;
1186 } else if (parser->idx < parser->size - 1) {
1187 parser->cont = true;
1188 parser->buffer[parser->idx++] = ch;
1189 /* Make sure the parsed string always terminates with '\0'. */
1190 parser->buffer[parser->idx] = 0;
1191 } else {
1192 ret = -EINVAL;
1193 goto fail;
1194 }
1195
1196 *ppos += read;
1197 return read;
1198 fail:
1199 trace_parser_fail(parser);
1200 return ret;
1201 }
1202
1203 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1204 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1205 {
1206 int len;
1207
1208 if (trace_seq_used(s) <= s->readpos)
1209 return -EBUSY;
1210
1211 len = trace_seq_used(s) - s->readpos;
1212 if (cnt > len)
1213 cnt = len;
1214 memcpy(buf, s->buffer + s->readpos, cnt);
1215
1216 s->readpos += cnt;
1217 return cnt;
1218 }
1219
1220 unsigned long __read_mostly tracing_thresh;
1221
1222 struct pipe_wait {
1223 struct trace_iterator *iter;
1224 int wait_index;
1225 };
1226
wait_pipe_cond(void * data)1227 static bool wait_pipe_cond(void *data)
1228 {
1229 struct pipe_wait *pwait = data;
1230 struct trace_iterator *iter = pwait->iter;
1231
1232 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1233 return true;
1234
1235 return iter->closed;
1236 }
1237
wait_on_pipe(struct trace_iterator * iter,int full)1238 static int wait_on_pipe(struct trace_iterator *iter, int full)
1239 {
1240 struct pipe_wait pwait;
1241 int ret;
1242
1243 /* Iterators are static, they should be filled or empty */
1244 if (trace_buffer_iter(iter, iter->cpu_file))
1245 return 0;
1246
1247 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1248 pwait.iter = iter;
1249
1250 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1251 wait_pipe_cond, &pwait);
1252
1253 #ifdef CONFIG_TRACER_SNAPSHOT
1254 /*
1255 * Make sure this is still the snapshot buffer, as if a snapshot were
1256 * to happen, this would now be the main buffer.
1257 */
1258 if (iter->snapshot)
1259 iter->array_buffer = &iter->tr->snapshot_buffer;
1260 #endif
1261 return ret;
1262 }
1263
1264 #ifdef CONFIG_FTRACE_STARTUP_TEST
1265 static bool selftests_can_run;
1266
1267 struct trace_selftests {
1268 struct list_head list;
1269 struct tracer *type;
1270 };
1271
1272 static LIST_HEAD(postponed_selftests);
1273
save_selftest(struct tracer * type)1274 static int save_selftest(struct tracer *type)
1275 {
1276 struct trace_selftests *selftest;
1277
1278 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1279 if (!selftest)
1280 return -ENOMEM;
1281
1282 selftest->type = type;
1283 list_add(&selftest->list, &postponed_selftests);
1284 return 0;
1285 }
1286
run_tracer_selftest(struct tracer * type)1287 static int run_tracer_selftest(struct tracer *type)
1288 {
1289 struct trace_array *tr = &global_trace;
1290 struct tracer_flags *saved_flags = tr->current_trace_flags;
1291 struct tracer *saved_tracer = tr->current_trace;
1292 int ret;
1293
1294 if (!type->selftest || tracing_selftest_disabled)
1295 return 0;
1296
1297 /*
1298 * If a tracer registers early in boot up (before scheduling is
1299 * initialized and such), then do not run its selftests yet.
1300 * Instead, run it a little later in the boot process.
1301 */
1302 if (!selftests_can_run)
1303 return save_selftest(type);
1304
1305 if (!tracing_is_on()) {
1306 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1307 type->name);
1308 return 0;
1309 }
1310
1311 /*
1312 * Run a selftest on this tracer.
1313 * Here we reset the trace buffer, and set the current
1314 * tracer to be this tracer. The tracer can then run some
1315 * internal tracing to verify that everything is in order.
1316 * If we fail, we do not register this tracer.
1317 */
1318 tracing_reset_online_cpus(&tr->array_buffer);
1319
1320 tr->current_trace = type;
1321 tr->current_trace_flags = type->flags ? : type->default_flags;
1322
1323 #ifdef CONFIG_TRACER_MAX_TRACE
1324 if (tracer_uses_snapshot(type)) {
1325 /* If we expanded the buffers, make sure the max is expanded too */
1326 if (tr->ring_buffer_expanded)
1327 ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size,
1328 RING_BUFFER_ALL_CPUS);
1329 tr->allocated_snapshot = true;
1330 }
1331 #endif
1332
1333 /* the test is responsible for initializing and enabling */
1334 pr_info("Testing tracer %s: ", type->name);
1335 ret = type->selftest(type, tr);
1336 /* the test is responsible for resetting too */
1337 tr->current_trace = saved_tracer;
1338 tr->current_trace_flags = saved_flags;
1339 if (ret) {
1340 printk(KERN_CONT "FAILED!\n");
1341 /* Add the warning after printing 'FAILED' */
1342 WARN_ON(1);
1343 return -1;
1344 }
1345 /* Only reset on passing, to avoid touching corrupted buffers */
1346 tracing_reset_online_cpus(&tr->array_buffer);
1347
1348 #ifdef CONFIG_TRACER_MAX_TRACE
1349 if (tracer_uses_snapshot(type)) {
1350 tr->allocated_snapshot = false;
1351
1352 /* Shrink the max buffer again */
1353 if (tr->ring_buffer_expanded)
1354 ring_buffer_resize(tr->snapshot_buffer.buffer, 1,
1355 RING_BUFFER_ALL_CPUS);
1356 }
1357 #endif
1358
1359 printk(KERN_CONT "PASSED\n");
1360 return 0;
1361 }
1362
do_run_tracer_selftest(struct tracer * type)1363 static int do_run_tracer_selftest(struct tracer *type)
1364 {
1365 int ret;
1366
1367 /*
1368 * Tests can take a long time, especially if they are run one after the
1369 * other, as does happen during bootup when all the tracers are
1370 * registered. This could cause the soft lockup watchdog to trigger.
1371 */
1372 cond_resched();
1373
1374 tracing_selftest_running = true;
1375 ret = run_tracer_selftest(type);
1376 tracing_selftest_running = false;
1377
1378 return ret;
1379 }
1380
init_trace_selftests(void)1381 static __init int init_trace_selftests(void)
1382 {
1383 struct trace_selftests *p, *n;
1384 struct tracer *t, **last;
1385 int ret;
1386
1387 selftests_can_run = true;
1388
1389 guard(mutex)(&trace_types_lock);
1390
1391 if (list_empty(&postponed_selftests))
1392 return 0;
1393
1394 pr_info("Running postponed tracer tests:\n");
1395
1396 tracing_selftest_running = true;
1397 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1398 /* This loop can take minutes when sanitizers are enabled, so
1399 * lets make sure we allow RCU processing.
1400 */
1401 cond_resched();
1402 ret = run_tracer_selftest(p->type);
1403 /* If the test fails, then warn and remove from available_tracers */
1404 if (ret < 0) {
1405 WARN(1, "tracer: %s failed selftest, disabling\n",
1406 p->type->name);
1407 last = &trace_types;
1408 for (t = trace_types; t; t = t->next) {
1409 if (t == p->type) {
1410 *last = t->next;
1411 break;
1412 }
1413 last = &t->next;
1414 }
1415 }
1416 list_del(&p->list);
1417 kfree(p);
1418 }
1419 tracing_selftest_running = false;
1420
1421 return 0;
1422 }
1423 core_initcall(init_trace_selftests);
1424 #else
do_run_tracer_selftest(struct tracer * type)1425 static inline int do_run_tracer_selftest(struct tracer *type)
1426 {
1427 return 0;
1428 }
1429 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1430
1431 static int add_tracer(struct trace_array *tr, struct tracer *t);
1432
1433 static void __init apply_trace_boot_options(void);
1434
free_tracers(struct trace_array * tr)1435 static void free_tracers(struct trace_array *tr)
1436 {
1437 struct tracers *t, *n;
1438
1439 lockdep_assert_held(&trace_types_lock);
1440
1441 list_for_each_entry_safe(t, n, &tr->tracers, list) {
1442 list_del(&t->list);
1443 kfree(t->flags);
1444 kfree(t);
1445 }
1446 }
1447
1448 /**
1449 * register_tracer - register a tracer with the ftrace system.
1450 * @type: the plugin for the tracer
1451 *
1452 * Register a new plugin tracer.
1453 */
register_tracer(struct tracer * type)1454 int __init register_tracer(struct tracer *type)
1455 {
1456 struct trace_array *tr;
1457 struct tracer *t;
1458 int ret = 0;
1459
1460 if (!type->name) {
1461 pr_info("Tracer must have a name\n");
1462 return -1;
1463 }
1464
1465 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1466 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1467 return -1;
1468 }
1469
1470 if (security_locked_down(LOCKDOWN_TRACEFS)) {
1471 pr_warn("Can not register tracer %s due to lockdown\n",
1472 type->name);
1473 return -EPERM;
1474 }
1475
1476 mutex_lock(&trace_types_lock);
1477
1478 for (t = trace_types; t; t = t->next) {
1479 if (strcmp(type->name, t->name) == 0) {
1480 /* already found */
1481 pr_info("Tracer %s already registered\n",
1482 type->name);
1483 ret = -1;
1484 goto out;
1485 }
1486 }
1487
1488 /* store the tracer for __set_tracer_option */
1489 if (type->flags)
1490 type->flags->trace = type;
1491
1492 ret = do_run_tracer_selftest(type);
1493 if (ret < 0)
1494 goto out;
1495
1496 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1497 ret = add_tracer(tr, type);
1498 if (ret < 0) {
1499 /* The tracer will still exist but without options */
1500 pr_warn("Failed to create tracer options for %s\n", type->name);
1501 break;
1502 }
1503 }
1504
1505 type->next = trace_types;
1506 trace_types = type;
1507
1508 out:
1509 mutex_unlock(&trace_types_lock);
1510
1511 if (ret || !default_bootup_tracer)
1512 return ret;
1513
1514 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1515 return 0;
1516
1517 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1518 /* Do we want this tracer to start on bootup? */
1519 WARN_ON(tracing_set_tracer(&global_trace, type->name) < 0);
1520 default_bootup_tracer = NULL;
1521
1522 apply_trace_boot_options();
1523
1524 /* disable other selftests, since this will break it. */
1525 disable_tracing_selftest("running a tracer");
1526
1527 return 0;
1528 }
1529
tracing_reset_cpu(struct array_buffer * buf,int cpu)1530 void tracing_reset_cpu(struct array_buffer *buf, int cpu)
1531 {
1532 struct trace_buffer *buffer = buf->buffer;
1533
1534 if (!buffer)
1535 return;
1536
1537 ring_buffer_record_disable(buffer);
1538
1539 /* Make sure all commits have finished */
1540 synchronize_rcu();
1541 ring_buffer_reset_cpu(buffer, cpu);
1542
1543 ring_buffer_record_enable(buffer);
1544 }
1545
tracing_reset_online_cpus(struct array_buffer * buf)1546 void tracing_reset_online_cpus(struct array_buffer *buf)
1547 {
1548 struct trace_buffer *buffer = buf->buffer;
1549
1550 if (!buffer)
1551 return;
1552
1553 ring_buffer_record_disable(buffer);
1554
1555 /* Make sure all commits have finished */
1556 synchronize_rcu();
1557
1558 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1559
1560 ring_buffer_reset_online_cpus(buffer);
1561
1562 ring_buffer_record_enable(buffer);
1563 }
1564
tracing_reset_all_cpus(struct array_buffer * buf)1565 static void tracing_reset_all_cpus(struct array_buffer *buf)
1566 {
1567 struct trace_buffer *buffer = buf->buffer;
1568
1569 if (!buffer)
1570 return;
1571
1572 ring_buffer_record_disable(buffer);
1573
1574 /* Make sure all commits have finished */
1575 synchronize_rcu();
1576
1577 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1578
1579 ring_buffer_reset(buffer);
1580
1581 ring_buffer_record_enable(buffer);
1582 }
1583
1584 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)1585 void tracing_reset_all_online_cpus_unlocked(void)
1586 {
1587 struct trace_array *tr;
1588
1589 lockdep_assert_held(&trace_types_lock);
1590
1591 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1592 if (!tr->clear_trace)
1593 continue;
1594 tr->clear_trace = false;
1595 tracing_reset_online_cpus(&tr->array_buffer);
1596 #ifdef CONFIG_TRACER_SNAPSHOT
1597 tracing_reset_online_cpus(&tr->snapshot_buffer);
1598 #endif
1599 }
1600 }
1601
tracing_reset_all_online_cpus(void)1602 void tracing_reset_all_online_cpus(void)
1603 {
1604 guard(mutex)(&trace_types_lock);
1605 tracing_reset_all_online_cpus_unlocked();
1606 }
1607
is_tracing_stopped(void)1608 int is_tracing_stopped(void)
1609 {
1610 return global_trace.stop_count;
1611 }
1612
tracing_start_tr(struct trace_array * tr)1613 static void tracing_start_tr(struct trace_array *tr)
1614 {
1615 struct trace_buffer *buffer;
1616
1617 if (tracing_disabled)
1618 return;
1619
1620 guard(raw_spinlock_irqsave)(&tr->start_lock);
1621 if (--tr->stop_count) {
1622 if (WARN_ON_ONCE(tr->stop_count < 0)) {
1623 /* Someone screwed up their debugging */
1624 tr->stop_count = 0;
1625 }
1626 return;
1627 }
1628
1629 /* Prevent the buffers from switching */
1630 arch_spin_lock(&tr->max_lock);
1631
1632 buffer = tr->array_buffer.buffer;
1633 if (buffer)
1634 ring_buffer_record_enable(buffer);
1635
1636 #ifdef CONFIG_TRACER_SNAPSHOT
1637 buffer = tr->snapshot_buffer.buffer;
1638 if (buffer)
1639 ring_buffer_record_enable(buffer);
1640 #endif
1641
1642 arch_spin_unlock(&tr->max_lock);
1643 }
1644
1645 /**
1646 * tracing_start - quick start of the tracer
1647 *
1648 * If tracing is enabled but was stopped by tracing_stop,
1649 * this will start the tracer back up.
1650 */
tracing_start(void)1651 void tracing_start(void)
1652
1653 {
1654 return tracing_start_tr(&global_trace);
1655 }
1656
tracing_stop_tr(struct trace_array * tr)1657 static void tracing_stop_tr(struct trace_array *tr)
1658 {
1659 struct trace_buffer *buffer;
1660
1661 guard(raw_spinlock_irqsave)(&tr->start_lock);
1662 if (tr->stop_count++)
1663 return;
1664
1665 /* Prevent the buffers from switching */
1666 arch_spin_lock(&tr->max_lock);
1667
1668 buffer = tr->array_buffer.buffer;
1669 if (buffer)
1670 ring_buffer_record_disable(buffer);
1671
1672 #ifdef CONFIG_TRACER_SNAPSHOT
1673 buffer = tr->snapshot_buffer.buffer;
1674 if (buffer)
1675 ring_buffer_record_disable(buffer);
1676 #endif
1677
1678 arch_spin_unlock(&tr->max_lock);
1679 }
1680
1681 /**
1682 * tracing_stop - quick stop of the tracer
1683 *
1684 * Light weight way to stop tracing. Use in conjunction with
1685 * tracing_start.
1686 */
tracing_stop(void)1687 void tracing_stop(void)
1688 {
1689 return tracing_stop_tr(&global_trace);
1690 }
1691
1692 /*
1693 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
1694 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
1695 * simplifies those functions and keeps them in sync.
1696 */
trace_handle_return(struct trace_seq * s)1697 enum print_line_t trace_handle_return(struct trace_seq *s)
1698 {
1699 return trace_seq_has_overflowed(s) ?
1700 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
1701 }
1702 EXPORT_SYMBOL_GPL(trace_handle_return);
1703
migration_disable_value(void)1704 static unsigned short migration_disable_value(void)
1705 {
1706 #if defined(CONFIG_SMP)
1707 return current->migration_disabled;
1708 #else
1709 return 0;
1710 #endif
1711 }
1712
tracing_gen_ctx_irq_test(unsigned int irqs_status)1713 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
1714 {
1715 unsigned int trace_flags = irqs_status;
1716 unsigned int pc;
1717
1718 pc = preempt_count();
1719
1720 if (pc & NMI_MASK)
1721 trace_flags |= TRACE_FLAG_NMI;
1722 if (pc & HARDIRQ_MASK)
1723 trace_flags |= TRACE_FLAG_HARDIRQ;
1724 if (in_serving_softirq())
1725 trace_flags |= TRACE_FLAG_SOFTIRQ;
1726 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
1727 trace_flags |= TRACE_FLAG_BH_OFF;
1728
1729 if (tif_need_resched())
1730 trace_flags |= TRACE_FLAG_NEED_RESCHED;
1731 if (test_preempt_need_resched())
1732 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
1733 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
1734 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
1735 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
1736 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
1737 }
1738
1739 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1740 trace_buffer_lock_reserve(struct trace_buffer *buffer,
1741 int type,
1742 unsigned long len,
1743 unsigned int trace_ctx)
1744 {
1745 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
1746 }
1747
1748 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1749 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1750 static int trace_buffered_event_ref;
1751
1752 /**
1753 * trace_buffered_event_enable - enable buffering events
1754 *
1755 * When events are being filtered, it is quicker to use a temporary
1756 * buffer to write the event data into if there's a likely chance
1757 * that it will not be committed. The discard of the ring buffer
1758 * is not as fast as committing, and is much slower than copying
1759 * a commit.
1760 *
1761 * When an event is to be filtered, allocate per cpu buffers to
1762 * write the event data into, and if the event is filtered and discarded
1763 * it is simply dropped, otherwise, the entire data is to be committed
1764 * in one shot.
1765 */
trace_buffered_event_enable(void)1766 void trace_buffered_event_enable(void)
1767 {
1768 struct ring_buffer_event *event;
1769 struct page *page;
1770 int cpu;
1771
1772 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1773
1774 if (trace_buffered_event_ref++)
1775 return;
1776
1777 for_each_tracing_cpu(cpu) {
1778 page = alloc_pages_node(cpu_to_node(cpu),
1779 GFP_KERNEL | __GFP_NORETRY, 0);
1780 /* This is just an optimization and can handle failures */
1781 if (!page) {
1782 pr_err("Failed to allocate event buffer\n");
1783 break;
1784 }
1785
1786 event = page_address(page);
1787 memset(event, 0, sizeof(*event));
1788
1789 per_cpu(trace_buffered_event, cpu) = event;
1790
1791 scoped_guard(preempt,) {
1792 if (cpu == smp_processor_id() &&
1793 __this_cpu_read(trace_buffered_event) !=
1794 per_cpu(trace_buffered_event, cpu))
1795 WARN_ON_ONCE(1);
1796 }
1797 }
1798 }
1799
enable_trace_buffered_event(void * data)1800 static void enable_trace_buffered_event(void *data)
1801 {
1802 this_cpu_dec(trace_buffered_event_cnt);
1803 }
1804
disable_trace_buffered_event(void * data)1805 static void disable_trace_buffered_event(void *data)
1806 {
1807 this_cpu_inc(trace_buffered_event_cnt);
1808 }
1809
1810 /**
1811 * trace_buffered_event_disable - disable buffering events
1812 *
1813 * When a filter is removed, it is faster to not use the buffered
1814 * events, and to commit directly into the ring buffer. Free up
1815 * the temp buffers when there are no more users. This requires
1816 * special synchronization with current events.
1817 */
trace_buffered_event_disable(void)1818 void trace_buffered_event_disable(void)
1819 {
1820 int cpu;
1821
1822 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1823
1824 if (WARN_ON_ONCE(!trace_buffered_event_ref))
1825 return;
1826
1827 if (--trace_buffered_event_ref)
1828 return;
1829
1830 /* For each CPU, set the buffer as used. */
1831 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
1832 NULL, true);
1833
1834 /* Wait for all current users to finish */
1835 synchronize_rcu();
1836
1837 for_each_tracing_cpu(cpu) {
1838 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
1839 per_cpu(trace_buffered_event, cpu) = NULL;
1840 }
1841
1842 /*
1843 * Wait for all CPUs that potentially started checking if they can use
1844 * their event buffer only after the previous synchronize_rcu() call and
1845 * they still read a valid pointer from trace_buffered_event. It must be
1846 * ensured they don't see cleared trace_buffered_event_cnt else they
1847 * could wrongly decide to use the pointed-to buffer which is now freed.
1848 */
1849 synchronize_rcu();
1850
1851 /* For each CPU, relinquish the buffer */
1852 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
1853 true);
1854 }
1855
1856 static struct trace_buffer *temp_buffer;
1857
1858 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)1859 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
1860 struct trace_event_file *trace_file,
1861 int type, unsigned long len,
1862 unsigned int trace_ctx)
1863 {
1864 struct ring_buffer_event *entry;
1865 struct trace_array *tr = trace_file->tr;
1866 int val;
1867
1868 *current_rb = tr->array_buffer.buffer;
1869
1870 if (!tr->no_filter_buffering_ref &&
1871 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
1872 preempt_disable_notrace();
1873 /*
1874 * Filtering is on, so try to use the per cpu buffer first.
1875 * This buffer will simulate a ring_buffer_event,
1876 * where the type_len is zero and the array[0] will
1877 * hold the full length.
1878 * (see include/linux/ring-buffer.h for details on
1879 * how the ring_buffer_event is structured).
1880 *
1881 * Using a temp buffer during filtering and copying it
1882 * on a matched filter is quicker than writing directly
1883 * into the ring buffer and then discarding it when
1884 * it doesn't match. That is because the discard
1885 * requires several atomic operations to get right.
1886 * Copying on match and doing nothing on a failed match
1887 * is still quicker than no copy on match, but having
1888 * to discard out of the ring buffer on a failed match.
1889 */
1890 if ((entry = __this_cpu_read(trace_buffered_event))) {
1891 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
1892
1893 val = this_cpu_inc_return(trace_buffered_event_cnt);
1894
1895 /*
1896 * Preemption is disabled, but interrupts and NMIs
1897 * can still come in now. If that happens after
1898 * the above increment, then it will have to go
1899 * back to the old method of allocating the event
1900 * on the ring buffer, and if the filter fails, it
1901 * will have to call ring_buffer_discard_commit()
1902 * to remove it.
1903 *
1904 * Need to also check the unlikely case that the
1905 * length is bigger than the temp buffer size.
1906 * If that happens, then the reserve is pretty much
1907 * guaranteed to fail, as the ring buffer currently
1908 * only allows events less than a page. But that may
1909 * change in the future, so let the ring buffer reserve
1910 * handle the failure in that case.
1911 */
1912 if (val == 1 && likely(len <= max_len)) {
1913 trace_event_setup(entry, type, trace_ctx);
1914 entry->array[0] = len;
1915 /* Return with preemption disabled */
1916 return entry;
1917 }
1918 this_cpu_dec(trace_buffered_event_cnt);
1919 }
1920 /* __trace_buffer_lock_reserve() disables preemption */
1921 preempt_enable_notrace();
1922 }
1923
1924 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
1925 trace_ctx);
1926 /*
1927 * If tracing is off, but we have triggers enabled
1928 * we still need to look at the event data. Use the temp_buffer
1929 * to store the trace event for the trigger to use. It's recursive
1930 * safe and will not be recorded anywhere.
1931 */
1932 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1933 *current_rb = temp_buffer;
1934 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
1935 trace_ctx);
1936 }
1937 return entry;
1938 }
1939 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1940
1941 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
1942 static DEFINE_MUTEX(tracepoint_printk_mutex);
1943
output_printk(struct trace_event_buffer * fbuffer)1944 static void output_printk(struct trace_event_buffer *fbuffer)
1945 {
1946 struct trace_event_call *event_call;
1947 struct trace_event_file *file;
1948 struct trace_event *event;
1949 unsigned long flags;
1950 struct trace_iterator *iter = tracepoint_print_iter;
1951
1952 /* We should never get here if iter is NULL */
1953 if (WARN_ON_ONCE(!iter))
1954 return;
1955
1956 event_call = fbuffer->trace_file->event_call;
1957 if (!event_call || !event_call->event.funcs ||
1958 !event_call->event.funcs->trace)
1959 return;
1960
1961 file = fbuffer->trace_file;
1962 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
1963 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
1964 !filter_match_preds(file->filter, fbuffer->entry)))
1965 return;
1966
1967 event = &fbuffer->trace_file->event_call->event;
1968
1969 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
1970 trace_seq_init(&iter->seq);
1971 iter->ent = fbuffer->entry;
1972 event_call->event.funcs->trace(iter, 0, event);
1973 trace_seq_putc(&iter->seq, 0);
1974 printk("%s", iter->seq.buffer);
1975
1976 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
1977 }
1978
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)1979 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
1980 void *buffer, size_t *lenp,
1981 loff_t *ppos)
1982 {
1983 int save_tracepoint_printk;
1984 int ret;
1985
1986 guard(mutex)(&tracepoint_printk_mutex);
1987 save_tracepoint_printk = tracepoint_printk;
1988
1989 ret = proc_dointvec(table, write, buffer, lenp, ppos);
1990
1991 /*
1992 * This will force exiting early, as tracepoint_printk
1993 * is always zero when tracepoint_printk_iter is not allocated
1994 */
1995 if (!tracepoint_print_iter)
1996 tracepoint_printk = 0;
1997
1998 if (save_tracepoint_printk == tracepoint_printk)
1999 return ret;
2000
2001 if (tracepoint_printk)
2002 static_key_enable(&tracepoint_printk_key.key);
2003 else
2004 static_key_disable(&tracepoint_printk_key.key);
2005
2006 return ret;
2007 }
2008
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2009 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2010 {
2011 enum event_trigger_type tt = ETT_NONE;
2012 struct trace_event_file *file = fbuffer->trace_file;
2013
2014 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2015 fbuffer->entry, &tt))
2016 goto discard;
2017
2018 if (static_key_false(&tracepoint_printk_key.key))
2019 output_printk(fbuffer);
2020
2021 if (static_branch_unlikely(&trace_event_exports_enabled))
2022 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2023
2024 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2025 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2026
2027 discard:
2028 if (tt)
2029 event_triggers_post_call(file, tt);
2030
2031 }
2032 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2033
2034 /*
2035 * Skip 3:
2036 *
2037 * trace_buffer_unlock_commit_regs()
2038 * trace_event_buffer_commit()
2039 * trace_event_raw_event_xxx()
2040 */
2041 # define STACK_SKIP 3
2042
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2043 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2044 struct trace_buffer *buffer,
2045 struct ring_buffer_event *event,
2046 unsigned int trace_ctx,
2047 struct pt_regs *regs)
2048 {
2049 __buffer_unlock_commit(buffer, event);
2050
2051 /*
2052 * If regs is not set, then skip the necessary functions.
2053 * Note, we can still get here via blktrace, wakeup tracer
2054 * and mmiotrace, but that's ok if they lose a function or
2055 * two. They are not that meaningful.
2056 */
2057 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2058 ftrace_trace_userstack(tr, buffer, trace_ctx);
2059 }
2060
2061 /*
2062 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2063 */
2064 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2065 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2066 struct ring_buffer_event *event)
2067 {
2068 __buffer_unlock_commit(buffer, event);
2069 }
2070
2071 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2072 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2073 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2074 {
2075 struct trace_buffer *buffer = tr->array_buffer.buffer;
2076 struct ring_buffer_event *event;
2077 struct ftrace_entry *entry;
2078 int size = sizeof(*entry);
2079
2080 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2081
2082 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2083 trace_ctx);
2084 if (!event)
2085 return;
2086 entry = ring_buffer_event_data(event);
2087 entry->ip = ip;
2088 entry->parent_ip = parent_ip;
2089
2090 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2091 if (fregs) {
2092 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2093 entry->args[i] = ftrace_regs_get_argument(fregs, i);
2094 }
2095 #endif
2096
2097 if (static_branch_unlikely(&trace_function_exports_enabled))
2098 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2099 __buffer_unlock_commit(buffer, event);
2100 }
2101
2102 #ifdef CONFIG_STACKTRACE
2103
2104 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2105 #define FTRACE_KSTACK_NESTING 4
2106
2107 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
2108
2109 struct ftrace_stack {
2110 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2111 };
2112
2113
2114 struct ftrace_stacks {
2115 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2116 };
2117
2118 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2119 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2120
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2121 void __ftrace_trace_stack(struct trace_array *tr,
2122 struct trace_buffer *buffer,
2123 unsigned int trace_ctx,
2124 int skip, struct pt_regs *regs)
2125 {
2126 struct ring_buffer_event *event;
2127 unsigned int size, nr_entries;
2128 struct ftrace_stack *fstack;
2129 struct stack_entry *entry;
2130 int stackidx;
2131 int bit;
2132
2133 bit = trace_test_and_set_recursion(_THIS_IP_, _RET_IP_, TRACE_EVENT_START);
2134 if (bit < 0)
2135 return;
2136
2137 /*
2138 * Add one, for this function and the call to save_stack_trace()
2139 * If regs is set, then these functions will not be in the way.
2140 */
2141 #ifndef CONFIG_UNWINDER_ORC
2142 if (!regs)
2143 skip++;
2144 #endif
2145
2146 guard(preempt_notrace)();
2147
2148 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2149
2150 /* This should never happen. If it does, yell once and skip */
2151 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2152 goto out;
2153
2154 /*
2155 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2156 * interrupt will either see the value pre increment or post
2157 * increment. If the interrupt happens pre increment it will have
2158 * restored the counter when it returns. We just need a barrier to
2159 * keep gcc from moving things around.
2160 */
2161 barrier();
2162
2163 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2164 size = ARRAY_SIZE(fstack->calls);
2165
2166 if (regs) {
2167 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2168 size, skip);
2169 } else {
2170 nr_entries = stack_trace_save(fstack->calls, size, skip);
2171 }
2172
2173 #ifdef CONFIG_DYNAMIC_FTRACE
2174 /* Mark entry of stack trace as trampoline code */
2175 if (tr->ops && tr->ops->trampoline) {
2176 unsigned long tramp_start = tr->ops->trampoline;
2177 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2178 unsigned long *calls = fstack->calls;
2179
2180 for (int i = 0; i < nr_entries; i++) {
2181 if (calls[i] >= tramp_start && calls[i] < tramp_end)
2182 calls[i] = FTRACE_TRAMPOLINE_MARKER;
2183 }
2184 }
2185 #endif
2186
2187 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2188 struct_size(entry, caller, nr_entries),
2189 trace_ctx);
2190 if (!event)
2191 goto out;
2192 entry = ring_buffer_event_data(event);
2193
2194 entry->size = nr_entries;
2195 memcpy(&entry->caller, fstack->calls,
2196 flex_array_size(entry, caller, nr_entries));
2197
2198 __buffer_unlock_commit(buffer, event);
2199
2200 out:
2201 /* Again, don't let gcc optimize things here */
2202 barrier();
2203 __this_cpu_dec(ftrace_stack_reserve);
2204 trace_clear_recursion(bit);
2205 }
2206
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)2207 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2208 int skip)
2209 {
2210 struct trace_buffer *buffer = tr->array_buffer.buffer;
2211
2212 if (rcu_is_watching()) {
2213 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2214 return;
2215 }
2216
2217 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2218 return;
2219
2220 /*
2221 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
2222 * but if the above rcu_is_watching() failed, then the NMI
2223 * triggered someplace critical, and ct_irq_enter() should
2224 * not be called from NMI.
2225 */
2226 if (unlikely(in_nmi()))
2227 return;
2228
2229 ct_irq_enter_irqson();
2230 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
2231 ct_irq_exit_irqson();
2232 }
2233
2234 /**
2235 * trace_dump_stack - record a stack back trace in the trace buffer
2236 * @skip: Number of functions to skip (helper handlers)
2237 */
trace_dump_stack(int skip)2238 void trace_dump_stack(int skip)
2239 {
2240 if (tracing_disabled || tracing_selftest_running)
2241 return;
2242
2243 #ifndef CONFIG_UNWINDER_ORC
2244 /* Skip 1 to skip this function. */
2245 skip++;
2246 #endif
2247 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
2248 tracing_gen_ctx(), skip, NULL);
2249 }
2250 EXPORT_SYMBOL_GPL(trace_dump_stack);
2251
2252 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2253 static DEFINE_PER_CPU(int, user_stack_count);
2254
2255 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2256 ftrace_trace_userstack(struct trace_array *tr,
2257 struct trace_buffer *buffer, unsigned int trace_ctx)
2258 {
2259 struct ring_buffer_event *event;
2260 struct userstack_entry *entry;
2261
2262 if (!(tr->trace_flags & TRACE_ITER(USERSTACKTRACE)))
2263 return;
2264
2265 /*
2266 * NMIs can not handle page faults, even with fix ups.
2267 * The save user stack can (and often does) fault.
2268 */
2269 if (unlikely(in_nmi()))
2270 return;
2271
2272 /*
2273 * prevent recursion, since the user stack tracing may
2274 * trigger other kernel events.
2275 */
2276 guard(preempt)();
2277 if (__this_cpu_read(user_stack_count))
2278 return;
2279
2280 __this_cpu_inc(user_stack_count);
2281
2282 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2283 sizeof(*entry), trace_ctx);
2284 if (!event)
2285 goto out_drop_count;
2286 entry = ring_buffer_event_data(event);
2287
2288 entry->tgid = current->tgid;
2289 memset(&entry->caller, 0, sizeof(entry->caller));
2290
2291 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2292 __buffer_unlock_commit(buffer, event);
2293
2294 out_drop_count:
2295 __this_cpu_dec(user_stack_count);
2296 }
2297 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)2298 static void ftrace_trace_userstack(struct trace_array *tr,
2299 struct trace_buffer *buffer,
2300 unsigned int trace_ctx)
2301 {
2302 }
2303 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2304
2305 #endif /* CONFIG_STACKTRACE */
2306
2307 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)2308 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
2309 unsigned long long delta)
2310 {
2311 entry->bottom_delta_ts = delta & U32_MAX;
2312 entry->top_delta_ts = (delta >> 32);
2313 }
2314
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)2315 void trace_last_func_repeats(struct trace_array *tr,
2316 struct trace_func_repeats *last_info,
2317 unsigned int trace_ctx)
2318 {
2319 struct trace_buffer *buffer = tr->array_buffer.buffer;
2320 struct func_repeats_entry *entry;
2321 struct ring_buffer_event *event;
2322 u64 delta;
2323
2324 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
2325 sizeof(*entry), trace_ctx);
2326 if (!event)
2327 return;
2328
2329 delta = ring_buffer_event_time_stamp(buffer, event) -
2330 last_info->ts_last_call;
2331
2332 entry = ring_buffer_event_data(event);
2333 entry->ip = last_info->ip;
2334 entry->parent_ip = last_info->parent_ip;
2335 entry->count = last_info->count;
2336 func_repeats_set_delta_ts(entry, delta);
2337
2338 __buffer_unlock_commit(buffer, event);
2339 }
2340
trace_iterator_increment(struct trace_iterator * iter)2341 static void trace_iterator_increment(struct trace_iterator *iter)
2342 {
2343 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2344
2345 iter->idx++;
2346 if (buf_iter)
2347 ring_buffer_iter_advance(buf_iter);
2348 }
2349
2350 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)2351 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2352 unsigned long *lost_events)
2353 {
2354 struct ring_buffer_event *event;
2355 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2356
2357 if (buf_iter) {
2358 event = ring_buffer_iter_peek(buf_iter, ts);
2359 if (lost_events)
2360 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
2361 (unsigned long)-1 : 0;
2362 } else {
2363 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
2364 lost_events);
2365 }
2366
2367 if (event) {
2368 iter->ent_size = ring_buffer_event_length(event);
2369 return ring_buffer_event_data(event);
2370 }
2371 iter->ent_size = 0;
2372 return NULL;
2373 }
2374
2375 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)2376 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2377 unsigned long *missing_events, u64 *ent_ts)
2378 {
2379 struct trace_buffer *buffer = iter->array_buffer->buffer;
2380 struct trace_entry *ent, *next = NULL;
2381 unsigned long lost_events = 0, next_lost = 0;
2382 int cpu_file = iter->cpu_file;
2383 u64 next_ts = 0, ts;
2384 int next_cpu = -1;
2385 int next_size = 0;
2386 int cpu;
2387
2388 /*
2389 * If we are in a per_cpu trace file, don't bother by iterating over
2390 * all cpu and peek directly.
2391 */
2392 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2393 if (ring_buffer_empty_cpu(buffer, cpu_file))
2394 return NULL;
2395 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2396 if (ent_cpu)
2397 *ent_cpu = cpu_file;
2398
2399 return ent;
2400 }
2401
2402 for_each_tracing_cpu(cpu) {
2403
2404 if (ring_buffer_empty_cpu(buffer, cpu))
2405 continue;
2406
2407 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2408
2409 /*
2410 * Pick the entry with the smallest timestamp:
2411 */
2412 if (ent && (!next || ts < next_ts)) {
2413 next = ent;
2414 next_cpu = cpu;
2415 next_ts = ts;
2416 next_lost = lost_events;
2417 next_size = iter->ent_size;
2418 }
2419 }
2420
2421 iter->ent_size = next_size;
2422
2423 if (ent_cpu)
2424 *ent_cpu = next_cpu;
2425
2426 if (ent_ts)
2427 *ent_ts = next_ts;
2428
2429 if (missing_events)
2430 *missing_events = next_lost;
2431
2432 return next;
2433 }
2434
2435 #define STATIC_FMT_BUF_SIZE 128
2436 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
2437
trace_iter_expand_format(struct trace_iterator * iter)2438 char *trace_iter_expand_format(struct trace_iterator *iter)
2439 {
2440 char *tmp;
2441
2442 /*
2443 * iter->tr is NULL when used with tp_printk, which makes
2444 * this get called where it is not safe to call krealloc().
2445 */
2446 if (!iter->tr || iter->fmt == static_fmt_buf)
2447 return NULL;
2448
2449 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
2450 GFP_KERNEL);
2451 if (tmp) {
2452 iter->fmt_size += STATIC_FMT_BUF_SIZE;
2453 iter->fmt = tmp;
2454 }
2455
2456 return tmp;
2457 }
2458
2459 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)2460 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
2461 {
2462 unsigned long addr = (unsigned long)str;
2463 struct trace_event *trace_event;
2464 struct trace_event_call *event;
2465
2466 /* OK if part of the event data */
2467 if ((addr >= (unsigned long)iter->ent) &&
2468 (addr < (unsigned long)iter->ent + iter->ent_size))
2469 return true;
2470
2471 /* OK if part of the temp seq buffer */
2472 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
2473 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
2474 return true;
2475
2476 /* Core rodata can not be freed */
2477 if (is_kernel_rodata(addr))
2478 return true;
2479
2480 if (trace_is_tracepoint_string(str))
2481 return true;
2482
2483 /*
2484 * Now this could be a module event, referencing core module
2485 * data, which is OK.
2486 */
2487 if (!iter->ent)
2488 return false;
2489
2490 trace_event = ftrace_find_event(iter->ent->type);
2491 if (!trace_event)
2492 return false;
2493
2494 event = container_of(trace_event, struct trace_event_call, event);
2495 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
2496 return false;
2497
2498 /* Would rather have rodata, but this will suffice */
2499 if (within_module_core(addr, event->module))
2500 return true;
2501
2502 return false;
2503 }
2504
2505 /**
2506 * ignore_event - Check dereferenced fields while writing to the seq buffer
2507 * @iter: The iterator that holds the seq buffer and the event being printed
2508 *
2509 * At boot up, test_event_printk() will flag any event that dereferences
2510 * a string with "%s" that does exist in the ring buffer. It may still
2511 * be valid, as the string may point to a static string in the kernel
2512 * rodata that never gets freed. But if the string pointer is pointing
2513 * to something that was allocated, there's a chance that it can be freed
2514 * by the time the user reads the trace. This would cause a bad memory
2515 * access by the kernel and possibly crash the system.
2516 *
2517 * This function will check if the event has any fields flagged as needing
2518 * to be checked at runtime and perform those checks.
2519 *
2520 * If it is found that a field is unsafe, it will write into the @iter->seq
2521 * a message stating what was found to be unsafe.
2522 *
2523 * @return: true if the event is unsafe and should be ignored,
2524 * false otherwise.
2525 */
ignore_event(struct trace_iterator * iter)2526 bool ignore_event(struct trace_iterator *iter)
2527 {
2528 struct ftrace_event_field *field;
2529 struct trace_event *trace_event;
2530 struct trace_event_call *event;
2531 struct list_head *head;
2532 struct trace_seq *seq;
2533 const void *ptr;
2534
2535 trace_event = ftrace_find_event(iter->ent->type);
2536
2537 seq = &iter->seq;
2538
2539 if (!trace_event) {
2540 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
2541 return true;
2542 }
2543
2544 event = container_of(trace_event, struct trace_event_call, event);
2545 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
2546 return false;
2547
2548 head = trace_get_fields(event);
2549 if (!head) {
2550 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
2551 trace_event_name(event));
2552 return true;
2553 }
2554
2555 /* Offsets are from the iter->ent that points to the raw event */
2556 ptr = iter->ent;
2557
2558 list_for_each_entry(field, head, link) {
2559 const char *str;
2560 bool good;
2561
2562 if (!field->needs_test)
2563 continue;
2564
2565 str = *(const char **)(ptr + field->offset);
2566
2567 good = trace_safe_str(iter, str);
2568
2569 /*
2570 * If you hit this warning, it is likely that the
2571 * trace event in question used %s on a string that
2572 * was saved at the time of the event, but may not be
2573 * around when the trace is read. Use __string(),
2574 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
2575 * instead. See samples/trace_events/trace-events-sample.h
2576 * for reference.
2577 */
2578 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
2579 trace_event_name(event), field->name)) {
2580 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
2581 trace_event_name(event), field->name);
2582 return true;
2583 }
2584 }
2585 return false;
2586 }
2587
trace_event_format(struct trace_iterator * iter,const char * fmt)2588 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
2589 {
2590 const char *p, *new_fmt;
2591 char *q;
2592
2593 if (WARN_ON_ONCE(!fmt))
2594 return fmt;
2595
2596 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER(HASH_PTR))
2597 return fmt;
2598
2599 p = fmt;
2600 new_fmt = q = iter->fmt;
2601 while (*p) {
2602 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
2603 if (!trace_iter_expand_format(iter))
2604 return fmt;
2605
2606 q += iter->fmt - new_fmt;
2607 new_fmt = iter->fmt;
2608 }
2609
2610 *q++ = *p++;
2611
2612 /* Replace %p with %px */
2613 if (p[-1] == '%') {
2614 if (p[0] == '%') {
2615 *q++ = *p++;
2616 } else if (p[0] == 'p' && !isalnum(p[1])) {
2617 *q++ = *p++;
2618 *q++ = 'x';
2619 }
2620 }
2621 }
2622 *q = '\0';
2623
2624 return new_fmt;
2625 }
2626
2627 #define STATIC_TEMP_BUF_SIZE 128
2628 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
2629
2630 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)2631 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2632 int *ent_cpu, u64 *ent_ts)
2633 {
2634 /* __find_next_entry will reset ent_size */
2635 int ent_size = iter->ent_size;
2636 struct trace_entry *entry;
2637
2638 /*
2639 * If called from ftrace_dump(), then the iter->temp buffer
2640 * will be the static_temp_buf and not created from kmalloc.
2641 * If the entry size is greater than the buffer, we can
2642 * not save it. Just return NULL in that case. This is only
2643 * used to add markers when two consecutive events' time
2644 * stamps have a large delta. See trace_print_lat_context()
2645 */
2646 if (iter->temp == static_temp_buf &&
2647 STATIC_TEMP_BUF_SIZE < ent_size)
2648 return NULL;
2649
2650 /*
2651 * The __find_next_entry() may call peek_next_entry(), which may
2652 * call ring_buffer_peek() that may make the contents of iter->ent
2653 * undefined. Need to copy iter->ent now.
2654 */
2655 if (iter->ent && iter->ent != iter->temp) {
2656 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
2657 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
2658 void *temp;
2659 temp = kmalloc(iter->ent_size, GFP_KERNEL);
2660 if (!temp)
2661 return NULL;
2662 kfree(iter->temp);
2663 iter->temp = temp;
2664 iter->temp_size = iter->ent_size;
2665 }
2666 memcpy(iter->temp, iter->ent, iter->ent_size);
2667 iter->ent = iter->temp;
2668 }
2669 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2670 /* Put back the original ent_size */
2671 iter->ent_size = ent_size;
2672
2673 return entry;
2674 }
2675
2676 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)2677 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2678 {
2679 iter->ent = __find_next_entry(iter, &iter->cpu,
2680 &iter->lost_events, &iter->ts);
2681
2682 if (iter->ent)
2683 trace_iterator_increment(iter);
2684
2685 return iter->ent ? iter : NULL;
2686 }
2687
trace_consume(struct trace_iterator * iter)2688 static void trace_consume(struct trace_iterator *iter)
2689 {
2690 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
2691 &iter->lost_events);
2692 }
2693
s_next(struct seq_file * m,void * v,loff_t * pos)2694 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2695 {
2696 struct trace_iterator *iter = m->private;
2697 int i = (int)*pos;
2698 void *ent;
2699
2700 WARN_ON_ONCE(iter->leftover);
2701
2702 (*pos)++;
2703
2704 /* can't go backwards */
2705 if (iter->idx > i)
2706 return NULL;
2707
2708 if (iter->idx < 0)
2709 ent = trace_find_next_entry_inc(iter);
2710 else
2711 ent = iter;
2712
2713 while (ent && iter->idx < i)
2714 ent = trace_find_next_entry_inc(iter);
2715
2716 iter->pos = *pos;
2717
2718 return ent;
2719 }
2720
tracing_iter_reset(struct trace_iterator * iter,int cpu)2721 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2722 {
2723 struct ring_buffer_iter *buf_iter;
2724 unsigned long entries = 0;
2725 u64 ts;
2726
2727 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
2728
2729 buf_iter = trace_buffer_iter(iter, cpu);
2730 if (!buf_iter)
2731 return;
2732
2733 ring_buffer_iter_reset(buf_iter);
2734
2735 /*
2736 * We could have the case with the max latency tracers
2737 * that a reset never took place on a cpu. This is evident
2738 * by the timestamp being before the start of the buffer.
2739 */
2740 while (ring_buffer_iter_peek(buf_iter, &ts)) {
2741 if (ts >= iter->array_buffer->time_start)
2742 break;
2743 entries++;
2744 ring_buffer_iter_advance(buf_iter);
2745 /* This could be a big loop */
2746 cond_resched();
2747 }
2748
2749 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
2750 }
2751
2752 /*
2753 * The current tracer is copied to avoid a global locking
2754 * all around.
2755 */
s_start(struct seq_file * m,loff_t * pos)2756 static void *s_start(struct seq_file *m, loff_t *pos)
2757 {
2758 struct trace_iterator *iter = m->private;
2759 struct trace_array *tr = iter->tr;
2760 int cpu_file = iter->cpu_file;
2761 void *p = NULL;
2762 loff_t l = 0;
2763 int cpu;
2764
2765 mutex_lock(&trace_types_lock);
2766 if (unlikely(tr->current_trace != iter->trace)) {
2767 /* Close iter->trace before switching to the new current tracer */
2768 if (iter->trace->close)
2769 iter->trace->close(iter);
2770 iter->trace = tr->current_trace;
2771 /* Reopen the new current tracer */
2772 if (iter->trace->open)
2773 iter->trace->open(iter);
2774 }
2775 mutex_unlock(&trace_types_lock);
2776
2777 if (iter->snapshot && tracer_uses_snapshot(iter->trace))
2778 return ERR_PTR(-EBUSY);
2779
2780 if (*pos != iter->pos) {
2781 iter->ent = NULL;
2782 iter->cpu = 0;
2783 iter->idx = -1;
2784
2785 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2786 for_each_tracing_cpu(cpu)
2787 tracing_iter_reset(iter, cpu);
2788 } else
2789 tracing_iter_reset(iter, cpu_file);
2790
2791 iter->leftover = 0;
2792 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2793 ;
2794
2795 } else {
2796 /*
2797 * If we overflowed the seq_file before, then we want
2798 * to just reuse the trace_seq buffer again.
2799 */
2800 if (iter->leftover)
2801 p = iter;
2802 else {
2803 l = *pos - 1;
2804 p = s_next(m, p, &l);
2805 }
2806 }
2807
2808 trace_event_read_lock();
2809 trace_access_lock(cpu_file);
2810 return p;
2811 }
2812
s_stop(struct seq_file * m,void * p)2813 static void s_stop(struct seq_file *m, void *p)
2814 {
2815 struct trace_iterator *iter = m->private;
2816
2817 if (iter->snapshot && tracer_uses_snapshot(iter->trace))
2818 return;
2819
2820 trace_access_unlock(iter->cpu_file);
2821 trace_event_read_unlock();
2822 }
2823
2824 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)2825 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
2826 unsigned long *entries, int cpu)
2827 {
2828 unsigned long count;
2829
2830 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2831 /*
2832 * If this buffer has skipped entries, then we hold all
2833 * entries for the trace and we need to ignore the
2834 * ones before the time stamp.
2835 */
2836 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2837 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2838 /* total is the same as the entries */
2839 *total = count;
2840 } else
2841 *total = count +
2842 ring_buffer_overrun_cpu(buf->buffer, cpu);
2843 *entries = count;
2844 }
2845
2846 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)2847 get_total_entries(struct array_buffer *buf,
2848 unsigned long *total, unsigned long *entries)
2849 {
2850 unsigned long t, e;
2851 int cpu;
2852
2853 *total = 0;
2854 *entries = 0;
2855
2856 for_each_tracing_cpu(cpu) {
2857 get_total_entries_cpu(buf, &t, &e, cpu);
2858 *total += t;
2859 *entries += e;
2860 }
2861 }
2862
trace_total_entries_cpu(struct trace_array * tr,int cpu)2863 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
2864 {
2865 unsigned long total, entries;
2866
2867 if (!tr)
2868 tr = &global_trace;
2869
2870 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
2871
2872 return entries;
2873 }
2874
trace_total_entries(struct trace_array * tr)2875 unsigned long trace_total_entries(struct trace_array *tr)
2876 {
2877 unsigned long total, entries;
2878
2879 if (!tr)
2880 tr = &global_trace;
2881
2882 get_total_entries(&tr->array_buffer, &total, &entries);
2883
2884 return entries;
2885 }
2886
print_lat_help_header(struct seq_file * m)2887 static void print_lat_help_header(struct seq_file *m)
2888 {
2889 seq_puts(m, "# _------=> CPU# \n"
2890 "# / _-----=> irqs-off/BH-disabled\n"
2891 "# | / _----=> need-resched \n"
2892 "# || / _---=> hardirq/softirq \n"
2893 "# ||| / _--=> preempt-depth \n"
2894 "# |||| / _-=> migrate-disable \n"
2895 "# ||||| / delay \n"
2896 "# cmd pid |||||| time | caller \n"
2897 "# \\ / |||||| \\ | / \n");
2898 }
2899
print_event_info(struct array_buffer * buf,struct seq_file * m)2900 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
2901 {
2902 unsigned long total;
2903 unsigned long entries;
2904
2905 get_total_entries(buf, &total, &entries);
2906 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
2907 entries, total, num_online_cpus());
2908 seq_puts(m, "#\n");
2909 }
2910
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)2911 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
2912 unsigned int flags)
2913 {
2914 bool tgid = flags & TRACE_ITER(RECORD_TGID);
2915
2916 print_event_info(buf, m);
2917
2918 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
2919 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
2920 }
2921
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)2922 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
2923 unsigned int flags)
2924 {
2925 bool tgid = flags & TRACE_ITER(RECORD_TGID);
2926 static const char space[] = " ";
2927 int prec = tgid ? 12 : 2;
2928
2929 print_event_info(buf, m);
2930
2931 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
2932 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
2933 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
2934 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
2935 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
2936 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
2937 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
2938 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
2939 }
2940
2941 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)2942 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2943 {
2944 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2945 struct array_buffer *buf = iter->array_buffer;
2946 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2947 struct tracer *type = iter->trace;
2948 unsigned long entries;
2949 unsigned long total;
2950 const char *name = type->name;
2951
2952 get_total_entries(buf, &total, &entries);
2953
2954 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2955 name, init_utsname()->release);
2956 seq_puts(m, "# -----------------------------------"
2957 "---------------------------------\n");
2958 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2959 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2960 nsecs_to_usecs(data->saved_latency),
2961 entries,
2962 total,
2963 buf->cpu,
2964 preempt_model_str(),
2965 /* These are reserved for later use */
2966 0, 0, 0, 0);
2967 #ifdef CONFIG_SMP
2968 seq_printf(m, " #P:%d)\n", num_online_cpus());
2969 #else
2970 seq_puts(m, ")\n");
2971 #endif
2972 seq_puts(m, "# -----------------\n");
2973 seq_printf(m, "# | task: %.16s-%d "
2974 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2975 data->comm, data->pid,
2976 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2977 data->policy, data->rt_priority);
2978 seq_puts(m, "# -----------------\n");
2979
2980 if (data->critical_start) {
2981 seq_puts(m, "# => started at: ");
2982 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2983 trace_print_seq(m, &iter->seq);
2984 seq_puts(m, "\n# => ended at: ");
2985 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2986 trace_print_seq(m, &iter->seq);
2987 seq_puts(m, "\n#\n");
2988 }
2989
2990 seq_puts(m, "#\n");
2991 }
2992
test_cpu_buff_start(struct trace_iterator * iter)2993 static void test_cpu_buff_start(struct trace_iterator *iter)
2994 {
2995 struct trace_seq *s = &iter->seq;
2996 struct trace_array *tr = iter->tr;
2997
2998 if (!(tr->trace_flags & TRACE_ITER(ANNOTATE)))
2999 return;
3000
3001 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3002 return;
3003
3004 if (cpumask_available(iter->started) &&
3005 cpumask_test_cpu(iter->cpu, iter->started))
3006 return;
3007
3008 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
3009 return;
3010
3011 if (cpumask_available(iter->started))
3012 cpumask_set_cpu(iter->cpu, iter->started);
3013
3014 /* Don't print started cpu buffer for the first entry of the trace */
3015 if (iter->idx > 1)
3016 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3017 iter->cpu);
3018 }
3019
3020 #ifdef CONFIG_FTRACE_SYSCALLS
is_syscall_event(struct trace_event * event)3021 static bool is_syscall_event(struct trace_event *event)
3022 {
3023 return (event->funcs == &enter_syscall_print_funcs) ||
3024 (event->funcs == &exit_syscall_print_funcs);
3025
3026 }
3027 #define syscall_buf_size CONFIG_TRACE_SYSCALL_BUF_SIZE_DEFAULT
3028 #else
is_syscall_event(struct trace_event * event)3029 static inline bool is_syscall_event(struct trace_event *event)
3030 {
3031 return false;
3032 }
3033 #define syscall_buf_size 0
3034 #endif /* CONFIG_FTRACE_SYSCALLS */
3035
print_trace_fmt(struct trace_iterator * iter)3036 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3037 {
3038 struct trace_array *tr = iter->tr;
3039 struct trace_seq *s = &iter->seq;
3040 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3041 struct trace_entry *entry;
3042 struct trace_event *event;
3043
3044 entry = iter->ent;
3045
3046 test_cpu_buff_start(iter);
3047
3048 event = ftrace_find_event(entry->type);
3049
3050 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3051 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3052 trace_print_lat_context(iter);
3053 else
3054 trace_print_context(iter);
3055 }
3056
3057 if (trace_seq_has_overflowed(s))
3058 return TRACE_TYPE_PARTIAL_LINE;
3059
3060 if (event) {
3061 if (tr->trace_flags & TRACE_ITER(FIELDS))
3062 return print_event_fields(iter, event);
3063 /*
3064 * For TRACE_EVENT() events, the print_fmt is not
3065 * safe to use if the array has delta offsets
3066 * Force printing via the fields.
3067 */
3068 if ((tr->text_delta)) {
3069 /* ftrace and system call events are still OK */
3070 if ((event->type > __TRACE_LAST_TYPE) &&
3071 !is_syscall_event(event))
3072 return print_event_fields(iter, event);
3073 }
3074 return event->funcs->trace(iter, sym_flags, event);
3075 }
3076
3077 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3078
3079 return trace_handle_return(s);
3080 }
3081
print_raw_fmt(struct trace_iterator * iter)3082 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3083 {
3084 struct trace_array *tr = iter->tr;
3085 struct trace_seq *s = &iter->seq;
3086 struct trace_entry *entry;
3087 struct trace_event *event;
3088
3089 entry = iter->ent;
3090
3091 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO))
3092 trace_seq_printf(s, "%d %d %llu ",
3093 entry->pid, iter->cpu, iter->ts);
3094
3095 if (trace_seq_has_overflowed(s))
3096 return TRACE_TYPE_PARTIAL_LINE;
3097
3098 event = ftrace_find_event(entry->type);
3099 if (event)
3100 return event->funcs->raw(iter, 0, event);
3101
3102 trace_seq_printf(s, "%d ?\n", entry->type);
3103
3104 return trace_handle_return(s);
3105 }
3106
print_hex_fmt(struct trace_iterator * iter)3107 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3108 {
3109 struct trace_array *tr = iter->tr;
3110 struct trace_seq *s = &iter->seq;
3111 unsigned char newline = '\n';
3112 struct trace_entry *entry;
3113 struct trace_event *event;
3114
3115 entry = iter->ent;
3116
3117 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3118 SEQ_PUT_HEX_FIELD(s, entry->pid);
3119 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3120 SEQ_PUT_HEX_FIELD(s, iter->ts);
3121 if (trace_seq_has_overflowed(s))
3122 return TRACE_TYPE_PARTIAL_LINE;
3123 }
3124
3125 event = ftrace_find_event(entry->type);
3126 if (event) {
3127 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3128 if (ret != TRACE_TYPE_HANDLED)
3129 return ret;
3130 }
3131
3132 SEQ_PUT_FIELD(s, newline);
3133
3134 return trace_handle_return(s);
3135 }
3136
print_bin_fmt(struct trace_iterator * iter)3137 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3138 {
3139 struct trace_array *tr = iter->tr;
3140 struct trace_seq *s = &iter->seq;
3141 struct trace_entry *entry;
3142 struct trace_event *event;
3143
3144 entry = iter->ent;
3145
3146 if (tr->trace_flags & TRACE_ITER(CONTEXT_INFO)) {
3147 SEQ_PUT_FIELD(s, entry->pid);
3148 SEQ_PUT_FIELD(s, iter->cpu);
3149 SEQ_PUT_FIELD(s, iter->ts);
3150 if (trace_seq_has_overflowed(s))
3151 return TRACE_TYPE_PARTIAL_LINE;
3152 }
3153
3154 event = ftrace_find_event(entry->type);
3155 return event ? event->funcs->binary(iter, 0, event) :
3156 TRACE_TYPE_HANDLED;
3157 }
3158
trace_empty(struct trace_iterator * iter)3159 int trace_empty(struct trace_iterator *iter)
3160 {
3161 struct ring_buffer_iter *buf_iter;
3162 int cpu;
3163
3164 /* If we are looking at one CPU buffer, only check that one */
3165 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3166 cpu = iter->cpu_file;
3167 buf_iter = trace_buffer_iter(iter, cpu);
3168 if (buf_iter) {
3169 if (!ring_buffer_iter_empty(buf_iter))
3170 return 0;
3171 } else {
3172 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3173 return 0;
3174 }
3175 return 1;
3176 }
3177
3178 for_each_tracing_cpu(cpu) {
3179 buf_iter = trace_buffer_iter(iter, cpu);
3180 if (buf_iter) {
3181 if (!ring_buffer_iter_empty(buf_iter))
3182 return 0;
3183 } else {
3184 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
3185 return 0;
3186 }
3187 }
3188
3189 return 1;
3190 }
3191
3192 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)3193 enum print_line_t print_trace_line(struct trace_iterator *iter)
3194 {
3195 struct trace_array *tr = iter->tr;
3196 unsigned long trace_flags = tr->trace_flags;
3197 enum print_line_t ret;
3198
3199 if (iter->lost_events) {
3200 if (iter->lost_events == (unsigned long)-1)
3201 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
3202 iter->cpu);
3203 else
3204 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3205 iter->cpu, iter->lost_events);
3206 if (trace_seq_has_overflowed(&iter->seq))
3207 return TRACE_TYPE_PARTIAL_LINE;
3208 }
3209
3210 if (iter->trace && iter->trace->print_line) {
3211 ret = iter->trace->print_line(iter);
3212 if (ret != TRACE_TYPE_UNHANDLED)
3213 return ret;
3214 }
3215
3216 if (iter->ent->type == TRACE_BPUTS &&
3217 trace_flags & TRACE_ITER(PRINTK) &&
3218 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3219 return trace_print_bputs_msg_only(iter);
3220
3221 if (iter->ent->type == TRACE_BPRINT &&
3222 trace_flags & TRACE_ITER(PRINTK) &&
3223 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3224 return trace_print_bprintk_msg_only(iter);
3225
3226 if (iter->ent->type == TRACE_PRINT &&
3227 trace_flags & TRACE_ITER(PRINTK) &&
3228 trace_flags & TRACE_ITER(PRINTK_MSGONLY))
3229 return trace_print_printk_msg_only(iter);
3230
3231 if (trace_flags & TRACE_ITER(BIN))
3232 return print_bin_fmt(iter);
3233
3234 if (trace_flags & TRACE_ITER(HEX))
3235 return print_hex_fmt(iter);
3236
3237 if (trace_flags & TRACE_ITER(RAW))
3238 return print_raw_fmt(iter);
3239
3240 return print_trace_fmt(iter);
3241 }
3242
trace_latency_header(struct seq_file * m)3243 void trace_latency_header(struct seq_file *m)
3244 {
3245 struct trace_iterator *iter = m->private;
3246 struct trace_array *tr = iter->tr;
3247
3248 /* print nothing if the buffers are empty */
3249 if (trace_empty(iter))
3250 return;
3251
3252 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3253 print_trace_header(m, iter);
3254
3255 if (!(tr->trace_flags & TRACE_ITER(VERBOSE)))
3256 print_lat_help_header(m);
3257 }
3258
trace_default_header(struct seq_file * m)3259 void trace_default_header(struct seq_file *m)
3260 {
3261 struct trace_iterator *iter = m->private;
3262 struct trace_array *tr = iter->tr;
3263 unsigned long trace_flags = tr->trace_flags;
3264
3265 if (!(trace_flags & TRACE_ITER(CONTEXT_INFO)))
3266 return;
3267
3268 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3269 /* print nothing if the buffers are empty */
3270 if (trace_empty(iter))
3271 return;
3272 print_trace_header(m, iter);
3273 if (!(trace_flags & TRACE_ITER(VERBOSE)))
3274 print_lat_help_header(m);
3275 } else {
3276 if (!(trace_flags & TRACE_ITER(VERBOSE))) {
3277 if (trace_flags & TRACE_ITER(IRQ_INFO))
3278 print_func_help_header_irq(iter->array_buffer,
3279 m, trace_flags);
3280 else
3281 print_func_help_header(iter->array_buffer, m,
3282 trace_flags);
3283 }
3284 }
3285 }
3286
test_ftrace_alive(struct seq_file * m)3287 static void test_ftrace_alive(struct seq_file *m)
3288 {
3289 if (!ftrace_is_dead())
3290 return;
3291 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3292 "# MAY BE MISSING FUNCTION EVENTS\n");
3293 }
3294
s_show(struct seq_file * m,void * v)3295 static int s_show(struct seq_file *m, void *v)
3296 {
3297 struct trace_iterator *iter = v;
3298 int ret;
3299
3300 if (iter->ent == NULL) {
3301 if (iter->tr) {
3302 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3303 seq_puts(m, "#\n");
3304 test_ftrace_alive(m);
3305 }
3306 if (iter->snapshot && trace_empty(iter))
3307 print_snapshot_help(m, iter);
3308 else if (iter->trace && iter->trace->print_header)
3309 iter->trace->print_header(m);
3310 else
3311 trace_default_header(m);
3312
3313 } else if (iter->leftover) {
3314 /*
3315 * If we filled the seq_file buffer earlier, we
3316 * want to just show it now.
3317 */
3318 ret = trace_print_seq(m, &iter->seq);
3319
3320 /* ret should this time be zero, but you never know */
3321 iter->leftover = ret;
3322
3323 } else {
3324 ret = print_trace_line(iter);
3325 if (ret == TRACE_TYPE_PARTIAL_LINE) {
3326 iter->seq.full = 0;
3327 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
3328 }
3329 ret = trace_print_seq(m, &iter->seq);
3330 /*
3331 * If we overflow the seq_file buffer, then it will
3332 * ask us for this data again at start up.
3333 * Use that instead.
3334 * ret is 0 if seq_file write succeeded.
3335 * -1 otherwise.
3336 */
3337 iter->leftover = ret;
3338 }
3339
3340 return 0;
3341 }
3342
3343 static const struct seq_operations tracer_seq_ops = {
3344 .start = s_start,
3345 .next = s_next,
3346 .stop = s_stop,
3347 .show = s_show,
3348 };
3349
3350 /*
3351 * Note, as iter itself can be allocated and freed in different
3352 * ways, this function is only used to free its content, and not
3353 * the iterator itself. The only requirement to all the allocations
3354 * is that it must zero all fields (kzalloc), as freeing works with
3355 * ethier allocated content or NULL.
3356 */
free_trace_iter_content(struct trace_iterator * iter)3357 static void free_trace_iter_content(struct trace_iterator *iter)
3358 {
3359 /* The fmt is either NULL, allocated or points to static_fmt_buf */
3360 if (iter->fmt != static_fmt_buf)
3361 kfree(iter->fmt);
3362
3363 kfree(iter->temp);
3364 kfree(iter->buffer_iter);
3365 mutex_destroy(&iter->mutex);
3366 free_cpumask_var(iter->started);
3367 }
3368
3369 struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)3370 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3371 {
3372 struct trace_array *tr = inode->i_private;
3373 struct trace_iterator *iter;
3374 int cpu;
3375
3376 if (tracing_disabled)
3377 return ERR_PTR(-ENODEV);
3378
3379 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3380 if (!iter)
3381 return ERR_PTR(-ENOMEM);
3382
3383 iter->buffer_iter = kzalloc_objs(*iter->buffer_iter, nr_cpu_ids);
3384 if (!iter->buffer_iter)
3385 goto release;
3386
3387 /*
3388 * trace_find_next_entry() may need to save off iter->ent.
3389 * It will place it into the iter->temp buffer. As most
3390 * events are less than 128, allocate a buffer of that size.
3391 * If one is greater, then trace_find_next_entry() will
3392 * allocate a new buffer to adjust for the bigger iter->ent.
3393 * It's not critical if it fails to get allocated here.
3394 */
3395 iter->temp = kmalloc(128, GFP_KERNEL);
3396 if (iter->temp)
3397 iter->temp_size = 128;
3398
3399 /*
3400 * trace_event_printf() may need to modify given format
3401 * string to replace %p with %px so that it shows real address
3402 * instead of hash value. However, that is only for the event
3403 * tracing, other tracer may not need. Defer the allocation
3404 * until it is needed.
3405 */
3406 iter->fmt = NULL;
3407 iter->fmt_size = 0;
3408
3409 mutex_lock(&trace_types_lock);
3410 iter->trace = tr->current_trace;
3411
3412 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3413 goto fail;
3414
3415 iter->tr = tr;
3416
3417 #ifdef CONFIG_TRACER_SNAPSHOT
3418 /* Currently only the top directory has a snapshot */
3419 if (tr->current_trace->print_max || snapshot)
3420 iter->array_buffer = &tr->snapshot_buffer;
3421 else
3422 #endif
3423 iter->array_buffer = &tr->array_buffer;
3424 iter->snapshot = snapshot;
3425 iter->pos = -1;
3426 iter->cpu_file = tracing_get_cpu(inode);
3427 mutex_init(&iter->mutex);
3428
3429 /* Notify the tracer early; before we stop tracing. */
3430 if (iter->trace->open)
3431 iter->trace->open(iter);
3432
3433 /* Annotate start of buffers if we had overruns */
3434 if (ring_buffer_overruns(iter->array_buffer->buffer))
3435 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3436
3437 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3438 if (trace_clocks[tr->clock_id].in_ns)
3439 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3440
3441 /*
3442 * If pause-on-trace is enabled, then stop the trace while
3443 * dumping, unless this is the "snapshot" file
3444 */
3445 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER(PAUSE_ON_TRACE))) {
3446 iter->iter_flags |= TRACE_FILE_PAUSE;
3447 tracing_stop_tr(tr);
3448 }
3449
3450 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3451 for_each_tracing_cpu(cpu) {
3452 iter->buffer_iter[cpu] =
3453 ring_buffer_read_start(iter->array_buffer->buffer,
3454 cpu, GFP_KERNEL);
3455 tracing_iter_reset(iter, cpu);
3456 }
3457 } else {
3458 cpu = iter->cpu_file;
3459 iter->buffer_iter[cpu] =
3460 ring_buffer_read_start(iter->array_buffer->buffer,
3461 cpu, GFP_KERNEL);
3462 tracing_iter_reset(iter, cpu);
3463 }
3464
3465 mutex_unlock(&trace_types_lock);
3466
3467 return iter;
3468
3469 fail:
3470 mutex_unlock(&trace_types_lock);
3471 free_trace_iter_content(iter);
3472 release:
3473 seq_release_private(inode, file);
3474 return ERR_PTR(-ENOMEM);
3475 }
3476
tracing_open_generic(struct inode * inode,struct file * filp)3477 int tracing_open_generic(struct inode *inode, struct file *filp)
3478 {
3479 int ret;
3480
3481 ret = tracing_check_open_get_tr(NULL);
3482 if (ret)
3483 return ret;
3484
3485 filp->private_data = inode->i_private;
3486 return 0;
3487 }
3488
3489 /*
3490 * Open and update trace_array ref count.
3491 * Must have the current trace_array passed to it.
3492 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)3493 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3494 {
3495 struct trace_array *tr = inode->i_private;
3496 int ret;
3497
3498 ret = tracing_check_open_get_tr(tr);
3499 if (ret)
3500 return ret;
3501
3502 if ((filp->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
3503 trace_array_put(tr);
3504 return -EACCES;
3505 }
3506
3507 filp->private_data = inode->i_private;
3508
3509 return 0;
3510 }
3511
3512 /*
3513 * The private pointer of the inode is the trace_event_file.
3514 * Update the tr ref count associated to it.
3515 */
tracing_open_file_tr(struct inode * inode,struct file * filp)3516 int tracing_open_file_tr(struct inode *inode, struct file *filp)
3517 {
3518 struct trace_event_file *file = inode->i_private;
3519 int ret;
3520
3521 ret = tracing_check_open_get_tr(file->tr);
3522 if (ret)
3523 return ret;
3524
3525 guard(mutex)(&event_mutex);
3526
3527 /* Fail if the file is marked for removal */
3528 if (file->flags & EVENT_FILE_FL_FREED) {
3529 trace_array_put(file->tr);
3530 return -ENODEV;
3531 } else {
3532 event_file_get(file);
3533 }
3534
3535 return 0;
3536 }
3537
tracing_release_file_tr(struct inode * inode,struct file * filp)3538 int tracing_release_file_tr(struct inode *inode, struct file *filp)
3539 {
3540 struct trace_event_file *file = inode->i_private;
3541
3542 trace_array_put(file->tr);
3543 event_file_put(file);
3544
3545 return 0;
3546 }
3547
tracing_single_release_file_tr(struct inode * inode,struct file * filp)3548 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
3549 {
3550 tracing_release_file_tr(inode, filp);
3551 return single_release(inode, filp);
3552 }
3553
tracing_release(struct inode * inode,struct file * file)3554 int tracing_release(struct inode *inode, struct file *file)
3555 {
3556 struct trace_array *tr = inode->i_private;
3557 struct seq_file *m = file->private_data;
3558 struct trace_iterator *iter;
3559 int cpu;
3560
3561 if (!(file->f_mode & FMODE_READ)) {
3562 trace_array_put(tr);
3563 return 0;
3564 }
3565
3566 /* Writes do not use seq_file */
3567 iter = m->private;
3568 mutex_lock(&trace_types_lock);
3569
3570 for_each_tracing_cpu(cpu) {
3571 if (iter->buffer_iter[cpu])
3572 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3573 }
3574
3575 if (iter->trace && iter->trace->close)
3576 iter->trace->close(iter);
3577
3578 if (iter->iter_flags & TRACE_FILE_PAUSE)
3579 /* reenable tracing if it was previously enabled */
3580 tracing_start_tr(tr);
3581
3582 __trace_array_put(tr);
3583
3584 mutex_unlock(&trace_types_lock);
3585
3586 free_trace_iter_content(iter);
3587 seq_release_private(inode, file);
3588
3589 return 0;
3590 }
3591
tracing_release_generic_tr(struct inode * inode,struct file * file)3592 int tracing_release_generic_tr(struct inode *inode, struct file *file)
3593 {
3594 struct trace_array *tr = inode->i_private;
3595
3596 trace_array_put(tr);
3597 return 0;
3598 }
3599
tracing_single_release_tr(struct inode * inode,struct file * file)3600 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3601 {
3602 struct trace_array *tr = inode->i_private;
3603
3604 trace_array_put(tr);
3605
3606 return single_release(inode, file);
3607 }
3608
3609 static bool update_last_data_if_empty(struct trace_array *tr);
3610
tracing_open(struct inode * inode,struct file * file)3611 static int tracing_open(struct inode *inode, struct file *file)
3612 {
3613 struct trace_array *tr = inode->i_private;
3614 struct trace_iterator *iter;
3615 int ret;
3616
3617 ret = tracing_check_open_get_tr(tr);
3618 if (ret)
3619 return ret;
3620
3621 /* If this file was open for write, then erase contents */
3622 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3623 int cpu = tracing_get_cpu(inode);
3624 struct array_buffer *trace_buf = &tr->array_buffer;
3625
3626 #ifdef CONFIG_TRACER_MAX_TRACE
3627 if (tr->current_trace->print_max)
3628 trace_buf = &tr->snapshot_buffer;
3629 #endif
3630
3631 if (cpu == RING_BUFFER_ALL_CPUS)
3632 tracing_reset_online_cpus(trace_buf);
3633 else
3634 tracing_reset_cpu(trace_buf, cpu);
3635
3636 update_last_data_if_empty(tr);
3637 }
3638
3639 if (file->f_mode & FMODE_READ) {
3640 iter = __tracing_open(inode, file, false);
3641 if (IS_ERR(iter))
3642 ret = PTR_ERR(iter);
3643 else if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
3644 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3645 }
3646
3647 if (ret < 0)
3648 trace_array_put(tr);
3649
3650 return ret;
3651 }
3652
3653 /*
3654 * Some tracers are not suitable for instance buffers.
3655 * A tracer is always available for the global array (toplevel)
3656 * or if it explicitly states that it is.
3657 */
3658 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)3659 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3660 {
3661 /* arrays with mapped buffer range do not have snapshots */
3662 if (tr->range_addr_start && tracer_uses_snapshot(t))
3663 return false;
3664 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3665 }
3666
3667 /* Find the next tracer that this trace array may use */
3668 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)3669 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3670 {
3671 while (t && !trace_ok_for_array(t, tr))
3672 t = t->next;
3673
3674 return t;
3675 }
3676
3677 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)3678 t_next(struct seq_file *m, void *v, loff_t *pos)
3679 {
3680 struct trace_array *tr = m->private;
3681 struct tracer *t = v;
3682
3683 (*pos)++;
3684
3685 if (t)
3686 t = get_tracer_for_array(tr, t->next);
3687
3688 return t;
3689 }
3690
t_start(struct seq_file * m,loff_t * pos)3691 static void *t_start(struct seq_file *m, loff_t *pos)
3692 {
3693 struct trace_array *tr = m->private;
3694 struct tracer *t;
3695 loff_t l = 0;
3696
3697 mutex_lock(&trace_types_lock);
3698
3699 t = get_tracer_for_array(tr, trace_types);
3700 for (; t && l < *pos; t = t_next(m, t, &l))
3701 ;
3702
3703 return t;
3704 }
3705
t_stop(struct seq_file * m,void * p)3706 static void t_stop(struct seq_file *m, void *p)
3707 {
3708 mutex_unlock(&trace_types_lock);
3709 }
3710
t_show(struct seq_file * m,void * v)3711 static int t_show(struct seq_file *m, void *v)
3712 {
3713 struct tracer *t = v;
3714
3715 if (!t)
3716 return 0;
3717
3718 seq_puts(m, t->name);
3719 if (t->next)
3720 seq_putc(m, ' ');
3721 else
3722 seq_putc(m, '\n');
3723
3724 return 0;
3725 }
3726
3727 static const struct seq_operations show_traces_seq_ops = {
3728 .start = t_start,
3729 .next = t_next,
3730 .stop = t_stop,
3731 .show = t_show,
3732 };
3733
show_traces_open(struct inode * inode,struct file * file)3734 static int show_traces_open(struct inode *inode, struct file *file)
3735 {
3736 struct trace_array *tr = inode->i_private;
3737 struct seq_file *m;
3738 int ret;
3739
3740 ret = tracing_check_open_get_tr(tr);
3741 if (ret)
3742 return ret;
3743
3744 ret = seq_open(file, &show_traces_seq_ops);
3745 if (ret) {
3746 trace_array_put(tr);
3747 return ret;
3748 }
3749
3750 m = file->private_data;
3751 m->private = tr;
3752
3753 return 0;
3754 }
3755
tracing_seq_release(struct inode * inode,struct file * file)3756 static int tracing_seq_release(struct inode *inode, struct file *file)
3757 {
3758 struct trace_array *tr = inode->i_private;
3759
3760 trace_array_put(tr);
3761 return seq_release(inode, file);
3762 }
3763
3764 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)3765 tracing_write_stub(struct file *filp, const char __user *ubuf,
3766 size_t count, loff_t *ppos)
3767 {
3768 return count;
3769 }
3770
tracing_lseek(struct file * file,loff_t offset,int whence)3771 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3772 {
3773 int ret;
3774
3775 if (file->f_mode & FMODE_READ)
3776 ret = seq_lseek(file, offset, whence);
3777 else
3778 file->f_pos = ret = 0;
3779
3780 return ret;
3781 }
3782
3783 static const struct file_operations tracing_fops = {
3784 .open = tracing_open,
3785 .read = seq_read,
3786 .read_iter = seq_read_iter,
3787 .splice_read = copy_splice_read,
3788 .write = tracing_write_stub,
3789 .llseek = tracing_lseek,
3790 .release = tracing_release,
3791 };
3792
3793 static const struct file_operations show_traces_fops = {
3794 .open = show_traces_open,
3795 .read = seq_read,
3796 .llseek = seq_lseek,
3797 .release = tracing_seq_release,
3798 };
3799
3800 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)3801 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3802 size_t count, loff_t *ppos)
3803 {
3804 struct trace_array *tr = file_inode(filp)->i_private;
3805 char *mask_str __free(kfree) = NULL;
3806 int len;
3807
3808 len = snprintf(NULL, 0, "%*pb\n",
3809 cpumask_pr_args(tr->tracing_cpumask)) + 1;
3810 mask_str = kmalloc(len, GFP_KERNEL);
3811 if (!mask_str)
3812 return -ENOMEM;
3813
3814 len = snprintf(mask_str, len, "%*pb\n",
3815 cpumask_pr_args(tr->tracing_cpumask));
3816 if (len >= count)
3817 return -EINVAL;
3818
3819 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
3820 }
3821
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)3822 int tracing_set_cpumask(struct trace_array *tr,
3823 cpumask_var_t tracing_cpumask_new)
3824 {
3825 int cpu;
3826
3827 if (!tr)
3828 return -EINVAL;
3829
3830 local_irq_disable();
3831 arch_spin_lock(&tr->max_lock);
3832 for_each_tracing_cpu(cpu) {
3833 /*
3834 * Increase/decrease the disabled counter if we are
3835 * about to flip a bit in the cpumask:
3836 */
3837 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3838 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3839 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
3840 #ifdef CONFIG_TRACER_SNAPSHOT
3841 ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu);
3842 #endif
3843 }
3844 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3845 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3846 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
3847 #ifdef CONFIG_TRACER_SNAPSHOT
3848 ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu);
3849 #endif
3850 }
3851 }
3852 arch_spin_unlock(&tr->max_lock);
3853 local_irq_enable();
3854
3855 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3856
3857 return 0;
3858 }
3859
3860 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)3861 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3862 size_t count, loff_t *ppos)
3863 {
3864 struct trace_array *tr = file_inode(filp)->i_private;
3865 cpumask_var_t tracing_cpumask_new;
3866 int err;
3867
3868 if (count == 0 || count > KMALLOC_MAX_SIZE)
3869 return -EINVAL;
3870
3871 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3872 return -ENOMEM;
3873
3874 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3875 if (err)
3876 goto err_free;
3877
3878 err = tracing_set_cpumask(tr, tracing_cpumask_new);
3879 if (err)
3880 goto err_free;
3881
3882 free_cpumask_var(tracing_cpumask_new);
3883
3884 return count;
3885
3886 err_free:
3887 free_cpumask_var(tracing_cpumask_new);
3888
3889 return err;
3890 }
3891
3892 static const struct file_operations tracing_cpumask_fops = {
3893 .open = tracing_open_generic_tr,
3894 .read = tracing_cpumask_read,
3895 .write = tracing_cpumask_write,
3896 .release = tracing_release_generic_tr,
3897 .llseek = generic_file_llseek,
3898 };
3899
tracing_trace_options_show(struct seq_file * m,void * v)3900 static int tracing_trace_options_show(struct seq_file *m, void *v)
3901 {
3902 struct tracer_opt *trace_opts;
3903 struct trace_array *tr = m->private;
3904 struct tracer_flags *flags;
3905 u32 tracer_flags;
3906 int i;
3907
3908 guard(mutex)(&trace_types_lock);
3909
3910 for (i = 0; trace_options[i]; i++) {
3911 if (tr->trace_flags & (1ULL << i))
3912 seq_printf(m, "%s\n", trace_options[i]);
3913 else
3914 seq_printf(m, "no%s\n", trace_options[i]);
3915 }
3916
3917 flags = tr->current_trace_flags;
3918 if (!flags || !flags->opts)
3919 return 0;
3920
3921 tracer_flags = flags->val;
3922 trace_opts = flags->opts;
3923
3924 for (i = 0; trace_opts[i].name; i++) {
3925 if (tracer_flags & trace_opts[i].bit)
3926 seq_printf(m, "%s\n", trace_opts[i].name);
3927 else
3928 seq_printf(m, "no%s\n", trace_opts[i].name);
3929 }
3930
3931 return 0;
3932 }
3933
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)3934 static int __set_tracer_option(struct trace_array *tr,
3935 struct tracer_flags *tracer_flags,
3936 struct tracer_opt *opts, int neg)
3937 {
3938 struct tracer *trace = tracer_flags->trace;
3939 int ret = 0;
3940
3941 if (trace->set_flag)
3942 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3943 if (ret)
3944 return ret;
3945
3946 if (neg)
3947 tracer_flags->val &= ~opts->bit;
3948 else
3949 tracer_flags->val |= opts->bit;
3950 return 0;
3951 }
3952
3953 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)3954 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3955 {
3956 struct tracer_flags *tracer_flags = tr->current_trace_flags;
3957 struct tracer_opt *opts = NULL;
3958 int i;
3959
3960 if (!tracer_flags || !tracer_flags->opts)
3961 return 0;
3962
3963 for (i = 0; tracer_flags->opts[i].name; i++) {
3964 opts = &tracer_flags->opts[i];
3965
3966 if (strcmp(cmp, opts->name) == 0)
3967 return __set_tracer_option(tr, tracer_flags, opts, neg);
3968 }
3969
3970 return -EINVAL;
3971 }
3972
3973 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u64 mask,int set)3974 int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set)
3975 {
3976 if (tracer->enabled && (mask & TRACE_ITER(OVERWRITE)) && !set)
3977 return -1;
3978
3979 return 0;
3980 }
3981
set_tracer_flag(struct trace_array * tr,u64 mask,int enabled)3982 int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled)
3983 {
3984 switch (mask) {
3985 case TRACE_ITER(RECORD_TGID):
3986 case TRACE_ITER(RECORD_CMD):
3987 case TRACE_ITER(TRACE_PRINTK):
3988 case TRACE_ITER(COPY_MARKER):
3989 lockdep_assert_held(&event_mutex);
3990 }
3991
3992 /* do nothing if flag is already set */
3993 if (!!(tr->trace_flags & mask) == !!enabled)
3994 return 0;
3995
3996 /* Give the tracer a chance to approve the change */
3997 if (tr->current_trace->flag_changed)
3998 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3999 return -EINVAL;
4000
4001 switch (mask) {
4002 case TRACE_ITER(TRACE_PRINTK):
4003 if (enabled) {
4004 update_printk_trace(tr);
4005 } else {
4006 /*
4007 * The global_trace cannot clear this.
4008 * It's flag only gets cleared if another instance sets it.
4009 */
4010 if (printk_trace == &global_trace)
4011 return -EINVAL;
4012 /*
4013 * An instance must always have it set.
4014 * by default, that's the global_trace instance.
4015 */
4016 if (printk_trace == tr)
4017 update_printk_trace(&global_trace);
4018 }
4019 break;
4020
4021 case TRACE_ITER(COPY_MARKER):
4022 update_marker_trace(tr, enabled);
4023 /* update_marker_trace updates the tr->trace_flags */
4024 return 0;
4025 }
4026
4027 if (enabled)
4028 tr->trace_flags |= mask;
4029 else
4030 tr->trace_flags &= ~mask;
4031
4032 switch (mask) {
4033 case TRACE_ITER(RECORD_CMD):
4034 trace_event_enable_cmd_record(enabled);
4035 break;
4036
4037 case TRACE_ITER(RECORD_TGID):
4038
4039 if (trace_alloc_tgid_map() < 0) {
4040 tr->trace_flags &= ~TRACE_ITER(RECORD_TGID);
4041 return -ENOMEM;
4042 }
4043
4044 trace_event_enable_tgid_record(enabled);
4045 break;
4046
4047 case TRACE_ITER(EVENT_FORK):
4048 trace_event_follow_fork(tr, enabled);
4049 break;
4050
4051 case TRACE_ITER(FUNC_FORK):
4052 ftrace_pid_follow_fork(tr, enabled);
4053 break;
4054
4055 case TRACE_ITER(OVERWRITE):
4056 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
4057 #ifdef CONFIG_TRACER_SNAPSHOT
4058 ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled);
4059 #endif
4060 break;
4061
4062 case TRACE_ITER(PRINTK):
4063 trace_printk_start_stop_comm(enabled);
4064 trace_printk_control(enabled);
4065 break;
4066
4067 #if defined(CONFIG_FUNCTION_PROFILER) && defined(CONFIG_FUNCTION_GRAPH_TRACER)
4068 case TRACE_GRAPH_GRAPH_TIME:
4069 ftrace_graph_graph_time_control(enabled);
4070 break;
4071 #endif
4072 }
4073
4074 return 0;
4075 }
4076
trace_set_options(struct trace_array * tr,char * option)4077 int trace_set_options(struct trace_array *tr, char *option)
4078 {
4079 char *cmp;
4080 int neg = 0;
4081 int ret;
4082 size_t orig_len = strlen(option);
4083 int len;
4084
4085 cmp = strstrip(option);
4086
4087 len = str_has_prefix(cmp, "no");
4088 if (len)
4089 neg = 1;
4090
4091 cmp += len;
4092
4093 mutex_lock(&event_mutex);
4094 mutex_lock(&trace_types_lock);
4095
4096 ret = match_string(trace_options, -1, cmp);
4097 /* If no option could be set, test the specific tracer options */
4098 if (ret < 0)
4099 ret = set_tracer_option(tr, cmp, neg);
4100 else
4101 ret = set_tracer_flag(tr, 1ULL << ret, !neg);
4102
4103 mutex_unlock(&trace_types_lock);
4104 mutex_unlock(&event_mutex);
4105
4106 /*
4107 * If the first trailing whitespace is replaced with '\0' by strstrip,
4108 * turn it back into a space.
4109 */
4110 if (orig_len > strlen(option))
4111 option[strlen(option)] = ' ';
4112
4113 return ret;
4114 }
4115
apply_trace_boot_options(void)4116 static void __init apply_trace_boot_options(void)
4117 {
4118 char *buf = trace_boot_options_buf;
4119 char *option;
4120
4121 while (true) {
4122 option = strsep(&buf, ",");
4123
4124 if (!option)
4125 break;
4126
4127 if (*option)
4128 trace_set_options(&global_trace, option);
4129
4130 /* Put back the comma to allow this to be called again */
4131 if (buf)
4132 *(buf - 1) = ',';
4133 }
4134 }
4135
4136 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)4137 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4138 size_t cnt, loff_t *ppos)
4139 {
4140 struct seq_file *m = filp->private_data;
4141 struct trace_array *tr = m->private;
4142 char buf[64];
4143 int ret;
4144
4145 if (cnt >= sizeof(buf))
4146 return -EINVAL;
4147
4148 if (copy_from_user(buf, ubuf, cnt))
4149 return -EFAULT;
4150
4151 buf[cnt] = 0;
4152
4153 ret = trace_set_options(tr, buf);
4154 if (ret < 0)
4155 return ret;
4156
4157 *ppos += cnt;
4158
4159 return cnt;
4160 }
4161
tracing_trace_options_open(struct inode * inode,struct file * file)4162 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4163 {
4164 struct trace_array *tr = inode->i_private;
4165 int ret;
4166
4167 ret = tracing_check_open_get_tr(tr);
4168 if (ret)
4169 return ret;
4170
4171 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4172 if (ret < 0)
4173 trace_array_put(tr);
4174
4175 return ret;
4176 }
4177
4178 static const struct file_operations tracing_iter_fops = {
4179 .open = tracing_trace_options_open,
4180 .read = seq_read,
4181 .llseek = seq_lseek,
4182 .release = tracing_single_release_tr,
4183 .write = tracing_trace_options_write,
4184 };
4185
4186 static const char readme_msg[] =
4187 "tracing mini-HOWTO:\n\n"
4188 "By default tracefs removes all OTH file permission bits.\n"
4189 "When mounting tracefs an optional group id can be specified\n"
4190 "which adds the group to every directory and file in tracefs:\n\n"
4191 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
4192 "# echo 0 > tracing_on : quick way to disable tracing\n"
4193 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4194 " Important files:\n"
4195 " trace\t\t\t- The static contents of the buffer\n"
4196 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4197 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4198 " current_tracer\t- function and latency tracers\n"
4199 " available_tracers\t- list of configured tracers for current_tracer\n"
4200 " error_log\t- error log for failed commands (that support it)\n"
4201 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4202 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4203 " trace_clock\t\t- change the clock used to order events\n"
4204 " local: Per cpu clock but may not be synced across CPUs\n"
4205 " global: Synced across CPUs but slows tracing down.\n"
4206 " counter: Not a clock, but just an increment\n"
4207 " uptime: Jiffy counter from time of boot\n"
4208 " perf: Same clock that perf events use\n"
4209 #ifdef CONFIG_X86_64
4210 " x86-tsc: TSC cycle counter\n"
4211 #endif
4212 "\n timestamp_mode\t- view the mode used to timestamp events\n"
4213 " delta: Delta difference against a buffer-wide timestamp\n"
4214 " absolute: Absolute (standalone) timestamp\n"
4215 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4216 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4217 " tracing_cpumask\t- Limit which CPUs to trace\n"
4218 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4219 "\t\t\t Remove sub-buffer with rmdir\n"
4220 " trace_options\t\t- Set format or modify how tracing happens\n"
4221 "\t\t\t Disable an option by prefixing 'no' to the\n"
4222 "\t\t\t option name\n"
4223 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4224 #ifdef CONFIG_DYNAMIC_FTRACE
4225 "\n available_filter_functions - list of functions that can be filtered on\n"
4226 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4227 "\t\t\t functions\n"
4228 "\t accepts: func_full_name or glob-matching-pattern\n"
4229 "\t modules: Can select a group via module\n"
4230 "\t Format: :mod:<module-name>\n"
4231 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4232 "\t triggers: a command to perform when function is hit\n"
4233 "\t Format: <function>:<trigger>[:count]\n"
4234 "\t trigger: traceon, traceoff\n"
4235 "\t\t enable_event:<system>:<event>\n"
4236 "\t\t disable_event:<system>:<event>\n"
4237 #ifdef CONFIG_STACKTRACE
4238 "\t\t stacktrace\n"
4239 #endif
4240 #ifdef CONFIG_TRACER_SNAPSHOT
4241 "\t\t snapshot\n"
4242 #endif
4243 "\t\t dump\n"
4244 "\t\t cpudump\n"
4245 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4246 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4247 "\t The first one will disable tracing every time do_fault is hit\n"
4248 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4249 "\t The first time do trap is hit and it disables tracing, the\n"
4250 "\t counter will decrement to 2. If tracing is already disabled,\n"
4251 "\t the counter will not decrement. It only decrements when the\n"
4252 "\t trigger did work\n"
4253 "\t To remove trigger without count:\n"
4254 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4255 "\t To remove trigger with a count:\n"
4256 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4257 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4258 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4259 "\t modules: Can select a group via module command :mod:\n"
4260 "\t Does not accept triggers\n"
4261 #endif /* CONFIG_DYNAMIC_FTRACE */
4262 #ifdef CONFIG_FUNCTION_TRACER
4263 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4264 "\t\t (function)\n"
4265 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
4266 "\t\t (function)\n"
4267 #endif
4268 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4269 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4270 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4271 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4272 #endif
4273 #ifdef CONFIG_TRACER_SNAPSHOT
4274 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4275 "\t\t\t snapshot buffer. Read the contents for more\n"
4276 "\t\t\t information\n"
4277 #endif
4278 #ifdef CONFIG_STACK_TRACER
4279 " stack_trace\t\t- Shows the max stack trace when active\n"
4280 " stack_max_size\t- Shows current max stack size that was traced\n"
4281 "\t\t\t Write into this file to reset the max size (trigger a\n"
4282 "\t\t\t new trace)\n"
4283 #ifdef CONFIG_DYNAMIC_FTRACE
4284 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4285 "\t\t\t traces\n"
4286 #endif
4287 #endif /* CONFIG_STACK_TRACER */
4288 #ifdef CONFIG_DYNAMIC_EVENTS
4289 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4290 "\t\t\t Write into this file to define/undefine new trace events.\n"
4291 #endif
4292 #ifdef CONFIG_KPROBE_EVENTS
4293 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4294 "\t\t\t Write into this file to define/undefine new trace events.\n"
4295 #endif
4296 #ifdef CONFIG_UPROBE_EVENTS
4297 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4298 "\t\t\t Write into this file to define/undefine new trace events.\n"
4299 #endif
4300 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
4301 defined(CONFIG_FPROBE_EVENTS)
4302 "\t accepts: event-definitions (one definition per line)\n"
4303 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4304 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
4305 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
4306 #endif
4307 #ifdef CONFIG_FPROBE_EVENTS
4308 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
4309 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
4310 #endif
4311 #ifdef CONFIG_HIST_TRIGGERS
4312 "\t s:[synthetic/]<event> <field> [<field>]\n"
4313 #endif
4314 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
4315 "\t -:[<group>/][<event>]\n"
4316 #ifdef CONFIG_KPROBE_EVENTS
4317 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4318 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
4319 #endif
4320 #ifdef CONFIG_UPROBE_EVENTS
4321 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
4322 #endif
4323 "\t args: <name>=fetcharg[:type]\n"
4324 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
4325 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4326 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4327 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
4328 "\t <argname>[->field[->field|.field...]],\n"
4329 #endif
4330 #else
4331 "\t $stack<index>, $stack, $retval, $comm,\n"
4332 #endif
4333 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4334 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
4335 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
4336 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4337 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
4338 #ifdef CONFIG_HIST_TRIGGERS
4339 "\t field: <stype> <name>;\n"
4340 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4341 "\t [unsigned] char/int/long\n"
4342 #endif
4343 "\t efield: For event probes ('e' types), the field is on of the fields\n"
4344 "\t of the <attached-group>/<attached-event>.\n"
4345 #endif
4346 " set_event\t\t- Enables events by name written into it\n"
4347 "\t\t\t Can enable module events via: :mod:<module>\n"
4348 " events/\t\t- Directory containing all trace event subsystems:\n"
4349 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4350 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4351 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4352 "\t\t\t events\n"
4353 " filter\t\t- If set, only events passing filter are traced\n"
4354 " events/<system>/<event>/\t- Directory containing control files for\n"
4355 "\t\t\t <event>:\n"
4356 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4357 " filter\t\t- If set, only events passing filter are traced\n"
4358 " trigger\t\t- If set, a command to perform when event is hit\n"
4359 "\t Format: <trigger>[:count][if <filter>]\n"
4360 "\t trigger: traceon, traceoff\n"
4361 "\t enable_event:<system>:<event>\n"
4362 "\t disable_event:<system>:<event>\n"
4363 #ifdef CONFIG_HIST_TRIGGERS
4364 "\t enable_hist:<system>:<event>\n"
4365 "\t disable_hist:<system>:<event>\n"
4366 #endif
4367 #ifdef CONFIG_STACKTRACE
4368 "\t\t stacktrace\n"
4369 #endif
4370 #ifdef CONFIG_TRACER_SNAPSHOT
4371 "\t\t snapshot\n"
4372 #endif
4373 #ifdef CONFIG_HIST_TRIGGERS
4374 "\t\t hist (see below)\n"
4375 #endif
4376 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4377 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4378 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4379 "\t events/block/block_unplug/trigger\n"
4380 "\t The first disables tracing every time block_unplug is hit.\n"
4381 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4382 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4383 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4384 "\t Like function triggers, the counter is only decremented if it\n"
4385 "\t enabled or disabled tracing.\n"
4386 "\t To remove a trigger without a count:\n"
4387 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4388 "\t To remove a trigger with a count:\n"
4389 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4390 "\t Filters can be ignored when removing a trigger.\n"
4391 #ifdef CONFIG_HIST_TRIGGERS
4392 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4393 "\t Format: hist:keys=<field1[,field2,...]>\n"
4394 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
4395 "\t [:values=<field1[,field2,...]>]\n"
4396 "\t [:sort=<field1[,field2,...]>]\n"
4397 "\t [:size=#entries]\n"
4398 "\t [:pause][:continue][:clear]\n"
4399 "\t [:name=histname1]\n"
4400 "\t [:nohitcount]\n"
4401 "\t [:<handler>.<action>]\n"
4402 "\t [if <filter>]\n\n"
4403 "\t Note, special fields can be used as well:\n"
4404 "\t common_timestamp - to record current timestamp\n"
4405 "\t common_cpu - to record the CPU the event happened on\n"
4406 "\n"
4407 "\t A hist trigger variable can be:\n"
4408 "\t - a reference to a field e.g. x=current_timestamp,\n"
4409 "\t - a reference to another variable e.g. y=$x,\n"
4410 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
4411 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
4412 "\n"
4413 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
4414 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
4415 "\t variable reference, field or numeric literal.\n"
4416 "\n"
4417 "\t When a matching event is hit, an entry is added to a hash\n"
4418 "\t table using the key(s) and value(s) named, and the value of a\n"
4419 "\t sum called 'hitcount' is incremented. Keys and values\n"
4420 "\t correspond to fields in the event's format description. Keys\n"
4421 "\t can be any field, or the special string 'common_stacktrace'.\n"
4422 "\t Compound keys consisting of up to two fields can be specified\n"
4423 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4424 "\t fields. Sort keys consisting of up to two fields can be\n"
4425 "\t specified using the 'sort' keyword. The sort direction can\n"
4426 "\t be modified by appending '.descending' or '.ascending' to a\n"
4427 "\t sort field. The 'size' parameter can be used to specify more\n"
4428 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4429 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4430 "\t its histogram data will be shared with other triggers of the\n"
4431 "\t same name, and trigger hits will update this common data.\n\n"
4432 "\t Reading the 'hist' file for the event will dump the hash\n"
4433 "\t table in its entirety to stdout. If there are multiple hist\n"
4434 "\t triggers attached to an event, there will be a table for each\n"
4435 "\t trigger in the output. The table displayed for a named\n"
4436 "\t trigger will be the same as any other instance having the\n"
4437 "\t same name. The default format used to display a given field\n"
4438 "\t can be modified by appending any of the following modifiers\n"
4439 "\t to the field name, as applicable:\n\n"
4440 "\t .hex display a number as a hex value\n"
4441 "\t .sym display an address as a symbol\n"
4442 "\t .sym-offset display an address as a symbol and offset\n"
4443 "\t .execname display a common_pid as a program name\n"
4444 "\t .syscall display a syscall id as a syscall name\n"
4445 "\t .log2 display log2 value rather than raw number\n"
4446 "\t .buckets=size display values in groups of size rather than raw number\n"
4447 "\t .usecs display a common_timestamp in microseconds\n"
4448 "\t .percent display a number of percentage value\n"
4449 "\t .graph display a bar-graph of a value\n\n"
4450 "\t The 'pause' parameter can be used to pause an existing hist\n"
4451 "\t trigger or to start a hist trigger but not log any events\n"
4452 "\t until told to do so. 'continue' can be used to start or\n"
4453 "\t restart a paused hist trigger.\n\n"
4454 "\t The 'clear' parameter will clear the contents of a running\n"
4455 "\t hist trigger and leave its current paused/active state\n"
4456 "\t unchanged.\n\n"
4457 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
4458 "\t raw hitcount in the histogram.\n\n"
4459 "\t The enable_hist and disable_hist triggers can be used to\n"
4460 "\t have one event conditionally start and stop another event's\n"
4461 "\t already-attached hist trigger. The syntax is analogous to\n"
4462 "\t the enable_event and disable_event triggers.\n\n"
4463 "\t Hist trigger handlers and actions are executed whenever a\n"
4464 "\t a histogram entry is added or updated. They take the form:\n\n"
4465 "\t <handler>.<action>\n\n"
4466 "\t The available handlers are:\n\n"
4467 "\t onmatch(matching.event) - invoke on addition or update\n"
4468 "\t onmax(var) - invoke if var exceeds current max\n"
4469 "\t onchange(var) - invoke action if var changes\n\n"
4470 "\t The available actions are:\n\n"
4471 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
4472 "\t save(field,...) - save current event fields\n"
4473 #ifdef CONFIG_TRACER_SNAPSHOT
4474 "\t snapshot() - snapshot the trace buffer\n\n"
4475 #endif
4476 #ifdef CONFIG_SYNTH_EVENTS
4477 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
4478 "\t Write into this file to define/undefine new synthetic events.\n"
4479 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
4480 #endif
4481 #endif
4482 ;
4483
4484 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4485 tracing_readme_read(struct file *filp, char __user *ubuf,
4486 size_t cnt, loff_t *ppos)
4487 {
4488 return simple_read_from_buffer(ubuf, cnt, ppos,
4489 readme_msg, strlen(readme_msg));
4490 }
4491
4492 static const struct file_operations tracing_readme_fops = {
4493 .open = tracing_open_generic,
4494 .read = tracing_readme_read,
4495 .llseek = generic_file_llseek,
4496 };
4497
4498 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4499 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)4500 update_eval_map(union trace_eval_map_item *ptr)
4501 {
4502 if (!ptr->map.eval_string) {
4503 if (ptr->tail.next) {
4504 ptr = ptr->tail.next;
4505 /* Set ptr to the next real item (skip head) */
4506 ptr++;
4507 } else
4508 return NULL;
4509 }
4510 return ptr;
4511 }
4512
eval_map_next(struct seq_file * m,void * v,loff_t * pos)4513 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4514 {
4515 union trace_eval_map_item *ptr = v;
4516
4517 /*
4518 * Paranoid! If ptr points to end, we don't want to increment past it.
4519 * This really should never happen.
4520 */
4521 (*pos)++;
4522 ptr = update_eval_map(ptr);
4523 if (WARN_ON_ONCE(!ptr))
4524 return NULL;
4525
4526 ptr++;
4527 ptr = update_eval_map(ptr);
4528
4529 return ptr;
4530 }
4531
eval_map_start(struct seq_file * m,loff_t * pos)4532 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4533 {
4534 union trace_eval_map_item *v;
4535 loff_t l = 0;
4536
4537 mutex_lock(&trace_eval_mutex);
4538
4539 v = trace_eval_maps;
4540 if (v)
4541 v++;
4542
4543 while (v && l < *pos) {
4544 v = eval_map_next(m, v, &l);
4545 }
4546
4547 return v;
4548 }
4549
eval_map_stop(struct seq_file * m,void * v)4550 static void eval_map_stop(struct seq_file *m, void *v)
4551 {
4552 mutex_unlock(&trace_eval_mutex);
4553 }
4554
eval_map_show(struct seq_file * m,void * v)4555 static int eval_map_show(struct seq_file *m, void *v)
4556 {
4557 union trace_eval_map_item *ptr = v;
4558
4559 seq_printf(m, "%s %ld (%s)\n",
4560 ptr->map.eval_string, ptr->map.eval_value,
4561 ptr->map.system);
4562
4563 return 0;
4564 }
4565
4566 static const struct seq_operations tracing_eval_map_seq_ops = {
4567 .start = eval_map_start,
4568 .next = eval_map_next,
4569 .stop = eval_map_stop,
4570 .show = eval_map_show,
4571 };
4572
tracing_eval_map_open(struct inode * inode,struct file * filp)4573 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
4574 {
4575 int ret;
4576
4577 ret = tracing_check_open_get_tr(NULL);
4578 if (ret)
4579 return ret;
4580
4581 return seq_open(filp, &tracing_eval_map_seq_ops);
4582 }
4583
4584 static const struct file_operations tracing_eval_map_fops = {
4585 .open = tracing_eval_map_open,
4586 .read = seq_read,
4587 .llseek = seq_lseek,
4588 .release = seq_release,
4589 };
4590
4591 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)4592 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
4593 {
4594 /* Return tail of array given the head */
4595 return ptr + ptr->head.length + 1;
4596 }
4597
4598 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)4599 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
4600 int len)
4601 {
4602 struct trace_eval_map **stop;
4603 struct trace_eval_map **map;
4604 union trace_eval_map_item *map_array;
4605 union trace_eval_map_item *ptr;
4606
4607 stop = start + len;
4608
4609 /*
4610 * The trace_eval_maps contains the map plus a head and tail item,
4611 * where the head holds the module and length of array, and the
4612 * tail holds a pointer to the next list.
4613 */
4614 map_array = kmalloc_objs(*map_array, len + 2);
4615 if (!map_array) {
4616 pr_warn("Unable to allocate trace eval mapping\n");
4617 return;
4618 }
4619
4620 guard(mutex)(&trace_eval_mutex);
4621
4622 if (!trace_eval_maps)
4623 trace_eval_maps = map_array;
4624 else {
4625 ptr = trace_eval_maps;
4626 for (;;) {
4627 ptr = trace_eval_jmp_to_tail(ptr);
4628 if (!ptr->tail.next)
4629 break;
4630 ptr = ptr->tail.next;
4631
4632 }
4633 ptr->tail.next = map_array;
4634 }
4635 map_array->head.mod = mod;
4636 map_array->head.length = len;
4637 map_array++;
4638
4639 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4640 map_array->map = **map;
4641 map_array++;
4642 }
4643 memset(map_array, 0, sizeof(*map_array));
4644 }
4645
trace_create_eval_file(struct dentry * d_tracer)4646 static void trace_create_eval_file(struct dentry *d_tracer)
4647 {
4648 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
4649 NULL, &tracing_eval_map_fops);
4650 }
4651
4652 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)4653 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)4654 static inline void trace_insert_eval_map_file(struct module *mod,
4655 struct trace_eval_map **start, int len) { }
4656 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
4657
4658 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)4659 trace_event_update_with_eval_map(struct module *mod,
4660 struct trace_eval_map **start,
4661 int len)
4662 {
4663 struct trace_eval_map **map;
4664
4665 /* Always run sanitizer only if btf_type_tag attr exists. */
4666 if (len <= 0) {
4667 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
4668 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
4669 __has_attribute(btf_type_tag)))
4670 return;
4671 }
4672
4673 map = start;
4674
4675 trace_event_update_all(map, len);
4676
4677 if (len <= 0)
4678 return;
4679
4680 trace_insert_eval_map_file(mod, start, len);
4681 }
4682
4683 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)4684 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4685 size_t cnt, loff_t *ppos)
4686 {
4687 struct trace_array *tr = filp->private_data;
4688 char buf[MAX_TRACER_SIZE+2];
4689 int r;
4690
4691 scoped_guard(mutex, &trace_types_lock) {
4692 r = sprintf(buf, "%s\n", tr->current_trace->name);
4693 }
4694
4695 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4696 }
4697
tracer_init(struct tracer * t,struct trace_array * tr)4698 int tracer_init(struct tracer *t, struct trace_array *tr)
4699 {
4700 tracing_reset_online_cpus(&tr->array_buffer);
4701 update_last_data_if_empty(tr);
4702 return t->init(tr);
4703 }
4704
trace_set_buffer_entries(struct array_buffer * buf,unsigned long val)4705 void trace_set_buffer_entries(struct array_buffer *buf, unsigned long val)
4706 {
4707 int cpu;
4708
4709 for_each_tracing_cpu(cpu)
4710 per_cpu_ptr(buf->data, cpu)->entries = val;
4711 }
4712
update_buffer_entries(struct array_buffer * buf,int cpu)4713 static void update_buffer_entries(struct array_buffer *buf, int cpu)
4714 {
4715 if (cpu == RING_BUFFER_ALL_CPUS) {
4716 trace_set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
4717 } else {
4718 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
4719 }
4720 }
4721
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)4722 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4723 unsigned long size, int cpu)
4724 {
4725 int ret;
4726
4727 /*
4728 * If kernel or user changes the size of the ring buffer
4729 * we use the size that was given, and we can forget about
4730 * expanding it later.
4731 */
4732 trace_set_ring_buffer_expanded(tr);
4733
4734 /* May be called before buffers are initialized */
4735 if (!tr->array_buffer.buffer)
4736 return 0;
4737
4738 /* Do not allow tracing while resizing ring buffer */
4739 tracing_stop_tr(tr);
4740
4741 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
4742 if (ret < 0)
4743 goto out_start;
4744
4745 #ifdef CONFIG_TRACER_SNAPSHOT
4746 if (!tr->allocated_snapshot)
4747 goto out;
4748
4749 ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu);
4750 if (ret < 0) {
4751 int r = resize_buffer_duplicate_size(&tr->array_buffer,
4752 &tr->array_buffer, cpu);
4753 if (r < 0) {
4754 /*
4755 * AARGH! We are left with different
4756 * size max buffer!!!!
4757 * The max buffer is our "snapshot" buffer.
4758 * When a tracer needs a snapshot (one of the
4759 * latency tracers), it swaps the max buffer
4760 * with the saved snap shot. We succeeded to
4761 * update the size of the main buffer, but failed to
4762 * update the size of the max buffer. But when we tried
4763 * to reset the main buffer to the original size, we
4764 * failed there too. This is very unlikely to
4765 * happen, but if it does, warn and kill all
4766 * tracing.
4767 */
4768 WARN_ON(1);
4769 tracing_disabled = 1;
4770 }
4771 goto out_start;
4772 }
4773
4774 update_buffer_entries(&tr->snapshot_buffer, cpu);
4775
4776 out:
4777 #endif /* CONFIG_TRACER_SNAPSHOT */
4778
4779 update_buffer_entries(&tr->array_buffer, cpu);
4780 out_start:
4781 tracing_start_tr(tr);
4782 return ret;
4783 }
4784
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)4785 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4786 unsigned long size, int cpu_id)
4787 {
4788 guard(mutex)(&trace_types_lock);
4789
4790 if (cpu_id != RING_BUFFER_ALL_CPUS) {
4791 /* make sure, this cpu is enabled in the mask */
4792 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
4793 return -EINVAL;
4794 }
4795
4796 return __tracing_resize_ring_buffer(tr, size, cpu_id);
4797 }
4798
4799 struct trace_mod_entry {
4800 unsigned long mod_addr;
4801 char mod_name[MODULE_NAME_LEN];
4802 };
4803
4804 struct trace_scratch {
4805 unsigned int clock_id;
4806 unsigned long text_addr;
4807 unsigned long nr_entries;
4808 struct trace_mod_entry entries[];
4809 };
4810
4811 static DEFINE_MUTEX(scratch_mutex);
4812
cmp_mod_entry(const void * key,const void * pivot)4813 static int cmp_mod_entry(const void *key, const void *pivot)
4814 {
4815 unsigned long addr = (unsigned long)key;
4816 const struct trace_mod_entry *ent = pivot;
4817
4818 if (addr < ent[0].mod_addr)
4819 return -1;
4820
4821 return addr >= ent[1].mod_addr;
4822 }
4823
4824 /**
4825 * trace_adjust_address() - Adjust prev boot address to current address.
4826 * @tr: Persistent ring buffer's trace_array.
4827 * @addr: Address in @tr which is adjusted.
4828 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)4829 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
4830 {
4831 struct trace_module_delta *module_delta;
4832 struct trace_scratch *tscratch;
4833 struct trace_mod_entry *entry;
4834 unsigned long raddr;
4835 int idx = 0, nr_entries;
4836
4837 /* If we don't have last boot delta, return the address */
4838 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
4839 return addr;
4840
4841 /* tr->module_delta must be protected by rcu. */
4842 guard(rcu)();
4843 tscratch = tr->scratch;
4844 /* if there is no tscrach, module_delta must be NULL. */
4845 module_delta = READ_ONCE(tr->module_delta);
4846 if (!module_delta || !tscratch->nr_entries ||
4847 tscratch->entries[0].mod_addr > addr) {
4848 raddr = addr + tr->text_delta;
4849 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
4850 is_kernel_rodata(raddr) ? raddr : addr;
4851 }
4852
4853 /* Note that entries must be sorted. */
4854 nr_entries = tscratch->nr_entries;
4855 if (nr_entries == 1 ||
4856 tscratch->entries[nr_entries - 1].mod_addr < addr)
4857 idx = nr_entries - 1;
4858 else {
4859 entry = __inline_bsearch((void *)addr,
4860 tscratch->entries,
4861 nr_entries - 1,
4862 sizeof(tscratch->entries[0]),
4863 cmp_mod_entry);
4864 if (entry)
4865 idx = entry - tscratch->entries;
4866 }
4867
4868 return addr + module_delta->delta[idx];
4869 }
4870
4871 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)4872 static int save_mod(struct module *mod, void *data)
4873 {
4874 struct trace_array *tr = data;
4875 struct trace_scratch *tscratch;
4876 struct trace_mod_entry *entry;
4877 unsigned int size;
4878
4879 tscratch = tr->scratch;
4880 if (!tscratch)
4881 return -1;
4882 size = tr->scratch_size;
4883
4884 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
4885 return -1;
4886
4887 entry = &tscratch->entries[tscratch->nr_entries];
4888
4889 tscratch->nr_entries++;
4890
4891 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
4892 strscpy(entry->mod_name, mod->name);
4893
4894 return 0;
4895 }
4896 #else
save_mod(struct module * mod,void * data)4897 static int save_mod(struct module *mod, void *data)
4898 {
4899 return 0;
4900 }
4901 #endif
4902
update_last_data(struct trace_array * tr)4903 static void update_last_data(struct trace_array *tr)
4904 {
4905 struct trace_module_delta *module_delta;
4906 struct trace_scratch *tscratch;
4907
4908 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
4909 return;
4910
4911 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
4912 return;
4913
4914 /* Only if the buffer has previous boot data clear and update it. */
4915 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
4916
4917 /* If this is a backup instance, mark it for autoremove. */
4918 if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
4919 tr->free_on_close = true;
4920
4921 /* Reset the module list and reload them */
4922 if (tr->scratch) {
4923 struct trace_scratch *tscratch = tr->scratch;
4924
4925 tscratch->clock_id = tr->clock_id;
4926 memset(tscratch->entries, 0,
4927 flex_array_size(tscratch, entries, tscratch->nr_entries));
4928 tscratch->nr_entries = 0;
4929
4930 guard(mutex)(&scratch_mutex);
4931 module_for_each_mod(save_mod, tr);
4932 }
4933
4934 /*
4935 * Need to clear all CPU buffers as there cannot be events
4936 * from the previous boot mixed with events with this boot
4937 * as that will cause a confusing trace. Need to clear all
4938 * CPU buffers, even for those that may currently be offline.
4939 */
4940 tracing_reset_all_cpus(&tr->array_buffer);
4941
4942 /* Using current data now */
4943 tr->text_delta = 0;
4944
4945 if (!tr->scratch)
4946 return;
4947
4948 tscratch = tr->scratch;
4949 module_delta = READ_ONCE(tr->module_delta);
4950 WRITE_ONCE(tr->module_delta, NULL);
4951 kfree_rcu(module_delta, rcu);
4952
4953 /* Set the persistent ring buffer meta data to this address */
4954 tscratch->text_addr = (unsigned long)_text;
4955 }
4956
4957 /**
4958 * tracing_update_buffers - used by tracing facility to expand ring buffers
4959 * @tr: The tracing instance
4960 *
4961 * To save on memory when the tracing is never used on a system with it
4962 * configured in. The ring buffers are set to a minimum size. But once
4963 * a user starts to use the tracing facility, then they need to grow
4964 * to their default size.
4965 *
4966 * This function is to be called when a tracer is about to be used.
4967 */
tracing_update_buffers(struct trace_array * tr)4968 int tracing_update_buffers(struct trace_array *tr)
4969 {
4970 int ret = 0;
4971
4972 if (!tr)
4973 tr = &global_trace;
4974
4975 guard(mutex)(&trace_types_lock);
4976
4977 update_last_data(tr);
4978
4979 if (!tr->ring_buffer_expanded)
4980 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4981 RING_BUFFER_ALL_CPUS);
4982 return ret;
4983 }
4984
4985 /*
4986 * Used to clear out the tracer before deletion of an instance.
4987 * Must have trace_types_lock held.
4988 */
tracing_set_nop(struct trace_array * tr)4989 static void tracing_set_nop(struct trace_array *tr)
4990 {
4991 if (tr->current_trace == &nop_trace)
4992 return;
4993
4994 tr->current_trace->enabled--;
4995
4996 if (tr->current_trace->reset)
4997 tr->current_trace->reset(tr);
4998
4999 tr->current_trace = &nop_trace;
5000 tr->current_trace_flags = nop_trace.flags;
5001 }
5002
5003 static bool tracer_options_updated;
5004
tracing_set_tracer(struct trace_array * tr,const char * buf)5005 int tracing_set_tracer(struct trace_array *tr, const char *buf)
5006 {
5007 struct tracer *trace = NULL;
5008 struct tracers *t;
5009 bool had_max_tr;
5010 int ret;
5011
5012 guard(mutex)(&trace_types_lock);
5013
5014 update_last_data(tr);
5015
5016 if (!tr->ring_buffer_expanded) {
5017 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5018 RING_BUFFER_ALL_CPUS);
5019 if (ret < 0)
5020 return ret;
5021 ret = 0;
5022 }
5023
5024 list_for_each_entry(t, &tr->tracers, list) {
5025 if (strcmp(t->tracer->name, buf) == 0) {
5026 trace = t->tracer;
5027 break;
5028 }
5029 }
5030 if (!trace)
5031 return -EINVAL;
5032
5033 if (trace == tr->current_trace)
5034 return 0;
5035
5036 #ifdef CONFIG_TRACER_SNAPSHOT
5037 if (tracer_uses_snapshot(trace)) {
5038 local_irq_disable();
5039 arch_spin_lock(&tr->max_lock);
5040 ret = tr->cond_snapshot ? -EBUSY : 0;
5041 arch_spin_unlock(&tr->max_lock);
5042 local_irq_enable();
5043 if (ret)
5044 return ret;
5045 }
5046 #endif
5047 /* Some tracers won't work on kernel command line */
5048 if (system_state < SYSTEM_RUNNING && trace->noboot) {
5049 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5050 trace->name);
5051 return -EINVAL;
5052 }
5053
5054 /* Some tracers are only allowed for the top level buffer */
5055 if (!trace_ok_for_array(trace, tr))
5056 return -EINVAL;
5057
5058 /* If trace pipe files are being read, we can't change the tracer */
5059 if (tr->trace_ref)
5060 return -EBUSY;
5061
5062 trace_branch_disable();
5063
5064 tr->current_trace->enabled--;
5065
5066 if (tr->current_trace->reset)
5067 tr->current_trace->reset(tr);
5068
5069 had_max_tr = tracer_uses_snapshot(tr->current_trace);
5070
5071 /* Current trace needs to be nop_trace before synchronize_rcu */
5072 tr->current_trace = &nop_trace;
5073 tr->current_trace_flags = nop_trace.flags;
5074
5075 if (had_max_tr && !tracer_uses_snapshot(trace)) {
5076 /*
5077 * We need to make sure that the update_max_tr sees that
5078 * current_trace changed to nop_trace to keep it from
5079 * swapping the buffers after we resize it.
5080 * The update_max_tr is called from interrupts disabled
5081 * so a synchronized_sched() is sufficient.
5082 */
5083 synchronize_rcu();
5084 free_snapshot(tr);
5085 tracing_disarm_snapshot(tr);
5086 }
5087
5088 if (!had_max_tr && tracer_uses_snapshot(trace)) {
5089 ret = tracing_arm_snapshot_locked(tr);
5090 if (ret)
5091 return ret;
5092 }
5093
5094 tr->current_trace_flags = t->flags ? : t->tracer->flags;
5095
5096 if (trace->init) {
5097 ret = tracer_init(trace, tr);
5098 if (ret) {
5099 if (tracer_uses_snapshot(trace))
5100 tracing_disarm_snapshot(tr);
5101 tr->current_trace_flags = nop_trace.flags;
5102 return ret;
5103 }
5104 }
5105
5106 tr->current_trace = trace;
5107 tr->current_trace->enabled++;
5108 trace_branch_enable(tr);
5109
5110 return 0;
5111 }
5112
5113 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5114 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5115 size_t cnt, loff_t *ppos)
5116 {
5117 struct trace_array *tr = filp->private_data;
5118 char buf[MAX_TRACER_SIZE+1];
5119 char *name;
5120 size_t ret;
5121 int err;
5122
5123 ret = cnt;
5124
5125 if (cnt > MAX_TRACER_SIZE)
5126 cnt = MAX_TRACER_SIZE;
5127
5128 if (copy_from_user(buf, ubuf, cnt))
5129 return -EFAULT;
5130
5131 buf[cnt] = 0;
5132
5133 name = strim(buf);
5134
5135 err = tracing_set_tracer(tr, name);
5136 if (err)
5137 return err;
5138
5139 *ppos += ret;
5140
5141 return ret;
5142 }
5143
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)5144 ssize_t tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5145 size_t cnt, loff_t *ppos)
5146 {
5147 char buf[64];
5148 int r;
5149
5150 r = snprintf(buf, sizeof(buf), "%ld\n",
5151 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5152 if (r > sizeof(buf))
5153 r = sizeof(buf);
5154 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5155 }
5156
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)5157 ssize_t tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5158 size_t cnt, loff_t *ppos)
5159 {
5160 unsigned long val;
5161 int ret;
5162
5163 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5164 if (ret)
5165 return ret;
5166
5167 *ptr = val * 1000;
5168
5169 return cnt;
5170 }
5171
5172 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5173 tracing_thresh_read(struct file *filp, char __user *ubuf,
5174 size_t cnt, loff_t *ppos)
5175 {
5176 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5177 }
5178
5179 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5180 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5181 size_t cnt, loff_t *ppos)
5182 {
5183 struct trace_array *tr = filp->private_data;
5184 int ret;
5185
5186 guard(mutex)(&trace_types_lock);
5187 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5188 if (ret < 0)
5189 return ret;
5190
5191 if (tr->current_trace->update_thresh) {
5192 ret = tr->current_trace->update_thresh(tr);
5193 if (ret < 0)
5194 return ret;
5195 }
5196
5197 return cnt;
5198 }
5199
open_pipe_on_cpu(struct trace_array * tr,int cpu)5200 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
5201 {
5202 if (cpu == RING_BUFFER_ALL_CPUS) {
5203 if (cpumask_empty(tr->pipe_cpumask)) {
5204 cpumask_setall(tr->pipe_cpumask);
5205 return 0;
5206 }
5207 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
5208 cpumask_set_cpu(cpu, tr->pipe_cpumask);
5209 return 0;
5210 }
5211 return -EBUSY;
5212 }
5213
close_pipe_on_cpu(struct trace_array * tr,int cpu)5214 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
5215 {
5216 if (cpu == RING_BUFFER_ALL_CPUS) {
5217 WARN_ON(!cpumask_full(tr->pipe_cpumask));
5218 cpumask_clear(tr->pipe_cpumask);
5219 } else {
5220 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
5221 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
5222 }
5223 }
5224
tracing_open_pipe(struct inode * inode,struct file * filp)5225 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5226 {
5227 struct trace_array *tr = inode->i_private;
5228 struct trace_iterator *iter;
5229 int cpu;
5230 int ret;
5231
5232 ret = tracing_check_open_get_tr(tr);
5233 if (ret)
5234 return ret;
5235
5236 guard(mutex)(&trace_types_lock);
5237 cpu = tracing_get_cpu(inode);
5238 ret = open_pipe_on_cpu(tr, cpu);
5239 if (ret)
5240 goto fail_pipe_on_cpu;
5241
5242 /* create a buffer to store the information to pass to userspace */
5243 iter = kzalloc_obj(*iter);
5244 if (!iter) {
5245 ret = -ENOMEM;
5246 goto fail_alloc_iter;
5247 }
5248
5249 trace_seq_init(&iter->seq);
5250 iter->trace = tr->current_trace;
5251
5252 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5253 ret = -ENOMEM;
5254 goto fail;
5255 }
5256
5257 /* trace pipe does not show start of buffer */
5258 cpumask_setall(iter->started);
5259
5260 if (tr->trace_flags & TRACE_ITER(LATENCY_FMT))
5261 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5262
5263 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5264 if (trace_clocks[tr->clock_id].in_ns)
5265 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5266
5267 iter->tr = tr;
5268 iter->array_buffer = &tr->array_buffer;
5269 iter->cpu_file = cpu;
5270 mutex_init(&iter->mutex);
5271 filp->private_data = iter;
5272
5273 if (iter->trace->pipe_open)
5274 iter->trace->pipe_open(iter);
5275
5276 nonseekable_open(inode, filp);
5277
5278 tr->trace_ref++;
5279
5280 return ret;
5281
5282 fail:
5283 kfree(iter);
5284 fail_alloc_iter:
5285 close_pipe_on_cpu(tr, cpu);
5286 fail_pipe_on_cpu:
5287 __trace_array_put(tr);
5288 return ret;
5289 }
5290
tracing_release_pipe(struct inode * inode,struct file * file)5291 static int tracing_release_pipe(struct inode *inode, struct file *file)
5292 {
5293 struct trace_iterator *iter = file->private_data;
5294 struct trace_array *tr = inode->i_private;
5295
5296 scoped_guard(mutex, &trace_types_lock) {
5297 tr->trace_ref--;
5298
5299 if (iter->trace->pipe_close)
5300 iter->trace->pipe_close(iter);
5301 close_pipe_on_cpu(tr, iter->cpu_file);
5302 }
5303
5304 free_trace_iter_content(iter);
5305 kfree(iter);
5306
5307 trace_array_put(tr);
5308
5309 return 0;
5310 }
5311
5312 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)5313 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5314 {
5315 struct trace_array *tr = iter->tr;
5316
5317 /* Iterators are static, they should be filled or empty */
5318 if (trace_buffer_iter(iter, iter->cpu_file))
5319 return EPOLLIN | EPOLLRDNORM;
5320
5321 if (tr->trace_flags & TRACE_ITER(BLOCK))
5322 /*
5323 * Always select as readable when in blocking mode
5324 */
5325 return EPOLLIN | EPOLLRDNORM;
5326 else
5327 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
5328 filp, poll_table, iter->tr->buffer_percent);
5329 }
5330
5331 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)5332 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5333 {
5334 struct trace_iterator *iter = filp->private_data;
5335
5336 return trace_poll(iter, filp, poll_table);
5337 }
5338
5339 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)5340 static int tracing_wait_pipe(struct file *filp)
5341 {
5342 struct trace_iterator *iter = filp->private_data;
5343 int ret;
5344
5345 while (trace_empty(iter)) {
5346
5347 if ((filp->f_flags & O_NONBLOCK)) {
5348 return -EAGAIN;
5349 }
5350
5351 /*
5352 * We block until we read something and tracing is disabled.
5353 * We still block if tracing is disabled, but we have never
5354 * read anything. This allows a user to cat this file, and
5355 * then enable tracing. But after we have read something,
5356 * we give an EOF when tracing is again disabled.
5357 *
5358 * iter->pos will be 0 if we haven't read anything.
5359 */
5360 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5361 break;
5362
5363 mutex_unlock(&iter->mutex);
5364
5365 ret = wait_on_pipe(iter, 0);
5366
5367 mutex_lock(&iter->mutex);
5368
5369 if (ret)
5370 return ret;
5371 }
5372
5373 return 1;
5374 }
5375
update_last_data_if_empty(struct trace_array * tr)5376 static bool update_last_data_if_empty(struct trace_array *tr)
5377 {
5378 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5379 return false;
5380
5381 if (!ring_buffer_empty(tr->array_buffer.buffer))
5382 return false;
5383
5384 /*
5385 * If the buffer contains the last boot data and all per-cpu
5386 * buffers are empty, reset it from the kernel side.
5387 */
5388 update_last_data(tr);
5389 return true;
5390 }
5391
5392 /*
5393 * Consumer reader.
5394 */
5395 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5396 tracing_read_pipe(struct file *filp, char __user *ubuf,
5397 size_t cnt, loff_t *ppos)
5398 {
5399 struct trace_iterator *iter = filp->private_data;
5400 ssize_t sret;
5401
5402 /*
5403 * Avoid more than one consumer on a single file descriptor
5404 * This is just a matter of traces coherency, the ring buffer itself
5405 * is protected.
5406 */
5407 guard(mutex)(&iter->mutex);
5408
5409 /* return any leftover data */
5410 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5411 if (sret != -EBUSY)
5412 return sret;
5413
5414 trace_seq_init(&iter->seq);
5415
5416 if (iter->trace->read) {
5417 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5418 if (sret)
5419 return sret;
5420 }
5421
5422 waitagain:
5423 if (update_last_data_if_empty(iter->tr))
5424 return 0;
5425
5426 sret = tracing_wait_pipe(filp);
5427 if (sret <= 0)
5428 return sret;
5429
5430 /* stop when tracing is finished */
5431 if (trace_empty(iter))
5432 return 0;
5433
5434 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
5435 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
5436
5437 /* reset all but tr, trace, and overruns */
5438 trace_iterator_reset(iter);
5439 cpumask_clear(iter->started);
5440 trace_seq_init(&iter->seq);
5441
5442 trace_event_read_lock();
5443 trace_access_lock(iter->cpu_file);
5444 while (trace_find_next_entry_inc(iter) != NULL) {
5445 enum print_line_t ret;
5446 int save_len = iter->seq.seq.len;
5447
5448 ret = print_trace_line(iter);
5449 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5450 /*
5451 * If one print_trace_line() fills entire trace_seq in one shot,
5452 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
5453 * In this case, we need to consume it, otherwise, loop will peek
5454 * this event next time, resulting in an infinite loop.
5455 */
5456 if (save_len == 0) {
5457 iter->seq.full = 0;
5458 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
5459 trace_consume(iter);
5460 break;
5461 }
5462
5463 /* In other cases, don't print partial lines */
5464 iter->seq.seq.len = save_len;
5465 break;
5466 }
5467 if (ret != TRACE_TYPE_NO_CONSUME)
5468 trace_consume(iter);
5469
5470 if (trace_seq_used(&iter->seq) >= cnt)
5471 break;
5472
5473 /*
5474 * Setting the full flag means we reached the trace_seq buffer
5475 * size and we should leave by partial output condition above.
5476 * One of the trace_seq_* functions is not used properly.
5477 */
5478 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5479 iter->ent->type);
5480 }
5481 trace_access_unlock(iter->cpu_file);
5482 trace_event_read_unlock();
5483
5484 /* Now copy what we have to the user */
5485 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5486 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
5487 trace_seq_init(&iter->seq);
5488
5489 /*
5490 * If there was nothing to send to user, in spite of consuming trace
5491 * entries, go back to wait for more entries.
5492 */
5493 if (sret == -EBUSY)
5494 goto waitagain;
5495
5496 return sret;
5497 }
5498
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)5499 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5500 unsigned int idx)
5501 {
5502 __free_page(spd->pages[idx]);
5503 }
5504
5505 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)5506 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5507 {
5508 size_t count;
5509 int save_len;
5510 int ret;
5511
5512 /* Seq buffer is page-sized, exactly what we need. */
5513 for (;;) {
5514 save_len = iter->seq.seq.len;
5515 ret = print_trace_line(iter);
5516
5517 if (trace_seq_has_overflowed(&iter->seq)) {
5518 iter->seq.seq.len = save_len;
5519 break;
5520 }
5521
5522 /*
5523 * This should not be hit, because it should only
5524 * be set if the iter->seq overflowed. But check it
5525 * anyway to be safe.
5526 */
5527 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5528 iter->seq.seq.len = save_len;
5529 break;
5530 }
5531
5532 count = trace_seq_used(&iter->seq) - save_len;
5533 if (rem < count) {
5534 rem = 0;
5535 iter->seq.seq.len = save_len;
5536 break;
5537 }
5538
5539 if (ret != TRACE_TYPE_NO_CONSUME)
5540 trace_consume(iter);
5541 rem -= count;
5542 if (!trace_find_next_entry_inc(iter)) {
5543 rem = 0;
5544 iter->ent = NULL;
5545 break;
5546 }
5547 }
5548
5549 return rem;
5550 }
5551
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)5552 static ssize_t tracing_splice_read_pipe(struct file *filp,
5553 loff_t *ppos,
5554 struct pipe_inode_info *pipe,
5555 size_t len,
5556 unsigned int flags)
5557 {
5558 struct page *pages_def[PIPE_DEF_BUFFERS];
5559 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5560 struct trace_iterator *iter = filp->private_data;
5561 struct splice_pipe_desc spd = {
5562 .pages = pages_def,
5563 .partial = partial_def,
5564 .nr_pages = 0, /* This gets updated below. */
5565 .nr_pages_max = PIPE_DEF_BUFFERS,
5566 .ops = &default_pipe_buf_ops,
5567 .spd_release = tracing_spd_release_pipe,
5568 };
5569 ssize_t ret;
5570 size_t rem;
5571 unsigned int i;
5572
5573 if (splice_grow_spd(pipe, &spd))
5574 return -ENOMEM;
5575
5576 mutex_lock(&iter->mutex);
5577
5578 if (iter->trace->splice_read) {
5579 ret = iter->trace->splice_read(iter, filp,
5580 ppos, pipe, len, flags);
5581 if (ret)
5582 goto out_err;
5583 }
5584
5585 ret = tracing_wait_pipe(filp);
5586 if (ret <= 0)
5587 goto out_err;
5588
5589 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5590 ret = -EFAULT;
5591 goto out_err;
5592 }
5593
5594 trace_event_read_lock();
5595 trace_access_lock(iter->cpu_file);
5596
5597 /* Fill as many pages as possible. */
5598 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5599 spd.pages[i] = alloc_page(GFP_KERNEL);
5600 if (!spd.pages[i])
5601 break;
5602
5603 rem = tracing_fill_pipe_page(rem, iter);
5604
5605 /* Copy the data into the page, so we can start over. */
5606 ret = trace_seq_to_buffer(&iter->seq,
5607 page_address(spd.pages[i]),
5608 min((size_t)trace_seq_used(&iter->seq),
5609 (size_t)PAGE_SIZE));
5610 if (ret < 0) {
5611 __free_page(spd.pages[i]);
5612 break;
5613 }
5614 spd.partial[i].offset = 0;
5615 spd.partial[i].len = ret;
5616
5617 trace_seq_init(&iter->seq);
5618 }
5619
5620 trace_access_unlock(iter->cpu_file);
5621 trace_event_read_unlock();
5622 mutex_unlock(&iter->mutex);
5623
5624 spd.nr_pages = i;
5625
5626 if (i)
5627 ret = splice_to_pipe(pipe, &spd);
5628 else
5629 ret = 0;
5630 out:
5631 splice_shrink_spd(&spd);
5632 return ret;
5633
5634 out_err:
5635 mutex_unlock(&iter->mutex);
5636 goto out;
5637 }
5638
5639 static ssize_t
tracing_syscall_buf_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5640 tracing_syscall_buf_read(struct file *filp, char __user *ubuf,
5641 size_t cnt, loff_t *ppos)
5642 {
5643 struct inode *inode = file_inode(filp);
5644 struct trace_array *tr = inode->i_private;
5645 char buf[64];
5646 int r;
5647
5648 r = snprintf(buf, 64, "%d\n", tr->syscall_buf_sz);
5649
5650 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5651 }
5652
5653 static ssize_t
tracing_syscall_buf_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5654 tracing_syscall_buf_write(struct file *filp, const char __user *ubuf,
5655 size_t cnt, loff_t *ppos)
5656 {
5657 struct inode *inode = file_inode(filp);
5658 struct trace_array *tr = inode->i_private;
5659 unsigned long val;
5660 int ret;
5661
5662 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5663 if (ret)
5664 return ret;
5665
5666 if (val > SYSCALL_FAULT_USER_MAX)
5667 val = SYSCALL_FAULT_USER_MAX;
5668
5669 tr->syscall_buf_sz = val;
5670
5671 *ppos += cnt;
5672
5673 return cnt;
5674 }
5675
5676 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5677 tracing_entries_read(struct file *filp, char __user *ubuf,
5678 size_t cnt, loff_t *ppos)
5679 {
5680 struct inode *inode = file_inode(filp);
5681 struct trace_array *tr = inode->i_private;
5682 int cpu = tracing_get_cpu(inode);
5683 char buf[64];
5684 int r = 0;
5685 ssize_t ret;
5686
5687 mutex_lock(&trace_types_lock);
5688
5689 if (cpu == RING_BUFFER_ALL_CPUS) {
5690 int cpu, buf_size_same;
5691 unsigned long size;
5692
5693 size = 0;
5694 buf_size_same = 1;
5695 /* check if all cpu sizes are same */
5696 for_each_tracing_cpu(cpu) {
5697 /* fill in the size from first enabled cpu */
5698 if (size == 0)
5699 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
5700 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
5701 buf_size_same = 0;
5702 break;
5703 }
5704 }
5705
5706 if (buf_size_same) {
5707 if (!tr->ring_buffer_expanded)
5708 r = sprintf(buf, "%lu (expanded: %lu)\n",
5709 size >> 10,
5710 trace_buf_size >> 10);
5711 else
5712 r = sprintf(buf, "%lu\n", size >> 10);
5713 } else
5714 r = sprintf(buf, "X\n");
5715 } else
5716 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
5717
5718 mutex_unlock(&trace_types_lock);
5719
5720 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5721 return ret;
5722 }
5723
5724 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5725 tracing_entries_write(struct file *filp, const char __user *ubuf,
5726 size_t cnt, loff_t *ppos)
5727 {
5728 struct inode *inode = file_inode(filp);
5729 struct trace_array *tr = inode->i_private;
5730 unsigned long val;
5731 int ret;
5732
5733 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5734 if (ret)
5735 return ret;
5736
5737 /* must have at least 1 entry */
5738 if (!val)
5739 return -EINVAL;
5740
5741 /* value is in KB */
5742 val <<= 10;
5743 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5744 if (ret < 0)
5745 return ret;
5746
5747 *ppos += cnt;
5748
5749 return cnt;
5750 }
5751
5752 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5753 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5754 size_t cnt, loff_t *ppos)
5755 {
5756 struct trace_array *tr = filp->private_data;
5757 char buf[64];
5758 int r, cpu;
5759 unsigned long size = 0, expanded_size = 0;
5760
5761 mutex_lock(&trace_types_lock);
5762 for_each_tracing_cpu(cpu) {
5763 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
5764 if (!tr->ring_buffer_expanded)
5765 expanded_size += trace_buf_size >> 10;
5766 }
5767 if (tr->ring_buffer_expanded)
5768 r = sprintf(buf, "%lu\n", size);
5769 else
5770 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5771 mutex_unlock(&trace_types_lock);
5772
5773 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5774 }
5775
5776 #define LAST_BOOT_HEADER ((void *)1)
5777
l_next(struct seq_file * m,void * v,loff_t * pos)5778 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
5779 {
5780 struct trace_array *tr = m->private;
5781 struct trace_scratch *tscratch = tr->scratch;
5782 unsigned int index = *pos;
5783
5784 (*pos)++;
5785
5786 if (*pos == 1)
5787 return LAST_BOOT_HEADER;
5788
5789 /* Only show offsets of the last boot data */
5790 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5791 return NULL;
5792
5793 /* *pos 0 is for the header, 1 is for the first module */
5794 index--;
5795
5796 if (index >= tscratch->nr_entries)
5797 return NULL;
5798
5799 return &tscratch->entries[index];
5800 }
5801
l_start(struct seq_file * m,loff_t * pos)5802 static void *l_start(struct seq_file *m, loff_t *pos)
5803 {
5804 mutex_lock(&scratch_mutex);
5805
5806 return l_next(m, NULL, pos);
5807 }
5808
l_stop(struct seq_file * m,void * p)5809 static void l_stop(struct seq_file *m, void *p)
5810 {
5811 mutex_unlock(&scratch_mutex);
5812 }
5813
show_last_boot_header(struct seq_file * m,struct trace_array * tr)5814 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
5815 {
5816 struct trace_scratch *tscratch = tr->scratch;
5817
5818 /*
5819 * Do not leak KASLR address. This only shows the KASLR address of
5820 * the last boot. When the ring buffer is started, the LAST_BOOT
5821 * flag gets cleared, and this should only report "current".
5822 * Otherwise it shows the KASLR address from the previous boot which
5823 * should not be the same as the current boot.
5824 */
5825 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
5826 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
5827 else
5828 seq_puts(m, "# Current\n");
5829 }
5830
l_show(struct seq_file * m,void * v)5831 static int l_show(struct seq_file *m, void *v)
5832 {
5833 struct trace_array *tr = m->private;
5834 struct trace_mod_entry *entry = v;
5835
5836 if (v == LAST_BOOT_HEADER) {
5837 show_last_boot_header(m, tr);
5838 return 0;
5839 }
5840
5841 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
5842 return 0;
5843 }
5844
5845 static const struct seq_operations last_boot_seq_ops = {
5846 .start = l_start,
5847 .next = l_next,
5848 .stop = l_stop,
5849 .show = l_show,
5850 };
5851
tracing_last_boot_open(struct inode * inode,struct file * file)5852 static int tracing_last_boot_open(struct inode *inode, struct file *file)
5853 {
5854 struct trace_array *tr = inode->i_private;
5855 struct seq_file *m;
5856 int ret;
5857
5858 ret = tracing_check_open_get_tr(tr);
5859 if (ret)
5860 return ret;
5861
5862 ret = seq_open(file, &last_boot_seq_ops);
5863 if (ret) {
5864 trace_array_put(tr);
5865 return ret;
5866 }
5867
5868 m = file->private_data;
5869 m->private = tr;
5870
5871 return 0;
5872 }
5873
tracing_buffer_meta_open(struct inode * inode,struct file * filp)5874 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
5875 {
5876 struct trace_array *tr = inode->i_private;
5877 int cpu = tracing_get_cpu(inode);
5878 int ret;
5879
5880 ret = tracing_check_open_get_tr(tr);
5881 if (ret)
5882 return ret;
5883
5884 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
5885 if (ret < 0)
5886 __trace_array_put(tr);
5887 return ret;
5888 }
5889
5890 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5891 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5892 size_t cnt, loff_t *ppos)
5893 {
5894 /*
5895 * There is no need to read what the user has written, this function
5896 * is just to make sure that there is no error when "echo" is used
5897 */
5898
5899 *ppos += cnt;
5900
5901 return cnt;
5902 }
5903
5904 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)5905 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5906 {
5907 struct trace_array *tr = inode->i_private;
5908
5909 /* disable tracing ? */
5910 if (tr->trace_flags & TRACE_ITER(STOP_ON_FREE))
5911 tracer_tracing_off(tr);
5912 /* resize the ring buffer to 0 */
5913 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5914
5915 trace_array_put(tr);
5916
5917 return 0;
5918 }
5919
5920 #define TRACE_MARKER_MAX_SIZE 4096
5921
write_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt,unsigned long ip)5922 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char *buf,
5923 size_t cnt, unsigned long ip)
5924 {
5925 struct ring_buffer_event *event;
5926 enum event_trigger_type tt = ETT_NONE;
5927 struct trace_buffer *buffer;
5928 struct print_entry *entry;
5929 int meta_size;
5930 ssize_t written;
5931 size_t size;
5932
5933 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
5934 again:
5935 size = cnt + meta_size;
5936
5937 buffer = tr->array_buffer.buffer;
5938 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5939 tracing_gen_ctx());
5940 if (unlikely(!event)) {
5941 /*
5942 * If the size was greater than what was allowed, then
5943 * make it smaller and try again.
5944 */
5945 if (size > ring_buffer_max_event_size(buffer)) {
5946 cnt = ring_buffer_max_event_size(buffer) - meta_size;
5947 /* The above should only happen once */
5948 if (WARN_ON_ONCE(cnt + meta_size == size))
5949 return -EBADF;
5950 goto again;
5951 }
5952
5953 /* Ring buffer disabled, return as if not open for write */
5954 return -EBADF;
5955 }
5956
5957 entry = ring_buffer_event_data(event);
5958 entry->ip = ip;
5959 memcpy(&entry->buf, buf, cnt);
5960 written = cnt;
5961
5962 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
5963 /* do not add \n before testing triggers, but add \0 */
5964 entry->buf[cnt] = '\0';
5965 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
5966 }
5967
5968 if (entry->buf[cnt - 1] != '\n') {
5969 entry->buf[cnt] = '\n';
5970 entry->buf[cnt + 1] = '\0';
5971 } else
5972 entry->buf[cnt] = '\0';
5973
5974 if (static_branch_unlikely(&trace_marker_exports_enabled))
5975 ftrace_exports(event, TRACE_EXPORT_MARKER);
5976 __buffer_unlock_commit(buffer, event);
5977
5978 if (tt)
5979 event_triggers_post_call(tr->trace_marker_file, tt);
5980
5981 return written;
5982 }
5983
5984 struct trace_user_buf {
5985 char *buf;
5986 };
5987
5988 static DEFINE_MUTEX(trace_user_buffer_mutex);
5989 static struct trace_user_buf_info *trace_user_buffer;
5990
5991 /**
5992 * trace_user_fault_destroy - free up allocated memory of a trace user buffer
5993 * @tinfo: The descriptor to free up
5994 *
5995 * Frees any data allocated in the trace info dsecriptor.
5996 */
trace_user_fault_destroy(struct trace_user_buf_info * tinfo)5997 void trace_user_fault_destroy(struct trace_user_buf_info *tinfo)
5998 {
5999 char *buf;
6000 int cpu;
6001
6002 if (!tinfo || !tinfo->tbuf)
6003 return;
6004
6005 for_each_possible_cpu(cpu) {
6006 buf = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6007 kfree(buf);
6008 }
6009 free_percpu(tinfo->tbuf);
6010 }
6011
user_fault_buffer_enable(struct trace_user_buf_info * tinfo,size_t size)6012 static int user_fault_buffer_enable(struct trace_user_buf_info *tinfo, size_t size)
6013 {
6014 char *buf;
6015 int cpu;
6016
6017 lockdep_assert_held(&trace_user_buffer_mutex);
6018
6019 tinfo->tbuf = alloc_percpu(struct trace_user_buf);
6020 if (!tinfo->tbuf)
6021 return -ENOMEM;
6022
6023 tinfo->ref = 1;
6024 tinfo->size = size;
6025
6026 /* Clear each buffer in case of error */
6027 for_each_possible_cpu(cpu) {
6028 per_cpu_ptr(tinfo->tbuf, cpu)->buf = NULL;
6029 }
6030
6031 for_each_possible_cpu(cpu) {
6032 buf = kmalloc_node(size, GFP_KERNEL,
6033 cpu_to_node(cpu));
6034 if (!buf)
6035 return -ENOMEM;
6036 per_cpu_ptr(tinfo->tbuf, cpu)->buf = buf;
6037 }
6038
6039 return 0;
6040 }
6041
6042 /* For internal use. Free and reinitialize */
user_buffer_free(struct trace_user_buf_info ** tinfo)6043 static void user_buffer_free(struct trace_user_buf_info **tinfo)
6044 {
6045 lockdep_assert_held(&trace_user_buffer_mutex);
6046
6047 trace_user_fault_destroy(*tinfo);
6048 kfree(*tinfo);
6049 *tinfo = NULL;
6050 }
6051
6052 /* For internal use. Initialize and allocate */
user_buffer_init(struct trace_user_buf_info ** tinfo,size_t size)6053 static int user_buffer_init(struct trace_user_buf_info **tinfo, size_t size)
6054 {
6055 bool alloc = false;
6056 int ret;
6057
6058 lockdep_assert_held(&trace_user_buffer_mutex);
6059
6060 if (!*tinfo) {
6061 alloc = true;
6062 *tinfo = kzalloc_obj(**tinfo);
6063 if (!*tinfo)
6064 return -ENOMEM;
6065 }
6066
6067 ret = user_fault_buffer_enable(*tinfo, size);
6068 if (ret < 0 && alloc)
6069 user_buffer_free(tinfo);
6070
6071 return ret;
6072 }
6073
6074 /* For internal use, derefrence and free if necessary */
user_buffer_put(struct trace_user_buf_info ** tinfo)6075 static void user_buffer_put(struct trace_user_buf_info **tinfo)
6076 {
6077 guard(mutex)(&trace_user_buffer_mutex);
6078
6079 if (WARN_ON_ONCE(!*tinfo || !(*tinfo)->ref))
6080 return;
6081
6082 if (--(*tinfo)->ref)
6083 return;
6084
6085 user_buffer_free(tinfo);
6086 }
6087
6088 /**
6089 * trace_user_fault_init - Allocated or reference a per CPU buffer
6090 * @tinfo: A pointer to the trace buffer descriptor
6091 * @size: The size to allocate each per CPU buffer
6092 *
6093 * Create a per CPU buffer that can be used to copy from user space
6094 * in a task context. When calling trace_user_fault_read(), preemption
6095 * must be disabled, and it will enable preemption and copy user
6096 * space data to the buffer. If any schedule switches occur, it will
6097 * retry until it succeeds without a schedule switch knowing the buffer
6098 * is still valid.
6099 *
6100 * Returns 0 on success, negative on failure.
6101 */
trace_user_fault_init(struct trace_user_buf_info * tinfo,size_t size)6102 int trace_user_fault_init(struct trace_user_buf_info *tinfo, size_t size)
6103 {
6104 int ret;
6105
6106 if (!tinfo)
6107 return -EINVAL;
6108
6109 guard(mutex)(&trace_user_buffer_mutex);
6110
6111 ret = user_buffer_init(&tinfo, size);
6112 if (ret < 0)
6113 trace_user_fault_destroy(tinfo);
6114
6115 return ret;
6116 }
6117
6118 /**
6119 * trace_user_fault_get - up the ref count for the user buffer
6120 * @tinfo: A pointer to a pointer to the trace buffer descriptor
6121 *
6122 * Ups the ref count of the trace buffer.
6123 *
6124 * Returns the new ref count.
6125 */
trace_user_fault_get(struct trace_user_buf_info * tinfo)6126 int trace_user_fault_get(struct trace_user_buf_info *tinfo)
6127 {
6128 if (!tinfo)
6129 return -1;
6130
6131 guard(mutex)(&trace_user_buffer_mutex);
6132
6133 tinfo->ref++;
6134 return tinfo->ref;
6135 }
6136
6137 /**
6138 * trace_user_fault_put - dereference a per cpu trace buffer
6139 * @tinfo: The @tinfo that was passed to trace_user_fault_get()
6140 *
6141 * Decrement the ref count of @tinfo.
6142 *
6143 * Returns the new refcount (negative on error).
6144 */
trace_user_fault_put(struct trace_user_buf_info * tinfo)6145 int trace_user_fault_put(struct trace_user_buf_info *tinfo)
6146 {
6147 guard(mutex)(&trace_user_buffer_mutex);
6148
6149 if (WARN_ON_ONCE(!tinfo || !tinfo->ref))
6150 return -1;
6151
6152 --tinfo->ref;
6153 return tinfo->ref;
6154 }
6155
6156 /**
6157 * trace_user_fault_read - Read user space into a per CPU buffer
6158 * @tinfo: The @tinfo allocated by trace_user_fault_get()
6159 * @ptr: The user space pointer to read
6160 * @size: The size of user space to read.
6161 * @copy_func: Optional function to use to copy from user space
6162 * @data: Data to pass to copy_func if it was supplied
6163 *
6164 * Preemption must be disabled when this is called, and must not
6165 * be enabled while using the returned buffer.
6166 * This does the copying from user space into a per CPU buffer.
6167 *
6168 * The @size must not be greater than the size passed in to
6169 * trace_user_fault_init().
6170 *
6171 * If @copy_func is NULL, trace_user_fault_read() will use copy_from_user(),
6172 * otherwise it will call @copy_func. It will call @copy_func with:
6173 *
6174 * buffer: the per CPU buffer of the @tinfo.
6175 * ptr: The pointer @ptr to user space to read
6176 * size: The @size of the ptr to read
6177 * data: The @data parameter
6178 *
6179 * It is expected that @copy_func will return 0 on success and non zero
6180 * if there was a fault.
6181 *
6182 * Returns a pointer to the buffer with the content read from @ptr.
6183 * Preemption must remain disabled while the caller accesses the
6184 * buffer returned by this function.
6185 * Returns NULL if there was a fault, or the size passed in is
6186 * greater than the size passed to trace_user_fault_init().
6187 */
trace_user_fault_read(struct trace_user_buf_info * tinfo,const char __user * ptr,size_t size,trace_user_buf_copy copy_func,void * data)6188 char *trace_user_fault_read(struct trace_user_buf_info *tinfo,
6189 const char __user *ptr, size_t size,
6190 trace_user_buf_copy copy_func, void *data)
6191 {
6192 int cpu = smp_processor_id();
6193 char *buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6194 unsigned int cnt;
6195 int trys = 0;
6196 int ret;
6197
6198 lockdep_assert_preemption_disabled();
6199
6200 /*
6201 * It's up to the caller to not try to copy more than it said
6202 * it would.
6203 */
6204 if (size > tinfo->size)
6205 return NULL;
6206
6207 /*
6208 * This acts similar to a seqcount. The per CPU context switches are
6209 * recorded, migration is disabled and preemption is enabled. The
6210 * read of the user space memory is copied into the per CPU buffer.
6211 * Preemption is disabled again, and if the per CPU context switches count
6212 * is still the same, it means the buffer has not been corrupted.
6213 * If the count is different, it is assumed the buffer is corrupted
6214 * and reading must be tried again.
6215 */
6216
6217 do {
6218 /*
6219 * It is possible that something is trying to migrate this
6220 * task. What happens then, is when preemption is enabled,
6221 * the migration thread will preempt this task, try to
6222 * migrate it, fail, then let it run again. That will
6223 * cause this to loop again and never succeed.
6224 * On failures, enabled and disable preemption with
6225 * migration enabled, to allow the migration thread to
6226 * migrate this task.
6227 */
6228 if (trys) {
6229 preempt_enable_notrace();
6230 preempt_disable_notrace();
6231 cpu = smp_processor_id();
6232 buffer = per_cpu_ptr(tinfo->tbuf, cpu)->buf;
6233 }
6234
6235 /*
6236 * If for some reason, copy_from_user() always causes a context
6237 * switch, this would then cause an infinite loop.
6238 * If this task is preempted by another user space task, it
6239 * will cause this task to try again. But just in case something
6240 * changes where the copying from user space causes another task
6241 * to run, prevent this from going into an infinite loop.
6242 * 100 tries should be plenty.
6243 */
6244 if (WARN_ONCE(trys++ > 100, "Error: Too many tries to read user space"))
6245 return NULL;
6246
6247 /* Read the current CPU context switch counter */
6248 cnt = nr_context_switches_cpu(cpu);
6249
6250 /*
6251 * Preemption is going to be enabled, but this task must
6252 * remain on this CPU.
6253 */
6254 migrate_disable();
6255
6256 /*
6257 * Now preemption is being enabled and another task can come in
6258 * and use the same buffer and corrupt our data.
6259 */
6260 preempt_enable_notrace();
6261
6262 /* Make sure preemption is enabled here */
6263 lockdep_assert_preemption_enabled();
6264
6265 if (copy_func) {
6266 ret = copy_func(buffer, ptr, size, data);
6267 } else {
6268 ret = __copy_from_user(buffer, ptr, size);
6269 }
6270
6271 preempt_disable_notrace();
6272 migrate_enable();
6273
6274 /* if it faulted, no need to test if the buffer was corrupted */
6275 if (ret)
6276 return NULL;
6277
6278 /*
6279 * Preemption is disabled again, now check the per CPU context
6280 * switch counter. If it doesn't match, then another user space
6281 * process may have schedule in and corrupted our buffer. In that
6282 * case the copying must be retried.
6283 */
6284 } while (nr_context_switches_cpu(cpu) != cnt);
6285
6286 return buffer;
6287 }
6288
6289 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6290 tracing_mark_write(struct file *filp, const char __user *ubuf,
6291 size_t cnt, loff_t *fpos)
6292 {
6293 struct trace_array *tr = filp->private_data;
6294 ssize_t written = -ENODEV;
6295 unsigned long ip;
6296 char *buf;
6297
6298 if (unlikely(tracing_disabled))
6299 return -EINVAL;
6300
6301 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6302 return -EINVAL;
6303
6304 if ((ssize_t)cnt < 0)
6305 return -EINVAL;
6306
6307 if (cnt > TRACE_MARKER_MAX_SIZE)
6308 cnt = TRACE_MARKER_MAX_SIZE;
6309
6310 /* Must have preemption disabled while having access to the buffer */
6311 guard(preempt_notrace)();
6312
6313 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6314 if (!buf)
6315 return -EFAULT;
6316
6317 /* The selftests expect this function to be the IP address */
6318 ip = _THIS_IP_;
6319
6320 /* The global trace_marker can go to multiple instances */
6321 if (tr == &global_trace) {
6322 guard(rcu)();
6323 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6324 written = write_marker_to_buffer(tr, buf, cnt, ip);
6325 if (written < 0)
6326 break;
6327 }
6328 } else {
6329 written = write_marker_to_buffer(tr, buf, cnt, ip);
6330 }
6331
6332 return written;
6333 }
6334
write_raw_marker_to_buffer(struct trace_array * tr,const char * buf,size_t cnt)6335 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
6336 const char *buf, size_t cnt)
6337 {
6338 struct ring_buffer_event *event;
6339 struct trace_buffer *buffer;
6340 struct raw_data_entry *entry;
6341 ssize_t written;
6342 size_t size;
6343
6344 /* cnt includes both the entry->id and the data behind it. */
6345 size = struct_offset(entry, id) + cnt;
6346
6347 buffer = tr->array_buffer.buffer;
6348
6349 if (size > ring_buffer_max_event_size(buffer))
6350 return -EINVAL;
6351
6352 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6353 tracing_gen_ctx());
6354 if (!event)
6355 /* Ring buffer disabled, return as if not open for write */
6356 return -EBADF;
6357
6358 entry = ring_buffer_event_data(event);
6359 unsafe_memcpy(&entry->id, buf, cnt,
6360 "id and content already reserved on ring buffer"
6361 "'buf' includes the 'id' and the data."
6362 "'entry' was allocated with cnt from 'id'.");
6363 written = cnt;
6364
6365 __buffer_unlock_commit(buffer, event);
6366
6367 return written;
6368 }
6369
6370 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6371 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6372 size_t cnt, loff_t *fpos)
6373 {
6374 struct trace_array *tr = filp->private_data;
6375 ssize_t written = -ENODEV;
6376 char *buf;
6377
6378 if (unlikely(tracing_disabled))
6379 return -EINVAL;
6380
6381 if (!(tr->trace_flags & TRACE_ITER(MARKERS)))
6382 return -EINVAL;
6383
6384 /* The marker must at least have a tag id */
6385 if (cnt < sizeof(unsigned int))
6386 return -EINVAL;
6387
6388 /* raw write is all or nothing */
6389 if (cnt > TRACE_MARKER_MAX_SIZE)
6390 return -EINVAL;
6391
6392 /* Must have preemption disabled while having access to the buffer */
6393 guard(preempt_notrace)();
6394
6395 buf = trace_user_fault_read(trace_user_buffer, ubuf, cnt, NULL, NULL);
6396 if (!buf)
6397 return -EFAULT;
6398
6399 /* The global trace_marker_raw can go to multiple instances */
6400 if (tr == &global_trace) {
6401 guard(rcu)();
6402 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
6403 written = write_raw_marker_to_buffer(tr, buf, cnt);
6404 if (written < 0)
6405 break;
6406 }
6407 } else {
6408 written = write_raw_marker_to_buffer(tr, buf, cnt);
6409 }
6410
6411 return written;
6412 }
6413
tracing_mark_open(struct inode * inode,struct file * filp)6414 static int tracing_mark_open(struct inode *inode, struct file *filp)
6415 {
6416 int ret;
6417
6418 scoped_guard(mutex, &trace_user_buffer_mutex) {
6419 if (!trace_user_buffer) {
6420 ret = user_buffer_init(&trace_user_buffer, TRACE_MARKER_MAX_SIZE);
6421 if (ret < 0)
6422 return ret;
6423 } else {
6424 trace_user_buffer->ref++;
6425 }
6426 }
6427
6428 stream_open(inode, filp);
6429 ret = tracing_open_generic_tr(inode, filp);
6430 if (ret < 0)
6431 user_buffer_put(&trace_user_buffer);
6432 return ret;
6433 }
6434
tracing_mark_release(struct inode * inode,struct file * file)6435 static int tracing_mark_release(struct inode *inode, struct file *file)
6436 {
6437 user_buffer_put(&trace_user_buffer);
6438 return tracing_release_generic_tr(inode, file);
6439 }
6440
tracing_clock_show(struct seq_file * m,void * v)6441 static int tracing_clock_show(struct seq_file *m, void *v)
6442 {
6443 struct trace_array *tr = m->private;
6444 int i;
6445
6446 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6447 seq_printf(m,
6448 "%s%s%s%s", i ? " " : "",
6449 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6450 i == tr->clock_id ? "]" : "");
6451 seq_putc(m, '\n');
6452
6453 return 0;
6454 }
6455
tracing_set_clock(struct trace_array * tr,const char * clockstr)6456 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6457 {
6458 int i;
6459
6460 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6461 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6462 break;
6463 }
6464 if (i == ARRAY_SIZE(trace_clocks))
6465 return -EINVAL;
6466
6467 guard(mutex)(&trace_types_lock);
6468
6469 tr->clock_id = i;
6470
6471 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
6472
6473 /*
6474 * New clock may not be consistent with the previous clock.
6475 * Reset the buffer so that it doesn't have incomparable timestamps.
6476 */
6477 tracing_reset_online_cpus(&tr->array_buffer);
6478
6479 #ifdef CONFIG_TRACER_SNAPSHOT
6480 if (tr->snapshot_buffer.buffer)
6481 ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func);
6482 tracing_reset_online_cpus(&tr->snapshot_buffer);
6483 #endif
6484 update_last_data_if_empty(tr);
6485
6486 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
6487 struct trace_scratch *tscratch = tr->scratch;
6488
6489 tscratch->clock_id = i;
6490 }
6491
6492 return 0;
6493 }
6494
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6495 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6496 size_t cnt, loff_t *fpos)
6497 {
6498 struct seq_file *m = filp->private_data;
6499 struct trace_array *tr = m->private;
6500 char buf[64];
6501 const char *clockstr;
6502 int ret;
6503
6504 if (cnt >= sizeof(buf))
6505 return -EINVAL;
6506
6507 if (copy_from_user(buf, ubuf, cnt))
6508 return -EFAULT;
6509
6510 buf[cnt] = 0;
6511
6512 clockstr = strstrip(buf);
6513
6514 ret = tracing_set_clock(tr, clockstr);
6515 if (ret)
6516 return ret;
6517
6518 *fpos += cnt;
6519
6520 return cnt;
6521 }
6522
tracing_clock_open(struct inode * inode,struct file * file)6523 static int tracing_clock_open(struct inode *inode, struct file *file)
6524 {
6525 struct trace_array *tr = inode->i_private;
6526 int ret;
6527
6528 ret = tracing_check_open_get_tr(tr);
6529 if (ret)
6530 return ret;
6531
6532 if ((file->f_mode & FMODE_WRITE) && trace_array_is_readonly(tr)) {
6533 trace_array_put(tr);
6534 return -EACCES;
6535 }
6536
6537 ret = single_open(file, tracing_clock_show, inode->i_private);
6538 if (ret < 0)
6539 trace_array_put(tr);
6540
6541 return ret;
6542 }
6543
tracing_time_stamp_mode_show(struct seq_file * m,void * v)6544 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6545 {
6546 struct trace_array *tr = m->private;
6547
6548 guard(mutex)(&trace_types_lock);
6549
6550 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
6551 seq_puts(m, "delta [absolute]\n");
6552 else
6553 seq_puts(m, "[delta] absolute\n");
6554
6555 return 0;
6556 }
6557
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)6558 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6559 {
6560 struct trace_array *tr = inode->i_private;
6561 int ret;
6562
6563 ret = tracing_check_open_get_tr(tr);
6564 if (ret)
6565 return ret;
6566
6567 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6568 if (ret < 0)
6569 trace_array_put(tr);
6570
6571 return ret;
6572 }
6573
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)6574 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
6575 {
6576 if (rbe == this_cpu_read(trace_buffered_event))
6577 return ring_buffer_time_stamp(buffer);
6578
6579 return ring_buffer_event_time_stamp(buffer, rbe);
6580 }
6581
6582 static const struct file_operations tracing_thresh_fops = {
6583 .open = tracing_open_generic,
6584 .read = tracing_thresh_read,
6585 .write = tracing_thresh_write,
6586 .llseek = generic_file_llseek,
6587 };
6588
6589 static const struct file_operations set_tracer_fops = {
6590 .open = tracing_open_generic_tr,
6591 .read = tracing_set_trace_read,
6592 .write = tracing_set_trace_write,
6593 .llseek = generic_file_llseek,
6594 .release = tracing_release_generic_tr,
6595 };
6596
6597 static const struct file_operations tracing_pipe_fops = {
6598 .open = tracing_open_pipe,
6599 .poll = tracing_poll_pipe,
6600 .read = tracing_read_pipe,
6601 .splice_read = tracing_splice_read_pipe,
6602 .release = tracing_release_pipe,
6603 };
6604
6605 static const struct file_operations tracing_entries_fops = {
6606 .open = tracing_open_generic_tr,
6607 .read = tracing_entries_read,
6608 .write = tracing_entries_write,
6609 .llseek = generic_file_llseek,
6610 .release = tracing_release_generic_tr,
6611 };
6612
6613 static const struct file_operations tracing_syscall_buf_fops = {
6614 .open = tracing_open_generic_tr,
6615 .read = tracing_syscall_buf_read,
6616 .write = tracing_syscall_buf_write,
6617 .llseek = generic_file_llseek,
6618 .release = tracing_release_generic_tr,
6619 };
6620
6621 static const struct file_operations tracing_buffer_meta_fops = {
6622 .open = tracing_buffer_meta_open,
6623 .read = seq_read,
6624 .llseek = seq_lseek,
6625 .release = tracing_seq_release,
6626 };
6627
6628 static const struct file_operations tracing_total_entries_fops = {
6629 .open = tracing_open_generic_tr,
6630 .read = tracing_total_entries_read,
6631 .llseek = generic_file_llseek,
6632 .release = tracing_release_generic_tr,
6633 };
6634
6635 static const struct file_operations tracing_free_buffer_fops = {
6636 .open = tracing_open_generic_tr,
6637 .write = tracing_free_buffer_write,
6638 .release = tracing_free_buffer_release,
6639 };
6640
6641 static const struct file_operations tracing_mark_fops = {
6642 .open = tracing_mark_open,
6643 .write = tracing_mark_write,
6644 .release = tracing_mark_release,
6645 };
6646
6647 static const struct file_operations tracing_mark_raw_fops = {
6648 .open = tracing_mark_open,
6649 .write = tracing_mark_raw_write,
6650 .release = tracing_mark_release,
6651 };
6652
6653 static const struct file_operations trace_clock_fops = {
6654 .open = tracing_clock_open,
6655 .read = seq_read,
6656 .llseek = seq_lseek,
6657 .release = tracing_single_release_tr,
6658 .write = tracing_clock_write,
6659 };
6660
6661 static const struct file_operations trace_time_stamp_mode_fops = {
6662 .open = tracing_time_stamp_mode_open,
6663 .read = seq_read,
6664 .llseek = seq_lseek,
6665 .release = tracing_single_release_tr,
6666 };
6667
6668 static const struct file_operations last_boot_fops = {
6669 .open = tracing_last_boot_open,
6670 .read = seq_read,
6671 .llseek = seq_lseek,
6672 .release = tracing_seq_release,
6673 };
6674
6675 /*
6676 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
6677 * @filp: The active open file structure
6678 * @ubuf: The userspace provided buffer to read value into
6679 * @cnt: The maximum number of bytes to read
6680 * @ppos: The current "file" position
6681 *
6682 * This function implements the write interface for a struct trace_min_max_param.
6683 * The filp->private_data must point to a trace_min_max_param structure that
6684 * defines where to write the value, the min and the max acceptable values,
6685 * and a lock to protect the write.
6686 */
6687 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6688 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
6689 {
6690 struct trace_min_max_param *param = filp->private_data;
6691 u64 val;
6692 int err;
6693
6694 if (!param)
6695 return -EFAULT;
6696
6697 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
6698 if (err)
6699 return err;
6700
6701 if (param->lock)
6702 mutex_lock(param->lock);
6703
6704 if (param->min && val < *param->min)
6705 err = -EINVAL;
6706
6707 if (param->max && val > *param->max)
6708 err = -EINVAL;
6709
6710 if (!err)
6711 *param->val = val;
6712
6713 if (param->lock)
6714 mutex_unlock(param->lock);
6715
6716 if (err)
6717 return err;
6718
6719 return cnt;
6720 }
6721
6722 /*
6723 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
6724 * @filp: The active open file structure
6725 * @ubuf: The userspace provided buffer to read value into
6726 * @cnt: The maximum number of bytes to read
6727 * @ppos: The current "file" position
6728 *
6729 * This function implements the read interface for a struct trace_min_max_param.
6730 * The filp->private_data must point to a trace_min_max_param struct with valid
6731 * data.
6732 */
6733 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6734 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6735 {
6736 struct trace_min_max_param *param = filp->private_data;
6737 char buf[U64_STR_SIZE];
6738 int len;
6739 u64 val;
6740
6741 if (!param)
6742 return -EFAULT;
6743
6744 val = *param->val;
6745
6746 if (cnt > sizeof(buf))
6747 cnt = sizeof(buf);
6748
6749 len = snprintf(buf, sizeof(buf), "%llu\n", val);
6750
6751 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
6752 }
6753
6754 const struct file_operations trace_min_max_fops = {
6755 .open = tracing_open_generic,
6756 .read = trace_min_max_read,
6757 .write = trace_min_max_write,
6758 };
6759
6760 #define TRACING_LOG_ERRS_MAX 8
6761 #define TRACING_LOG_LOC_MAX 128
6762
6763 #define CMD_PREFIX " Command: "
6764
6765 struct err_info {
6766 const char **errs; /* ptr to loc-specific array of err strings */
6767 u8 type; /* index into errs -> specific err string */
6768 u16 pos; /* caret position */
6769 u64 ts;
6770 };
6771
6772 struct tracing_log_err {
6773 struct list_head list;
6774 struct err_info info;
6775 char loc[TRACING_LOG_LOC_MAX]; /* err location */
6776 char *cmd; /* what caused err */
6777 };
6778
6779 static DEFINE_MUTEX(tracing_err_log_lock);
6780
alloc_tracing_log_err(int len)6781 static struct tracing_log_err *alloc_tracing_log_err(int len)
6782 {
6783 struct tracing_log_err *err;
6784
6785 err = kzalloc_obj(*err);
6786 if (!err)
6787 return ERR_PTR(-ENOMEM);
6788
6789 err->cmd = kzalloc(len, GFP_KERNEL);
6790 if (!err->cmd) {
6791 kfree(err);
6792 return ERR_PTR(-ENOMEM);
6793 }
6794
6795 return err;
6796 }
6797
free_tracing_log_err(struct tracing_log_err * err)6798 static void free_tracing_log_err(struct tracing_log_err *err)
6799 {
6800 kfree(err->cmd);
6801 kfree(err);
6802 }
6803
get_tracing_log_err(struct trace_array * tr,int len)6804 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
6805 int len)
6806 {
6807 struct tracing_log_err *err;
6808 char *cmd;
6809
6810 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6811 err = alloc_tracing_log_err(len);
6812 if (PTR_ERR(err) != -ENOMEM)
6813 tr->n_err_log_entries++;
6814
6815 return err;
6816 }
6817 cmd = kzalloc(len, GFP_KERNEL);
6818 if (!cmd)
6819 return ERR_PTR(-ENOMEM);
6820 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6821 kfree(err->cmd);
6822 err->cmd = cmd;
6823 list_del(&err->list);
6824
6825 return err;
6826 }
6827
6828 /**
6829 * err_pos - find the position of a string within a command for error careting
6830 * @cmd: The tracing command that caused the error
6831 * @str: The string to position the caret at within @cmd
6832 *
6833 * Finds the position of the first occurrence of @str within @cmd. The
6834 * return value can be passed to tracing_log_err() for caret placement
6835 * within @cmd.
6836 *
6837 * Returns the index within @cmd of the first occurrence of @str or 0
6838 * if @str was not found.
6839 */
err_pos(char * cmd,const char * str)6840 unsigned int err_pos(char *cmd, const char *str)
6841 {
6842 char *found;
6843
6844 if (WARN_ON(!strlen(cmd)))
6845 return 0;
6846
6847 found = strstr(cmd, str);
6848 if (found)
6849 return found - cmd;
6850
6851 return 0;
6852 }
6853
6854 /**
6855 * tracing_log_err - write an error to the tracing error log
6856 * @tr: The associated trace array for the error (NULL for top level array)
6857 * @loc: A string describing where the error occurred
6858 * @cmd: The tracing command that caused the error
6859 * @errs: The array of loc-specific static error strings
6860 * @type: The index into errs[], which produces the specific static err string
6861 * @pos: The position the caret should be placed in the cmd
6862 *
6863 * Writes an error into tracing/error_log of the form:
6864 *
6865 * <loc>: error: <text>
6866 * Command: <cmd>
6867 * ^
6868 *
6869 * tracing/error_log is a small log file containing the last
6870 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
6871 * unless there has been a tracing error, and the error log can be
6872 * cleared and have its memory freed by writing the empty string in
6873 * truncation mode to it i.e. echo > tracing/error_log.
6874 *
6875 * NOTE: the @errs array along with the @type param are used to
6876 * produce a static error string - this string is not copied and saved
6877 * when the error is logged - only a pointer to it is saved. See
6878 * existing callers for examples of how static strings are typically
6879 * defined for use with tracing_log_err().
6880 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)6881 void tracing_log_err(struct trace_array *tr,
6882 const char *loc, const char *cmd,
6883 const char **errs, u8 type, u16 pos)
6884 {
6885 struct tracing_log_err *err;
6886 int len = 0;
6887
6888 if (!tr)
6889 tr = &global_trace;
6890
6891 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
6892
6893 guard(mutex)(&tracing_err_log_lock);
6894
6895 err = get_tracing_log_err(tr, len);
6896 if (PTR_ERR(err) == -ENOMEM)
6897 return;
6898
6899 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
6900 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
6901
6902 err->info.errs = errs;
6903 err->info.type = type;
6904 err->info.pos = pos;
6905 err->info.ts = local_clock();
6906
6907 list_add_tail(&err->list, &tr->err_log);
6908 }
6909
clear_tracing_err_log(struct trace_array * tr)6910 static void clear_tracing_err_log(struct trace_array *tr)
6911 {
6912 struct tracing_log_err *err, *next;
6913
6914 guard(mutex)(&tracing_err_log_lock);
6915
6916 list_for_each_entry_safe(err, next, &tr->err_log, list) {
6917 list_del(&err->list);
6918 free_tracing_log_err(err);
6919 }
6920
6921 tr->n_err_log_entries = 0;
6922 }
6923
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)6924 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
6925 {
6926 struct trace_array *tr = m->private;
6927
6928 mutex_lock(&tracing_err_log_lock);
6929
6930 return seq_list_start(&tr->err_log, *pos);
6931 }
6932
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)6933 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
6934 {
6935 struct trace_array *tr = m->private;
6936
6937 return seq_list_next(v, &tr->err_log, pos);
6938 }
6939
tracing_err_log_seq_stop(struct seq_file * m,void * v)6940 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
6941 {
6942 mutex_unlock(&tracing_err_log_lock);
6943 }
6944
tracing_err_log_show_pos(struct seq_file * m,u16 pos)6945 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
6946 {
6947 u16 i;
6948
6949 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
6950 seq_putc(m, ' ');
6951 for (i = 0; i < pos; i++)
6952 seq_putc(m, ' ');
6953 seq_puts(m, "^\n");
6954 }
6955
tracing_err_log_seq_show(struct seq_file * m,void * v)6956 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
6957 {
6958 struct tracing_log_err *err = v;
6959
6960 if (err) {
6961 const char *err_text = err->info.errs[err->info.type];
6962 u64 sec = err->info.ts;
6963 u32 nsec;
6964
6965 nsec = do_div(sec, NSEC_PER_SEC);
6966 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
6967 err->loc, err_text);
6968 seq_printf(m, "%s", err->cmd);
6969 tracing_err_log_show_pos(m, err->info.pos);
6970 }
6971
6972 return 0;
6973 }
6974
6975 static const struct seq_operations tracing_err_log_seq_ops = {
6976 .start = tracing_err_log_seq_start,
6977 .next = tracing_err_log_seq_next,
6978 .stop = tracing_err_log_seq_stop,
6979 .show = tracing_err_log_seq_show
6980 };
6981
tracing_err_log_open(struct inode * inode,struct file * file)6982 static int tracing_err_log_open(struct inode *inode, struct file *file)
6983 {
6984 struct trace_array *tr = inode->i_private;
6985 int ret = 0;
6986
6987 ret = tracing_check_open_get_tr(tr);
6988 if (ret)
6989 return ret;
6990
6991 /* If this file was opened for write, then erase contents */
6992 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
6993 clear_tracing_err_log(tr);
6994
6995 if (file->f_mode & FMODE_READ) {
6996 ret = seq_open(file, &tracing_err_log_seq_ops);
6997 if (!ret) {
6998 struct seq_file *m = file->private_data;
6999 m->private = tr;
7000 } else {
7001 trace_array_put(tr);
7002 }
7003 }
7004 return ret;
7005 }
7006
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7007 static ssize_t tracing_err_log_write(struct file *file,
7008 const char __user *buffer,
7009 size_t count, loff_t *ppos)
7010 {
7011 return count;
7012 }
7013
tracing_err_log_release(struct inode * inode,struct file * file)7014 static int tracing_err_log_release(struct inode *inode, struct file *file)
7015 {
7016 struct trace_array *tr = inode->i_private;
7017
7018 trace_array_put(tr);
7019
7020 if (file->f_mode & FMODE_READ)
7021 seq_release(inode, file);
7022
7023 return 0;
7024 }
7025
7026 static const struct file_operations tracing_err_log_fops = {
7027 .open = tracing_err_log_open,
7028 .write = tracing_err_log_write,
7029 .read = seq_read,
7030 .llseek = tracing_lseek,
7031 .release = tracing_err_log_release,
7032 };
7033
tracing_buffers_open(struct inode * inode,struct file * filp)7034 int tracing_buffers_open(struct inode *inode, struct file *filp)
7035 {
7036 struct trace_array *tr = inode->i_private;
7037 struct ftrace_buffer_info *info;
7038 int ret;
7039
7040 ret = tracing_check_open_get_tr(tr);
7041 if (ret)
7042 return ret;
7043
7044 info = kvzalloc_obj(*info);
7045 if (!info) {
7046 trace_array_put(tr);
7047 return -ENOMEM;
7048 }
7049
7050 mutex_lock(&trace_types_lock);
7051
7052 info->iter.tr = tr;
7053 info->iter.cpu_file = tracing_get_cpu(inode);
7054 info->iter.trace = tr->current_trace;
7055 info->iter.array_buffer = &tr->array_buffer;
7056 info->spare = NULL;
7057 /* Force reading ring buffer for first read */
7058 info->read = (unsigned int)-1;
7059
7060 filp->private_data = info;
7061
7062 tr->trace_ref++;
7063
7064 mutex_unlock(&trace_types_lock);
7065
7066 ret = nonseekable_open(inode, filp);
7067 if (ret < 0)
7068 trace_array_put(tr);
7069
7070 return ret;
7071 }
7072
7073 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7074 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7075 {
7076 struct ftrace_buffer_info *info = filp->private_data;
7077 struct trace_iterator *iter = &info->iter;
7078
7079 return trace_poll(iter, filp, poll_table);
7080 }
7081
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7082 ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7083 size_t count, loff_t *ppos)
7084 {
7085 struct ftrace_buffer_info *info = filp->private_data;
7086 struct trace_iterator *iter = &info->iter;
7087 void *trace_data;
7088 int page_size;
7089 ssize_t ret = 0;
7090 ssize_t size;
7091
7092 if (!count)
7093 return 0;
7094
7095 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7096 return -EBUSY;
7097
7098 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7099
7100 /* Make sure the spare matches the current sub buffer size */
7101 if (info->spare) {
7102 if (page_size != info->spare_size) {
7103 ring_buffer_free_read_page(iter->array_buffer->buffer,
7104 info->spare_cpu, info->spare);
7105 info->spare = NULL;
7106 }
7107 }
7108
7109 if (!info->spare) {
7110 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7111 iter->cpu_file);
7112 if (IS_ERR(info->spare)) {
7113 ret = PTR_ERR(info->spare);
7114 info->spare = NULL;
7115 } else {
7116 info->spare_cpu = iter->cpu_file;
7117 info->spare_size = page_size;
7118 }
7119 }
7120 if (!info->spare)
7121 return ret;
7122
7123 /* Do we have previous read data to read? */
7124 if (info->read < page_size)
7125 goto read;
7126
7127 again:
7128 trace_access_lock(iter->cpu_file);
7129 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7130 info->spare,
7131 count,
7132 iter->cpu_file, 0);
7133 trace_access_unlock(iter->cpu_file);
7134
7135 if (ret < 0) {
7136 if (trace_empty(iter) && !iter->closed) {
7137 if (update_last_data_if_empty(iter->tr))
7138 return 0;
7139
7140 if ((filp->f_flags & O_NONBLOCK))
7141 return -EAGAIN;
7142
7143 ret = wait_on_pipe(iter, 0);
7144 if (ret)
7145 return ret;
7146
7147 goto again;
7148 }
7149 return 0;
7150 }
7151
7152 info->read = 0;
7153 read:
7154 size = page_size - info->read;
7155 if (size > count)
7156 size = count;
7157 trace_data = ring_buffer_read_page_data(info->spare);
7158 ret = copy_to_user(ubuf, trace_data + info->read, size);
7159 if (ret == size)
7160 return -EFAULT;
7161
7162 size -= ret;
7163
7164 *ppos += size;
7165 info->read += size;
7166
7167 return size;
7168 }
7169
tracing_buffers_flush(struct file * file,fl_owner_t id)7170 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7171 {
7172 struct ftrace_buffer_info *info = file->private_data;
7173 struct trace_iterator *iter = &info->iter;
7174
7175 iter->closed = true;
7176 /* Make sure the waiters see the new wait_index */
7177 (void)atomic_fetch_inc_release(&iter->wait_index);
7178
7179 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7180
7181 return 0;
7182 }
7183
tracing_buffers_release(struct inode * inode,struct file * file)7184 int tracing_buffers_release(struct inode *inode, struct file *file)
7185 {
7186 struct ftrace_buffer_info *info = file->private_data;
7187 struct trace_iterator *iter = &info->iter;
7188
7189 guard(mutex)(&trace_types_lock);
7190
7191 iter->tr->trace_ref--;
7192
7193 __trace_array_put(iter->tr);
7194
7195 if (info->spare)
7196 ring_buffer_free_read_page(iter->array_buffer->buffer,
7197 info->spare_cpu, info->spare);
7198 kvfree(info);
7199
7200 return 0;
7201 }
7202
7203 struct buffer_ref {
7204 struct trace_buffer *buffer;
7205 void *page;
7206 int cpu;
7207 refcount_t refcount;
7208 };
7209
buffer_ref_release(struct buffer_ref * ref)7210 static void buffer_ref_release(struct buffer_ref *ref)
7211 {
7212 if (!refcount_dec_and_test(&ref->refcount))
7213 return;
7214 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7215 kfree(ref);
7216 }
7217
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7218 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7219 struct pipe_buffer *buf)
7220 {
7221 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7222
7223 buffer_ref_release(ref);
7224 buf->private = 0;
7225 }
7226
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)7227 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7228 struct pipe_buffer *buf)
7229 {
7230 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7231
7232 if (refcount_read(&ref->refcount) > INT_MAX/2)
7233 return false;
7234
7235 refcount_inc(&ref->refcount);
7236 return true;
7237 }
7238
7239 /* Pipe buffer operations for a buffer. */
7240 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7241 .release = buffer_pipe_buf_release,
7242 .get = buffer_pipe_buf_get,
7243 };
7244
7245 /*
7246 * Callback from splice_to_pipe(), if we need to release some pages
7247 * at the end of the spd in case we error'ed out in filling the pipe.
7248 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)7249 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7250 {
7251 struct buffer_ref *ref =
7252 (struct buffer_ref *)spd->partial[i].private;
7253
7254 buffer_ref_release(ref);
7255 spd->partial[i].private = 0;
7256 }
7257
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)7258 ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7259 struct pipe_inode_info *pipe, size_t len,
7260 unsigned int flags)
7261 {
7262 struct ftrace_buffer_info *info = file->private_data;
7263 struct trace_iterator *iter = &info->iter;
7264 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7265 struct page *pages_def[PIPE_DEF_BUFFERS];
7266 struct splice_pipe_desc spd = {
7267 .pages = pages_def,
7268 .partial = partial_def,
7269 .nr_pages_max = PIPE_DEF_BUFFERS,
7270 .ops = &buffer_pipe_buf_ops,
7271 .spd_release = buffer_spd_release,
7272 };
7273 struct buffer_ref *ref;
7274 bool woken = false;
7275 int page_size;
7276 int entries, i;
7277 ssize_t ret = 0;
7278
7279 if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace))
7280 return -EBUSY;
7281
7282 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7283 if (*ppos & (page_size - 1))
7284 return -EINVAL;
7285
7286 if (len & (page_size - 1)) {
7287 if (len < page_size)
7288 return -EINVAL;
7289 len &= (~(page_size - 1));
7290 }
7291
7292 if (splice_grow_spd(pipe, &spd))
7293 return -ENOMEM;
7294
7295 again:
7296 trace_access_lock(iter->cpu_file);
7297 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7298
7299 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
7300 struct page *page;
7301 int r;
7302
7303 ref = kzalloc_obj(*ref);
7304 if (!ref) {
7305 ret = -ENOMEM;
7306 break;
7307 }
7308
7309 refcount_set(&ref->refcount, 1);
7310 ref->buffer = iter->array_buffer->buffer;
7311 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7312 if (IS_ERR(ref->page)) {
7313 ret = PTR_ERR(ref->page);
7314 ref->page = NULL;
7315 kfree(ref);
7316 break;
7317 }
7318 ref->cpu = iter->cpu_file;
7319
7320 r = ring_buffer_read_page(ref->buffer, ref->page,
7321 len, iter->cpu_file, 1);
7322 if (r < 0) {
7323 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7324 ref->page);
7325 kfree(ref);
7326 break;
7327 }
7328
7329 page = virt_to_page(ring_buffer_read_page_data(ref->page));
7330
7331 spd.pages[i] = page;
7332 spd.partial[i].len = page_size;
7333 spd.partial[i].offset = 0;
7334 spd.partial[i].private = (unsigned long)ref;
7335 spd.nr_pages++;
7336 *ppos += page_size;
7337
7338 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
7339 }
7340
7341 trace_access_unlock(iter->cpu_file);
7342 spd.nr_pages = i;
7343
7344 /* did we read anything? */
7345 if (!spd.nr_pages) {
7346
7347 if (ret)
7348 goto out;
7349
7350 if (woken)
7351 goto out;
7352
7353 ret = -EAGAIN;
7354 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7355 goto out;
7356
7357 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
7358 if (ret)
7359 goto out;
7360
7361 /* No need to wait after waking up when tracing is off */
7362 if (!tracer_tracing_is_on(iter->tr))
7363 goto out;
7364
7365 /* Iterate one more time to collect any new data then exit */
7366 woken = true;
7367
7368 goto again;
7369 }
7370
7371 ret = splice_to_pipe(pipe, &spd);
7372 out:
7373 splice_shrink_spd(&spd);
7374
7375 return ret;
7376 }
7377
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)7378 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
7379 {
7380 struct ftrace_buffer_info *info = file->private_data;
7381 struct trace_iterator *iter = &info->iter;
7382 int err;
7383
7384 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
7385 if (!(file->f_flags & O_NONBLOCK)) {
7386 err = ring_buffer_wait(iter->array_buffer->buffer,
7387 iter->cpu_file,
7388 iter->tr->buffer_percent,
7389 NULL, NULL);
7390 if (err)
7391 return err;
7392 }
7393
7394 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
7395 iter->cpu_file);
7396 } else if (cmd) {
7397 return -ENOTTY;
7398 }
7399
7400 /*
7401 * An ioctl call with cmd 0 to the ring buffer file will wake up all
7402 * waiters
7403 */
7404 guard(mutex)(&trace_types_lock);
7405
7406 /* Make sure the waiters see the new wait_index */
7407 (void)atomic_fetch_inc_release(&iter->wait_index);
7408
7409 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7410
7411 return 0;
7412 }
7413
7414 /*
7415 * This is called when a VMA is duplicated (e.g., on fork()) to increment
7416 * the user_mapped counter without remapping pages.
7417 */
tracing_buffers_mmap_open(struct vm_area_struct * vma)7418 static void tracing_buffers_mmap_open(struct vm_area_struct *vma)
7419 {
7420 struct ftrace_buffer_info *info = vma->vm_file->private_data;
7421 struct trace_iterator *iter = &info->iter;
7422
7423 ring_buffer_map_dup(iter->array_buffer->buffer, iter->cpu_file);
7424 }
7425
tracing_buffers_mmap_close(struct vm_area_struct * vma)7426 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
7427 {
7428 struct ftrace_buffer_info *info = vma->vm_file->private_data;
7429 struct trace_iterator *iter = &info->iter;
7430
7431 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
7432 put_snapshot_map(iter->tr);
7433 }
7434
tracing_buffers_may_split(struct vm_area_struct * vma,unsigned long addr)7435 static int tracing_buffers_may_split(struct vm_area_struct *vma, unsigned long addr)
7436 {
7437 /*
7438 * Trace buffer mappings require the complete buffer including
7439 * the meta page. Partial mappings are not supported.
7440 */
7441 return -EINVAL;
7442 }
7443
7444 static const struct vm_operations_struct tracing_buffers_vmops = {
7445 .open = tracing_buffers_mmap_open,
7446 .close = tracing_buffers_mmap_close,
7447 .may_split = tracing_buffers_may_split,
7448 };
7449
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)7450 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
7451 {
7452 struct ftrace_buffer_info *info = filp->private_data;
7453 struct trace_iterator *iter = &info->iter;
7454 int ret = 0;
7455
7456 /* A memmap'ed and backup buffers are not supported for user space mmap */
7457 if (iter->tr->flags & (TRACE_ARRAY_FL_MEMMAP | TRACE_ARRAY_FL_VMALLOC))
7458 return -ENODEV;
7459
7460 ret = get_snapshot_map(iter->tr);
7461 if (ret)
7462 return ret;
7463
7464 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
7465 if (ret)
7466 put_snapshot_map(iter->tr);
7467
7468 vma->vm_ops = &tracing_buffers_vmops;
7469
7470 return ret;
7471 }
7472
7473 static const struct file_operations tracing_buffers_fops = {
7474 .open = tracing_buffers_open,
7475 .read = tracing_buffers_read,
7476 .poll = tracing_buffers_poll,
7477 .release = tracing_buffers_release,
7478 .flush = tracing_buffers_flush,
7479 .splice_read = tracing_buffers_splice_read,
7480 .unlocked_ioctl = tracing_buffers_ioctl,
7481 .mmap = tracing_buffers_mmap,
7482 };
7483
7484 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7485 tracing_stats_read(struct file *filp, char __user *ubuf,
7486 size_t count, loff_t *ppos)
7487 {
7488 struct inode *inode = file_inode(filp);
7489 struct trace_array *tr = inode->i_private;
7490 struct array_buffer *trace_buf = &tr->array_buffer;
7491 int cpu = tracing_get_cpu(inode);
7492 struct trace_seq *s;
7493 unsigned long cnt;
7494 unsigned long long t;
7495 unsigned long usec_rem;
7496
7497 s = kmalloc_obj(*s);
7498 if (!s)
7499 return -ENOMEM;
7500
7501 trace_seq_init(s);
7502
7503 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7504 trace_seq_printf(s, "entries: %ld\n", cnt);
7505
7506 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7507 trace_seq_printf(s, "overrun: %ld\n", cnt);
7508
7509 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7510 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7511
7512 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7513 trace_seq_printf(s, "bytes: %ld\n", cnt);
7514
7515 if (trace_clocks[tr->clock_id].in_ns) {
7516 /* local or global for trace_clock */
7517 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7518 usec_rem = do_div(t, USEC_PER_SEC);
7519 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7520 t, usec_rem);
7521
7522 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
7523 usec_rem = do_div(t, USEC_PER_SEC);
7524 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7525 } else {
7526 /* counter or tsc mode for trace_clock */
7527 trace_seq_printf(s, "oldest event ts: %llu\n",
7528 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7529
7530 trace_seq_printf(s, "now ts: %llu\n",
7531 ring_buffer_time_stamp(trace_buf->buffer));
7532 }
7533
7534 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7535 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7536
7537 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7538 trace_seq_printf(s, "read events: %ld\n", cnt);
7539
7540 count = simple_read_from_buffer(ubuf, count, ppos,
7541 s->buffer, trace_seq_used(s));
7542
7543 kfree(s);
7544
7545 return count;
7546 }
7547
7548 static const struct file_operations tracing_stats_fops = {
7549 .open = tracing_open_generic_tr,
7550 .read = tracing_stats_read,
7551 .llseek = generic_file_llseek,
7552 .release = tracing_release_generic_tr,
7553 };
7554
7555 #ifdef CONFIG_DYNAMIC_FTRACE
7556
7557 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7558 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7559 size_t cnt, loff_t *ppos)
7560 {
7561 ssize_t ret;
7562 char *buf;
7563 int r;
7564
7565 /* 512 should be plenty to hold the amount needed */
7566 #define DYN_INFO_BUF_SIZE 512
7567
7568 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
7569 if (!buf)
7570 return -ENOMEM;
7571
7572 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
7573 "%ld pages:%ld groups: %ld\n"
7574 "ftrace boot update time = %llu (ns)\n"
7575 "ftrace module total update time = %llu (ns)\n",
7576 ftrace_update_tot_cnt,
7577 ftrace_number_of_pages,
7578 ftrace_number_of_groups,
7579 ftrace_update_time,
7580 ftrace_total_mod_time);
7581
7582 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7583 kfree(buf);
7584 return ret;
7585 }
7586
7587 static const struct file_operations tracing_dyn_info_fops = {
7588 .open = tracing_open_generic,
7589 .read = tracing_read_dyn_info,
7590 .llseek = generic_file_llseek,
7591 };
7592 #endif /* CONFIG_DYNAMIC_FTRACE */
7593
tracing_get_dentry(struct trace_array * tr)7594 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7595 {
7596 /* Top directory uses NULL as the parent */
7597 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7598 return NULL;
7599
7600 if (WARN_ON(!tr->dir))
7601 return ERR_PTR(-ENODEV);
7602
7603 /* All sub buffers have a descriptor */
7604 return tr->dir;
7605 }
7606
tracing_dentry_percpu(struct trace_array * tr,int cpu)7607 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7608 {
7609 struct dentry *d_tracer;
7610
7611 if (tr->percpu_dir)
7612 return tr->percpu_dir;
7613
7614 d_tracer = tracing_get_dentry(tr);
7615 if (IS_ERR(d_tracer))
7616 return NULL;
7617
7618 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7619
7620 MEM_FAIL(!tr->percpu_dir,
7621 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7622
7623 return tr->percpu_dir;
7624 }
7625
7626 struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)7627 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7628 void *data, long cpu, const struct file_operations *fops)
7629 {
7630 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7631
7632 if (ret) /* See tracing_get_cpu() */
7633 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7634 return ret;
7635 }
7636
7637 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)7638 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7639 {
7640 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7641 struct dentry *d_cpu;
7642 char cpu_dir[30]; /* 30 characters should be more than enough */
7643
7644 if (!d_percpu)
7645 return;
7646
7647 snprintf(cpu_dir, 30, "cpu%ld", cpu);
7648 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7649 if (!d_cpu) {
7650 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7651 return;
7652 }
7653
7654 /* per cpu trace_pipe */
7655 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
7656 tr, cpu, &tracing_pipe_fops);
7657
7658 /* per cpu trace */
7659 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
7660 tr, cpu, &tracing_fops);
7661
7662 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
7663 tr, cpu, &tracing_buffers_fops);
7664
7665 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
7666 tr, cpu, &tracing_stats_fops);
7667
7668 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu,
7669 tr, cpu, &tracing_entries_fops);
7670
7671 if (tr->range_addr_start)
7672 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
7673 tr, cpu, &tracing_buffer_meta_fops);
7674 #ifdef CONFIG_TRACER_SNAPSHOT
7675 if (!tr->range_addr_start) {
7676 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
7677 tr, cpu, &snapshot_fops);
7678
7679 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
7680 tr, cpu, &snapshot_raw_fops);
7681 }
7682 #endif
7683 }
7684
7685 #ifdef CONFIG_FTRACE_SELFTEST
7686 /* Let selftest have access to static functions in this file */
7687 #include "trace_selftest.c"
7688 #endif
7689
7690 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7691 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7692 loff_t *ppos)
7693 {
7694 struct trace_option_dentry *topt = filp->private_data;
7695 char *buf;
7696
7697 if (topt->flags->val & topt->opt->bit)
7698 buf = "1\n";
7699 else
7700 buf = "0\n";
7701
7702 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7703 }
7704
7705 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7706 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7707 loff_t *ppos)
7708 {
7709 struct trace_option_dentry *topt = filp->private_data;
7710 unsigned long val;
7711 int ret;
7712
7713 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7714 if (ret)
7715 return ret;
7716
7717 if (val != 0 && val != 1)
7718 return -EINVAL;
7719
7720 if (!!(topt->flags->val & topt->opt->bit) != val) {
7721 guard(mutex)(&trace_types_lock);
7722 ret = __set_tracer_option(topt->tr, topt->flags,
7723 topt->opt, !val);
7724 if (ret)
7725 return ret;
7726 }
7727
7728 *ppos += cnt;
7729
7730 return cnt;
7731 }
7732
tracing_open_options(struct inode * inode,struct file * filp)7733 static int tracing_open_options(struct inode *inode, struct file *filp)
7734 {
7735 struct trace_option_dentry *topt = inode->i_private;
7736 int ret;
7737
7738 ret = tracing_check_open_get_tr(topt->tr);
7739 if (ret)
7740 return ret;
7741
7742 filp->private_data = inode->i_private;
7743 return 0;
7744 }
7745
tracing_release_options(struct inode * inode,struct file * file)7746 static int tracing_release_options(struct inode *inode, struct file *file)
7747 {
7748 struct trace_option_dentry *topt = file->private_data;
7749
7750 trace_array_put(topt->tr);
7751 return 0;
7752 }
7753
7754 static const struct file_operations trace_options_fops = {
7755 .open = tracing_open_options,
7756 .read = trace_options_read,
7757 .write = trace_options_write,
7758 .llseek = generic_file_llseek,
7759 .release = tracing_release_options,
7760 };
7761
7762 /*
7763 * In order to pass in both the trace_array descriptor as well as the index
7764 * to the flag that the trace option file represents, the trace_array
7765 * has a character array of trace_flags_index[], which holds the index
7766 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7767 * The address of this character array is passed to the flag option file
7768 * read/write callbacks.
7769 *
7770 * In order to extract both the index and the trace_array descriptor,
7771 * get_tr_index() uses the following algorithm.
7772 *
7773 * idx = *ptr;
7774 *
7775 * As the pointer itself contains the address of the index (remember
7776 * index[1] == 1).
7777 *
7778 * Then to get the trace_array descriptor, by subtracting that index
7779 * from the ptr, we get to the start of the index itself.
7780 *
7781 * ptr - idx == &index[0]
7782 *
7783 * Then a simple container_of() from that pointer gets us to the
7784 * trace_array descriptor.
7785 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)7786 static void get_tr_index(void *data, struct trace_array **ptr,
7787 unsigned int *pindex)
7788 {
7789 *pindex = *(unsigned char *)data;
7790
7791 *ptr = container_of(data - *pindex, struct trace_array,
7792 trace_flags_index);
7793 }
7794
7795 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7796 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7797 loff_t *ppos)
7798 {
7799 void *tr_index = filp->private_data;
7800 struct trace_array *tr;
7801 unsigned int index;
7802 char *buf;
7803
7804 get_tr_index(tr_index, &tr, &index);
7805
7806 if (tr->trace_flags & (1ULL << index))
7807 buf = "1\n";
7808 else
7809 buf = "0\n";
7810
7811 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7812 }
7813
7814 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7815 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7816 loff_t *ppos)
7817 {
7818 void *tr_index = filp->private_data;
7819 struct trace_array *tr;
7820 unsigned int index;
7821 unsigned long val;
7822 int ret;
7823
7824 get_tr_index(tr_index, &tr, &index);
7825
7826 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7827 if (ret)
7828 return ret;
7829
7830 if (val != 0 && val != 1)
7831 return -EINVAL;
7832
7833 mutex_lock(&event_mutex);
7834 mutex_lock(&trace_types_lock);
7835 ret = set_tracer_flag(tr, 1ULL << index, val);
7836 mutex_unlock(&trace_types_lock);
7837 mutex_unlock(&event_mutex);
7838
7839 if (ret < 0)
7840 return ret;
7841
7842 *ppos += cnt;
7843
7844 return cnt;
7845 }
7846
7847 static const struct file_operations trace_options_core_fops = {
7848 .open = tracing_open_generic,
7849 .read = trace_options_core_read,
7850 .write = trace_options_core_write,
7851 .llseek = generic_file_llseek,
7852 };
7853
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)7854 struct dentry *trace_create_file(const char *name,
7855 umode_t mode,
7856 struct dentry *parent,
7857 void *data,
7858 const struct file_operations *fops)
7859 {
7860 struct dentry *ret;
7861
7862 ret = tracefs_create_file(name, mode, parent, data, fops);
7863 if (!ret)
7864 pr_warn("Could not create tracefs '%s' entry\n", name);
7865
7866 return ret;
7867 }
7868
7869
trace_options_init_dentry(struct trace_array * tr)7870 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7871 {
7872 struct dentry *d_tracer;
7873
7874 if (tr->options)
7875 return tr->options;
7876
7877 d_tracer = tracing_get_dentry(tr);
7878 if (IS_ERR(d_tracer))
7879 return NULL;
7880
7881 tr->options = tracefs_create_dir("options", d_tracer);
7882 if (!tr->options) {
7883 pr_warn("Could not create tracefs directory 'options'\n");
7884 return NULL;
7885 }
7886
7887 return tr->options;
7888 }
7889
7890 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)7891 create_trace_option_file(struct trace_array *tr,
7892 struct trace_option_dentry *topt,
7893 struct tracer_flags *flags,
7894 struct tracer_opt *opt)
7895 {
7896 struct dentry *t_options;
7897
7898 t_options = trace_options_init_dentry(tr);
7899 if (!t_options)
7900 return;
7901
7902 topt->flags = flags;
7903 topt->opt = opt;
7904 topt->tr = tr;
7905
7906 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
7907 t_options, topt, &trace_options_fops);
7908 }
7909
7910 static int
create_trace_option_files(struct trace_array * tr,struct tracer * tracer,struct tracer_flags * flags)7911 create_trace_option_files(struct trace_array *tr, struct tracer *tracer,
7912 struct tracer_flags *flags)
7913 {
7914 struct trace_option_dentry *topts;
7915 struct trace_options *tr_topts;
7916 struct tracer_opt *opts;
7917 int cnt;
7918
7919 if (!flags || !flags->opts)
7920 return 0;
7921
7922 opts = flags->opts;
7923
7924 for (cnt = 0; opts[cnt].name; cnt++)
7925 ;
7926
7927 topts = kzalloc_objs(*topts, cnt + 1);
7928 if (!topts)
7929 return 0;
7930
7931 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7932 GFP_KERNEL);
7933 if (!tr_topts) {
7934 kfree(topts);
7935 return -ENOMEM;
7936 }
7937
7938 tr->topts = tr_topts;
7939 tr->topts[tr->nr_topts].tracer = tracer;
7940 tr->topts[tr->nr_topts].topts = topts;
7941 tr->nr_topts++;
7942
7943 for (cnt = 0; opts[cnt].name; cnt++) {
7944 create_trace_option_file(tr, &topts[cnt], flags,
7945 &opts[cnt]);
7946 MEM_FAIL(topts[cnt].entry == NULL,
7947 "Failed to create trace option: %s",
7948 opts[cnt].name);
7949 }
7950 return 0;
7951 }
7952
get_global_flags_val(struct tracer * tracer)7953 static int get_global_flags_val(struct tracer *tracer)
7954 {
7955 struct tracers *t;
7956
7957 list_for_each_entry(t, &global_trace.tracers, list) {
7958 if (t->tracer != tracer)
7959 continue;
7960 if (!t->flags)
7961 return -1;
7962 return t->flags->val;
7963 }
7964 return -1;
7965 }
7966
add_tracer_options(struct trace_array * tr,struct tracers * t)7967 static int add_tracer_options(struct trace_array *tr, struct tracers *t)
7968 {
7969 struct tracer *tracer = t->tracer;
7970 struct tracer_flags *flags = t->flags ?: tracer->flags;
7971
7972 if (!flags)
7973 return 0;
7974
7975 /* Only add tracer options after update_tracer_options finish */
7976 if (!tracer_options_updated)
7977 return 0;
7978
7979 return create_trace_option_files(tr, tracer, flags);
7980 }
7981
add_tracer(struct trace_array * tr,struct tracer * tracer)7982 static int add_tracer(struct trace_array *tr, struct tracer *tracer)
7983 {
7984 struct tracer_flags *flags;
7985 struct tracers *t;
7986 int ret;
7987
7988 /* Only enable if the directory has been created already. */
7989 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
7990 return 0;
7991
7992 /*
7993 * If this is an instance, only create flags for tracers
7994 * the instance may have.
7995 */
7996 if (!trace_ok_for_array(tracer, tr))
7997 return 0;
7998
7999 t = kmalloc_obj(*t);
8000 if (!t)
8001 return -ENOMEM;
8002
8003 t->tracer = tracer;
8004 t->flags = NULL;
8005 list_add(&t->list, &tr->tracers);
8006
8007 flags = tracer->flags;
8008 if (!flags) {
8009 if (!tracer->default_flags)
8010 return 0;
8011
8012 /*
8013 * If the tracer defines default flags, it means the flags are
8014 * per trace instance.
8015 */
8016 flags = kmalloc_obj(*flags);
8017 if (!flags)
8018 return -ENOMEM;
8019
8020 *flags = *tracer->default_flags;
8021 flags->trace = tracer;
8022
8023 t->flags = flags;
8024
8025 /* If this is an instance, inherit the global_trace flags */
8026 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL)) {
8027 int val = get_global_flags_val(tracer);
8028 if (!WARN_ON_ONCE(val < 0))
8029 flags->val = val;
8030 }
8031 }
8032
8033 ret = add_tracer_options(tr, t);
8034 if (ret < 0) {
8035 list_del(&t->list);
8036 kfree(t->flags);
8037 kfree(t);
8038 }
8039
8040 return ret;
8041 }
8042
8043 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8044 create_trace_option_core_file(struct trace_array *tr,
8045 const char *option, long index)
8046 {
8047 struct dentry *t_options;
8048
8049 t_options = trace_options_init_dentry(tr);
8050 if (!t_options)
8051 return NULL;
8052
8053 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8054 (void *)&tr->trace_flags_index[index],
8055 &trace_options_core_fops);
8056 }
8057
create_trace_options_dir(struct trace_array * tr)8058 static void create_trace_options_dir(struct trace_array *tr)
8059 {
8060 struct dentry *t_options;
8061 bool top_level = tr == &global_trace;
8062 int i;
8063
8064 t_options = trace_options_init_dentry(tr);
8065 if (!t_options)
8066 return;
8067
8068 for (i = 0; trace_options[i]; i++) {
8069 if (top_level ||
8070 !((1ULL << i) & TOP_LEVEL_TRACE_FLAGS)) {
8071 create_trace_option_core_file(tr, trace_options[i], i);
8072 }
8073 }
8074 }
8075
8076 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8077 rb_simple_read(struct file *filp, char __user *ubuf,
8078 size_t cnt, loff_t *ppos)
8079 {
8080 struct trace_array *tr = filp->private_data;
8081 char buf[64];
8082 int r;
8083
8084 r = tracer_tracing_is_on(tr);
8085 r = sprintf(buf, "%d\n", r);
8086
8087 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8088 }
8089
8090 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8091 rb_simple_write(struct file *filp, const char __user *ubuf,
8092 size_t cnt, loff_t *ppos)
8093 {
8094 struct trace_array *tr = filp->private_data;
8095 struct trace_buffer *buffer = tr->array_buffer.buffer;
8096 unsigned long val;
8097 int ret;
8098
8099 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8100 if (ret)
8101 return ret;
8102
8103 if (buffer) {
8104 guard(mutex)(&trace_types_lock);
8105 if (!!val == tracer_tracing_is_on(tr)) {
8106 val = 0; /* do nothing */
8107 } else if (val) {
8108 tracer_tracing_on(tr);
8109 if (tr->current_trace->start)
8110 tr->current_trace->start(tr);
8111 } else {
8112 tracer_tracing_off(tr);
8113 if (tr->current_trace->stop)
8114 tr->current_trace->stop(tr);
8115 /* Wake up any waiters */
8116 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
8117 }
8118 }
8119
8120 (*ppos)++;
8121
8122 return cnt;
8123 }
8124
8125 static const struct file_operations rb_simple_fops = {
8126 .open = tracing_open_generic_tr,
8127 .read = rb_simple_read,
8128 .write = rb_simple_write,
8129 .release = tracing_release_generic_tr,
8130 .llseek = default_llseek,
8131 };
8132
8133 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8134 buffer_percent_read(struct file *filp, char __user *ubuf,
8135 size_t cnt, loff_t *ppos)
8136 {
8137 struct trace_array *tr = filp->private_data;
8138 char buf[64];
8139 int r;
8140
8141 r = tr->buffer_percent;
8142 r = sprintf(buf, "%d\n", r);
8143
8144 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8145 }
8146
8147 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8148 buffer_percent_write(struct file *filp, const char __user *ubuf,
8149 size_t cnt, loff_t *ppos)
8150 {
8151 struct trace_array *tr = filp->private_data;
8152 unsigned long val;
8153 int ret;
8154
8155 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8156 if (ret)
8157 return ret;
8158
8159 if (val > 100)
8160 return -EINVAL;
8161
8162 tr->buffer_percent = val;
8163
8164 (*ppos)++;
8165
8166 return cnt;
8167 }
8168
8169 static const struct file_operations buffer_percent_fops = {
8170 .open = tracing_open_generic_tr,
8171 .read = buffer_percent_read,
8172 .write = buffer_percent_write,
8173 .release = tracing_release_generic_tr,
8174 .llseek = default_llseek,
8175 };
8176
8177 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8178 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
8179 {
8180 struct trace_array *tr = filp->private_data;
8181 size_t size;
8182 char buf[64];
8183 int order;
8184 int r;
8185
8186 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
8187 size = (PAGE_SIZE << order) / 1024;
8188
8189 r = sprintf(buf, "%zd\n", size);
8190
8191 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8192 }
8193
8194 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8195 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
8196 size_t cnt, loff_t *ppos)
8197 {
8198 struct trace_array *tr = filp->private_data;
8199 unsigned long val;
8200 int old_order;
8201 int order;
8202 int pages;
8203 int ret;
8204
8205 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8206 if (ret)
8207 return ret;
8208
8209 val *= 1024; /* value passed in is in KB */
8210
8211 pages = DIV_ROUND_UP(val, PAGE_SIZE);
8212 order = fls(pages - 1);
8213
8214 /* limit between 1 and 128 system pages */
8215 if (order < 0 || order > 7)
8216 return -EINVAL;
8217
8218 /* Do not allow tracing while changing the order of the ring buffer */
8219 tracing_stop_tr(tr);
8220
8221 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
8222 if (old_order == order)
8223 goto out;
8224
8225 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
8226 if (ret)
8227 goto out;
8228
8229 #ifdef CONFIG_TRACER_SNAPSHOT
8230
8231 if (!tr->allocated_snapshot)
8232 goto out_max;
8233
8234 ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
8235 if (ret) {
8236 /* Put back the old order */
8237 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
8238 if (WARN_ON_ONCE(cnt)) {
8239 /*
8240 * AARGH! We are left with different orders!
8241 * The max buffer is our "snapshot" buffer.
8242 * When a tracer needs a snapshot (one of the
8243 * latency tracers), it swaps the max buffer
8244 * with the saved snap shot. We succeeded to
8245 * update the order of the main buffer, but failed to
8246 * update the order of the max buffer. But when we tried
8247 * to reset the main buffer to the original size, we
8248 * failed there too. This is very unlikely to
8249 * happen, but if it does, warn and kill all
8250 * tracing.
8251 */
8252 tracing_disabled = 1;
8253 }
8254 goto out;
8255 }
8256 out_max:
8257 #endif
8258 (*ppos)++;
8259 out:
8260 if (ret)
8261 cnt = ret;
8262 tracing_start_tr(tr);
8263 return cnt;
8264 }
8265
8266 static const struct file_operations buffer_subbuf_size_fops = {
8267 .open = tracing_open_generic_tr,
8268 .read = buffer_subbuf_size_read,
8269 .write = buffer_subbuf_size_write,
8270 .release = tracing_release_generic_tr,
8271 .llseek = default_llseek,
8272 };
8273
8274 static struct dentry *trace_instance_dir;
8275
8276 static void
8277 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8278
8279 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)8280 static int make_mod_delta(struct module *mod, void *data)
8281 {
8282 struct trace_module_delta *module_delta;
8283 struct trace_scratch *tscratch;
8284 struct trace_mod_entry *entry;
8285 struct trace_array *tr = data;
8286 int i;
8287
8288 tscratch = tr->scratch;
8289 module_delta = READ_ONCE(tr->module_delta);
8290 for (i = 0; i < tscratch->nr_entries; i++) {
8291 entry = &tscratch->entries[i];
8292 if (strcmp(mod->name, entry->mod_name))
8293 continue;
8294 if (mod->state == MODULE_STATE_GOING)
8295 module_delta->delta[i] = 0;
8296 else
8297 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
8298 - entry->mod_addr;
8299 break;
8300 }
8301 return 0;
8302 }
8303 #else
make_mod_delta(struct module * mod,void * data)8304 static int make_mod_delta(struct module *mod, void *data)
8305 {
8306 return 0;
8307 }
8308 #endif
8309
mod_addr_comp(const void * a,const void * b,const void * data)8310 static int mod_addr_comp(const void *a, const void *b, const void *data)
8311 {
8312 const struct trace_mod_entry *e1 = a;
8313 const struct trace_mod_entry *e2 = b;
8314
8315 return e1->mod_addr > e2->mod_addr ? 1 : -1;
8316 }
8317
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)8318 static void setup_trace_scratch(struct trace_array *tr,
8319 struct trace_scratch *tscratch, unsigned int size)
8320 {
8321 struct trace_module_delta *module_delta;
8322 struct trace_mod_entry *entry;
8323 int i, nr_entries;
8324
8325 if (!tscratch)
8326 return;
8327
8328 tr->scratch = tscratch;
8329 tr->scratch_size = size;
8330
8331 if (tscratch->text_addr)
8332 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
8333
8334 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
8335 goto reset;
8336
8337 /* Check if each module name is a valid string */
8338 for (i = 0; i < tscratch->nr_entries; i++) {
8339 int n;
8340
8341 entry = &tscratch->entries[i];
8342
8343 for (n = 0; n < MODULE_NAME_LEN; n++) {
8344 if (entry->mod_name[n] == '\0')
8345 break;
8346 if (!isprint(entry->mod_name[n]))
8347 goto reset;
8348 }
8349 if (n == MODULE_NAME_LEN)
8350 goto reset;
8351 }
8352
8353 /* Sort the entries so that we can find appropriate module from address. */
8354 nr_entries = tscratch->nr_entries;
8355 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
8356 mod_addr_comp, NULL, NULL);
8357
8358 if (IS_ENABLED(CONFIG_MODULES)) {
8359 module_delta = kzalloc_flex(*module_delta, delta, nr_entries);
8360 if (!module_delta) {
8361 pr_info("module_delta allocation failed. Not able to decode module address.");
8362 goto reset;
8363 }
8364 init_rcu_head(&module_delta->rcu);
8365 } else
8366 module_delta = NULL;
8367 WRITE_ONCE(tr->module_delta, module_delta);
8368
8369 /* Scan modules to make text delta for modules. */
8370 module_for_each_mod(make_mod_delta, tr);
8371
8372 /* Set trace_clock as the same of the previous boot. */
8373 if (tscratch->clock_id != tr->clock_id) {
8374 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
8375 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
8376 pr_info("the previous trace_clock info is not valid.");
8377 goto reset;
8378 }
8379 }
8380 return;
8381 reset:
8382 /* Invalid trace modules */
8383 memset(tscratch, 0, size);
8384 }
8385
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)8386 int allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8387 {
8388 enum ring_buffer_flags rb_flags;
8389 struct trace_scratch *tscratch;
8390 unsigned int scratch_size = 0;
8391
8392 rb_flags = tr->trace_flags & TRACE_ITER(OVERWRITE) ? RB_FL_OVERWRITE : 0;
8393
8394 buf->tr = tr;
8395
8396 if (tr->range_addr_start && tr->range_addr_size) {
8397 /* Add scratch buffer to handle 128 modules */
8398 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
8399 tr->range_addr_start,
8400 tr->range_addr_size,
8401 struct_size(tscratch, entries, 128));
8402
8403 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
8404 setup_trace_scratch(tr, tscratch, scratch_size);
8405
8406 /*
8407 * This is basically the same as a mapped buffer,
8408 * with the same restrictions.
8409 */
8410 tr->mapped++;
8411 } else {
8412 buf->buffer = ring_buffer_alloc(size, rb_flags);
8413 }
8414 if (!buf->buffer)
8415 return -ENOMEM;
8416
8417 buf->data = alloc_percpu(struct trace_array_cpu);
8418 if (!buf->data) {
8419 ring_buffer_free(buf->buffer);
8420 buf->buffer = NULL;
8421 return -ENOMEM;
8422 }
8423
8424 /* Allocate the first page for all buffers */
8425 trace_set_buffer_entries(&tr->array_buffer,
8426 ring_buffer_size(tr->array_buffer.buffer, 0));
8427
8428 return 0;
8429 }
8430
free_trace_buffer(struct array_buffer * buf)8431 static void free_trace_buffer(struct array_buffer *buf)
8432 {
8433 if (buf->buffer) {
8434 ring_buffer_free(buf->buffer);
8435 buf->buffer = NULL;
8436 free_percpu(buf->data);
8437 buf->data = NULL;
8438 }
8439 }
8440
allocate_trace_buffers(struct trace_array * tr,unsigned long size)8441 static int allocate_trace_buffers(struct trace_array *tr, unsigned long size)
8442 {
8443 int ret;
8444
8445 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8446 if (ret)
8447 return ret;
8448
8449 ret = trace_allocate_snapshot(tr, size);
8450 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n"))
8451 free_trace_buffer(&tr->array_buffer);
8452
8453 return ret;
8454 }
8455
free_trace_buffers(struct trace_array * tr)8456 static void free_trace_buffers(struct trace_array *tr)
8457 {
8458 if (!tr)
8459 return;
8460
8461 free_trace_buffer(&tr->array_buffer);
8462 kfree(tr->module_delta);
8463
8464 #ifdef CONFIG_TRACER_SNAPSHOT
8465 free_trace_buffer(&tr->snapshot_buffer);
8466 #endif
8467 }
8468
init_trace_flags_index(struct trace_array * tr)8469 static void init_trace_flags_index(struct trace_array *tr)
8470 {
8471 int i;
8472
8473 /* Used by the trace options files */
8474 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8475 tr->trace_flags_index[i] = i;
8476 }
8477
__update_tracer(struct trace_array * tr)8478 static int __update_tracer(struct trace_array *tr)
8479 {
8480 struct tracer *t;
8481 int ret = 0;
8482
8483 for (t = trace_types; t && !ret; t = t->next)
8484 ret = add_tracer(tr, t);
8485
8486 return ret;
8487 }
8488
__update_tracer_options(struct trace_array * tr)8489 static __init int __update_tracer_options(struct trace_array *tr)
8490 {
8491 struct tracers *t;
8492 int ret = 0;
8493
8494 list_for_each_entry(t, &tr->tracers, list) {
8495 ret = add_tracer_options(tr, t);
8496 if (ret < 0)
8497 break;
8498 }
8499
8500 return ret;
8501 }
8502
update_tracer_options(void)8503 static __init void update_tracer_options(void)
8504 {
8505 struct trace_array *tr;
8506
8507 guard(mutex)(&trace_types_lock);
8508 tracer_options_updated = true;
8509 list_for_each_entry(tr, &ftrace_trace_arrays, list)
8510 __update_tracer_options(tr);
8511 }
8512
8513 /* Must have trace_types_lock held */
trace_array_find(const char * instance)8514 struct trace_array *trace_array_find(const char *instance)
8515 {
8516 struct trace_array *tr, *found = NULL;
8517
8518 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8519 if (tr->name && strcmp(tr->name, instance) == 0) {
8520 found = tr;
8521 break;
8522 }
8523 }
8524
8525 return found;
8526 }
8527
trace_array_find_get(const char * instance)8528 struct trace_array *trace_array_find_get(const char *instance)
8529 {
8530 struct trace_array *tr;
8531
8532 guard(mutex)(&trace_types_lock);
8533 tr = trace_array_find(instance);
8534 if (tr && __trace_array_get(tr) < 0)
8535 tr = NULL;
8536
8537 return tr;
8538 }
8539
trace_array_create_dir(struct trace_array * tr)8540 static int trace_array_create_dir(struct trace_array *tr)
8541 {
8542 int ret;
8543
8544 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8545 if (!tr->dir)
8546 return -EINVAL;
8547
8548 ret = event_trace_add_tracer(tr->dir, tr);
8549 if (ret) {
8550 tracefs_remove(tr->dir);
8551 return ret;
8552 }
8553
8554 init_tracer_tracefs(tr, tr->dir);
8555 ret = __update_tracer(tr);
8556 if (ret) {
8557 event_trace_del_tracer(tr);
8558 tracefs_remove(tr->dir);
8559 return ret;
8560 }
8561 return 0;
8562 }
8563
8564 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)8565 trace_array_create_systems(const char *name, const char *systems,
8566 unsigned long range_addr_start,
8567 unsigned long range_addr_size)
8568 {
8569 struct trace_array *tr;
8570 int ret;
8571
8572 ret = -ENOMEM;
8573 tr = kzalloc_obj(*tr);
8574 if (!tr)
8575 return ERR_PTR(ret);
8576
8577 tr->name = kstrdup(name, GFP_KERNEL);
8578 if (!tr->name)
8579 goto out_free_tr;
8580
8581 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8582 goto out_free_tr;
8583
8584 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
8585 goto out_free_tr;
8586
8587 if (systems) {
8588 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
8589 if (!tr->system_names)
8590 goto out_free_tr;
8591 }
8592
8593 /* Only for boot up memory mapped ring buffers */
8594 tr->range_addr_start = range_addr_start;
8595 tr->range_addr_size = range_addr_size;
8596
8597 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8598
8599 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8600
8601 raw_spin_lock_init(&tr->start_lock);
8602
8603 tr->syscall_buf_sz = global_trace.syscall_buf_sz;
8604
8605 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8606 #ifdef CONFIG_TRACER_SNAPSHOT
8607 spin_lock_init(&tr->snapshot_trigger_lock);
8608 #endif
8609 tr->current_trace = &nop_trace;
8610 tr->current_trace_flags = nop_trace.flags;
8611
8612 INIT_LIST_HEAD(&tr->systems);
8613 INIT_LIST_HEAD(&tr->events);
8614 INIT_LIST_HEAD(&tr->hist_vars);
8615 INIT_LIST_HEAD(&tr->err_log);
8616 INIT_LIST_HEAD(&tr->tracers);
8617 INIT_LIST_HEAD(&tr->marker_list);
8618
8619 #ifdef CONFIG_MODULES
8620 INIT_LIST_HEAD(&tr->mod_events);
8621 #endif
8622
8623 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8624 goto out_free_tr;
8625
8626 /* The ring buffer is defaultly expanded */
8627 trace_set_ring_buffer_expanded(tr);
8628
8629 if (ftrace_allocate_ftrace_ops(tr) < 0)
8630 goto out_free_tr;
8631
8632 trace_array_init_autoremove(tr);
8633
8634 ftrace_init_trace_array(tr);
8635
8636 init_trace_flags_index(tr);
8637
8638 if (trace_instance_dir) {
8639 ret = trace_array_create_dir(tr);
8640 if (ret)
8641 goto out_free_tr;
8642 } else
8643 __trace_early_add_events(tr);
8644
8645 list_add(&tr->list, &ftrace_trace_arrays);
8646
8647 tr->ref++;
8648
8649 return tr;
8650
8651 out_free_tr:
8652 ftrace_free_ftrace_ops(tr);
8653 free_trace_buffers(tr);
8654 free_cpumask_var(tr->pipe_cpumask);
8655 free_cpumask_var(tr->tracing_cpumask);
8656 kfree_const(tr->system_names);
8657 kfree(tr->range_name);
8658 kfree(tr->name);
8659 kfree(tr);
8660
8661 return ERR_PTR(ret);
8662 }
8663
trace_array_create(const char * name)8664 static struct trace_array *trace_array_create(const char *name)
8665 {
8666 return trace_array_create_systems(name, NULL, 0, 0);
8667 }
8668
instance_mkdir(const char * name)8669 static int instance_mkdir(const char *name)
8670 {
8671 struct trace_array *tr;
8672 int ret;
8673
8674 guard(mutex)(&event_mutex);
8675 guard(mutex)(&trace_types_lock);
8676
8677 ret = -EEXIST;
8678 if (trace_array_find(name))
8679 return -EEXIST;
8680
8681 tr = trace_array_create(name);
8682
8683 ret = PTR_ERR_OR_ZERO(tr);
8684
8685 return ret;
8686 }
8687
8688 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)8689 static u64 map_pages(unsigned long start, unsigned long size)
8690 {
8691 unsigned long vmap_start, vmap_end;
8692 struct vm_struct *area;
8693 int ret;
8694
8695 area = get_vm_area(size, VM_IOREMAP);
8696 if (!area)
8697 return 0;
8698
8699 vmap_start = (unsigned long) area->addr;
8700 vmap_end = vmap_start + size;
8701
8702 ret = vmap_page_range(vmap_start, vmap_end,
8703 start, pgprot_nx(PAGE_KERNEL));
8704 if (ret < 0) {
8705 free_vm_area(area);
8706 return 0;
8707 }
8708
8709 return (u64)vmap_start;
8710 }
8711 #else
map_pages(unsigned long start,unsigned long size)8712 static inline u64 map_pages(unsigned long start, unsigned long size)
8713 {
8714 return 0;
8715 }
8716 #endif
8717
8718 /**
8719 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
8720 * @name: The name of the trace array to be looked up/created.
8721 * @systems: A list of systems to create event directories for (NULL for all)
8722 *
8723 * Returns pointer to trace array with given name.
8724 * NULL, if it cannot be created.
8725 *
8726 * NOTE: This function increments the reference counter associated with the
8727 * trace array returned. This makes sure it cannot be freed while in use.
8728 * Use trace_array_put() once the trace array is no longer needed.
8729 * If the trace_array is to be freed, trace_array_destroy() needs to
8730 * be called after the trace_array_put(), or simply let user space delete
8731 * it from the tracefs instances directory. But until the
8732 * trace_array_put() is called, user space can not delete it.
8733 *
8734 */
trace_array_get_by_name(const char * name,const char * systems)8735 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
8736 {
8737 struct trace_array *tr;
8738
8739 guard(mutex)(&event_mutex);
8740 guard(mutex)(&trace_types_lock);
8741
8742 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8743 if (tr->name && strcmp(tr->name, name) == 0) {
8744 /* if this fails, @tr is going to be removed. */
8745 if (__trace_array_get(tr) < 0)
8746 tr = NULL;
8747 return tr;
8748 }
8749 }
8750
8751 tr = trace_array_create_systems(name, systems, 0, 0);
8752
8753 if (IS_ERR(tr))
8754 tr = NULL;
8755 else
8756 tr->ref++;
8757
8758 return tr;
8759 }
8760 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
8761
__remove_instance(struct trace_array * tr)8762 static int __remove_instance(struct trace_array *tr)
8763 {
8764 int i;
8765
8766 /* Reference counter for a newly created trace array = 1. */
8767 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
8768 return -EBUSY;
8769
8770 list_del(&tr->list);
8771
8772 if (printk_trace == tr)
8773 update_printk_trace(&global_trace);
8774
8775 /* Must be done before disabling all the flags */
8776 if (update_marker_trace(tr, 0))
8777 synchronize_rcu();
8778
8779 /* Disable all the flags that were enabled coming in */
8780 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8781 if ((1ULL << i) & ZEROED_TRACE_FLAGS)
8782 set_tracer_flag(tr, 1ULL << i, 0);
8783 }
8784
8785 trace_array_cancel_autoremove(tr);
8786 tracing_set_nop(tr);
8787 clear_ftrace_function_probes(tr);
8788 event_trace_del_tracer(tr);
8789 ftrace_clear_pids(tr);
8790 ftrace_destroy_function_files(tr);
8791 tracefs_remove(tr->dir);
8792 free_percpu(tr->last_func_repeats);
8793 free_trace_buffers(tr);
8794 clear_tracing_err_log(tr);
8795 free_tracers(tr);
8796
8797 if (tr->range_name) {
8798 reserve_mem_release_by_name(tr->range_name);
8799 kfree(tr->range_name);
8800 }
8801 if (tr->flags & TRACE_ARRAY_FL_VMALLOC)
8802 vfree((void *)tr->range_addr_start);
8803
8804 for (i = 0; i < tr->nr_topts; i++) {
8805 kfree(tr->topts[i].topts);
8806 }
8807 kfree(tr->topts);
8808
8809 free_cpumask_var(tr->pipe_cpumask);
8810 free_cpumask_var(tr->tracing_cpumask);
8811 kfree_const(tr->system_names);
8812 kfree(tr->name);
8813 kfree(tr);
8814
8815 return 0;
8816 }
8817
trace_array_destroy(struct trace_array * this_tr)8818 int trace_array_destroy(struct trace_array *this_tr)
8819 {
8820 struct trace_array *tr;
8821
8822 if (!this_tr)
8823 return -EINVAL;
8824
8825 guard(mutex)(&event_mutex);
8826 guard(mutex)(&trace_types_lock);
8827
8828
8829 /* Making sure trace array exists before destroying it. */
8830 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8831 if (tr == this_tr)
8832 return __remove_instance(tr);
8833 }
8834
8835 return -ENODEV;
8836 }
8837 EXPORT_SYMBOL_GPL(trace_array_destroy);
8838
instance_rmdir(const char * name)8839 static int instance_rmdir(const char *name)
8840 {
8841 struct trace_array *tr;
8842
8843 guard(mutex)(&event_mutex);
8844 guard(mutex)(&trace_types_lock);
8845
8846 tr = trace_array_find(name);
8847 if (!tr)
8848 return -ENODEV;
8849
8850 return __remove_instance(tr);
8851 }
8852
create_trace_instances(struct dentry * d_tracer)8853 static __init void create_trace_instances(struct dentry *d_tracer)
8854 {
8855 struct trace_array *tr;
8856
8857 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8858 instance_mkdir,
8859 instance_rmdir);
8860 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
8861 return;
8862
8863 guard(mutex)(&event_mutex);
8864 guard(mutex)(&trace_types_lock);
8865
8866 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8867 if (!tr->name)
8868 continue;
8869 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
8870 "Failed to create instance directory\n"))
8871 return;
8872 }
8873 }
8874
8875 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)8876 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8877 {
8878 umode_t writable_mode = TRACE_MODE_WRITE;
8879 int cpu;
8880
8881 if (trace_array_is_readonly(tr))
8882 writable_mode = TRACE_MODE_READ;
8883
8884 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
8885 tr, &show_traces_fops);
8886
8887 trace_create_file("current_tracer", writable_mode, d_tracer,
8888 tr, &set_tracer_fops);
8889
8890 trace_create_file("tracing_cpumask", writable_mode, d_tracer,
8891 tr, &tracing_cpumask_fops);
8892
8893 /* Options are used for changing print-format even for readonly instance. */
8894 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
8895 tr, &tracing_iter_fops);
8896
8897 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
8898 tr, &tracing_fops);
8899
8900 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
8901 tr, &tracing_pipe_fops);
8902
8903 trace_create_file("buffer_size_kb", writable_mode, d_tracer,
8904 tr, &tracing_entries_fops);
8905
8906 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
8907 tr, &tracing_total_entries_fops);
8908
8909 trace_create_file("trace_clock", writable_mode, d_tracer, tr,
8910 &trace_clock_fops);
8911
8912 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
8913 &trace_time_stamp_mode_fops);
8914
8915 tr->buffer_percent = 50;
8916
8917 trace_create_file("buffer_subbuf_size_kb", writable_mode, d_tracer,
8918 tr, &buffer_subbuf_size_fops);
8919
8920 create_trace_options_dir(tr);
8921
8922 if (tr->range_addr_start)
8923 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
8924 tr, &last_boot_fops);
8925
8926 for_each_tracing_cpu(cpu)
8927 tracing_init_tracefs_percpu(tr, cpu);
8928
8929 /* Read-only instance has above files only. */
8930 if (trace_array_is_readonly(tr))
8931 return;
8932
8933 trace_create_file("free_buffer", 0200, d_tracer,
8934 tr, &tracing_free_buffer_fops);
8935
8936 trace_create_file("trace_marker", 0220, d_tracer,
8937 tr, &tracing_mark_fops);
8938
8939 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
8940
8941 trace_create_file("trace_marker_raw", 0220, d_tracer,
8942 tr, &tracing_mark_raw_fops);
8943
8944 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
8945 tr, &buffer_percent_fops);
8946
8947 trace_create_file("syscall_user_buf_size", TRACE_MODE_WRITE, d_tracer,
8948 tr, &tracing_syscall_buf_fops);
8949
8950 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
8951 tr, &rb_simple_fops);
8952
8953 trace_create_maxlat_file(tr, d_tracer);
8954
8955 if (ftrace_create_function_files(tr, d_tracer))
8956 MEM_FAIL(1, "Could not allocate function filter files");
8957
8958 #ifdef CONFIG_TRACER_SNAPSHOT
8959 if (!tr->range_addr_start)
8960 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
8961 tr, &snapshot_fops);
8962 #endif
8963
8964 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
8965 tr, &tracing_err_log_fops);
8966
8967 ftrace_init_tracefs(tr, d_tracer);
8968 }
8969
8970 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)8971 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8972 {
8973 struct vfsmount *mnt;
8974 struct file_system_type *type;
8975 struct fs_context *fc;
8976 int ret;
8977
8978 /*
8979 * To maintain backward compatibility for tools that mount
8980 * debugfs to get to the tracing facility, tracefs is automatically
8981 * mounted to the debugfs/tracing directory.
8982 */
8983 type = get_fs_type("tracefs");
8984 if (!type)
8985 return NULL;
8986
8987 fc = fs_context_for_submount(type, mntpt);
8988 put_filesystem(type);
8989 if (IS_ERR(fc))
8990 return ERR_CAST(fc);
8991
8992 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
8993
8994 ret = vfs_parse_fs_string(fc, "source", "tracefs");
8995 if (!ret)
8996 mnt = fc_mount(fc);
8997 else
8998 mnt = ERR_PTR(ret);
8999
9000 put_fs_context(fc);
9001 return mnt;
9002 }
9003 #endif
9004
9005 /**
9006 * tracing_init_dentry - initialize top level trace array
9007 *
9008 * This is called when creating files or directories in the tracing
9009 * directory. It is called via fs_initcall() by any of the boot up code
9010 * and expects to return the dentry of the top level tracing directory.
9011 */
tracing_init_dentry(void)9012 int tracing_init_dentry(void)
9013 {
9014 struct trace_array *tr = &global_trace;
9015
9016 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9017 pr_warn("Tracing disabled due to lockdown\n");
9018 return -EPERM;
9019 }
9020
9021 /* The top level trace array uses NULL as parent */
9022 if (tr->dir)
9023 return 0;
9024
9025 if (WARN_ON(!tracefs_initialized()))
9026 return -ENODEV;
9027
9028 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
9029 /*
9030 * As there may still be users that expect the tracing
9031 * files to exist in debugfs/tracing, we must automount
9032 * the tracefs file system there, so older tools still
9033 * work with the newer kernel.
9034 */
9035 tr->dir = debugfs_create_automount("tracing", NULL,
9036 trace_automount, NULL);
9037 #endif
9038
9039 return 0;
9040 }
9041
9042 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9043 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9044
9045 struct workqueue_struct *trace_init_wq __initdata;
9046 static struct work_struct eval_map_work __initdata;
9047 static struct work_struct tracerfs_init_work __initdata;
9048
eval_map_work_func(struct work_struct * work)9049 static void __init eval_map_work_func(struct work_struct *work)
9050 {
9051 int len;
9052
9053 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9054 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
9055 }
9056
trace_eval_init(void)9057 static int __init trace_eval_init(void)
9058 {
9059 INIT_WORK(&eval_map_work, eval_map_work_func);
9060
9061 trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0);
9062 if (!trace_init_wq) {
9063 pr_err("Unable to allocate trace_init_wq\n");
9064 /* Do work here */
9065 eval_map_work_func(&eval_map_work);
9066 return -ENOMEM;
9067 }
9068
9069 queue_work(trace_init_wq, &eval_map_work);
9070 return 0;
9071 }
9072
9073 subsys_initcall(trace_eval_init);
9074
trace_eval_sync(void)9075 static int __init trace_eval_sync(void)
9076 {
9077 /* Make sure the eval map updates are finished */
9078 if (trace_init_wq)
9079 destroy_workqueue(trace_init_wq);
9080 return 0;
9081 }
9082
9083 late_initcall_sync(trace_eval_sync);
9084
9085
9086 #ifdef CONFIG_MODULES
9087
module_exists(const char * module)9088 bool module_exists(const char *module)
9089 {
9090 /* All modules have the symbol __this_module */
9091 static const char this_mod[] = "__this_module";
9092 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
9093 unsigned long val;
9094 int n;
9095
9096 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
9097
9098 if (n > sizeof(modname) - 1)
9099 return false;
9100
9101 val = module_kallsyms_lookup_name(modname);
9102 return val != 0;
9103 }
9104
trace_module_add_evals(struct module * mod)9105 static void trace_module_add_evals(struct module *mod)
9106 {
9107 /*
9108 * Modules with bad taint do not have events created, do
9109 * not bother with enums either.
9110 */
9111 if (trace_module_has_bad_taint(mod))
9112 return;
9113
9114 /* Even if no trace_evals, this need to sanitize field types. */
9115 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9116 }
9117
9118 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9119 static void trace_module_remove_evals(struct module *mod)
9120 {
9121 union trace_eval_map_item *map;
9122 union trace_eval_map_item **last = &trace_eval_maps;
9123
9124 if (!mod->num_trace_evals)
9125 return;
9126
9127 guard(mutex)(&trace_eval_mutex);
9128
9129 map = trace_eval_maps;
9130
9131 while (map) {
9132 if (map->head.mod == mod)
9133 break;
9134 map = trace_eval_jmp_to_tail(map);
9135 last = &map->tail.next;
9136 map = map->tail.next;
9137 }
9138 if (!map)
9139 return;
9140
9141 *last = trace_eval_jmp_to_tail(map)->tail.next;
9142 kfree(map);
9143 }
9144 #else
trace_module_remove_evals(struct module * mod)9145 static inline void trace_module_remove_evals(struct module *mod) { }
9146 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9147
trace_module_record(struct module * mod,bool add)9148 static void trace_module_record(struct module *mod, bool add)
9149 {
9150 struct trace_array *tr;
9151 unsigned long flags;
9152
9153 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9154 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
9155 /* Update any persistent trace array that has already been started */
9156 if (flags == TRACE_ARRAY_FL_BOOT && add) {
9157 guard(mutex)(&scratch_mutex);
9158 save_mod(mod, tr);
9159 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
9160 /* Update delta if the module loaded in previous boot */
9161 make_mod_delta(mod, tr);
9162 }
9163 }
9164 }
9165
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9166 static int trace_module_notify(struct notifier_block *self,
9167 unsigned long val, void *data)
9168 {
9169 struct module *mod = data;
9170
9171 switch (val) {
9172 case MODULE_STATE_COMING:
9173 trace_module_add_evals(mod);
9174 trace_module_record(mod, true);
9175 break;
9176 case MODULE_STATE_GOING:
9177 trace_module_remove_evals(mod);
9178 trace_module_record(mod, false);
9179 break;
9180 }
9181
9182 return NOTIFY_OK;
9183 }
9184
9185 static struct notifier_block trace_module_nb = {
9186 .notifier_call = trace_module_notify,
9187 .priority = 0,
9188 };
9189 #endif /* CONFIG_MODULES */
9190
tracer_init_tracefs_work_func(struct work_struct * work)9191 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9192 {
9193
9194 event_trace_init();
9195
9196 init_tracer_tracefs(&global_trace, NULL);
9197 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9198
9199 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9200 &global_trace, &tracing_thresh_fops);
9201
9202 trace_create_file("README", TRACE_MODE_READ, NULL,
9203 NULL, &tracing_readme_fops);
9204
9205 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9206 NULL, &tracing_saved_cmdlines_fops);
9207
9208 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9209 NULL, &tracing_saved_cmdlines_size_fops);
9210
9211 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9212 NULL, &tracing_saved_tgids_fops);
9213
9214 trace_create_eval_file(NULL);
9215
9216 #ifdef CONFIG_MODULES
9217 register_module_notifier(&trace_module_nb);
9218 #endif
9219
9220 #ifdef CONFIG_DYNAMIC_FTRACE
9221 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9222 NULL, &tracing_dyn_info_fops);
9223 #endif
9224
9225 create_trace_instances(NULL);
9226
9227 update_tracer_options();
9228 }
9229
tracer_init_tracefs(void)9230 static __init int tracer_init_tracefs(void)
9231 {
9232 int ret;
9233
9234 trace_access_lock_init();
9235
9236 ret = tracing_init_dentry();
9237 if (ret)
9238 return 0;
9239
9240 if (trace_init_wq) {
9241 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9242 queue_work(trace_init_wq, &tracerfs_init_work);
9243 } else {
9244 tracer_init_tracefs_work_func(NULL);
9245 }
9246
9247 if (rv_init_interface())
9248 pr_err("RV: Error while creating the RV interface\n");
9249
9250 return 0;
9251 }
9252
9253 fs_initcall(tracer_init_tracefs);
9254
9255 static int trace_die_panic_handler(struct notifier_block *self,
9256 unsigned long ev, void *unused);
9257
9258 static struct notifier_block trace_panic_notifier = {
9259 .notifier_call = trace_die_panic_handler,
9260 .priority = INT_MAX - 1,
9261 };
9262
9263 static struct notifier_block trace_die_notifier = {
9264 .notifier_call = trace_die_panic_handler,
9265 .priority = INT_MAX - 1,
9266 };
9267
9268 /*
9269 * The idea is to execute the following die/panic callback early, in order
9270 * to avoid showing irrelevant information in the trace (like other panic
9271 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9272 * warnings get disabled (to prevent potential log flooding).
9273 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)9274 static int trace_die_panic_handler(struct notifier_block *self,
9275 unsigned long ev, void *unused)
9276 {
9277 if (!ftrace_dump_on_oops_enabled())
9278 return NOTIFY_DONE;
9279
9280 /* The die notifier requires DIE_OOPS to trigger */
9281 if (self == &trace_die_notifier && ev != DIE_OOPS)
9282 return NOTIFY_DONE;
9283
9284 ftrace_dump(DUMP_PARAM);
9285
9286 return NOTIFY_DONE;
9287 }
9288
9289 /*
9290 * printk is set to max of 1024, we really don't need it that big.
9291 * Nothing should be printing 1000 characters anyway.
9292 */
9293 #define TRACE_MAX_PRINT 1000
9294
9295 /*
9296 * Define here KERN_TRACE so that we have one place to modify
9297 * it if we decide to change what log level the ftrace dump
9298 * should be at.
9299 */
9300 #define KERN_TRACE KERN_EMERG
9301
9302 void
trace_printk_seq(struct trace_seq * s)9303 trace_printk_seq(struct trace_seq *s)
9304 {
9305 /* Probably should print a warning here. */
9306 if (s->seq.len >= TRACE_MAX_PRINT)
9307 s->seq.len = TRACE_MAX_PRINT;
9308
9309 /*
9310 * More paranoid code. Although the buffer size is set to
9311 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9312 * an extra layer of protection.
9313 */
9314 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9315 s->seq.len = s->seq.size - 1;
9316
9317 /* should be zero ended, but we are paranoid. */
9318 s->buffer[s->seq.len] = 0;
9319
9320 printk(KERN_TRACE "%s", s->buffer);
9321
9322 trace_seq_init(s);
9323 }
9324
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)9325 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
9326 {
9327 iter->tr = tr;
9328 iter->trace = iter->tr->current_trace;
9329 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9330 iter->array_buffer = &tr->array_buffer;
9331
9332 if (iter->trace && iter->trace->open)
9333 iter->trace->open(iter);
9334
9335 /* Annotate start of buffers if we had overruns */
9336 if (ring_buffer_overruns(iter->array_buffer->buffer))
9337 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9338
9339 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9340 if (trace_clocks[iter->tr->clock_id].in_ns)
9341 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9342
9343 /* Can not use kmalloc for iter.temp and iter.fmt */
9344 iter->temp = static_temp_buf;
9345 iter->temp_size = STATIC_TEMP_BUF_SIZE;
9346 iter->fmt = static_fmt_buf;
9347 iter->fmt_size = STATIC_FMT_BUF_SIZE;
9348 }
9349
trace_init_global_iter(struct trace_iterator * iter)9350 void trace_init_global_iter(struct trace_iterator *iter)
9351 {
9352 trace_init_iter(iter, &global_trace);
9353 }
9354
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)9355 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
9356 {
9357 /* use static because iter can be a bit big for the stack */
9358 static struct trace_iterator iter;
9359 unsigned int old_userobj;
9360 unsigned long flags;
9361 int cnt = 0;
9362
9363 /*
9364 * Always turn off tracing when we dump.
9365 * We don't need to show trace output of what happens
9366 * between multiple crashes.
9367 *
9368 * If the user does a sysrq-z, then they can re-enable
9369 * tracing with echo 1 > tracing_on.
9370 */
9371 tracer_tracing_off(tr);
9372
9373 local_irq_save(flags);
9374
9375 /* Simulate the iterator */
9376 trace_init_iter(&iter, tr);
9377
9378 /* While dumping, do not allow the buffer to be enable */
9379 tracer_tracing_disable(tr);
9380
9381 old_userobj = tr->trace_flags & TRACE_ITER(SYM_USEROBJ);
9382
9383 /* don't look at user memory in panic mode */
9384 tr->trace_flags &= ~TRACE_ITER(SYM_USEROBJ);
9385
9386 if (dump_mode == DUMP_ORIG)
9387 iter.cpu_file = raw_smp_processor_id();
9388 else
9389 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9390
9391 if (tr == &global_trace)
9392 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9393 else
9394 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
9395
9396 /* Did function tracer already get disabled? */
9397 if (ftrace_is_dead()) {
9398 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9399 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9400 }
9401
9402 /*
9403 * We need to stop all tracing on all CPUS to read
9404 * the next buffer. This is a bit expensive, but is
9405 * not done often. We fill all what we can read,
9406 * and then release the locks again.
9407 */
9408
9409 while (!trace_empty(&iter)) {
9410
9411 if (!cnt)
9412 printk(KERN_TRACE "---------------------------------\n");
9413
9414 cnt++;
9415
9416 trace_iterator_reset(&iter);
9417 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9418
9419 if (trace_find_next_entry_inc(&iter) != NULL) {
9420 int ret;
9421
9422 ret = print_trace_line(&iter);
9423 if (ret != TRACE_TYPE_NO_CONSUME)
9424 trace_consume(&iter);
9425
9426 trace_printk_seq(&iter.seq);
9427 }
9428 touch_nmi_watchdog();
9429 }
9430
9431 if (!cnt)
9432 printk(KERN_TRACE " (ftrace buffer empty)\n");
9433 else
9434 printk(KERN_TRACE "---------------------------------\n");
9435
9436 tr->trace_flags |= old_userobj;
9437
9438 tracer_tracing_enable(tr);
9439 local_irq_restore(flags);
9440 }
9441
ftrace_dump_by_param(void)9442 static void ftrace_dump_by_param(void)
9443 {
9444 bool first_param = true;
9445 char dump_param[MAX_TRACER_SIZE];
9446 char *buf, *token, *inst_name;
9447 struct trace_array *tr;
9448
9449 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
9450 buf = dump_param;
9451
9452 while ((token = strsep(&buf, ",")) != NULL) {
9453 if (first_param) {
9454 first_param = false;
9455 if (!strcmp("0", token))
9456 continue;
9457 else if (!strcmp("1", token)) {
9458 ftrace_dump_one(&global_trace, DUMP_ALL);
9459 continue;
9460 }
9461 else if (!strcmp("2", token) ||
9462 !strcmp("orig_cpu", token)) {
9463 ftrace_dump_one(&global_trace, DUMP_ORIG);
9464 continue;
9465 }
9466 }
9467
9468 inst_name = strsep(&token, "=");
9469 tr = trace_array_find(inst_name);
9470 if (!tr) {
9471 printk(KERN_TRACE "Instance %s not found\n", inst_name);
9472 continue;
9473 }
9474
9475 if (token && (!strcmp("2", token) ||
9476 !strcmp("orig_cpu", token)))
9477 ftrace_dump_one(tr, DUMP_ORIG);
9478 else
9479 ftrace_dump_one(tr, DUMP_ALL);
9480 }
9481 }
9482
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)9483 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9484 {
9485 static atomic_t dump_running;
9486
9487 /* Only allow one dump user at a time. */
9488 if (atomic_inc_return(&dump_running) != 1) {
9489 atomic_dec(&dump_running);
9490 return;
9491 }
9492
9493 switch (oops_dump_mode) {
9494 case DUMP_ALL:
9495 ftrace_dump_one(&global_trace, DUMP_ALL);
9496 break;
9497 case DUMP_ORIG:
9498 ftrace_dump_one(&global_trace, DUMP_ORIG);
9499 break;
9500 case DUMP_PARAM:
9501 ftrace_dump_by_param();
9502 break;
9503 case DUMP_NONE:
9504 break;
9505 default:
9506 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9507 ftrace_dump_one(&global_trace, DUMP_ALL);
9508 }
9509
9510 atomic_dec(&dump_running);
9511 }
9512 EXPORT_SYMBOL_GPL(ftrace_dump);
9513
9514 #define WRITE_BUFSIZE 4096
9515
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))9516 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9517 size_t count, loff_t *ppos,
9518 int (*createfn)(const char *))
9519 {
9520 char *kbuf __free(kfree) = NULL;
9521 char *buf, *tmp;
9522 int ret = 0;
9523 size_t done = 0;
9524 size_t size;
9525
9526 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9527 if (!kbuf)
9528 return -ENOMEM;
9529
9530 while (done < count) {
9531 size = count - done;
9532
9533 if (size >= WRITE_BUFSIZE)
9534 size = WRITE_BUFSIZE - 1;
9535
9536 if (copy_from_user(kbuf, buffer + done, size))
9537 return -EFAULT;
9538
9539 kbuf[size] = '\0';
9540 buf = kbuf;
9541 do {
9542 tmp = strchr(buf, '\n');
9543 if (tmp) {
9544 *tmp = '\0';
9545 size = tmp - buf + 1;
9546 } else {
9547 size = strlen(buf);
9548 if (done + size < count) {
9549 if (buf != kbuf)
9550 break;
9551 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9552 pr_warn("Line length is too long: Should be less than %d\n",
9553 WRITE_BUFSIZE - 2);
9554 return -EINVAL;
9555 }
9556 }
9557 done += size;
9558
9559 /* Remove comments */
9560 tmp = strchr(buf, '#');
9561
9562 if (tmp)
9563 *tmp = '\0';
9564
9565 ret = createfn(buf);
9566 if (ret)
9567 return ret;
9568 buf += size;
9569
9570 } while (done < count);
9571 }
9572 return done;
9573 }
9574
backup_instance_area(const char * backup,unsigned long * addr,phys_addr_t * size)9575 __init static int backup_instance_area(const char *backup,
9576 unsigned long *addr, phys_addr_t *size)
9577 {
9578 struct trace_array *backup_tr;
9579 void *allocated_vaddr = NULL;
9580
9581 backup_tr = trace_array_get_by_name(backup, NULL);
9582 if (!backup_tr) {
9583 pr_warn("Tracing: Instance %s is not found.\n", backup);
9584 return -ENOENT;
9585 }
9586
9587 if (!(backup_tr->flags & TRACE_ARRAY_FL_BOOT)) {
9588 pr_warn("Tracing: Instance %s is not boot mapped.\n", backup);
9589 trace_array_put(backup_tr);
9590 return -EINVAL;
9591 }
9592
9593 *size = backup_tr->range_addr_size;
9594
9595 allocated_vaddr = vzalloc(*size);
9596 if (!allocated_vaddr) {
9597 pr_warn("Tracing: Failed to allocate memory for copying instance %s (size 0x%lx)\n",
9598 backup, (unsigned long)*size);
9599 trace_array_put(backup_tr);
9600 return -ENOMEM;
9601 }
9602
9603 memcpy(allocated_vaddr,
9604 (void *)backup_tr->range_addr_start, (size_t)*size);
9605 *addr = (unsigned long)allocated_vaddr;
9606
9607 trace_array_put(backup_tr);
9608 return 0;
9609 }
9610
enable_instances(void)9611 __init static void enable_instances(void)
9612 {
9613 struct trace_array *tr;
9614 bool memmap_area = false;
9615 char *curr_str;
9616 char *name;
9617 char *str;
9618 char *tok;
9619
9620 /* A tab is always appended */
9621 boot_instance_info[boot_instance_index - 1] = '\0';
9622 str = boot_instance_info;
9623
9624 while ((curr_str = strsep(&str, "\t"))) {
9625 phys_addr_t start = 0;
9626 phys_addr_t size = 0;
9627 unsigned long addr = 0;
9628 bool traceprintk = false;
9629 bool traceoff = false;
9630 char *flag_delim;
9631 char *addr_delim;
9632 char *rname __free(kfree) = NULL;
9633 char *backup;
9634
9635 tok = strsep(&curr_str, ",");
9636
9637 name = strsep(&tok, "=");
9638 backup = tok;
9639
9640 flag_delim = strchr(name, '^');
9641 addr_delim = strchr(name, '@');
9642
9643 if (addr_delim)
9644 *addr_delim++ = '\0';
9645
9646 if (flag_delim)
9647 *flag_delim++ = '\0';
9648
9649 if (backup) {
9650 if (backup_instance_area(backup, &addr, &size) < 0)
9651 continue;
9652 }
9653
9654 if (flag_delim) {
9655 char *flag;
9656
9657 while ((flag = strsep(&flag_delim, "^"))) {
9658 if (strcmp(flag, "traceoff") == 0) {
9659 traceoff = true;
9660 } else if ((strcmp(flag, "printk") == 0) ||
9661 (strcmp(flag, "traceprintk") == 0) ||
9662 (strcmp(flag, "trace_printk") == 0)) {
9663 traceprintk = true;
9664 } else {
9665 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
9666 flag, name);
9667 }
9668 }
9669 }
9670
9671 tok = addr_delim;
9672 if (tok && isdigit(*tok)) {
9673 start = memparse(tok, &tok);
9674 if (!start) {
9675 pr_warn("Tracing: Invalid boot instance address for %s\n",
9676 name);
9677 continue;
9678 }
9679 if (*tok != ':') {
9680 pr_warn("Tracing: No size specified for instance %s\n", name);
9681 continue;
9682 }
9683 tok++;
9684 size = memparse(tok, &tok);
9685 if (!size) {
9686 pr_warn("Tracing: Invalid boot instance size for %s\n",
9687 name);
9688 continue;
9689 }
9690 memmap_area = true;
9691 } else if (tok) {
9692 if (!reserve_mem_find_by_name(tok, &start, &size)) {
9693 start = 0;
9694 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
9695 continue;
9696 }
9697 rname = kstrdup(tok, GFP_KERNEL);
9698 }
9699
9700 if (start) {
9701 /* Start and size must be page aligned */
9702 if (start & ~PAGE_MASK) {
9703 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
9704 continue;
9705 }
9706 if (size & ~PAGE_MASK) {
9707 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
9708 continue;
9709 }
9710
9711 if (memmap_area)
9712 addr = map_pages(start, size);
9713 else
9714 addr = (unsigned long)phys_to_virt(start);
9715 if (addr) {
9716 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
9717 name, &start, (unsigned long)size);
9718 } else {
9719 pr_warn("Tracing: Failed to map boot instance %s\n", name);
9720 continue;
9721 }
9722 } else {
9723 /* Only non mapped buffers have snapshot buffers */
9724 do_allocate_snapshot(name);
9725 }
9726
9727 tr = trace_array_create_systems(name, NULL, addr, size);
9728 if (IS_ERR(tr)) {
9729 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
9730 continue;
9731 }
9732
9733 if (traceoff)
9734 tracer_tracing_off(tr);
9735
9736 if (traceprintk)
9737 update_printk_trace(tr);
9738
9739 /*
9740 * memmap'd buffers can not be freed.
9741 */
9742 if (memmap_area) {
9743 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
9744 tr->ref++;
9745 }
9746
9747 /*
9748 * Backup buffers can be freed but need vfree().
9749 */
9750 if (backup) {
9751 tr->flags |= TRACE_ARRAY_FL_VMALLOC | TRACE_ARRAY_FL_RDONLY;
9752 trace_array_start_autoremove();
9753 }
9754
9755 if (start || backup) {
9756 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
9757 tr->range_name = no_free_ptr(rname);
9758 }
9759
9760 /*
9761 * Save the events to start and enabled them after all boot instances
9762 * have been created.
9763 */
9764 tr->boot_events = curr_str;
9765 }
9766
9767 /* Enable the events after all boot instances have been created */
9768 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9769
9770 if (!tr->boot_events || !(*tr->boot_events)) {
9771 tr->boot_events = NULL;
9772 continue;
9773 }
9774
9775 curr_str = tr->boot_events;
9776
9777 /* Clear the instance if this is a persistent buffer */
9778 if (tr->flags & TRACE_ARRAY_FL_LAST_BOOT)
9779 update_last_data(tr);
9780
9781 while ((tok = strsep(&curr_str, ","))) {
9782 early_enable_events(tr, tok, true);
9783 }
9784 tr->boot_events = NULL;
9785 }
9786 }
9787
tracer_alloc_buffers(void)9788 __init static int tracer_alloc_buffers(void)
9789 {
9790 unsigned long ring_buf_size;
9791 int ret = -ENOMEM;
9792
9793
9794 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9795 pr_warn("Tracing disabled due to lockdown\n");
9796 return -EPERM;
9797 }
9798
9799 /*
9800 * Make sure we don't accidentally add more trace options
9801 * than we have bits for.
9802 */
9803 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9804
9805 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9806 return -ENOMEM;
9807
9808 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9809 goto out_free_buffer_mask;
9810
9811 /* Only allocate trace_printk buffers if a trace_printk exists */
9812 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9813 /* Must be called before global_trace.buffer is allocated */
9814 trace_printk_init_buffers();
9815
9816 /* To save memory, keep the ring buffer size to its minimum */
9817 if (global_trace.ring_buffer_expanded)
9818 ring_buf_size = trace_buf_size;
9819 else
9820 ring_buf_size = 1;
9821
9822 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9823 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9824
9825 raw_spin_lock_init(&global_trace.start_lock);
9826
9827 /*
9828 * The prepare callbacks allocates some memory for the ring buffer. We
9829 * don't free the buffer if the CPU goes down. If we were to free
9830 * the buffer, then the user would lose any trace that was in the
9831 * buffer. The memory will be removed once the "instance" is removed.
9832 */
9833 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9834 "trace/RB:prepare", trace_rb_cpu_prepare,
9835 NULL);
9836 if (ret < 0)
9837 goto out_free_cpumask;
9838 /* Used for event triggers */
9839 ret = -ENOMEM;
9840 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9841 if (!temp_buffer)
9842 goto out_rm_hp_state;
9843
9844 if (trace_create_savedcmd() < 0)
9845 goto out_free_temp_buffer;
9846
9847 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
9848 goto out_free_savedcmd;
9849
9850 /* TODO: make the number of buffers hot pluggable with CPUS */
9851 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9852 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9853 goto out_free_pipe_cpumask;
9854 }
9855 if (global_trace.buffer_disabled)
9856 tracing_off();
9857
9858 if (trace_boot_clock) {
9859 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9860 if (ret < 0)
9861 pr_warn("Trace clock %s not defined, going back to default\n",
9862 trace_boot_clock);
9863 }
9864
9865 /*
9866 * register_tracer() might reference current_trace, so it
9867 * needs to be set before we register anything. This is
9868 * just a bootstrap of current_trace anyway.
9869 */
9870 global_trace.current_trace = &nop_trace;
9871 global_trace.current_trace_flags = nop_trace.flags;
9872
9873 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9874 #ifdef CONFIG_TRACER_SNAPSHOT
9875 spin_lock_init(&global_trace.snapshot_trigger_lock);
9876 #endif
9877 ftrace_init_global_array_ops(&global_trace);
9878
9879 #ifdef CONFIG_MODULES
9880 INIT_LIST_HEAD(&global_trace.mod_events);
9881 #endif
9882
9883 init_trace_flags_index(&global_trace);
9884
9885 INIT_LIST_HEAD(&global_trace.tracers);
9886
9887 /* All seems OK, enable tracing */
9888 tracing_disabled = 0;
9889
9890 atomic_notifier_chain_register(&panic_notifier_list,
9891 &trace_panic_notifier);
9892
9893 register_die_notifier(&trace_die_notifier);
9894
9895 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9896
9897 global_trace.syscall_buf_sz = syscall_buf_size;
9898
9899 INIT_LIST_HEAD(&global_trace.systems);
9900 INIT_LIST_HEAD(&global_trace.events);
9901 INIT_LIST_HEAD(&global_trace.hist_vars);
9902 INIT_LIST_HEAD(&global_trace.err_log);
9903 list_add(&global_trace.marker_list, &marker_copies);
9904 list_add(&global_trace.list, &ftrace_trace_arrays);
9905
9906 register_tracer(&nop_trace);
9907
9908 /* Function tracing may start here (via kernel command line) */
9909 init_function_trace();
9910
9911 apply_trace_boot_options();
9912
9913 register_snapshot_cmd();
9914
9915 return 0;
9916
9917 out_free_pipe_cpumask:
9918 free_cpumask_var(global_trace.pipe_cpumask);
9919 out_free_savedcmd:
9920 trace_free_saved_cmdlines_buffer();
9921 out_free_temp_buffer:
9922 ring_buffer_free(temp_buffer);
9923 out_rm_hp_state:
9924 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9925 out_free_cpumask:
9926 free_cpumask_var(global_trace.tracing_cpumask);
9927 out_free_buffer_mask:
9928 free_cpumask_var(tracing_buffer_mask);
9929 return ret;
9930 }
9931
9932 #ifdef CONFIG_FUNCTION_TRACER
9933 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)9934 struct trace_array *trace_get_global_array(void)
9935 {
9936 return &global_trace;
9937 }
9938 #endif
9939
early_trace_init(void)9940 void __init early_trace_init(void)
9941 {
9942 if (tracepoint_printk) {
9943 tracepoint_print_iter = kzalloc_obj(*tracepoint_print_iter);
9944 if (MEM_FAIL(!tracepoint_print_iter,
9945 "Failed to allocate trace iterator\n"))
9946 tracepoint_printk = 0;
9947 else
9948 static_key_enable(&tracepoint_printk_key.key);
9949 }
9950 tracer_alloc_buffers();
9951
9952 init_events();
9953 }
9954
trace_init(void)9955 void __init trace_init(void)
9956 {
9957 trace_event_init();
9958
9959 if (boot_instance_index)
9960 enable_instances();
9961 }
9962
clear_boot_tracer(void)9963 __init static void clear_boot_tracer(void)
9964 {
9965 /*
9966 * The default tracer at boot buffer is an init section.
9967 * This function is called in lateinit. If we did not
9968 * find the boot tracer, then clear it out, to prevent
9969 * later registration from accessing the buffer that is
9970 * about to be freed.
9971 */
9972 if (!default_bootup_tracer)
9973 return;
9974
9975 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9976 default_bootup_tracer);
9977 default_bootup_tracer = NULL;
9978 }
9979
9980 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)9981 __init static void tracing_set_default_clock(void)
9982 {
9983 /* sched_clock_stable() is determined in late_initcall */
9984 if (!trace_boot_clock && !sched_clock_stable()) {
9985 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9986 pr_warn("Can not set tracing clock due to lockdown\n");
9987 return;
9988 }
9989
9990 printk(KERN_WARNING
9991 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9992 "If you want to keep using the local clock, then add:\n"
9993 " \"trace_clock=local\"\n"
9994 "on the kernel command line\n");
9995 tracing_set_clock(&global_trace, "global");
9996 }
9997 }
9998 #else
tracing_set_default_clock(void)9999 static inline void tracing_set_default_clock(void) { }
10000 #endif
10001
late_trace_init(void)10002 __init static int late_trace_init(void)
10003 {
10004 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10005 static_key_disable(&tracepoint_printk_key.key);
10006 tracepoint_printk = 0;
10007 }
10008
10009 if (traceoff_after_boot)
10010 tracing_off();
10011
10012 tracing_set_default_clock();
10013 clear_boot_tracer();
10014 return 0;
10015 }
10016
10017 late_initcall_sync(late_trace_init);
10018