1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57
58 #include "trace.h"
59 #include "trace_output.h"
60
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63 * We need to change this state when a selftest is running.
64 * A selftest will lurk into the ring-buffer to count the
65 * entries inserted during the selftest although some concurrent
66 * insertions into the ring-buffer such as trace_printk could occurred
67 * at the same time, giving false positive or negative results.
68 */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72 * If boot-time tracing including tracers/events via kernel cmdline
73 * is running, we do not want to run SELFTEST.
74 */
75 bool __read_mostly tracing_selftest_disabled;
76
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 }
83 }
84 #else
85 #define tracing_selftest_running 0
86 #define tracing_selftest_disabled 0
87 #endif
88
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 { }
99 };
100
101 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 return 0;
105 }
106
107 /*
108 * To prevent the comm cache from being overwritten when no
109 * tracing is active, only save the comm when a trace event
110 * occurred.
111 */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113
114 /*
115 * Kill all tracing for good (never come back).
116 * It is initialized to 1 but will turn to zero if the initialization
117 * of the tracer is successful. But that is the only place that sets
118 * this back to zero.
119 */
120 static int tracing_disabled = 1;
121
122 cpumask_var_t __read_mostly tracing_buffer_mask;
123
124 #define MAX_TRACER_SIZE 100
125 /*
126 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127 *
128 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129 * is set, then ftrace_dump is called. This will output the contents
130 * of the ftrace buffers to the console. This is very useful for
131 * capturing traces that lead to crashes and outputing it to a
132 * serial console.
133 *
134 * It is default off, but you can enable it with either specifying
135 * "ftrace_dump_on_oops" in the kernel command line, or setting
136 * /proc/sys/kernel/ftrace_dump_on_oops
137 * Set 1 if you want to dump buffers of all CPUs
138 * Set 2 if you want to dump the buffer of the CPU that triggered oops
139 * Set instance name if you want to dump the specific trace instance
140 * Multiple instance dump is also supported, and instances are seperated
141 * by commas.
142 */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 {
153 .procname = "ftrace_dump_on_oops",
154 .data = &ftrace_dump_on_oops,
155 .maxlen = MAX_TRACER_SIZE,
156 .mode = 0644,
157 .proc_handler = proc_dostring,
158 },
159 {
160 .procname = "traceoff_on_warning",
161 .data = &__disable_trace_on_warning,
162 .maxlen = sizeof(__disable_trace_on_warning),
163 .mode = 0644,
164 .proc_handler = proc_dointvec,
165 },
166 {
167 .procname = "tracepoint_printk",
168 .data = &tracepoint_printk,
169 .maxlen = sizeof(tracepoint_printk),
170 .mode = 0644,
171 .proc_handler = tracepoint_printk_sysctl,
172 },
173 };
174
init_trace_sysctls(void)175 static int __init init_trace_sysctls(void)
176 {
177 register_sysctl_init("kernel", trace_sysctl_table);
178 return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 struct module *mod;
186 unsigned long length;
187 };
188
189 union trace_eval_map_item;
190
191 struct trace_eval_map_tail {
192 /*
193 * "end" is first and points to NULL as it must be different
194 * than "mod" or "eval_string"
195 */
196 union trace_eval_map_item *next;
197 const char *end; /* points to NULL */
198 };
199
200 static DEFINE_MUTEX(trace_eval_mutex);
201
202 /*
203 * The trace_eval_maps are saved in an array with two extra elements,
204 * one at the beginning, and one at the end. The beginning item contains
205 * the count of the saved maps (head.length), and the module they
206 * belong to if not built in (head.mod). The ending item contains a
207 * pointer to the next array of saved eval_map items.
208 */
209 union trace_eval_map_item {
210 struct trace_eval_map map;
211 struct trace_eval_map_head head;
212 struct trace_eval_map_tail tail;
213 };
214
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 struct trace_buffer *buffer,
221 unsigned int trace_ctx);
222
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234
set_cmdline_ftrace(char * str)235 static int __init set_cmdline_ftrace(char *str)
236 {
237 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 default_bootup_tracer = bootup_tracer_buf;
239 /* We are using ftrace early, expand it */
240 trace_set_ring_buffer_expanded(NULL);
241 return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244
ftrace_dump_on_oops_enabled(void)245 int ftrace_dump_on_oops_enabled(void)
246 {
247 if (!strcmp("0", ftrace_dump_on_oops))
248 return 0;
249 else
250 return 1;
251 }
252
set_ftrace_dump_on_oops(char * str)253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 if (!*str) {
256 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 return 1;
258 }
259
260 if (*str == ',') {
261 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 return 1;
264 }
265
266 if (*str++ == '=') {
267 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 return 1;
269 }
270
271 return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274
stop_trace_on_warning(char * str)275 static int __init stop_trace_on_warning(char *str)
276 {
277 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 __disable_trace_on_warning = 1;
279 return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282
boot_alloc_snapshot(char * str)283 static int __init boot_alloc_snapshot(char *str)
284 {
285 char *slot = boot_snapshot_info + boot_snapshot_index;
286 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 int ret;
288
289 if (str[0] == '=') {
290 str++;
291 if (strlen(str) >= left)
292 return -1;
293
294 ret = snprintf(slot, left, "%s\t", str);
295 boot_snapshot_index += ret;
296 } else {
297 allocate_snapshot = true;
298 /* We also need the main ring buffer expanded */
299 trace_set_ring_buffer_expanded(NULL);
300 }
301 return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304
305
boot_snapshot(char * str)306 static int __init boot_snapshot(char *str)
307 {
308 snapshot_at_boot = true;
309 boot_alloc_snapshot(str);
310 return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313
314
boot_instance(char * str)315 static int __init boot_instance(char *str)
316 {
317 char *slot = boot_instance_info + boot_instance_index;
318 int left = sizeof(boot_instance_info) - boot_instance_index;
319 int ret;
320
321 if (strlen(str) >= left)
322 return -1;
323
324 ret = snprintf(slot, left, "%s\t", str);
325 boot_instance_index += ret;
326
327 return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330
331
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333
set_trace_boot_options(char * str)334 static int __init set_trace_boot_options(char *str)
335 {
336 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343
set_trace_boot_clock(char * str)344 static int __init set_trace_boot_clock(char *str)
345 {
346 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 trace_boot_clock = trace_boot_clock_buf;
348 return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351
set_tracepoint_printk(char * str)352 static int __init set_tracepoint_printk(char *str)
353 {
354 /* Ignore the "tp_printk_stop_on_boot" param */
355 if (*str == '_')
356 return 0;
357
358 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 tracepoint_printk = 1;
360 return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363
set_tracepoint_printk_stop(char * str)364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 tracepoint_printk_stop_on_boot = true;
367 return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370
set_traceoff_after_boot(char * str)371 static int __init set_traceoff_after_boot(char *str)
372 {
373 traceoff_after_boot = true;
374 return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377
ns2usecs(u64 nsec)378 unsigned long long ns2usecs(u64 nsec)
379 {
380 nsec += 500;
381 do_div(nsec, 1000);
382 return nsec;
383 }
384
385 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)386 trace_process_export(struct trace_export *export,
387 struct ring_buffer_event *event, int flag)
388 {
389 struct trace_entry *entry;
390 unsigned int size = 0;
391
392 if (export->flags & flag) {
393 entry = ring_buffer_event_data(event);
394 size = ring_buffer_event_length(event);
395 export->write(export, entry, size);
396 }
397 }
398
399 static DEFINE_MUTEX(ftrace_export_lock);
400
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406
ftrace_exports_enable(struct trace_export * export)407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 if (export->flags & TRACE_EXPORT_FUNCTION)
410 static_branch_inc(&trace_function_exports_enabled);
411
412 if (export->flags & TRACE_EXPORT_EVENT)
413 static_branch_inc(&trace_event_exports_enabled);
414
415 if (export->flags & TRACE_EXPORT_MARKER)
416 static_branch_inc(&trace_marker_exports_enabled);
417 }
418
ftrace_exports_disable(struct trace_export * export)419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 if (export->flags & TRACE_EXPORT_FUNCTION)
422 static_branch_dec(&trace_function_exports_enabled);
423
424 if (export->flags & TRACE_EXPORT_EVENT)
425 static_branch_dec(&trace_event_exports_enabled);
426
427 if (export->flags & TRACE_EXPORT_MARKER)
428 static_branch_dec(&trace_marker_exports_enabled);
429 }
430
ftrace_exports(struct ring_buffer_event * event,int flag)431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 struct trace_export *export;
434
435 guard(preempt_notrace)();
436
437 export = rcu_dereference_raw_check(ftrace_exports_list);
438 while (export) {
439 trace_process_export(export, event, flag);
440 export = rcu_dereference_raw_check(export->next);
441 }
442 }
443
444 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)445 add_trace_export(struct trace_export **list, struct trace_export *export)
446 {
447 rcu_assign_pointer(export->next, *list);
448 /*
449 * We are entering export into the list but another
450 * CPU might be walking that list. We need to make sure
451 * the export->next pointer is valid before another CPU sees
452 * the export pointer included into the list.
453 */
454 rcu_assign_pointer(*list, export);
455 }
456
457 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)458 rm_trace_export(struct trace_export **list, struct trace_export *export)
459 {
460 struct trace_export **p;
461
462 for (p = list; *p != NULL; p = &(*p)->next)
463 if (*p == export)
464 break;
465
466 if (*p != export)
467 return -1;
468
469 rcu_assign_pointer(*p, (*p)->next);
470
471 return 0;
472 }
473
474 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)475 add_ftrace_export(struct trace_export **list, struct trace_export *export)
476 {
477 ftrace_exports_enable(export);
478
479 add_trace_export(list, export);
480 }
481
482 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)483 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
484 {
485 int ret;
486
487 ret = rm_trace_export(list, export);
488 ftrace_exports_disable(export);
489
490 return ret;
491 }
492
register_ftrace_export(struct trace_export * export)493 int register_ftrace_export(struct trace_export *export)
494 {
495 if (WARN_ON_ONCE(!export->write))
496 return -1;
497
498 guard(mutex)(&ftrace_export_lock);
499
500 add_ftrace_export(&ftrace_exports_list, export);
501
502 return 0;
503 }
504 EXPORT_SYMBOL_GPL(register_ftrace_export);
505
unregister_ftrace_export(struct trace_export * export)506 int unregister_ftrace_export(struct trace_export *export)
507 {
508 guard(mutex)(&ftrace_export_lock);
509 return rm_ftrace_export(&ftrace_exports_list, export);
510 }
511 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
512
513 /* trace_flags holds trace_options default values */
514 #define TRACE_DEFAULT_FLAGS \
515 (FUNCTION_DEFAULT_FLAGS | \
516 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
517 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
518 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
519 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
520 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \
521 TRACE_ITER_COPY_MARKER)
522
523 /* trace_options that are only supported by global_trace */
524 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
525 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
526
527 /* trace_flags that are default zero for instances */
528 #define ZEROED_TRACE_FLAGS \
529 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
530 TRACE_ITER_COPY_MARKER)
531
532 /*
533 * The global_trace is the descriptor that holds the top-level tracing
534 * buffers for the live tracing.
535 */
536 static struct trace_array global_trace = {
537 .trace_flags = TRACE_DEFAULT_FLAGS,
538 };
539
540 static struct trace_array *printk_trace = &global_trace;
541
542 /* List of trace_arrays interested in the top level trace_marker */
543 static LIST_HEAD(marker_copies);
544
printk_binsafe(struct trace_array * tr)545 static __always_inline bool printk_binsafe(struct trace_array *tr)
546 {
547 /*
548 * The binary format of traceprintk can cause a crash if used
549 * by a buffer from another boot. Force the use of the
550 * non binary version of trace_printk if the trace_printk
551 * buffer is a boot mapped ring buffer.
552 */
553 return !(tr->flags & TRACE_ARRAY_FL_BOOT);
554 }
555
update_printk_trace(struct trace_array * tr)556 static void update_printk_trace(struct trace_array *tr)
557 {
558 if (printk_trace == tr)
559 return;
560
561 printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
562 printk_trace = tr;
563 tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
564 }
565
566 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)567 static bool update_marker_trace(struct trace_array *tr, int enabled)
568 {
569 lockdep_assert_held(&event_mutex);
570
571 if (enabled) {
572 if (!list_empty(&tr->marker_list))
573 return false;
574
575 list_add_rcu(&tr->marker_list, &marker_copies);
576 tr->trace_flags |= TRACE_ITER_COPY_MARKER;
577 return true;
578 }
579
580 if (list_empty(&tr->marker_list))
581 return false;
582
583 list_del_init(&tr->marker_list);
584 tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
585 return true;
586 }
587
trace_set_ring_buffer_expanded(struct trace_array * tr)588 void trace_set_ring_buffer_expanded(struct trace_array *tr)
589 {
590 if (!tr)
591 tr = &global_trace;
592 tr->ring_buffer_expanded = true;
593 }
594
595 LIST_HEAD(ftrace_trace_arrays);
596
trace_array_get(struct trace_array * this_tr)597 int trace_array_get(struct trace_array *this_tr)
598 {
599 struct trace_array *tr;
600
601 guard(mutex)(&trace_types_lock);
602 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
603 if (tr == this_tr) {
604 tr->ref++;
605 return 0;
606 }
607 }
608
609 return -ENODEV;
610 }
611
__trace_array_put(struct trace_array * this_tr)612 static void __trace_array_put(struct trace_array *this_tr)
613 {
614 WARN_ON(!this_tr->ref);
615 this_tr->ref--;
616 }
617
618 /**
619 * trace_array_put - Decrement the reference counter for this trace array.
620 * @this_tr : pointer to the trace array
621 *
622 * NOTE: Use this when we no longer need the trace array returned by
623 * trace_array_get_by_name(). This ensures the trace array can be later
624 * destroyed.
625 *
626 */
trace_array_put(struct trace_array * this_tr)627 void trace_array_put(struct trace_array *this_tr)
628 {
629 if (!this_tr)
630 return;
631
632 guard(mutex)(&trace_types_lock);
633 __trace_array_put(this_tr);
634 }
635 EXPORT_SYMBOL_GPL(trace_array_put);
636
tracing_check_open_get_tr(struct trace_array * tr)637 int tracing_check_open_get_tr(struct trace_array *tr)
638 {
639 int ret;
640
641 ret = security_locked_down(LOCKDOWN_TRACEFS);
642 if (ret)
643 return ret;
644
645 if (tracing_disabled)
646 return -ENODEV;
647
648 if (tr && trace_array_get(tr) < 0)
649 return -ENODEV;
650
651 return 0;
652 }
653
654 /**
655 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
656 * @filtered_pids: The list of pids to check
657 * @search_pid: The PID to find in @filtered_pids
658 *
659 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
660 */
661 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)662 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
663 {
664 return trace_pid_list_is_set(filtered_pids, search_pid);
665 }
666
667 /**
668 * trace_ignore_this_task - should a task be ignored for tracing
669 * @filtered_pids: The list of pids to check
670 * @filtered_no_pids: The list of pids not to be traced
671 * @task: The task that should be ignored if not filtered
672 *
673 * Checks if @task should be traced or not from @filtered_pids.
674 * Returns true if @task should *NOT* be traced.
675 * Returns false if @task should be traced.
676 */
677 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)678 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
679 struct trace_pid_list *filtered_no_pids,
680 struct task_struct *task)
681 {
682 /*
683 * If filtered_no_pids is not empty, and the task's pid is listed
684 * in filtered_no_pids, then return true.
685 * Otherwise, if filtered_pids is empty, that means we can
686 * trace all tasks. If it has content, then only trace pids
687 * within filtered_pids.
688 */
689
690 return (filtered_pids &&
691 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
692 (filtered_no_pids &&
693 trace_find_filtered_pid(filtered_no_pids, task->pid));
694 }
695
696 /**
697 * trace_filter_add_remove_task - Add or remove a task from a pid_list
698 * @pid_list: The list to modify
699 * @self: The current task for fork or NULL for exit
700 * @task: The task to add or remove
701 *
702 * If adding a task, if @self is defined, the task is only added if @self
703 * is also included in @pid_list. This happens on fork and tasks should
704 * only be added when the parent is listed. If @self is NULL, then the
705 * @task pid will be removed from the list, which would happen on exit
706 * of a task.
707 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)708 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
709 struct task_struct *self,
710 struct task_struct *task)
711 {
712 if (!pid_list)
713 return;
714
715 /* For forks, we only add if the forking task is listed */
716 if (self) {
717 if (!trace_find_filtered_pid(pid_list, self->pid))
718 return;
719 }
720
721 /* "self" is set for forks, and NULL for exits */
722 if (self)
723 trace_pid_list_set(pid_list, task->pid);
724 else
725 trace_pid_list_clear(pid_list, task->pid);
726 }
727
728 /**
729 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
730 * @pid_list: The pid list to show
731 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
732 * @pos: The position of the file
733 *
734 * This is used by the seq_file "next" operation to iterate the pids
735 * listed in a trace_pid_list structure.
736 *
737 * Returns the pid+1 as we want to display pid of zero, but NULL would
738 * stop the iteration.
739 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)740 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
741 {
742 long pid = (unsigned long)v;
743 unsigned int next;
744
745 (*pos)++;
746
747 /* pid already is +1 of the actual previous bit */
748 if (trace_pid_list_next(pid_list, pid, &next) < 0)
749 return NULL;
750
751 pid = next;
752
753 /* Return pid + 1 to allow zero to be represented */
754 return (void *)(pid + 1);
755 }
756
757 /**
758 * trace_pid_start - Used for seq_file to start reading pid lists
759 * @pid_list: The pid list to show
760 * @pos: The position of the file
761 *
762 * This is used by seq_file "start" operation to start the iteration
763 * of listing pids.
764 *
765 * Returns the pid+1 as we want to display pid of zero, but NULL would
766 * stop the iteration.
767 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)768 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
769 {
770 unsigned long pid;
771 unsigned int first;
772 loff_t l = 0;
773
774 if (trace_pid_list_first(pid_list, &first) < 0)
775 return NULL;
776
777 pid = first;
778
779 /* Return pid + 1 so that zero can be the exit value */
780 for (pid++; pid && l < *pos;
781 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
782 ;
783 return (void *)pid;
784 }
785
786 /**
787 * trace_pid_show - show the current pid in seq_file processing
788 * @m: The seq_file structure to write into
789 * @v: A void pointer of the pid (+1) value to display
790 *
791 * Can be directly used by seq_file operations to display the current
792 * pid value.
793 */
trace_pid_show(struct seq_file * m,void * v)794 int trace_pid_show(struct seq_file *m, void *v)
795 {
796 unsigned long pid = (unsigned long)v - 1;
797
798 seq_printf(m, "%lu\n", pid);
799 return 0;
800 }
801
802 /* 128 should be much more than enough */
803 #define PID_BUF_SIZE 127
804
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)805 int trace_pid_write(struct trace_pid_list *filtered_pids,
806 struct trace_pid_list **new_pid_list,
807 const char __user *ubuf, size_t cnt)
808 {
809 struct trace_pid_list *pid_list;
810 struct trace_parser parser;
811 unsigned long val;
812 int nr_pids = 0;
813 ssize_t read = 0;
814 ssize_t ret;
815 loff_t pos;
816 pid_t pid;
817
818 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
819 return -ENOMEM;
820
821 /*
822 * Always recreate a new array. The write is an all or nothing
823 * operation. Always create a new array when adding new pids by
824 * the user. If the operation fails, then the current list is
825 * not modified.
826 */
827 pid_list = trace_pid_list_alloc();
828 if (!pid_list) {
829 trace_parser_put(&parser);
830 return -ENOMEM;
831 }
832
833 if (filtered_pids) {
834 /* copy the current bits to the new max */
835 ret = trace_pid_list_first(filtered_pids, &pid);
836 while (!ret) {
837 trace_pid_list_set(pid_list, pid);
838 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
839 nr_pids++;
840 }
841 }
842
843 ret = 0;
844 while (cnt > 0) {
845
846 pos = 0;
847
848 ret = trace_get_user(&parser, ubuf, cnt, &pos);
849 if (ret < 0)
850 break;
851
852 read += ret;
853 ubuf += ret;
854 cnt -= ret;
855
856 if (!trace_parser_loaded(&parser))
857 break;
858
859 ret = -EINVAL;
860 if (kstrtoul(parser.buffer, 0, &val))
861 break;
862
863 pid = (pid_t)val;
864
865 if (trace_pid_list_set(pid_list, pid) < 0) {
866 ret = -1;
867 break;
868 }
869 nr_pids++;
870
871 trace_parser_clear(&parser);
872 ret = 0;
873 }
874 trace_parser_put(&parser);
875
876 if (ret < 0) {
877 trace_pid_list_free(pid_list);
878 return ret;
879 }
880
881 if (!nr_pids) {
882 /* Cleared the list of pids */
883 trace_pid_list_free(pid_list);
884 pid_list = NULL;
885 }
886
887 *new_pid_list = pid_list;
888
889 return read;
890 }
891
buffer_ftrace_now(struct array_buffer * buf,int cpu)892 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
893 {
894 u64 ts;
895
896 /* Early boot up does not have a buffer yet */
897 if (!buf->buffer)
898 return trace_clock_local();
899
900 ts = ring_buffer_time_stamp(buf->buffer);
901 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
902
903 return ts;
904 }
905
ftrace_now(int cpu)906 u64 ftrace_now(int cpu)
907 {
908 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
909 }
910
911 /**
912 * tracing_is_enabled - Show if global_trace has been enabled
913 *
914 * Shows if the global trace has been enabled or not. It uses the
915 * mirror flag "buffer_disabled" to be used in fast paths such as for
916 * the irqsoff tracer. But it may be inaccurate due to races. If you
917 * need to know the accurate state, use tracing_is_on() which is a little
918 * slower, but accurate.
919 */
tracing_is_enabled(void)920 int tracing_is_enabled(void)
921 {
922 /*
923 * For quick access (irqsoff uses this in fast path), just
924 * return the mirror variable of the state of the ring buffer.
925 * It's a little racy, but we don't really care.
926 */
927 return !global_trace.buffer_disabled;
928 }
929
930 /*
931 * trace_buf_size is the size in bytes that is allocated
932 * for a buffer. Note, the number of bytes is always rounded
933 * to page size.
934 *
935 * This number is purposely set to a low number of 16384.
936 * If the dump on oops happens, it will be much appreciated
937 * to not have to wait for all that output. Anyway this can be
938 * boot time and run time configurable.
939 */
940 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
941
942 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
943
944 /* trace_types holds a link list of available tracers. */
945 static struct tracer *trace_types __read_mostly;
946
947 /*
948 * trace_types_lock is used to protect the trace_types list.
949 */
950 DEFINE_MUTEX(trace_types_lock);
951
952 /*
953 * serialize the access of the ring buffer
954 *
955 * ring buffer serializes readers, but it is low level protection.
956 * The validity of the events (which returns by ring_buffer_peek() ..etc)
957 * are not protected by ring buffer.
958 *
959 * The content of events may become garbage if we allow other process consumes
960 * these events concurrently:
961 * A) the page of the consumed events may become a normal page
962 * (not reader page) in ring buffer, and this page will be rewritten
963 * by events producer.
964 * B) The page of the consumed events may become a page for splice_read,
965 * and this page will be returned to system.
966 *
967 * These primitives allow multi process access to different cpu ring buffer
968 * concurrently.
969 *
970 * These primitives don't distinguish read-only and read-consume access.
971 * Multi read-only access are also serialized.
972 */
973
974 #ifdef CONFIG_SMP
975 static DECLARE_RWSEM(all_cpu_access_lock);
976 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
977
trace_access_lock(int cpu)978 static inline void trace_access_lock(int cpu)
979 {
980 if (cpu == RING_BUFFER_ALL_CPUS) {
981 /* gain it for accessing the whole ring buffer. */
982 down_write(&all_cpu_access_lock);
983 } else {
984 /* gain it for accessing a cpu ring buffer. */
985
986 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
987 down_read(&all_cpu_access_lock);
988
989 /* Secondly block other access to this @cpu ring buffer. */
990 mutex_lock(&per_cpu(cpu_access_lock, cpu));
991 }
992 }
993
trace_access_unlock(int cpu)994 static inline void trace_access_unlock(int cpu)
995 {
996 if (cpu == RING_BUFFER_ALL_CPUS) {
997 up_write(&all_cpu_access_lock);
998 } else {
999 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1000 up_read(&all_cpu_access_lock);
1001 }
1002 }
1003
trace_access_lock_init(void)1004 static inline void trace_access_lock_init(void)
1005 {
1006 int cpu;
1007
1008 for_each_possible_cpu(cpu)
1009 mutex_init(&per_cpu(cpu_access_lock, cpu));
1010 }
1011
1012 #else
1013
1014 static DEFINE_MUTEX(access_lock);
1015
trace_access_lock(int cpu)1016 static inline void trace_access_lock(int cpu)
1017 {
1018 (void)cpu;
1019 mutex_lock(&access_lock);
1020 }
1021
trace_access_unlock(int cpu)1022 static inline void trace_access_unlock(int cpu)
1023 {
1024 (void)cpu;
1025 mutex_unlock(&access_lock);
1026 }
1027
trace_access_lock_init(void)1028 static inline void trace_access_lock_init(void)
1029 {
1030 }
1031
1032 #endif
1033
1034 #ifdef CONFIG_STACKTRACE
1035 static void __ftrace_trace_stack(struct trace_array *tr,
1036 struct trace_buffer *buffer,
1037 unsigned int trace_ctx,
1038 int skip, struct pt_regs *regs);
1039 static inline void ftrace_trace_stack(struct trace_array *tr,
1040 struct trace_buffer *buffer,
1041 unsigned int trace_ctx,
1042 int skip, struct pt_regs *regs);
1043
1044 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1045 static inline void __ftrace_trace_stack(struct trace_array *tr,
1046 struct trace_buffer *buffer,
1047 unsigned int trace_ctx,
1048 int skip, struct pt_regs *regs)
1049 {
1050 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1051 static inline void ftrace_trace_stack(struct trace_array *tr,
1052 struct trace_buffer *buffer,
1053 unsigned long trace_ctx,
1054 int skip, struct pt_regs *regs)
1055 {
1056 }
1057
1058 #endif
1059
1060 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1061 trace_event_setup(struct ring_buffer_event *event,
1062 int type, unsigned int trace_ctx)
1063 {
1064 struct trace_entry *ent = ring_buffer_event_data(event);
1065
1066 tracing_generic_entry_update(ent, type, trace_ctx);
1067 }
1068
1069 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1070 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1071 int type,
1072 unsigned long len,
1073 unsigned int trace_ctx)
1074 {
1075 struct ring_buffer_event *event;
1076
1077 event = ring_buffer_lock_reserve(buffer, len);
1078 if (event != NULL)
1079 trace_event_setup(event, type, trace_ctx);
1080
1081 return event;
1082 }
1083
tracer_tracing_on(struct trace_array * tr)1084 void tracer_tracing_on(struct trace_array *tr)
1085 {
1086 if (tr->array_buffer.buffer)
1087 ring_buffer_record_on(tr->array_buffer.buffer);
1088 /*
1089 * This flag is looked at when buffers haven't been allocated
1090 * yet, or by some tracers (like irqsoff), that just want to
1091 * know if the ring buffer has been disabled, but it can handle
1092 * races of where it gets disabled but we still do a record.
1093 * As the check is in the fast path of the tracers, it is more
1094 * important to be fast than accurate.
1095 */
1096 tr->buffer_disabled = 0;
1097 }
1098
1099 /**
1100 * tracing_on - enable tracing buffers
1101 *
1102 * This function enables tracing buffers that may have been
1103 * disabled with tracing_off.
1104 */
tracing_on(void)1105 void tracing_on(void)
1106 {
1107 tracer_tracing_on(&global_trace);
1108 }
1109 EXPORT_SYMBOL_GPL(tracing_on);
1110
1111
1112 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1113 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1114 {
1115 __this_cpu_write(trace_taskinfo_save, true);
1116
1117 /* If this is the temp buffer, we need to commit fully */
1118 if (this_cpu_read(trace_buffered_event) == event) {
1119 /* Length is in event->array[0] */
1120 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1121 /* Release the temp buffer */
1122 this_cpu_dec(trace_buffered_event_cnt);
1123 /* ring_buffer_unlock_commit() enables preemption */
1124 preempt_enable_notrace();
1125 } else
1126 ring_buffer_unlock_commit(buffer);
1127 }
1128
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1129 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1130 const char *str, int size)
1131 {
1132 struct ring_buffer_event *event;
1133 struct trace_buffer *buffer;
1134 struct print_entry *entry;
1135 unsigned int trace_ctx;
1136 int alloc;
1137
1138 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1139 return 0;
1140
1141 if (unlikely(tracing_selftest_running && tr == &global_trace))
1142 return 0;
1143
1144 if (unlikely(tracing_disabled))
1145 return 0;
1146
1147 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1148
1149 trace_ctx = tracing_gen_ctx();
1150 buffer = tr->array_buffer.buffer;
1151 guard(ring_buffer_nest)(buffer);
1152 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1153 trace_ctx);
1154 if (!event)
1155 return 0;
1156
1157 entry = ring_buffer_event_data(event);
1158 entry->ip = ip;
1159
1160 memcpy(&entry->buf, str, size);
1161
1162 /* Add a newline if necessary */
1163 if (entry->buf[size - 1] != '\n') {
1164 entry->buf[size] = '\n';
1165 entry->buf[size + 1] = '\0';
1166 } else
1167 entry->buf[size] = '\0';
1168
1169 __buffer_unlock_commit(buffer, event);
1170 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1171 return size;
1172 }
1173 EXPORT_SYMBOL_GPL(__trace_array_puts);
1174
1175 /**
1176 * __trace_puts - write a constant string into the trace buffer.
1177 * @ip: The address of the caller
1178 * @str: The constant string to write
1179 * @size: The size of the string.
1180 */
__trace_puts(unsigned long ip,const char * str,int size)1181 int __trace_puts(unsigned long ip, const char *str, int size)
1182 {
1183 return __trace_array_puts(printk_trace, ip, str, size);
1184 }
1185 EXPORT_SYMBOL_GPL(__trace_puts);
1186
1187 /**
1188 * __trace_bputs - write the pointer to a constant string into trace buffer
1189 * @ip: The address of the caller
1190 * @str: The constant string to write to the buffer to
1191 */
__trace_bputs(unsigned long ip,const char * str)1192 int __trace_bputs(unsigned long ip, const char *str)
1193 {
1194 struct trace_array *tr = READ_ONCE(printk_trace);
1195 struct ring_buffer_event *event;
1196 struct trace_buffer *buffer;
1197 struct bputs_entry *entry;
1198 unsigned int trace_ctx;
1199 int size = sizeof(struct bputs_entry);
1200
1201 if (!printk_binsafe(tr))
1202 return __trace_puts(ip, str, strlen(str));
1203
1204 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1205 return 0;
1206
1207 if (unlikely(tracing_selftest_running || tracing_disabled))
1208 return 0;
1209
1210 trace_ctx = tracing_gen_ctx();
1211 buffer = tr->array_buffer.buffer;
1212
1213 guard(ring_buffer_nest)(buffer);
1214 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1215 trace_ctx);
1216 if (!event)
1217 return 0;
1218
1219 entry = ring_buffer_event_data(event);
1220 entry->ip = ip;
1221 entry->str = str;
1222
1223 __buffer_unlock_commit(buffer, event);
1224 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1225
1226 return 1;
1227 }
1228 EXPORT_SYMBOL_GPL(__trace_bputs);
1229
1230 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1231 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1232 void *cond_data)
1233 {
1234 struct tracer *tracer = tr->current_trace;
1235 unsigned long flags;
1236
1237 if (in_nmi()) {
1238 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1239 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1240 return;
1241 }
1242
1243 if (!tr->allocated_snapshot) {
1244 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1245 trace_array_puts(tr, "*** stopping trace here! ***\n");
1246 tracer_tracing_off(tr);
1247 return;
1248 }
1249
1250 /* Note, snapshot can not be used when the tracer uses it */
1251 if (tracer->use_max_tr) {
1252 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1253 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1254 return;
1255 }
1256
1257 if (tr->mapped) {
1258 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1259 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1260 return;
1261 }
1262
1263 local_irq_save(flags);
1264 update_max_tr(tr, current, smp_processor_id(), cond_data);
1265 local_irq_restore(flags);
1266 }
1267
tracing_snapshot_instance(struct trace_array * tr)1268 void tracing_snapshot_instance(struct trace_array *tr)
1269 {
1270 tracing_snapshot_instance_cond(tr, NULL);
1271 }
1272
1273 /**
1274 * tracing_snapshot - take a snapshot of the current buffer.
1275 *
1276 * This causes a swap between the snapshot buffer and the current live
1277 * tracing buffer. You can use this to take snapshots of the live
1278 * trace when some condition is triggered, but continue to trace.
1279 *
1280 * Note, make sure to allocate the snapshot with either
1281 * a tracing_snapshot_alloc(), or by doing it manually
1282 * with: echo 1 > /sys/kernel/tracing/snapshot
1283 *
1284 * If the snapshot buffer is not allocated, it will stop tracing.
1285 * Basically making a permanent snapshot.
1286 */
tracing_snapshot(void)1287 void tracing_snapshot(void)
1288 {
1289 struct trace_array *tr = &global_trace;
1290
1291 tracing_snapshot_instance(tr);
1292 }
1293 EXPORT_SYMBOL_GPL(tracing_snapshot);
1294
1295 /**
1296 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1297 * @tr: The tracing instance to snapshot
1298 * @cond_data: The data to be tested conditionally, and possibly saved
1299 *
1300 * This is the same as tracing_snapshot() except that the snapshot is
1301 * conditional - the snapshot will only happen if the
1302 * cond_snapshot.update() implementation receiving the cond_data
1303 * returns true, which means that the trace array's cond_snapshot
1304 * update() operation used the cond_data to determine whether the
1305 * snapshot should be taken, and if it was, presumably saved it along
1306 * with the snapshot.
1307 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1308 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1309 {
1310 tracing_snapshot_instance_cond(tr, cond_data);
1311 }
1312 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1313
1314 /**
1315 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1316 * @tr: The tracing instance
1317 *
1318 * When the user enables a conditional snapshot using
1319 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1320 * with the snapshot. This accessor is used to retrieve it.
1321 *
1322 * Should not be called from cond_snapshot.update(), since it takes
1323 * the tr->max_lock lock, which the code calling
1324 * cond_snapshot.update() has already done.
1325 *
1326 * Returns the cond_data associated with the trace array's snapshot.
1327 */
tracing_cond_snapshot_data(struct trace_array * tr)1328 void *tracing_cond_snapshot_data(struct trace_array *tr)
1329 {
1330 void *cond_data = NULL;
1331
1332 local_irq_disable();
1333 arch_spin_lock(&tr->max_lock);
1334
1335 if (tr->cond_snapshot)
1336 cond_data = tr->cond_snapshot->cond_data;
1337
1338 arch_spin_unlock(&tr->max_lock);
1339 local_irq_enable();
1340
1341 return cond_data;
1342 }
1343 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1344
1345 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1346 struct array_buffer *size_buf, int cpu_id);
1347 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1348
tracing_alloc_snapshot_instance(struct trace_array * tr)1349 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1350 {
1351 int order;
1352 int ret;
1353
1354 if (!tr->allocated_snapshot) {
1355
1356 /* Make the snapshot buffer have the same order as main buffer */
1357 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1358 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1359 if (ret < 0)
1360 return ret;
1361
1362 /* allocate spare buffer */
1363 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1364 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1365 if (ret < 0)
1366 return ret;
1367
1368 tr->allocated_snapshot = true;
1369 }
1370
1371 return 0;
1372 }
1373
free_snapshot(struct trace_array * tr)1374 static void free_snapshot(struct trace_array *tr)
1375 {
1376 /*
1377 * We don't free the ring buffer. instead, resize it because
1378 * The max_tr ring buffer has some state (e.g. ring->clock) and
1379 * we want preserve it.
1380 */
1381 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1382 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1383 set_buffer_entries(&tr->max_buffer, 1);
1384 tracing_reset_online_cpus(&tr->max_buffer);
1385 tr->allocated_snapshot = false;
1386 }
1387
tracing_arm_snapshot_locked(struct trace_array * tr)1388 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1389 {
1390 int ret;
1391
1392 lockdep_assert_held(&trace_types_lock);
1393
1394 spin_lock(&tr->snapshot_trigger_lock);
1395 if (tr->snapshot == UINT_MAX || tr->mapped) {
1396 spin_unlock(&tr->snapshot_trigger_lock);
1397 return -EBUSY;
1398 }
1399
1400 tr->snapshot++;
1401 spin_unlock(&tr->snapshot_trigger_lock);
1402
1403 ret = tracing_alloc_snapshot_instance(tr);
1404 if (ret) {
1405 spin_lock(&tr->snapshot_trigger_lock);
1406 tr->snapshot--;
1407 spin_unlock(&tr->snapshot_trigger_lock);
1408 }
1409
1410 return ret;
1411 }
1412
tracing_arm_snapshot(struct trace_array * tr)1413 int tracing_arm_snapshot(struct trace_array *tr)
1414 {
1415 guard(mutex)(&trace_types_lock);
1416 return tracing_arm_snapshot_locked(tr);
1417 }
1418
tracing_disarm_snapshot(struct trace_array * tr)1419 void tracing_disarm_snapshot(struct trace_array *tr)
1420 {
1421 spin_lock(&tr->snapshot_trigger_lock);
1422 if (!WARN_ON(!tr->snapshot))
1423 tr->snapshot--;
1424 spin_unlock(&tr->snapshot_trigger_lock);
1425 }
1426
1427 /**
1428 * tracing_alloc_snapshot - allocate snapshot buffer.
1429 *
1430 * This only allocates the snapshot buffer if it isn't already
1431 * allocated - it doesn't also take a snapshot.
1432 *
1433 * This is meant to be used in cases where the snapshot buffer needs
1434 * to be set up for events that can't sleep but need to be able to
1435 * trigger a snapshot.
1436 */
tracing_alloc_snapshot(void)1437 int tracing_alloc_snapshot(void)
1438 {
1439 struct trace_array *tr = &global_trace;
1440 int ret;
1441
1442 ret = tracing_alloc_snapshot_instance(tr);
1443 WARN_ON(ret < 0);
1444
1445 return ret;
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1448
1449 /**
1450 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1451 *
1452 * This is similar to tracing_snapshot(), but it will allocate the
1453 * snapshot buffer if it isn't already allocated. Use this only
1454 * where it is safe to sleep, as the allocation may sleep.
1455 *
1456 * This causes a swap between the snapshot buffer and the current live
1457 * tracing buffer. You can use this to take snapshots of the live
1458 * trace when some condition is triggered, but continue to trace.
1459 */
tracing_snapshot_alloc(void)1460 void tracing_snapshot_alloc(void)
1461 {
1462 int ret;
1463
1464 ret = tracing_alloc_snapshot();
1465 if (ret < 0)
1466 return;
1467
1468 tracing_snapshot();
1469 }
1470 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1471
1472 /**
1473 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1474 * @tr: The tracing instance
1475 * @cond_data: User data to associate with the snapshot
1476 * @update: Implementation of the cond_snapshot update function
1477 *
1478 * Check whether the conditional snapshot for the given instance has
1479 * already been enabled, or if the current tracer is already using a
1480 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1481 * save the cond_data and update function inside.
1482 *
1483 * Returns 0 if successful, error otherwise.
1484 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1485 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1486 cond_update_fn_t update)
1487 {
1488 struct cond_snapshot *cond_snapshot __free(kfree) =
1489 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1490 int ret;
1491
1492 if (!cond_snapshot)
1493 return -ENOMEM;
1494
1495 cond_snapshot->cond_data = cond_data;
1496 cond_snapshot->update = update;
1497
1498 guard(mutex)(&trace_types_lock);
1499
1500 if (tr->current_trace->use_max_tr)
1501 return -EBUSY;
1502
1503 /*
1504 * The cond_snapshot can only change to NULL without the
1505 * trace_types_lock. We don't care if we race with it going
1506 * to NULL, but we want to make sure that it's not set to
1507 * something other than NULL when we get here, which we can
1508 * do safely with only holding the trace_types_lock and not
1509 * having to take the max_lock.
1510 */
1511 if (tr->cond_snapshot)
1512 return -EBUSY;
1513
1514 ret = tracing_arm_snapshot_locked(tr);
1515 if (ret)
1516 return ret;
1517
1518 local_irq_disable();
1519 arch_spin_lock(&tr->max_lock);
1520 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1521 arch_spin_unlock(&tr->max_lock);
1522 local_irq_enable();
1523
1524 return 0;
1525 }
1526 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1527
1528 /**
1529 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1530 * @tr: The tracing instance
1531 *
1532 * Check whether the conditional snapshot for the given instance is
1533 * enabled; if so, free the cond_snapshot associated with it,
1534 * otherwise return -EINVAL.
1535 *
1536 * Returns 0 if successful, error otherwise.
1537 */
tracing_snapshot_cond_disable(struct trace_array * tr)1538 int tracing_snapshot_cond_disable(struct trace_array *tr)
1539 {
1540 int ret = 0;
1541
1542 local_irq_disable();
1543 arch_spin_lock(&tr->max_lock);
1544
1545 if (!tr->cond_snapshot)
1546 ret = -EINVAL;
1547 else {
1548 kfree(tr->cond_snapshot);
1549 tr->cond_snapshot = NULL;
1550 }
1551
1552 arch_spin_unlock(&tr->max_lock);
1553 local_irq_enable();
1554
1555 tracing_disarm_snapshot(tr);
1556
1557 return ret;
1558 }
1559 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1560 #else
tracing_snapshot(void)1561 void tracing_snapshot(void)
1562 {
1563 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1564 }
1565 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1566 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1567 {
1568 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1569 }
1570 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1571 int tracing_alloc_snapshot(void)
1572 {
1573 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1574 return -ENODEV;
1575 }
1576 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1577 void tracing_snapshot_alloc(void)
1578 {
1579 /* Give warning */
1580 tracing_snapshot();
1581 }
1582 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1583 void *tracing_cond_snapshot_data(struct trace_array *tr)
1584 {
1585 return NULL;
1586 }
1587 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1588 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1589 {
1590 return -ENODEV;
1591 }
1592 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1593 int tracing_snapshot_cond_disable(struct trace_array *tr)
1594 {
1595 return false;
1596 }
1597 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1598 #define free_snapshot(tr) do { } while (0)
1599 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1600 #endif /* CONFIG_TRACER_SNAPSHOT */
1601
tracer_tracing_off(struct trace_array * tr)1602 void tracer_tracing_off(struct trace_array *tr)
1603 {
1604 if (tr->array_buffer.buffer)
1605 ring_buffer_record_off(tr->array_buffer.buffer);
1606 /*
1607 * This flag is looked at when buffers haven't been allocated
1608 * yet, or by some tracers (like irqsoff), that just want to
1609 * know if the ring buffer has been disabled, but it can handle
1610 * races of where it gets disabled but we still do a record.
1611 * As the check is in the fast path of the tracers, it is more
1612 * important to be fast than accurate.
1613 */
1614 tr->buffer_disabled = 1;
1615 }
1616
1617 /**
1618 * tracer_tracing_disable() - temporary disable the buffer from write
1619 * @tr: The trace array to disable its buffer for
1620 *
1621 * Expects trace_tracing_enable() to re-enable tracing.
1622 * The difference between this and tracer_tracing_off() is that this
1623 * is a counter and can nest, whereas, tracer_tracing_off() can
1624 * be called multiple times and a single trace_tracing_on() will
1625 * enable it.
1626 */
tracer_tracing_disable(struct trace_array * tr)1627 void tracer_tracing_disable(struct trace_array *tr)
1628 {
1629 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1630 return;
1631
1632 ring_buffer_record_disable(tr->array_buffer.buffer);
1633 }
1634
1635 /**
1636 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1637 * @tr: The trace array that had tracer_tracincg_disable() called on it
1638 *
1639 * This is called after tracer_tracing_disable() has been called on @tr,
1640 * when it's safe to re-enable tracing.
1641 */
tracer_tracing_enable(struct trace_array * tr)1642 void tracer_tracing_enable(struct trace_array *tr)
1643 {
1644 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1645 return;
1646
1647 ring_buffer_record_enable(tr->array_buffer.buffer);
1648 }
1649
1650 /**
1651 * tracing_off - turn off tracing buffers
1652 *
1653 * This function stops the tracing buffers from recording data.
1654 * It does not disable any overhead the tracers themselves may
1655 * be causing. This function simply causes all recording to
1656 * the ring buffers to fail.
1657 */
tracing_off(void)1658 void tracing_off(void)
1659 {
1660 tracer_tracing_off(&global_trace);
1661 }
1662 EXPORT_SYMBOL_GPL(tracing_off);
1663
disable_trace_on_warning(void)1664 void disable_trace_on_warning(void)
1665 {
1666 if (__disable_trace_on_warning) {
1667 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1668 "Disabling tracing due to warning\n");
1669 tracing_off();
1670 }
1671 }
1672
1673 /**
1674 * tracer_tracing_is_on - show real state of ring buffer enabled
1675 * @tr : the trace array to know if ring buffer is enabled
1676 *
1677 * Shows real state of the ring buffer if it is enabled or not.
1678 */
tracer_tracing_is_on(struct trace_array * tr)1679 bool tracer_tracing_is_on(struct trace_array *tr)
1680 {
1681 if (tr->array_buffer.buffer)
1682 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1683 return !tr->buffer_disabled;
1684 }
1685
1686 /**
1687 * tracing_is_on - show state of ring buffers enabled
1688 */
tracing_is_on(void)1689 int tracing_is_on(void)
1690 {
1691 return tracer_tracing_is_on(&global_trace);
1692 }
1693 EXPORT_SYMBOL_GPL(tracing_is_on);
1694
set_buf_size(char * str)1695 static int __init set_buf_size(char *str)
1696 {
1697 unsigned long buf_size;
1698
1699 if (!str)
1700 return 0;
1701 buf_size = memparse(str, &str);
1702 /*
1703 * nr_entries can not be zero and the startup
1704 * tests require some buffer space. Therefore
1705 * ensure we have at least 4096 bytes of buffer.
1706 */
1707 trace_buf_size = max(4096UL, buf_size);
1708 return 1;
1709 }
1710 __setup("trace_buf_size=", set_buf_size);
1711
set_tracing_thresh(char * str)1712 static int __init set_tracing_thresh(char *str)
1713 {
1714 unsigned long threshold;
1715 int ret;
1716
1717 if (!str)
1718 return 0;
1719 ret = kstrtoul(str, 0, &threshold);
1720 if (ret < 0)
1721 return 0;
1722 tracing_thresh = threshold * 1000;
1723 return 1;
1724 }
1725 __setup("tracing_thresh=", set_tracing_thresh);
1726
nsecs_to_usecs(unsigned long nsecs)1727 unsigned long nsecs_to_usecs(unsigned long nsecs)
1728 {
1729 return nsecs / 1000;
1730 }
1731
1732 /*
1733 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1734 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1735 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1736 * of strings in the order that the evals (enum) were defined.
1737 */
1738 #undef C
1739 #define C(a, b) b
1740
1741 /* These must match the bit positions in trace_iterator_flags */
1742 static const char *trace_options[] = {
1743 TRACE_FLAGS
1744 NULL
1745 };
1746
1747 static struct {
1748 u64 (*func)(void);
1749 const char *name;
1750 int in_ns; /* is this clock in nanoseconds? */
1751 } trace_clocks[] = {
1752 { trace_clock_local, "local", 1 },
1753 { trace_clock_global, "global", 1 },
1754 { trace_clock_counter, "counter", 0 },
1755 { trace_clock_jiffies, "uptime", 0 },
1756 { trace_clock, "perf", 1 },
1757 { ktime_get_mono_fast_ns, "mono", 1 },
1758 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1759 { ktime_get_boot_fast_ns, "boot", 1 },
1760 { ktime_get_tai_fast_ns, "tai", 1 },
1761 ARCH_TRACE_CLOCKS
1762 };
1763
trace_clock_in_ns(struct trace_array * tr)1764 bool trace_clock_in_ns(struct trace_array *tr)
1765 {
1766 if (trace_clocks[tr->clock_id].in_ns)
1767 return true;
1768
1769 return false;
1770 }
1771
1772 /*
1773 * trace_parser_get_init - gets the buffer for trace parser
1774 */
trace_parser_get_init(struct trace_parser * parser,int size)1775 int trace_parser_get_init(struct trace_parser *parser, int size)
1776 {
1777 memset(parser, 0, sizeof(*parser));
1778
1779 parser->buffer = kmalloc(size, GFP_KERNEL);
1780 if (!parser->buffer)
1781 return 1;
1782
1783 parser->size = size;
1784 return 0;
1785 }
1786
1787 /*
1788 * trace_parser_put - frees the buffer for trace parser
1789 */
trace_parser_put(struct trace_parser * parser)1790 void trace_parser_put(struct trace_parser *parser)
1791 {
1792 kfree(parser->buffer);
1793 parser->buffer = NULL;
1794 }
1795
1796 /*
1797 * trace_get_user - reads the user input string separated by space
1798 * (matched by isspace(ch))
1799 *
1800 * For each string found the 'struct trace_parser' is updated,
1801 * and the function returns.
1802 *
1803 * Returns number of bytes read.
1804 *
1805 * See kernel/trace/trace.h for 'struct trace_parser' details.
1806 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1807 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1808 size_t cnt, loff_t *ppos)
1809 {
1810 char ch;
1811 size_t read = 0;
1812 ssize_t ret;
1813
1814 if (!*ppos)
1815 trace_parser_clear(parser);
1816
1817 ret = get_user(ch, ubuf++);
1818 if (ret)
1819 return ret;
1820
1821 read++;
1822 cnt--;
1823
1824 /*
1825 * The parser is not finished with the last write,
1826 * continue reading the user input without skipping spaces.
1827 */
1828 if (!parser->cont) {
1829 /* skip white space */
1830 while (cnt && isspace(ch)) {
1831 ret = get_user(ch, ubuf++);
1832 if (ret)
1833 return ret;
1834 read++;
1835 cnt--;
1836 }
1837
1838 parser->idx = 0;
1839
1840 /* only spaces were written */
1841 if (isspace(ch) || !ch) {
1842 *ppos += read;
1843 return read;
1844 }
1845 }
1846
1847 /* read the non-space input */
1848 while (cnt && !isspace(ch) && ch) {
1849 if (parser->idx < parser->size - 1)
1850 parser->buffer[parser->idx++] = ch;
1851 else
1852 return -EINVAL;
1853
1854 ret = get_user(ch, ubuf++);
1855 if (ret)
1856 return ret;
1857 read++;
1858 cnt--;
1859 }
1860
1861 /* We either got finished input or we have to wait for another call. */
1862 if (isspace(ch) || !ch) {
1863 parser->buffer[parser->idx] = 0;
1864 parser->cont = false;
1865 } else if (parser->idx < parser->size - 1) {
1866 parser->cont = true;
1867 parser->buffer[parser->idx++] = ch;
1868 /* Make sure the parsed string always terminates with '\0'. */
1869 parser->buffer[parser->idx] = 0;
1870 } else {
1871 return -EINVAL;
1872 }
1873
1874 *ppos += read;
1875 return read;
1876 }
1877
1878 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1879 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1880 {
1881 int len;
1882
1883 if (trace_seq_used(s) <= s->readpos)
1884 return -EBUSY;
1885
1886 len = trace_seq_used(s) - s->readpos;
1887 if (cnt > len)
1888 cnt = len;
1889 memcpy(buf, s->buffer + s->readpos, cnt);
1890
1891 s->readpos += cnt;
1892 return cnt;
1893 }
1894
1895 unsigned long __read_mostly tracing_thresh;
1896
1897 #ifdef CONFIG_TRACER_MAX_TRACE
1898 static const struct file_operations tracing_max_lat_fops;
1899
1900 #ifdef LATENCY_FS_NOTIFY
1901
1902 static struct workqueue_struct *fsnotify_wq;
1903
latency_fsnotify_workfn(struct work_struct * work)1904 static void latency_fsnotify_workfn(struct work_struct *work)
1905 {
1906 struct trace_array *tr = container_of(work, struct trace_array,
1907 fsnotify_work);
1908 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1909 }
1910
latency_fsnotify_workfn_irq(struct irq_work * iwork)1911 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1912 {
1913 struct trace_array *tr = container_of(iwork, struct trace_array,
1914 fsnotify_irqwork);
1915 queue_work(fsnotify_wq, &tr->fsnotify_work);
1916 }
1917
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1918 static void trace_create_maxlat_file(struct trace_array *tr,
1919 struct dentry *d_tracer)
1920 {
1921 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1922 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1923 tr->d_max_latency = trace_create_file("tracing_max_latency",
1924 TRACE_MODE_WRITE,
1925 d_tracer, tr,
1926 &tracing_max_lat_fops);
1927 }
1928
latency_fsnotify_init(void)1929 __init static int latency_fsnotify_init(void)
1930 {
1931 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1932 WQ_UNBOUND | WQ_HIGHPRI, 0);
1933 if (!fsnotify_wq) {
1934 pr_err("Unable to allocate tr_max_lat_wq\n");
1935 return -ENOMEM;
1936 }
1937 return 0;
1938 }
1939
1940 late_initcall_sync(latency_fsnotify_init);
1941
latency_fsnotify(struct trace_array * tr)1942 void latency_fsnotify(struct trace_array *tr)
1943 {
1944 if (!fsnotify_wq)
1945 return;
1946 /*
1947 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1948 * possible that we are called from __schedule() or do_idle(), which
1949 * could cause a deadlock.
1950 */
1951 irq_work_queue(&tr->fsnotify_irqwork);
1952 }
1953
1954 #else /* !LATENCY_FS_NOTIFY */
1955
1956 #define trace_create_maxlat_file(tr, d_tracer) \
1957 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1958 d_tracer, tr, &tracing_max_lat_fops)
1959
1960 #endif
1961
1962 /*
1963 * Copy the new maximum trace into the separate maximum-trace
1964 * structure. (this way the maximum trace is permanently saved,
1965 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1966 */
1967 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1968 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1969 {
1970 struct array_buffer *trace_buf = &tr->array_buffer;
1971 struct array_buffer *max_buf = &tr->max_buffer;
1972 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1973 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1974
1975 max_buf->cpu = cpu;
1976 max_buf->time_start = data->preempt_timestamp;
1977
1978 max_data->saved_latency = tr->max_latency;
1979 max_data->critical_start = data->critical_start;
1980 max_data->critical_end = data->critical_end;
1981
1982 strscpy(max_data->comm, tsk->comm);
1983 max_data->pid = tsk->pid;
1984 /*
1985 * If tsk == current, then use current_uid(), as that does not use
1986 * RCU. The irq tracer can be called out of RCU scope.
1987 */
1988 if (tsk == current)
1989 max_data->uid = current_uid();
1990 else
1991 max_data->uid = task_uid(tsk);
1992
1993 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1994 max_data->policy = tsk->policy;
1995 max_data->rt_priority = tsk->rt_priority;
1996
1997 /* record this tasks comm */
1998 tracing_record_cmdline(tsk);
1999 latency_fsnotify(tr);
2000 }
2001
2002 /**
2003 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2004 * @tr: tracer
2005 * @tsk: the task with the latency
2006 * @cpu: The cpu that initiated the trace.
2007 * @cond_data: User data associated with a conditional snapshot
2008 *
2009 * Flip the buffers between the @tr and the max_tr and record information
2010 * about which task was the cause of this latency.
2011 */
2012 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2013 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2014 void *cond_data)
2015 {
2016 if (tr->stop_count)
2017 return;
2018
2019 WARN_ON_ONCE(!irqs_disabled());
2020
2021 if (!tr->allocated_snapshot) {
2022 /* Only the nop tracer should hit this when disabling */
2023 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2024 return;
2025 }
2026
2027 arch_spin_lock(&tr->max_lock);
2028
2029 /* Inherit the recordable setting from array_buffer */
2030 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2031 ring_buffer_record_on(tr->max_buffer.buffer);
2032 else
2033 ring_buffer_record_off(tr->max_buffer.buffer);
2034
2035 #ifdef CONFIG_TRACER_SNAPSHOT
2036 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2037 arch_spin_unlock(&tr->max_lock);
2038 return;
2039 }
2040 #endif
2041 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2042
2043 __update_max_tr(tr, tsk, cpu);
2044
2045 arch_spin_unlock(&tr->max_lock);
2046
2047 /* Any waiters on the old snapshot buffer need to wake up */
2048 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2049 }
2050
2051 /**
2052 * update_max_tr_single - only copy one trace over, and reset the rest
2053 * @tr: tracer
2054 * @tsk: task with the latency
2055 * @cpu: the cpu of the buffer to copy.
2056 *
2057 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2058 */
2059 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2060 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2061 {
2062 int ret;
2063
2064 if (tr->stop_count)
2065 return;
2066
2067 WARN_ON_ONCE(!irqs_disabled());
2068 if (!tr->allocated_snapshot) {
2069 /* Only the nop tracer should hit this when disabling */
2070 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2071 return;
2072 }
2073
2074 arch_spin_lock(&tr->max_lock);
2075
2076 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2077
2078 if (ret == -EBUSY) {
2079 /*
2080 * We failed to swap the buffer due to a commit taking
2081 * place on this CPU. We fail to record, but we reset
2082 * the max trace buffer (no one writes directly to it)
2083 * and flag that it failed.
2084 * Another reason is resize is in progress.
2085 */
2086 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2087 "Failed to swap buffers due to commit or resize in progress\n");
2088 }
2089
2090 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2091
2092 __update_max_tr(tr, tsk, cpu);
2093 arch_spin_unlock(&tr->max_lock);
2094 }
2095
2096 #endif /* CONFIG_TRACER_MAX_TRACE */
2097
2098 struct pipe_wait {
2099 struct trace_iterator *iter;
2100 int wait_index;
2101 };
2102
wait_pipe_cond(void * data)2103 static bool wait_pipe_cond(void *data)
2104 {
2105 struct pipe_wait *pwait = data;
2106 struct trace_iterator *iter = pwait->iter;
2107
2108 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2109 return true;
2110
2111 return iter->closed;
2112 }
2113
wait_on_pipe(struct trace_iterator * iter,int full)2114 static int wait_on_pipe(struct trace_iterator *iter, int full)
2115 {
2116 struct pipe_wait pwait;
2117 int ret;
2118
2119 /* Iterators are static, they should be filled or empty */
2120 if (trace_buffer_iter(iter, iter->cpu_file))
2121 return 0;
2122
2123 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2124 pwait.iter = iter;
2125
2126 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2127 wait_pipe_cond, &pwait);
2128
2129 #ifdef CONFIG_TRACER_MAX_TRACE
2130 /*
2131 * Make sure this is still the snapshot buffer, as if a snapshot were
2132 * to happen, this would now be the main buffer.
2133 */
2134 if (iter->snapshot)
2135 iter->array_buffer = &iter->tr->max_buffer;
2136 #endif
2137 return ret;
2138 }
2139
2140 #ifdef CONFIG_FTRACE_STARTUP_TEST
2141 static bool selftests_can_run;
2142
2143 struct trace_selftests {
2144 struct list_head list;
2145 struct tracer *type;
2146 };
2147
2148 static LIST_HEAD(postponed_selftests);
2149
save_selftest(struct tracer * type)2150 static int save_selftest(struct tracer *type)
2151 {
2152 struct trace_selftests *selftest;
2153
2154 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2155 if (!selftest)
2156 return -ENOMEM;
2157
2158 selftest->type = type;
2159 list_add(&selftest->list, &postponed_selftests);
2160 return 0;
2161 }
2162
run_tracer_selftest(struct tracer * type)2163 static int run_tracer_selftest(struct tracer *type)
2164 {
2165 struct trace_array *tr = &global_trace;
2166 struct tracer *saved_tracer = tr->current_trace;
2167 int ret;
2168
2169 if (!type->selftest || tracing_selftest_disabled)
2170 return 0;
2171
2172 /*
2173 * If a tracer registers early in boot up (before scheduling is
2174 * initialized and such), then do not run its selftests yet.
2175 * Instead, run it a little later in the boot process.
2176 */
2177 if (!selftests_can_run)
2178 return save_selftest(type);
2179
2180 if (!tracing_is_on()) {
2181 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2182 type->name);
2183 return 0;
2184 }
2185
2186 /*
2187 * Run a selftest on this tracer.
2188 * Here we reset the trace buffer, and set the current
2189 * tracer to be this tracer. The tracer can then run some
2190 * internal tracing to verify that everything is in order.
2191 * If we fail, we do not register this tracer.
2192 */
2193 tracing_reset_online_cpus(&tr->array_buffer);
2194
2195 tr->current_trace = type;
2196
2197 #ifdef CONFIG_TRACER_MAX_TRACE
2198 if (type->use_max_tr) {
2199 /* If we expanded the buffers, make sure the max is expanded too */
2200 if (tr->ring_buffer_expanded)
2201 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2202 RING_BUFFER_ALL_CPUS);
2203 tr->allocated_snapshot = true;
2204 }
2205 #endif
2206
2207 /* the test is responsible for initializing and enabling */
2208 pr_info("Testing tracer %s: ", type->name);
2209 ret = type->selftest(type, tr);
2210 /* the test is responsible for resetting too */
2211 tr->current_trace = saved_tracer;
2212 if (ret) {
2213 printk(KERN_CONT "FAILED!\n");
2214 /* Add the warning after printing 'FAILED' */
2215 WARN_ON(1);
2216 return -1;
2217 }
2218 /* Only reset on passing, to avoid touching corrupted buffers */
2219 tracing_reset_online_cpus(&tr->array_buffer);
2220
2221 #ifdef CONFIG_TRACER_MAX_TRACE
2222 if (type->use_max_tr) {
2223 tr->allocated_snapshot = false;
2224
2225 /* Shrink the max buffer again */
2226 if (tr->ring_buffer_expanded)
2227 ring_buffer_resize(tr->max_buffer.buffer, 1,
2228 RING_BUFFER_ALL_CPUS);
2229 }
2230 #endif
2231
2232 printk(KERN_CONT "PASSED\n");
2233 return 0;
2234 }
2235
do_run_tracer_selftest(struct tracer * type)2236 static int do_run_tracer_selftest(struct tracer *type)
2237 {
2238 int ret;
2239
2240 /*
2241 * Tests can take a long time, especially if they are run one after the
2242 * other, as does happen during bootup when all the tracers are
2243 * registered. This could cause the soft lockup watchdog to trigger.
2244 */
2245 cond_resched();
2246
2247 tracing_selftest_running = true;
2248 ret = run_tracer_selftest(type);
2249 tracing_selftest_running = false;
2250
2251 return ret;
2252 }
2253
init_trace_selftests(void)2254 static __init int init_trace_selftests(void)
2255 {
2256 struct trace_selftests *p, *n;
2257 struct tracer *t, **last;
2258 int ret;
2259
2260 selftests_can_run = true;
2261
2262 guard(mutex)(&trace_types_lock);
2263
2264 if (list_empty(&postponed_selftests))
2265 return 0;
2266
2267 pr_info("Running postponed tracer tests:\n");
2268
2269 tracing_selftest_running = true;
2270 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2271 /* This loop can take minutes when sanitizers are enabled, so
2272 * lets make sure we allow RCU processing.
2273 */
2274 cond_resched();
2275 ret = run_tracer_selftest(p->type);
2276 /* If the test fails, then warn and remove from available_tracers */
2277 if (ret < 0) {
2278 WARN(1, "tracer: %s failed selftest, disabling\n",
2279 p->type->name);
2280 last = &trace_types;
2281 for (t = trace_types; t; t = t->next) {
2282 if (t == p->type) {
2283 *last = t->next;
2284 break;
2285 }
2286 last = &t->next;
2287 }
2288 }
2289 list_del(&p->list);
2290 kfree(p);
2291 }
2292 tracing_selftest_running = false;
2293
2294 return 0;
2295 }
2296 core_initcall(init_trace_selftests);
2297 #else
do_run_tracer_selftest(struct tracer * type)2298 static inline int do_run_tracer_selftest(struct tracer *type)
2299 {
2300 return 0;
2301 }
2302 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2303
2304 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2305
2306 static void __init apply_trace_boot_options(void);
2307
2308 /**
2309 * register_tracer - register a tracer with the ftrace system.
2310 * @type: the plugin for the tracer
2311 *
2312 * Register a new plugin tracer.
2313 */
register_tracer(struct tracer * type)2314 int __init register_tracer(struct tracer *type)
2315 {
2316 struct tracer *t;
2317 int ret = 0;
2318
2319 if (!type->name) {
2320 pr_info("Tracer must have a name\n");
2321 return -1;
2322 }
2323
2324 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2325 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2326 return -1;
2327 }
2328
2329 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2330 pr_warn("Can not register tracer %s due to lockdown\n",
2331 type->name);
2332 return -EPERM;
2333 }
2334
2335 mutex_lock(&trace_types_lock);
2336
2337 for (t = trace_types; t; t = t->next) {
2338 if (strcmp(type->name, t->name) == 0) {
2339 /* already found */
2340 pr_info("Tracer %s already registered\n",
2341 type->name);
2342 ret = -1;
2343 goto out;
2344 }
2345 }
2346
2347 if (!type->set_flag)
2348 type->set_flag = &dummy_set_flag;
2349 if (!type->flags) {
2350 /*allocate a dummy tracer_flags*/
2351 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2352 if (!type->flags) {
2353 ret = -ENOMEM;
2354 goto out;
2355 }
2356 type->flags->val = 0;
2357 type->flags->opts = dummy_tracer_opt;
2358 } else
2359 if (!type->flags->opts)
2360 type->flags->opts = dummy_tracer_opt;
2361
2362 /* store the tracer for __set_tracer_option */
2363 type->flags->trace = type;
2364
2365 ret = do_run_tracer_selftest(type);
2366 if (ret < 0)
2367 goto out;
2368
2369 type->next = trace_types;
2370 trace_types = type;
2371 add_tracer_options(&global_trace, type);
2372
2373 out:
2374 mutex_unlock(&trace_types_lock);
2375
2376 if (ret || !default_bootup_tracer)
2377 return ret;
2378
2379 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2380 return 0;
2381
2382 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2383 /* Do we want this tracer to start on bootup? */
2384 tracing_set_tracer(&global_trace, type->name);
2385 default_bootup_tracer = NULL;
2386
2387 apply_trace_boot_options();
2388
2389 /* disable other selftests, since this will break it. */
2390 disable_tracing_selftest("running a tracer");
2391
2392 return 0;
2393 }
2394
tracing_reset_cpu(struct array_buffer * buf,int cpu)2395 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2396 {
2397 struct trace_buffer *buffer = buf->buffer;
2398
2399 if (!buffer)
2400 return;
2401
2402 ring_buffer_record_disable(buffer);
2403
2404 /* Make sure all commits have finished */
2405 synchronize_rcu();
2406 ring_buffer_reset_cpu(buffer, cpu);
2407
2408 ring_buffer_record_enable(buffer);
2409 }
2410
tracing_reset_online_cpus(struct array_buffer * buf)2411 void tracing_reset_online_cpus(struct array_buffer *buf)
2412 {
2413 struct trace_buffer *buffer = buf->buffer;
2414
2415 if (!buffer)
2416 return;
2417
2418 ring_buffer_record_disable(buffer);
2419
2420 /* Make sure all commits have finished */
2421 synchronize_rcu();
2422
2423 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2424
2425 ring_buffer_reset_online_cpus(buffer);
2426
2427 ring_buffer_record_enable(buffer);
2428 }
2429
tracing_reset_all_cpus(struct array_buffer * buf)2430 static void tracing_reset_all_cpus(struct array_buffer *buf)
2431 {
2432 struct trace_buffer *buffer = buf->buffer;
2433
2434 if (!buffer)
2435 return;
2436
2437 ring_buffer_record_disable(buffer);
2438
2439 /* Make sure all commits have finished */
2440 synchronize_rcu();
2441
2442 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2443
2444 ring_buffer_reset(buffer);
2445
2446 ring_buffer_record_enable(buffer);
2447 }
2448
2449 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2450 void tracing_reset_all_online_cpus_unlocked(void)
2451 {
2452 struct trace_array *tr;
2453
2454 lockdep_assert_held(&trace_types_lock);
2455
2456 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2457 if (!tr->clear_trace)
2458 continue;
2459 tr->clear_trace = false;
2460 tracing_reset_online_cpus(&tr->array_buffer);
2461 #ifdef CONFIG_TRACER_MAX_TRACE
2462 tracing_reset_online_cpus(&tr->max_buffer);
2463 #endif
2464 }
2465 }
2466
tracing_reset_all_online_cpus(void)2467 void tracing_reset_all_online_cpus(void)
2468 {
2469 guard(mutex)(&trace_types_lock);
2470 tracing_reset_all_online_cpus_unlocked();
2471 }
2472
is_tracing_stopped(void)2473 int is_tracing_stopped(void)
2474 {
2475 return global_trace.stop_count;
2476 }
2477
tracing_start_tr(struct trace_array * tr)2478 static void tracing_start_tr(struct trace_array *tr)
2479 {
2480 struct trace_buffer *buffer;
2481
2482 if (tracing_disabled)
2483 return;
2484
2485 guard(raw_spinlock_irqsave)(&tr->start_lock);
2486 if (--tr->stop_count) {
2487 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2488 /* Someone screwed up their debugging */
2489 tr->stop_count = 0;
2490 }
2491 return;
2492 }
2493
2494 /* Prevent the buffers from switching */
2495 arch_spin_lock(&tr->max_lock);
2496
2497 buffer = tr->array_buffer.buffer;
2498 if (buffer)
2499 ring_buffer_record_enable(buffer);
2500
2501 #ifdef CONFIG_TRACER_MAX_TRACE
2502 buffer = tr->max_buffer.buffer;
2503 if (buffer)
2504 ring_buffer_record_enable(buffer);
2505 #endif
2506
2507 arch_spin_unlock(&tr->max_lock);
2508 }
2509
2510 /**
2511 * tracing_start - quick start of the tracer
2512 *
2513 * If tracing is enabled but was stopped by tracing_stop,
2514 * this will start the tracer back up.
2515 */
tracing_start(void)2516 void tracing_start(void)
2517
2518 {
2519 return tracing_start_tr(&global_trace);
2520 }
2521
tracing_stop_tr(struct trace_array * tr)2522 static void tracing_stop_tr(struct trace_array *tr)
2523 {
2524 struct trace_buffer *buffer;
2525
2526 guard(raw_spinlock_irqsave)(&tr->start_lock);
2527 if (tr->stop_count++)
2528 return;
2529
2530 /* Prevent the buffers from switching */
2531 arch_spin_lock(&tr->max_lock);
2532
2533 buffer = tr->array_buffer.buffer;
2534 if (buffer)
2535 ring_buffer_record_disable(buffer);
2536
2537 #ifdef CONFIG_TRACER_MAX_TRACE
2538 buffer = tr->max_buffer.buffer;
2539 if (buffer)
2540 ring_buffer_record_disable(buffer);
2541 #endif
2542
2543 arch_spin_unlock(&tr->max_lock);
2544 }
2545
2546 /**
2547 * tracing_stop - quick stop of the tracer
2548 *
2549 * Light weight way to stop tracing. Use in conjunction with
2550 * tracing_start.
2551 */
tracing_stop(void)2552 void tracing_stop(void)
2553 {
2554 return tracing_stop_tr(&global_trace);
2555 }
2556
2557 /*
2558 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2559 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2560 * simplifies those functions and keeps them in sync.
2561 */
trace_handle_return(struct trace_seq * s)2562 enum print_line_t trace_handle_return(struct trace_seq *s)
2563 {
2564 return trace_seq_has_overflowed(s) ?
2565 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2566 }
2567 EXPORT_SYMBOL_GPL(trace_handle_return);
2568
migration_disable_value(void)2569 static unsigned short migration_disable_value(void)
2570 {
2571 #if defined(CONFIG_SMP)
2572 return current->migration_disabled;
2573 #else
2574 return 0;
2575 #endif
2576 }
2577
tracing_gen_ctx_irq_test(unsigned int irqs_status)2578 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2579 {
2580 unsigned int trace_flags = irqs_status;
2581 unsigned int pc;
2582
2583 pc = preempt_count();
2584
2585 if (pc & NMI_MASK)
2586 trace_flags |= TRACE_FLAG_NMI;
2587 if (pc & HARDIRQ_MASK)
2588 trace_flags |= TRACE_FLAG_HARDIRQ;
2589 if (in_serving_softirq())
2590 trace_flags |= TRACE_FLAG_SOFTIRQ;
2591 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2592 trace_flags |= TRACE_FLAG_BH_OFF;
2593
2594 if (tif_need_resched())
2595 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2596 if (test_preempt_need_resched())
2597 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2598 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2599 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2600 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2601 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2602 }
2603
2604 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2605 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2606 int type,
2607 unsigned long len,
2608 unsigned int trace_ctx)
2609 {
2610 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2611 }
2612
2613 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2614 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2615 static int trace_buffered_event_ref;
2616
2617 /**
2618 * trace_buffered_event_enable - enable buffering events
2619 *
2620 * When events are being filtered, it is quicker to use a temporary
2621 * buffer to write the event data into if there's a likely chance
2622 * that it will not be committed. The discard of the ring buffer
2623 * is not as fast as committing, and is much slower than copying
2624 * a commit.
2625 *
2626 * When an event is to be filtered, allocate per cpu buffers to
2627 * write the event data into, and if the event is filtered and discarded
2628 * it is simply dropped, otherwise, the entire data is to be committed
2629 * in one shot.
2630 */
trace_buffered_event_enable(void)2631 void trace_buffered_event_enable(void)
2632 {
2633 struct ring_buffer_event *event;
2634 struct page *page;
2635 int cpu;
2636
2637 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2638
2639 if (trace_buffered_event_ref++)
2640 return;
2641
2642 for_each_tracing_cpu(cpu) {
2643 page = alloc_pages_node(cpu_to_node(cpu),
2644 GFP_KERNEL | __GFP_NORETRY, 0);
2645 /* This is just an optimization and can handle failures */
2646 if (!page) {
2647 pr_err("Failed to allocate event buffer\n");
2648 break;
2649 }
2650
2651 event = page_address(page);
2652 memset(event, 0, sizeof(*event));
2653
2654 per_cpu(trace_buffered_event, cpu) = event;
2655
2656 scoped_guard(preempt,) {
2657 if (cpu == smp_processor_id() &&
2658 __this_cpu_read(trace_buffered_event) !=
2659 per_cpu(trace_buffered_event, cpu))
2660 WARN_ON_ONCE(1);
2661 }
2662 }
2663 }
2664
enable_trace_buffered_event(void * data)2665 static void enable_trace_buffered_event(void *data)
2666 {
2667 this_cpu_dec(trace_buffered_event_cnt);
2668 }
2669
disable_trace_buffered_event(void * data)2670 static void disable_trace_buffered_event(void *data)
2671 {
2672 this_cpu_inc(trace_buffered_event_cnt);
2673 }
2674
2675 /**
2676 * trace_buffered_event_disable - disable buffering events
2677 *
2678 * When a filter is removed, it is faster to not use the buffered
2679 * events, and to commit directly into the ring buffer. Free up
2680 * the temp buffers when there are no more users. This requires
2681 * special synchronization with current events.
2682 */
trace_buffered_event_disable(void)2683 void trace_buffered_event_disable(void)
2684 {
2685 int cpu;
2686
2687 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2688
2689 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2690 return;
2691
2692 if (--trace_buffered_event_ref)
2693 return;
2694
2695 /* For each CPU, set the buffer as used. */
2696 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2697 NULL, true);
2698
2699 /* Wait for all current users to finish */
2700 synchronize_rcu();
2701
2702 for_each_tracing_cpu(cpu) {
2703 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2704 per_cpu(trace_buffered_event, cpu) = NULL;
2705 }
2706
2707 /*
2708 * Wait for all CPUs that potentially started checking if they can use
2709 * their event buffer only after the previous synchronize_rcu() call and
2710 * they still read a valid pointer from trace_buffered_event. It must be
2711 * ensured they don't see cleared trace_buffered_event_cnt else they
2712 * could wrongly decide to use the pointed-to buffer which is now freed.
2713 */
2714 synchronize_rcu();
2715
2716 /* For each CPU, relinquish the buffer */
2717 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2718 true);
2719 }
2720
2721 static struct trace_buffer *temp_buffer;
2722
2723 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2724 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2725 struct trace_event_file *trace_file,
2726 int type, unsigned long len,
2727 unsigned int trace_ctx)
2728 {
2729 struct ring_buffer_event *entry;
2730 struct trace_array *tr = trace_file->tr;
2731 int val;
2732
2733 *current_rb = tr->array_buffer.buffer;
2734
2735 if (!tr->no_filter_buffering_ref &&
2736 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2737 preempt_disable_notrace();
2738 /*
2739 * Filtering is on, so try to use the per cpu buffer first.
2740 * This buffer will simulate a ring_buffer_event,
2741 * where the type_len is zero and the array[0] will
2742 * hold the full length.
2743 * (see include/linux/ring-buffer.h for details on
2744 * how the ring_buffer_event is structured).
2745 *
2746 * Using a temp buffer during filtering and copying it
2747 * on a matched filter is quicker than writing directly
2748 * into the ring buffer and then discarding it when
2749 * it doesn't match. That is because the discard
2750 * requires several atomic operations to get right.
2751 * Copying on match and doing nothing on a failed match
2752 * is still quicker than no copy on match, but having
2753 * to discard out of the ring buffer on a failed match.
2754 */
2755 if ((entry = __this_cpu_read(trace_buffered_event))) {
2756 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2757
2758 val = this_cpu_inc_return(trace_buffered_event_cnt);
2759
2760 /*
2761 * Preemption is disabled, but interrupts and NMIs
2762 * can still come in now. If that happens after
2763 * the above increment, then it will have to go
2764 * back to the old method of allocating the event
2765 * on the ring buffer, and if the filter fails, it
2766 * will have to call ring_buffer_discard_commit()
2767 * to remove it.
2768 *
2769 * Need to also check the unlikely case that the
2770 * length is bigger than the temp buffer size.
2771 * If that happens, then the reserve is pretty much
2772 * guaranteed to fail, as the ring buffer currently
2773 * only allows events less than a page. But that may
2774 * change in the future, so let the ring buffer reserve
2775 * handle the failure in that case.
2776 */
2777 if (val == 1 && likely(len <= max_len)) {
2778 trace_event_setup(entry, type, trace_ctx);
2779 entry->array[0] = len;
2780 /* Return with preemption disabled */
2781 return entry;
2782 }
2783 this_cpu_dec(trace_buffered_event_cnt);
2784 }
2785 /* __trace_buffer_lock_reserve() disables preemption */
2786 preempt_enable_notrace();
2787 }
2788
2789 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2790 trace_ctx);
2791 /*
2792 * If tracing is off, but we have triggers enabled
2793 * we still need to look at the event data. Use the temp_buffer
2794 * to store the trace event for the trigger to use. It's recursive
2795 * safe and will not be recorded anywhere.
2796 */
2797 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2798 *current_rb = temp_buffer;
2799 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2800 trace_ctx);
2801 }
2802 return entry;
2803 }
2804 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2805
2806 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2807 static DEFINE_MUTEX(tracepoint_printk_mutex);
2808
output_printk(struct trace_event_buffer * fbuffer)2809 static void output_printk(struct trace_event_buffer *fbuffer)
2810 {
2811 struct trace_event_call *event_call;
2812 struct trace_event_file *file;
2813 struct trace_event *event;
2814 unsigned long flags;
2815 struct trace_iterator *iter = tracepoint_print_iter;
2816
2817 /* We should never get here if iter is NULL */
2818 if (WARN_ON_ONCE(!iter))
2819 return;
2820
2821 event_call = fbuffer->trace_file->event_call;
2822 if (!event_call || !event_call->event.funcs ||
2823 !event_call->event.funcs->trace)
2824 return;
2825
2826 file = fbuffer->trace_file;
2827 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2828 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2829 !filter_match_preds(file->filter, fbuffer->entry)))
2830 return;
2831
2832 event = &fbuffer->trace_file->event_call->event;
2833
2834 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2835 trace_seq_init(&iter->seq);
2836 iter->ent = fbuffer->entry;
2837 event_call->event.funcs->trace(iter, 0, event);
2838 trace_seq_putc(&iter->seq, 0);
2839 printk("%s", iter->seq.buffer);
2840
2841 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2842 }
2843
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2844 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2845 void *buffer, size_t *lenp,
2846 loff_t *ppos)
2847 {
2848 int save_tracepoint_printk;
2849 int ret;
2850
2851 guard(mutex)(&tracepoint_printk_mutex);
2852 save_tracepoint_printk = tracepoint_printk;
2853
2854 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2855
2856 /*
2857 * This will force exiting early, as tracepoint_printk
2858 * is always zero when tracepoint_printk_iter is not allocated
2859 */
2860 if (!tracepoint_print_iter)
2861 tracepoint_printk = 0;
2862
2863 if (save_tracepoint_printk == tracepoint_printk)
2864 return ret;
2865
2866 if (tracepoint_printk)
2867 static_key_enable(&tracepoint_printk_key.key);
2868 else
2869 static_key_disable(&tracepoint_printk_key.key);
2870
2871 return ret;
2872 }
2873
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2874 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2875 {
2876 enum event_trigger_type tt = ETT_NONE;
2877 struct trace_event_file *file = fbuffer->trace_file;
2878
2879 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2880 fbuffer->entry, &tt))
2881 goto discard;
2882
2883 if (static_key_false(&tracepoint_printk_key.key))
2884 output_printk(fbuffer);
2885
2886 if (static_branch_unlikely(&trace_event_exports_enabled))
2887 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2888
2889 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2890 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2891
2892 discard:
2893 if (tt)
2894 event_triggers_post_call(file, tt);
2895
2896 }
2897 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2898
2899 /*
2900 * Skip 3:
2901 *
2902 * trace_buffer_unlock_commit_regs()
2903 * trace_event_buffer_commit()
2904 * trace_event_raw_event_xxx()
2905 */
2906 # define STACK_SKIP 3
2907
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2908 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2909 struct trace_buffer *buffer,
2910 struct ring_buffer_event *event,
2911 unsigned int trace_ctx,
2912 struct pt_regs *regs)
2913 {
2914 __buffer_unlock_commit(buffer, event);
2915
2916 /*
2917 * If regs is not set, then skip the necessary functions.
2918 * Note, we can still get here via blktrace, wakeup tracer
2919 * and mmiotrace, but that's ok if they lose a function or
2920 * two. They are not that meaningful.
2921 */
2922 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2923 ftrace_trace_userstack(tr, buffer, trace_ctx);
2924 }
2925
2926 /*
2927 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2928 */
2929 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2930 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2931 struct ring_buffer_event *event)
2932 {
2933 __buffer_unlock_commit(buffer, event);
2934 }
2935
2936 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2937 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2938 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2939 {
2940 struct trace_buffer *buffer = tr->array_buffer.buffer;
2941 struct ring_buffer_event *event;
2942 struct ftrace_entry *entry;
2943 int size = sizeof(*entry);
2944
2945 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2946
2947 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2948 trace_ctx);
2949 if (!event)
2950 return;
2951 entry = ring_buffer_event_data(event);
2952 entry->ip = ip;
2953 entry->parent_ip = parent_ip;
2954
2955 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2956 if (fregs) {
2957 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2958 entry->args[i] = ftrace_regs_get_argument(fregs, i);
2959 }
2960 #endif
2961
2962 if (static_branch_unlikely(&trace_function_exports_enabled))
2963 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2964 __buffer_unlock_commit(buffer, event);
2965 }
2966
2967 #ifdef CONFIG_STACKTRACE
2968
2969 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2970 #define FTRACE_KSTACK_NESTING 4
2971
2972 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
2973
2974 struct ftrace_stack {
2975 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2976 };
2977
2978
2979 struct ftrace_stacks {
2980 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2981 };
2982
2983 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2984 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2985
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2986 static void __ftrace_trace_stack(struct trace_array *tr,
2987 struct trace_buffer *buffer,
2988 unsigned int trace_ctx,
2989 int skip, struct pt_regs *regs)
2990 {
2991 struct ring_buffer_event *event;
2992 unsigned int size, nr_entries;
2993 struct ftrace_stack *fstack;
2994 struct stack_entry *entry;
2995 int stackidx;
2996
2997 /*
2998 * Add one, for this function and the call to save_stack_trace()
2999 * If regs is set, then these functions will not be in the way.
3000 */
3001 #ifndef CONFIG_UNWINDER_ORC
3002 if (!regs)
3003 skip++;
3004 #endif
3005
3006 guard(preempt_notrace)();
3007
3008 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3009
3010 /* This should never happen. If it does, yell once and skip */
3011 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3012 goto out;
3013
3014 /*
3015 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3016 * interrupt will either see the value pre increment or post
3017 * increment. If the interrupt happens pre increment it will have
3018 * restored the counter when it returns. We just need a barrier to
3019 * keep gcc from moving things around.
3020 */
3021 barrier();
3022
3023 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3024 size = ARRAY_SIZE(fstack->calls);
3025
3026 if (regs) {
3027 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3028 size, skip);
3029 } else {
3030 nr_entries = stack_trace_save(fstack->calls, size, skip);
3031 }
3032
3033 #ifdef CONFIG_DYNAMIC_FTRACE
3034 /* Mark entry of stack trace as trampoline code */
3035 if (tr->ops && tr->ops->trampoline) {
3036 unsigned long tramp_start = tr->ops->trampoline;
3037 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3038 unsigned long *calls = fstack->calls;
3039
3040 for (int i = 0; i < nr_entries; i++) {
3041 if (calls[i] >= tramp_start && calls[i] < tramp_end)
3042 calls[i] = FTRACE_TRAMPOLINE_MARKER;
3043 }
3044 }
3045 #endif
3046
3047 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3048 struct_size(entry, caller, nr_entries),
3049 trace_ctx);
3050 if (!event)
3051 goto out;
3052 entry = ring_buffer_event_data(event);
3053
3054 entry->size = nr_entries;
3055 memcpy(&entry->caller, fstack->calls,
3056 flex_array_size(entry, caller, nr_entries));
3057
3058 __buffer_unlock_commit(buffer, event);
3059
3060 out:
3061 /* Again, don't let gcc optimize things here */
3062 barrier();
3063 __this_cpu_dec(ftrace_stack_reserve);
3064 }
3065
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3066 static inline void ftrace_trace_stack(struct trace_array *tr,
3067 struct trace_buffer *buffer,
3068 unsigned int trace_ctx,
3069 int skip, struct pt_regs *regs)
3070 {
3071 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3072 return;
3073
3074 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3075 }
3076
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3077 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3078 int skip)
3079 {
3080 struct trace_buffer *buffer = tr->array_buffer.buffer;
3081
3082 if (rcu_is_watching()) {
3083 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3084 return;
3085 }
3086
3087 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3088 return;
3089
3090 /*
3091 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3092 * but if the above rcu_is_watching() failed, then the NMI
3093 * triggered someplace critical, and ct_irq_enter() should
3094 * not be called from NMI.
3095 */
3096 if (unlikely(in_nmi()))
3097 return;
3098
3099 ct_irq_enter_irqson();
3100 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3101 ct_irq_exit_irqson();
3102 }
3103
3104 /**
3105 * trace_dump_stack - record a stack back trace in the trace buffer
3106 * @skip: Number of functions to skip (helper handlers)
3107 */
trace_dump_stack(int skip)3108 void trace_dump_stack(int skip)
3109 {
3110 if (tracing_disabled || tracing_selftest_running)
3111 return;
3112
3113 #ifndef CONFIG_UNWINDER_ORC
3114 /* Skip 1 to skip this function. */
3115 skip++;
3116 #endif
3117 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3118 tracing_gen_ctx(), skip, NULL);
3119 }
3120 EXPORT_SYMBOL_GPL(trace_dump_stack);
3121
3122 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3123 static DEFINE_PER_CPU(int, user_stack_count);
3124
3125 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3126 ftrace_trace_userstack(struct trace_array *tr,
3127 struct trace_buffer *buffer, unsigned int trace_ctx)
3128 {
3129 struct ring_buffer_event *event;
3130 struct userstack_entry *entry;
3131
3132 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3133 return;
3134
3135 /*
3136 * NMIs can not handle page faults, even with fix ups.
3137 * The save user stack can (and often does) fault.
3138 */
3139 if (unlikely(in_nmi()))
3140 return;
3141
3142 /*
3143 * prevent recursion, since the user stack tracing may
3144 * trigger other kernel events.
3145 */
3146 guard(preempt)();
3147 if (__this_cpu_read(user_stack_count))
3148 return;
3149
3150 __this_cpu_inc(user_stack_count);
3151
3152 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3153 sizeof(*entry), trace_ctx);
3154 if (!event)
3155 goto out_drop_count;
3156 entry = ring_buffer_event_data(event);
3157
3158 entry->tgid = current->tgid;
3159 memset(&entry->caller, 0, sizeof(entry->caller));
3160
3161 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3162 __buffer_unlock_commit(buffer, event);
3163
3164 out_drop_count:
3165 __this_cpu_dec(user_stack_count);
3166 }
3167 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3168 static void ftrace_trace_userstack(struct trace_array *tr,
3169 struct trace_buffer *buffer,
3170 unsigned int trace_ctx)
3171 {
3172 }
3173 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3174
3175 #endif /* CONFIG_STACKTRACE */
3176
3177 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3178 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3179 unsigned long long delta)
3180 {
3181 entry->bottom_delta_ts = delta & U32_MAX;
3182 entry->top_delta_ts = (delta >> 32);
3183 }
3184
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3185 void trace_last_func_repeats(struct trace_array *tr,
3186 struct trace_func_repeats *last_info,
3187 unsigned int trace_ctx)
3188 {
3189 struct trace_buffer *buffer = tr->array_buffer.buffer;
3190 struct func_repeats_entry *entry;
3191 struct ring_buffer_event *event;
3192 u64 delta;
3193
3194 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3195 sizeof(*entry), trace_ctx);
3196 if (!event)
3197 return;
3198
3199 delta = ring_buffer_event_time_stamp(buffer, event) -
3200 last_info->ts_last_call;
3201
3202 entry = ring_buffer_event_data(event);
3203 entry->ip = last_info->ip;
3204 entry->parent_ip = last_info->parent_ip;
3205 entry->count = last_info->count;
3206 func_repeats_set_delta_ts(entry, delta);
3207
3208 __buffer_unlock_commit(buffer, event);
3209 }
3210
3211 /* created for use with alloc_percpu */
3212 struct trace_buffer_struct {
3213 int nesting;
3214 char buffer[4][TRACE_BUF_SIZE];
3215 };
3216
3217 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3218
3219 /*
3220 * This allows for lockless recording. If we're nested too deeply, then
3221 * this returns NULL.
3222 */
get_trace_buf(void)3223 static char *get_trace_buf(void)
3224 {
3225 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3226
3227 if (!trace_percpu_buffer || buffer->nesting >= 4)
3228 return NULL;
3229
3230 buffer->nesting++;
3231
3232 /* Interrupts must see nesting incremented before we use the buffer */
3233 barrier();
3234 return &buffer->buffer[buffer->nesting - 1][0];
3235 }
3236
put_trace_buf(void)3237 static void put_trace_buf(void)
3238 {
3239 /* Don't let the decrement of nesting leak before this */
3240 barrier();
3241 this_cpu_dec(trace_percpu_buffer->nesting);
3242 }
3243
alloc_percpu_trace_buffer(void)3244 static int alloc_percpu_trace_buffer(void)
3245 {
3246 struct trace_buffer_struct __percpu *buffers;
3247
3248 if (trace_percpu_buffer)
3249 return 0;
3250
3251 buffers = alloc_percpu(struct trace_buffer_struct);
3252 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3253 return -ENOMEM;
3254
3255 trace_percpu_buffer = buffers;
3256 return 0;
3257 }
3258
3259 static int buffers_allocated;
3260
trace_printk_init_buffers(void)3261 void trace_printk_init_buffers(void)
3262 {
3263 if (buffers_allocated)
3264 return;
3265
3266 if (alloc_percpu_trace_buffer())
3267 return;
3268
3269 /* trace_printk() is for debug use only. Don't use it in production. */
3270
3271 pr_warn("\n");
3272 pr_warn("**********************************************************\n");
3273 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3274 pr_warn("** **\n");
3275 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3276 pr_warn("** **\n");
3277 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3278 pr_warn("** unsafe for production use. **\n");
3279 pr_warn("** **\n");
3280 pr_warn("** If you see this message and you are not debugging **\n");
3281 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3282 pr_warn("** **\n");
3283 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3284 pr_warn("**********************************************************\n");
3285
3286 /* Expand the buffers to set size */
3287 tracing_update_buffers(&global_trace);
3288
3289 buffers_allocated = 1;
3290
3291 /*
3292 * trace_printk_init_buffers() can be called by modules.
3293 * If that happens, then we need to start cmdline recording
3294 * directly here. If the global_trace.buffer is already
3295 * allocated here, then this was called by module code.
3296 */
3297 if (global_trace.array_buffer.buffer)
3298 tracing_start_cmdline_record();
3299 }
3300 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3301
trace_printk_start_comm(void)3302 void trace_printk_start_comm(void)
3303 {
3304 /* Start tracing comms if trace printk is set */
3305 if (!buffers_allocated)
3306 return;
3307 tracing_start_cmdline_record();
3308 }
3309
trace_printk_start_stop_comm(int enabled)3310 static void trace_printk_start_stop_comm(int enabled)
3311 {
3312 if (!buffers_allocated)
3313 return;
3314
3315 if (enabled)
3316 tracing_start_cmdline_record();
3317 else
3318 tracing_stop_cmdline_record();
3319 }
3320
3321 /**
3322 * trace_vbprintk - write binary msg to tracing buffer
3323 * @ip: The address of the caller
3324 * @fmt: The string format to write to the buffer
3325 * @args: Arguments for @fmt
3326 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3327 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3328 {
3329 struct ring_buffer_event *event;
3330 struct trace_buffer *buffer;
3331 struct trace_array *tr = READ_ONCE(printk_trace);
3332 struct bprint_entry *entry;
3333 unsigned int trace_ctx;
3334 char *tbuffer;
3335 int len = 0, size;
3336
3337 if (!printk_binsafe(tr))
3338 return trace_vprintk(ip, fmt, args);
3339
3340 if (unlikely(tracing_selftest_running || tracing_disabled))
3341 return 0;
3342
3343 /* Don't pollute graph traces with trace_vprintk internals */
3344 pause_graph_tracing();
3345
3346 trace_ctx = tracing_gen_ctx();
3347 guard(preempt_notrace)();
3348
3349 tbuffer = get_trace_buf();
3350 if (!tbuffer) {
3351 len = 0;
3352 goto out_nobuffer;
3353 }
3354
3355 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3356
3357 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3358 goto out_put;
3359
3360 size = sizeof(*entry) + sizeof(u32) * len;
3361 buffer = tr->array_buffer.buffer;
3362 scoped_guard(ring_buffer_nest, buffer) {
3363 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3364 trace_ctx);
3365 if (!event)
3366 goto out_put;
3367 entry = ring_buffer_event_data(event);
3368 entry->ip = ip;
3369 entry->fmt = fmt;
3370
3371 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3372 __buffer_unlock_commit(buffer, event);
3373 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3374 }
3375 out_put:
3376 put_trace_buf();
3377
3378 out_nobuffer:
3379 unpause_graph_tracing();
3380
3381 return len;
3382 }
3383 EXPORT_SYMBOL_GPL(trace_vbprintk);
3384
3385 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3386 int __trace_array_vprintk(struct trace_buffer *buffer,
3387 unsigned long ip, const char *fmt, va_list args)
3388 {
3389 struct ring_buffer_event *event;
3390 int len = 0, size;
3391 struct print_entry *entry;
3392 unsigned int trace_ctx;
3393 char *tbuffer;
3394
3395 if (tracing_disabled)
3396 return 0;
3397
3398 /* Don't pollute graph traces with trace_vprintk internals */
3399 pause_graph_tracing();
3400
3401 trace_ctx = tracing_gen_ctx();
3402 guard(preempt_notrace)();
3403
3404
3405 tbuffer = get_trace_buf();
3406 if (!tbuffer) {
3407 len = 0;
3408 goto out_nobuffer;
3409 }
3410
3411 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3412
3413 size = sizeof(*entry) + len + 1;
3414 scoped_guard(ring_buffer_nest, buffer) {
3415 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3416 trace_ctx);
3417 if (!event)
3418 goto out;
3419 entry = ring_buffer_event_data(event);
3420 entry->ip = ip;
3421
3422 memcpy(&entry->buf, tbuffer, len + 1);
3423 __buffer_unlock_commit(buffer, event);
3424 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3425 }
3426 out:
3427 put_trace_buf();
3428
3429 out_nobuffer:
3430 unpause_graph_tracing();
3431
3432 return len;
3433 }
3434
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3435 int trace_array_vprintk(struct trace_array *tr,
3436 unsigned long ip, const char *fmt, va_list args)
3437 {
3438 if (tracing_selftest_running && tr == &global_trace)
3439 return 0;
3440
3441 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3442 }
3443
3444 /**
3445 * trace_array_printk - Print a message to a specific instance
3446 * @tr: The instance trace_array descriptor
3447 * @ip: The instruction pointer that this is called from.
3448 * @fmt: The format to print (printf format)
3449 *
3450 * If a subsystem sets up its own instance, they have the right to
3451 * printk strings into their tracing instance buffer using this
3452 * function. Note, this function will not write into the top level
3453 * buffer (use trace_printk() for that), as writing into the top level
3454 * buffer should only have events that can be individually disabled.
3455 * trace_printk() is only used for debugging a kernel, and should not
3456 * be ever incorporated in normal use.
3457 *
3458 * trace_array_printk() can be used, as it will not add noise to the
3459 * top level tracing buffer.
3460 *
3461 * Note, trace_array_init_printk() must be called on @tr before this
3462 * can be used.
3463 */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3464 int trace_array_printk(struct trace_array *tr,
3465 unsigned long ip, const char *fmt, ...)
3466 {
3467 int ret;
3468 va_list ap;
3469
3470 if (!tr)
3471 return -ENOENT;
3472
3473 /* This is only allowed for created instances */
3474 if (tr == &global_trace)
3475 return 0;
3476
3477 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3478 return 0;
3479
3480 va_start(ap, fmt);
3481 ret = trace_array_vprintk(tr, ip, fmt, ap);
3482 va_end(ap);
3483 return ret;
3484 }
3485 EXPORT_SYMBOL_GPL(trace_array_printk);
3486
3487 /**
3488 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3489 * @tr: The trace array to initialize the buffers for
3490 *
3491 * As trace_array_printk() only writes into instances, they are OK to
3492 * have in the kernel (unlike trace_printk()). This needs to be called
3493 * before trace_array_printk() can be used on a trace_array.
3494 */
trace_array_init_printk(struct trace_array * tr)3495 int trace_array_init_printk(struct trace_array *tr)
3496 {
3497 if (!tr)
3498 return -ENOENT;
3499
3500 /* This is only allowed for created instances */
3501 if (tr == &global_trace)
3502 return -EINVAL;
3503
3504 return alloc_percpu_trace_buffer();
3505 }
3506 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3507
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3508 int trace_array_printk_buf(struct trace_buffer *buffer,
3509 unsigned long ip, const char *fmt, ...)
3510 {
3511 int ret;
3512 va_list ap;
3513
3514 if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3515 return 0;
3516
3517 va_start(ap, fmt);
3518 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3519 va_end(ap);
3520 return ret;
3521 }
3522
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3523 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3524 {
3525 return trace_array_vprintk(printk_trace, ip, fmt, args);
3526 }
3527 EXPORT_SYMBOL_GPL(trace_vprintk);
3528
trace_iterator_increment(struct trace_iterator * iter)3529 static void trace_iterator_increment(struct trace_iterator *iter)
3530 {
3531 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3532
3533 iter->idx++;
3534 if (buf_iter)
3535 ring_buffer_iter_advance(buf_iter);
3536 }
3537
3538 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3539 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3540 unsigned long *lost_events)
3541 {
3542 struct ring_buffer_event *event;
3543 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3544
3545 if (buf_iter) {
3546 event = ring_buffer_iter_peek(buf_iter, ts);
3547 if (lost_events)
3548 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3549 (unsigned long)-1 : 0;
3550 } else {
3551 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3552 lost_events);
3553 }
3554
3555 if (event) {
3556 iter->ent_size = ring_buffer_event_length(event);
3557 return ring_buffer_event_data(event);
3558 }
3559 iter->ent_size = 0;
3560 return NULL;
3561 }
3562
3563 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3564 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3565 unsigned long *missing_events, u64 *ent_ts)
3566 {
3567 struct trace_buffer *buffer = iter->array_buffer->buffer;
3568 struct trace_entry *ent, *next = NULL;
3569 unsigned long lost_events = 0, next_lost = 0;
3570 int cpu_file = iter->cpu_file;
3571 u64 next_ts = 0, ts;
3572 int next_cpu = -1;
3573 int next_size = 0;
3574 int cpu;
3575
3576 /*
3577 * If we are in a per_cpu trace file, don't bother by iterating over
3578 * all cpu and peek directly.
3579 */
3580 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3581 if (ring_buffer_empty_cpu(buffer, cpu_file))
3582 return NULL;
3583 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3584 if (ent_cpu)
3585 *ent_cpu = cpu_file;
3586
3587 return ent;
3588 }
3589
3590 for_each_tracing_cpu(cpu) {
3591
3592 if (ring_buffer_empty_cpu(buffer, cpu))
3593 continue;
3594
3595 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3596
3597 /*
3598 * Pick the entry with the smallest timestamp:
3599 */
3600 if (ent && (!next || ts < next_ts)) {
3601 next = ent;
3602 next_cpu = cpu;
3603 next_ts = ts;
3604 next_lost = lost_events;
3605 next_size = iter->ent_size;
3606 }
3607 }
3608
3609 iter->ent_size = next_size;
3610
3611 if (ent_cpu)
3612 *ent_cpu = next_cpu;
3613
3614 if (ent_ts)
3615 *ent_ts = next_ts;
3616
3617 if (missing_events)
3618 *missing_events = next_lost;
3619
3620 return next;
3621 }
3622
3623 #define STATIC_FMT_BUF_SIZE 128
3624 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3625
trace_iter_expand_format(struct trace_iterator * iter)3626 char *trace_iter_expand_format(struct trace_iterator *iter)
3627 {
3628 char *tmp;
3629
3630 /*
3631 * iter->tr is NULL when used with tp_printk, which makes
3632 * this get called where it is not safe to call krealloc().
3633 */
3634 if (!iter->tr || iter->fmt == static_fmt_buf)
3635 return NULL;
3636
3637 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3638 GFP_KERNEL);
3639 if (tmp) {
3640 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3641 iter->fmt = tmp;
3642 }
3643
3644 return tmp;
3645 }
3646
3647 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3648 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3649 {
3650 unsigned long addr = (unsigned long)str;
3651 struct trace_event *trace_event;
3652 struct trace_event_call *event;
3653
3654 /* OK if part of the event data */
3655 if ((addr >= (unsigned long)iter->ent) &&
3656 (addr < (unsigned long)iter->ent + iter->ent_size))
3657 return true;
3658
3659 /* OK if part of the temp seq buffer */
3660 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3661 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3662 return true;
3663
3664 /* Core rodata can not be freed */
3665 if (is_kernel_rodata(addr))
3666 return true;
3667
3668 if (trace_is_tracepoint_string(str))
3669 return true;
3670
3671 /*
3672 * Now this could be a module event, referencing core module
3673 * data, which is OK.
3674 */
3675 if (!iter->ent)
3676 return false;
3677
3678 trace_event = ftrace_find_event(iter->ent->type);
3679 if (!trace_event)
3680 return false;
3681
3682 event = container_of(trace_event, struct trace_event_call, event);
3683 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3684 return false;
3685
3686 /* Would rather have rodata, but this will suffice */
3687 if (within_module_core(addr, event->module))
3688 return true;
3689
3690 return false;
3691 }
3692
3693 /**
3694 * ignore_event - Check dereferenced fields while writing to the seq buffer
3695 * @iter: The iterator that holds the seq buffer and the event being printed
3696 *
3697 * At boot up, test_event_printk() will flag any event that dereferences
3698 * a string with "%s" that does exist in the ring buffer. It may still
3699 * be valid, as the string may point to a static string in the kernel
3700 * rodata that never gets freed. But if the string pointer is pointing
3701 * to something that was allocated, there's a chance that it can be freed
3702 * by the time the user reads the trace. This would cause a bad memory
3703 * access by the kernel and possibly crash the system.
3704 *
3705 * This function will check if the event has any fields flagged as needing
3706 * to be checked at runtime and perform those checks.
3707 *
3708 * If it is found that a field is unsafe, it will write into the @iter->seq
3709 * a message stating what was found to be unsafe.
3710 *
3711 * @return: true if the event is unsafe and should be ignored,
3712 * false otherwise.
3713 */
ignore_event(struct trace_iterator * iter)3714 bool ignore_event(struct trace_iterator *iter)
3715 {
3716 struct ftrace_event_field *field;
3717 struct trace_event *trace_event;
3718 struct trace_event_call *event;
3719 struct list_head *head;
3720 struct trace_seq *seq;
3721 const void *ptr;
3722
3723 trace_event = ftrace_find_event(iter->ent->type);
3724
3725 seq = &iter->seq;
3726
3727 if (!trace_event) {
3728 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3729 return true;
3730 }
3731
3732 event = container_of(trace_event, struct trace_event_call, event);
3733 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3734 return false;
3735
3736 head = trace_get_fields(event);
3737 if (!head) {
3738 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3739 trace_event_name(event));
3740 return true;
3741 }
3742
3743 /* Offsets are from the iter->ent that points to the raw event */
3744 ptr = iter->ent;
3745
3746 list_for_each_entry(field, head, link) {
3747 const char *str;
3748 bool good;
3749
3750 if (!field->needs_test)
3751 continue;
3752
3753 str = *(const char **)(ptr + field->offset);
3754
3755 good = trace_safe_str(iter, str);
3756
3757 /*
3758 * If you hit this warning, it is likely that the
3759 * trace event in question used %s on a string that
3760 * was saved at the time of the event, but may not be
3761 * around when the trace is read. Use __string(),
3762 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3763 * instead. See samples/trace_events/trace-events-sample.h
3764 * for reference.
3765 */
3766 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3767 trace_event_name(event), field->name)) {
3768 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3769 trace_event_name(event), field->name);
3770 return true;
3771 }
3772 }
3773 return false;
3774 }
3775
trace_event_format(struct trace_iterator * iter,const char * fmt)3776 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3777 {
3778 const char *p, *new_fmt;
3779 char *q;
3780
3781 if (WARN_ON_ONCE(!fmt))
3782 return fmt;
3783
3784 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3785 return fmt;
3786
3787 p = fmt;
3788 new_fmt = q = iter->fmt;
3789 while (*p) {
3790 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3791 if (!trace_iter_expand_format(iter))
3792 return fmt;
3793
3794 q += iter->fmt - new_fmt;
3795 new_fmt = iter->fmt;
3796 }
3797
3798 *q++ = *p++;
3799
3800 /* Replace %p with %px */
3801 if (p[-1] == '%') {
3802 if (p[0] == '%') {
3803 *q++ = *p++;
3804 } else if (p[0] == 'p' && !isalnum(p[1])) {
3805 *q++ = *p++;
3806 *q++ = 'x';
3807 }
3808 }
3809 }
3810 *q = '\0';
3811
3812 return new_fmt;
3813 }
3814
3815 #define STATIC_TEMP_BUF_SIZE 128
3816 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3817
3818 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3819 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3820 int *ent_cpu, u64 *ent_ts)
3821 {
3822 /* __find_next_entry will reset ent_size */
3823 int ent_size = iter->ent_size;
3824 struct trace_entry *entry;
3825
3826 /*
3827 * If called from ftrace_dump(), then the iter->temp buffer
3828 * will be the static_temp_buf and not created from kmalloc.
3829 * If the entry size is greater than the buffer, we can
3830 * not save it. Just return NULL in that case. This is only
3831 * used to add markers when two consecutive events' time
3832 * stamps have a large delta. See trace_print_lat_context()
3833 */
3834 if (iter->temp == static_temp_buf &&
3835 STATIC_TEMP_BUF_SIZE < ent_size)
3836 return NULL;
3837
3838 /*
3839 * The __find_next_entry() may call peek_next_entry(), which may
3840 * call ring_buffer_peek() that may make the contents of iter->ent
3841 * undefined. Need to copy iter->ent now.
3842 */
3843 if (iter->ent && iter->ent != iter->temp) {
3844 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3845 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3846 void *temp;
3847 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3848 if (!temp)
3849 return NULL;
3850 kfree(iter->temp);
3851 iter->temp = temp;
3852 iter->temp_size = iter->ent_size;
3853 }
3854 memcpy(iter->temp, iter->ent, iter->ent_size);
3855 iter->ent = iter->temp;
3856 }
3857 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3858 /* Put back the original ent_size */
3859 iter->ent_size = ent_size;
3860
3861 return entry;
3862 }
3863
3864 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3865 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3866 {
3867 iter->ent = __find_next_entry(iter, &iter->cpu,
3868 &iter->lost_events, &iter->ts);
3869
3870 if (iter->ent)
3871 trace_iterator_increment(iter);
3872
3873 return iter->ent ? iter : NULL;
3874 }
3875
trace_consume(struct trace_iterator * iter)3876 static void trace_consume(struct trace_iterator *iter)
3877 {
3878 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3879 &iter->lost_events);
3880 }
3881
s_next(struct seq_file * m,void * v,loff_t * pos)3882 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3883 {
3884 struct trace_iterator *iter = m->private;
3885 int i = (int)*pos;
3886 void *ent;
3887
3888 WARN_ON_ONCE(iter->leftover);
3889
3890 (*pos)++;
3891
3892 /* can't go backwards */
3893 if (iter->idx > i)
3894 return NULL;
3895
3896 if (iter->idx < 0)
3897 ent = trace_find_next_entry_inc(iter);
3898 else
3899 ent = iter;
3900
3901 while (ent && iter->idx < i)
3902 ent = trace_find_next_entry_inc(iter);
3903
3904 iter->pos = *pos;
3905
3906 return ent;
3907 }
3908
tracing_iter_reset(struct trace_iterator * iter,int cpu)3909 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3910 {
3911 struct ring_buffer_iter *buf_iter;
3912 unsigned long entries = 0;
3913 u64 ts;
3914
3915 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3916
3917 buf_iter = trace_buffer_iter(iter, cpu);
3918 if (!buf_iter)
3919 return;
3920
3921 ring_buffer_iter_reset(buf_iter);
3922
3923 /*
3924 * We could have the case with the max latency tracers
3925 * that a reset never took place on a cpu. This is evident
3926 * by the timestamp being before the start of the buffer.
3927 */
3928 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3929 if (ts >= iter->array_buffer->time_start)
3930 break;
3931 entries++;
3932 ring_buffer_iter_advance(buf_iter);
3933 /* This could be a big loop */
3934 cond_resched();
3935 }
3936
3937 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3938 }
3939
3940 /*
3941 * The current tracer is copied to avoid a global locking
3942 * all around.
3943 */
s_start(struct seq_file * m,loff_t * pos)3944 static void *s_start(struct seq_file *m, loff_t *pos)
3945 {
3946 struct trace_iterator *iter = m->private;
3947 struct trace_array *tr = iter->tr;
3948 int cpu_file = iter->cpu_file;
3949 void *p = NULL;
3950 loff_t l = 0;
3951 int cpu;
3952
3953 mutex_lock(&trace_types_lock);
3954 if (unlikely(tr->current_trace != iter->trace)) {
3955 /* Close iter->trace before switching to the new current tracer */
3956 if (iter->trace->close)
3957 iter->trace->close(iter);
3958 iter->trace = tr->current_trace;
3959 /* Reopen the new current tracer */
3960 if (iter->trace->open)
3961 iter->trace->open(iter);
3962 }
3963 mutex_unlock(&trace_types_lock);
3964
3965 #ifdef CONFIG_TRACER_MAX_TRACE
3966 if (iter->snapshot && iter->trace->use_max_tr)
3967 return ERR_PTR(-EBUSY);
3968 #endif
3969
3970 if (*pos != iter->pos) {
3971 iter->ent = NULL;
3972 iter->cpu = 0;
3973 iter->idx = -1;
3974
3975 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3976 for_each_tracing_cpu(cpu)
3977 tracing_iter_reset(iter, cpu);
3978 } else
3979 tracing_iter_reset(iter, cpu_file);
3980
3981 iter->leftover = 0;
3982 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3983 ;
3984
3985 } else {
3986 /*
3987 * If we overflowed the seq_file before, then we want
3988 * to just reuse the trace_seq buffer again.
3989 */
3990 if (iter->leftover)
3991 p = iter;
3992 else {
3993 l = *pos - 1;
3994 p = s_next(m, p, &l);
3995 }
3996 }
3997
3998 trace_event_read_lock();
3999 trace_access_lock(cpu_file);
4000 return p;
4001 }
4002
s_stop(struct seq_file * m,void * p)4003 static void s_stop(struct seq_file *m, void *p)
4004 {
4005 struct trace_iterator *iter = m->private;
4006
4007 #ifdef CONFIG_TRACER_MAX_TRACE
4008 if (iter->snapshot && iter->trace->use_max_tr)
4009 return;
4010 #endif
4011
4012 trace_access_unlock(iter->cpu_file);
4013 trace_event_read_unlock();
4014 }
4015
4016 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4017 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4018 unsigned long *entries, int cpu)
4019 {
4020 unsigned long count;
4021
4022 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4023 /*
4024 * If this buffer has skipped entries, then we hold all
4025 * entries for the trace and we need to ignore the
4026 * ones before the time stamp.
4027 */
4028 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4029 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4030 /* total is the same as the entries */
4031 *total = count;
4032 } else
4033 *total = count +
4034 ring_buffer_overrun_cpu(buf->buffer, cpu);
4035 *entries = count;
4036 }
4037
4038 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4039 get_total_entries(struct array_buffer *buf,
4040 unsigned long *total, unsigned long *entries)
4041 {
4042 unsigned long t, e;
4043 int cpu;
4044
4045 *total = 0;
4046 *entries = 0;
4047
4048 for_each_tracing_cpu(cpu) {
4049 get_total_entries_cpu(buf, &t, &e, cpu);
4050 *total += t;
4051 *entries += e;
4052 }
4053 }
4054
trace_total_entries_cpu(struct trace_array * tr,int cpu)4055 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4056 {
4057 unsigned long total, entries;
4058
4059 if (!tr)
4060 tr = &global_trace;
4061
4062 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4063
4064 return entries;
4065 }
4066
trace_total_entries(struct trace_array * tr)4067 unsigned long trace_total_entries(struct trace_array *tr)
4068 {
4069 unsigned long total, entries;
4070
4071 if (!tr)
4072 tr = &global_trace;
4073
4074 get_total_entries(&tr->array_buffer, &total, &entries);
4075
4076 return entries;
4077 }
4078
print_lat_help_header(struct seq_file * m)4079 static void print_lat_help_header(struct seq_file *m)
4080 {
4081 seq_puts(m, "# _------=> CPU# \n"
4082 "# / _-----=> irqs-off/BH-disabled\n"
4083 "# | / _----=> need-resched \n"
4084 "# || / _---=> hardirq/softirq \n"
4085 "# ||| / _--=> preempt-depth \n"
4086 "# |||| / _-=> migrate-disable \n"
4087 "# ||||| / delay \n"
4088 "# cmd pid |||||| time | caller \n"
4089 "# \\ / |||||| \\ | / \n");
4090 }
4091
print_event_info(struct array_buffer * buf,struct seq_file * m)4092 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4093 {
4094 unsigned long total;
4095 unsigned long entries;
4096
4097 get_total_entries(buf, &total, &entries);
4098 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4099 entries, total, num_online_cpus());
4100 seq_puts(m, "#\n");
4101 }
4102
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4103 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4104 unsigned int flags)
4105 {
4106 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4107
4108 print_event_info(buf, m);
4109
4110 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4111 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4112 }
4113
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4114 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4115 unsigned int flags)
4116 {
4117 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4118 static const char space[] = " ";
4119 int prec = tgid ? 12 : 2;
4120
4121 print_event_info(buf, m);
4122
4123 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4124 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4125 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4126 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4127 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4128 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4129 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4130 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4131 }
4132
4133 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4134 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4135 {
4136 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4137 struct array_buffer *buf = iter->array_buffer;
4138 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4139 struct tracer *type = iter->trace;
4140 unsigned long entries;
4141 unsigned long total;
4142 const char *name = type->name;
4143
4144 get_total_entries(buf, &total, &entries);
4145
4146 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4147 name, init_utsname()->release);
4148 seq_puts(m, "# -----------------------------------"
4149 "---------------------------------\n");
4150 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4151 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4152 nsecs_to_usecs(data->saved_latency),
4153 entries,
4154 total,
4155 buf->cpu,
4156 preempt_model_str(),
4157 /* These are reserved for later use */
4158 0, 0, 0, 0);
4159 #ifdef CONFIG_SMP
4160 seq_printf(m, " #P:%d)\n", num_online_cpus());
4161 #else
4162 seq_puts(m, ")\n");
4163 #endif
4164 seq_puts(m, "# -----------------\n");
4165 seq_printf(m, "# | task: %.16s-%d "
4166 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4167 data->comm, data->pid,
4168 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4169 data->policy, data->rt_priority);
4170 seq_puts(m, "# -----------------\n");
4171
4172 if (data->critical_start) {
4173 seq_puts(m, "# => started at: ");
4174 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4175 trace_print_seq(m, &iter->seq);
4176 seq_puts(m, "\n# => ended at: ");
4177 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4178 trace_print_seq(m, &iter->seq);
4179 seq_puts(m, "\n#\n");
4180 }
4181
4182 seq_puts(m, "#\n");
4183 }
4184
test_cpu_buff_start(struct trace_iterator * iter)4185 static void test_cpu_buff_start(struct trace_iterator *iter)
4186 {
4187 struct trace_seq *s = &iter->seq;
4188 struct trace_array *tr = iter->tr;
4189
4190 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4191 return;
4192
4193 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4194 return;
4195
4196 if (cpumask_available(iter->started) &&
4197 cpumask_test_cpu(iter->cpu, iter->started))
4198 return;
4199
4200 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4201 return;
4202
4203 if (cpumask_available(iter->started))
4204 cpumask_set_cpu(iter->cpu, iter->started);
4205
4206 /* Don't print started cpu buffer for the first entry of the trace */
4207 if (iter->idx > 1)
4208 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4209 iter->cpu);
4210 }
4211
print_trace_fmt(struct trace_iterator * iter)4212 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4213 {
4214 struct trace_array *tr = iter->tr;
4215 struct trace_seq *s = &iter->seq;
4216 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4217 struct trace_entry *entry;
4218 struct trace_event *event;
4219
4220 entry = iter->ent;
4221
4222 test_cpu_buff_start(iter);
4223
4224 event = ftrace_find_event(entry->type);
4225
4226 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4227 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4228 trace_print_lat_context(iter);
4229 else
4230 trace_print_context(iter);
4231 }
4232
4233 if (trace_seq_has_overflowed(s))
4234 return TRACE_TYPE_PARTIAL_LINE;
4235
4236 if (event) {
4237 if (tr->trace_flags & TRACE_ITER_FIELDS)
4238 return print_event_fields(iter, event);
4239 /*
4240 * For TRACE_EVENT() events, the print_fmt is not
4241 * safe to use if the array has delta offsets
4242 * Force printing via the fields.
4243 */
4244 if ((tr->text_delta) &&
4245 event->type > __TRACE_LAST_TYPE)
4246 return print_event_fields(iter, event);
4247
4248 return event->funcs->trace(iter, sym_flags, event);
4249 }
4250
4251 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4252
4253 return trace_handle_return(s);
4254 }
4255
print_raw_fmt(struct trace_iterator * iter)4256 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4257 {
4258 struct trace_array *tr = iter->tr;
4259 struct trace_seq *s = &iter->seq;
4260 struct trace_entry *entry;
4261 struct trace_event *event;
4262
4263 entry = iter->ent;
4264
4265 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4266 trace_seq_printf(s, "%d %d %llu ",
4267 entry->pid, iter->cpu, iter->ts);
4268
4269 if (trace_seq_has_overflowed(s))
4270 return TRACE_TYPE_PARTIAL_LINE;
4271
4272 event = ftrace_find_event(entry->type);
4273 if (event)
4274 return event->funcs->raw(iter, 0, event);
4275
4276 trace_seq_printf(s, "%d ?\n", entry->type);
4277
4278 return trace_handle_return(s);
4279 }
4280
print_hex_fmt(struct trace_iterator * iter)4281 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4282 {
4283 struct trace_array *tr = iter->tr;
4284 struct trace_seq *s = &iter->seq;
4285 unsigned char newline = '\n';
4286 struct trace_entry *entry;
4287 struct trace_event *event;
4288
4289 entry = iter->ent;
4290
4291 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4292 SEQ_PUT_HEX_FIELD(s, entry->pid);
4293 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4294 SEQ_PUT_HEX_FIELD(s, iter->ts);
4295 if (trace_seq_has_overflowed(s))
4296 return TRACE_TYPE_PARTIAL_LINE;
4297 }
4298
4299 event = ftrace_find_event(entry->type);
4300 if (event) {
4301 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4302 if (ret != TRACE_TYPE_HANDLED)
4303 return ret;
4304 }
4305
4306 SEQ_PUT_FIELD(s, newline);
4307
4308 return trace_handle_return(s);
4309 }
4310
print_bin_fmt(struct trace_iterator * iter)4311 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4312 {
4313 struct trace_array *tr = iter->tr;
4314 struct trace_seq *s = &iter->seq;
4315 struct trace_entry *entry;
4316 struct trace_event *event;
4317
4318 entry = iter->ent;
4319
4320 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4321 SEQ_PUT_FIELD(s, entry->pid);
4322 SEQ_PUT_FIELD(s, iter->cpu);
4323 SEQ_PUT_FIELD(s, iter->ts);
4324 if (trace_seq_has_overflowed(s))
4325 return TRACE_TYPE_PARTIAL_LINE;
4326 }
4327
4328 event = ftrace_find_event(entry->type);
4329 return event ? event->funcs->binary(iter, 0, event) :
4330 TRACE_TYPE_HANDLED;
4331 }
4332
trace_empty(struct trace_iterator * iter)4333 int trace_empty(struct trace_iterator *iter)
4334 {
4335 struct ring_buffer_iter *buf_iter;
4336 int cpu;
4337
4338 /* If we are looking at one CPU buffer, only check that one */
4339 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4340 cpu = iter->cpu_file;
4341 buf_iter = trace_buffer_iter(iter, cpu);
4342 if (buf_iter) {
4343 if (!ring_buffer_iter_empty(buf_iter))
4344 return 0;
4345 } else {
4346 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4347 return 0;
4348 }
4349 return 1;
4350 }
4351
4352 for_each_tracing_cpu(cpu) {
4353 buf_iter = trace_buffer_iter(iter, cpu);
4354 if (buf_iter) {
4355 if (!ring_buffer_iter_empty(buf_iter))
4356 return 0;
4357 } else {
4358 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4359 return 0;
4360 }
4361 }
4362
4363 return 1;
4364 }
4365
4366 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4367 enum print_line_t print_trace_line(struct trace_iterator *iter)
4368 {
4369 struct trace_array *tr = iter->tr;
4370 unsigned long trace_flags = tr->trace_flags;
4371 enum print_line_t ret;
4372
4373 if (iter->lost_events) {
4374 if (iter->lost_events == (unsigned long)-1)
4375 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4376 iter->cpu);
4377 else
4378 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4379 iter->cpu, iter->lost_events);
4380 if (trace_seq_has_overflowed(&iter->seq))
4381 return TRACE_TYPE_PARTIAL_LINE;
4382 }
4383
4384 if (iter->trace && iter->trace->print_line) {
4385 ret = iter->trace->print_line(iter);
4386 if (ret != TRACE_TYPE_UNHANDLED)
4387 return ret;
4388 }
4389
4390 if (iter->ent->type == TRACE_BPUTS &&
4391 trace_flags & TRACE_ITER_PRINTK &&
4392 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4393 return trace_print_bputs_msg_only(iter);
4394
4395 if (iter->ent->type == TRACE_BPRINT &&
4396 trace_flags & TRACE_ITER_PRINTK &&
4397 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4398 return trace_print_bprintk_msg_only(iter);
4399
4400 if (iter->ent->type == TRACE_PRINT &&
4401 trace_flags & TRACE_ITER_PRINTK &&
4402 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4403 return trace_print_printk_msg_only(iter);
4404
4405 if (trace_flags & TRACE_ITER_BIN)
4406 return print_bin_fmt(iter);
4407
4408 if (trace_flags & TRACE_ITER_HEX)
4409 return print_hex_fmt(iter);
4410
4411 if (trace_flags & TRACE_ITER_RAW)
4412 return print_raw_fmt(iter);
4413
4414 return print_trace_fmt(iter);
4415 }
4416
trace_latency_header(struct seq_file * m)4417 void trace_latency_header(struct seq_file *m)
4418 {
4419 struct trace_iterator *iter = m->private;
4420 struct trace_array *tr = iter->tr;
4421
4422 /* print nothing if the buffers are empty */
4423 if (trace_empty(iter))
4424 return;
4425
4426 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4427 print_trace_header(m, iter);
4428
4429 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4430 print_lat_help_header(m);
4431 }
4432
trace_default_header(struct seq_file * m)4433 void trace_default_header(struct seq_file *m)
4434 {
4435 struct trace_iterator *iter = m->private;
4436 struct trace_array *tr = iter->tr;
4437 unsigned long trace_flags = tr->trace_flags;
4438
4439 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4440 return;
4441
4442 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4443 /* print nothing if the buffers are empty */
4444 if (trace_empty(iter))
4445 return;
4446 print_trace_header(m, iter);
4447 if (!(trace_flags & TRACE_ITER_VERBOSE))
4448 print_lat_help_header(m);
4449 } else {
4450 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4451 if (trace_flags & TRACE_ITER_IRQ_INFO)
4452 print_func_help_header_irq(iter->array_buffer,
4453 m, trace_flags);
4454 else
4455 print_func_help_header(iter->array_buffer, m,
4456 trace_flags);
4457 }
4458 }
4459 }
4460
test_ftrace_alive(struct seq_file * m)4461 static void test_ftrace_alive(struct seq_file *m)
4462 {
4463 if (!ftrace_is_dead())
4464 return;
4465 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4466 "# MAY BE MISSING FUNCTION EVENTS\n");
4467 }
4468
4469 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4470 static void show_snapshot_main_help(struct seq_file *m)
4471 {
4472 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4473 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4474 "# Takes a snapshot of the main buffer.\n"
4475 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4476 "# (Doesn't have to be '2' works with any number that\n"
4477 "# is not a '0' or '1')\n");
4478 }
4479
show_snapshot_percpu_help(struct seq_file * m)4480 static void show_snapshot_percpu_help(struct seq_file *m)
4481 {
4482 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4483 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4484 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4485 "# Takes a snapshot of the main buffer for this cpu.\n");
4486 #else
4487 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4488 "# Must use main snapshot file to allocate.\n");
4489 #endif
4490 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4491 "# (Doesn't have to be '2' works with any number that\n"
4492 "# is not a '0' or '1')\n");
4493 }
4494
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4495 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4496 {
4497 if (iter->tr->allocated_snapshot)
4498 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4499 else
4500 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4501
4502 seq_puts(m, "# Snapshot commands:\n");
4503 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4504 show_snapshot_main_help(m);
4505 else
4506 show_snapshot_percpu_help(m);
4507 }
4508 #else
4509 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4510 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4511 #endif
4512
s_show(struct seq_file * m,void * v)4513 static int s_show(struct seq_file *m, void *v)
4514 {
4515 struct trace_iterator *iter = v;
4516 int ret;
4517
4518 if (iter->ent == NULL) {
4519 if (iter->tr) {
4520 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4521 seq_puts(m, "#\n");
4522 test_ftrace_alive(m);
4523 }
4524 if (iter->snapshot && trace_empty(iter))
4525 print_snapshot_help(m, iter);
4526 else if (iter->trace && iter->trace->print_header)
4527 iter->trace->print_header(m);
4528 else
4529 trace_default_header(m);
4530
4531 } else if (iter->leftover) {
4532 /*
4533 * If we filled the seq_file buffer earlier, we
4534 * want to just show it now.
4535 */
4536 ret = trace_print_seq(m, &iter->seq);
4537
4538 /* ret should this time be zero, but you never know */
4539 iter->leftover = ret;
4540
4541 } else {
4542 ret = print_trace_line(iter);
4543 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4544 iter->seq.full = 0;
4545 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4546 }
4547 ret = trace_print_seq(m, &iter->seq);
4548 /*
4549 * If we overflow the seq_file buffer, then it will
4550 * ask us for this data again at start up.
4551 * Use that instead.
4552 * ret is 0 if seq_file write succeeded.
4553 * -1 otherwise.
4554 */
4555 iter->leftover = ret;
4556 }
4557
4558 return 0;
4559 }
4560
4561 /*
4562 * Should be used after trace_array_get(), trace_types_lock
4563 * ensures that i_cdev was already initialized.
4564 */
tracing_get_cpu(struct inode * inode)4565 static inline int tracing_get_cpu(struct inode *inode)
4566 {
4567 if (inode->i_cdev) /* See trace_create_cpu_file() */
4568 return (long)inode->i_cdev - 1;
4569 return RING_BUFFER_ALL_CPUS;
4570 }
4571
4572 static const struct seq_operations tracer_seq_ops = {
4573 .start = s_start,
4574 .next = s_next,
4575 .stop = s_stop,
4576 .show = s_show,
4577 };
4578
4579 /*
4580 * Note, as iter itself can be allocated and freed in different
4581 * ways, this function is only used to free its content, and not
4582 * the iterator itself. The only requirement to all the allocations
4583 * is that it must zero all fields (kzalloc), as freeing works with
4584 * ethier allocated content or NULL.
4585 */
free_trace_iter_content(struct trace_iterator * iter)4586 static void free_trace_iter_content(struct trace_iterator *iter)
4587 {
4588 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4589 if (iter->fmt != static_fmt_buf)
4590 kfree(iter->fmt);
4591
4592 kfree(iter->temp);
4593 kfree(iter->buffer_iter);
4594 mutex_destroy(&iter->mutex);
4595 free_cpumask_var(iter->started);
4596 }
4597
4598 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4599 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4600 {
4601 struct trace_array *tr = inode->i_private;
4602 struct trace_iterator *iter;
4603 int cpu;
4604
4605 if (tracing_disabled)
4606 return ERR_PTR(-ENODEV);
4607
4608 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4609 if (!iter)
4610 return ERR_PTR(-ENOMEM);
4611
4612 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4613 GFP_KERNEL);
4614 if (!iter->buffer_iter)
4615 goto release;
4616
4617 /*
4618 * trace_find_next_entry() may need to save off iter->ent.
4619 * It will place it into the iter->temp buffer. As most
4620 * events are less than 128, allocate a buffer of that size.
4621 * If one is greater, then trace_find_next_entry() will
4622 * allocate a new buffer to adjust for the bigger iter->ent.
4623 * It's not critical if it fails to get allocated here.
4624 */
4625 iter->temp = kmalloc(128, GFP_KERNEL);
4626 if (iter->temp)
4627 iter->temp_size = 128;
4628
4629 /*
4630 * trace_event_printf() may need to modify given format
4631 * string to replace %p with %px so that it shows real address
4632 * instead of hash value. However, that is only for the event
4633 * tracing, other tracer may not need. Defer the allocation
4634 * until it is needed.
4635 */
4636 iter->fmt = NULL;
4637 iter->fmt_size = 0;
4638
4639 mutex_lock(&trace_types_lock);
4640 iter->trace = tr->current_trace;
4641
4642 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4643 goto fail;
4644
4645 iter->tr = tr;
4646
4647 #ifdef CONFIG_TRACER_MAX_TRACE
4648 /* Currently only the top directory has a snapshot */
4649 if (tr->current_trace->print_max || snapshot)
4650 iter->array_buffer = &tr->max_buffer;
4651 else
4652 #endif
4653 iter->array_buffer = &tr->array_buffer;
4654 iter->snapshot = snapshot;
4655 iter->pos = -1;
4656 iter->cpu_file = tracing_get_cpu(inode);
4657 mutex_init(&iter->mutex);
4658
4659 /* Notify the tracer early; before we stop tracing. */
4660 if (iter->trace->open)
4661 iter->trace->open(iter);
4662
4663 /* Annotate start of buffers if we had overruns */
4664 if (ring_buffer_overruns(iter->array_buffer->buffer))
4665 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4666
4667 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4668 if (trace_clocks[tr->clock_id].in_ns)
4669 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4670
4671 /*
4672 * If pause-on-trace is enabled, then stop the trace while
4673 * dumping, unless this is the "snapshot" file
4674 */
4675 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4676 tracing_stop_tr(tr);
4677
4678 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4679 for_each_tracing_cpu(cpu) {
4680 iter->buffer_iter[cpu] =
4681 ring_buffer_read_start(iter->array_buffer->buffer,
4682 cpu, GFP_KERNEL);
4683 tracing_iter_reset(iter, cpu);
4684 }
4685 } else {
4686 cpu = iter->cpu_file;
4687 iter->buffer_iter[cpu] =
4688 ring_buffer_read_start(iter->array_buffer->buffer,
4689 cpu, GFP_KERNEL);
4690 tracing_iter_reset(iter, cpu);
4691 }
4692
4693 mutex_unlock(&trace_types_lock);
4694
4695 return iter;
4696
4697 fail:
4698 mutex_unlock(&trace_types_lock);
4699 free_trace_iter_content(iter);
4700 release:
4701 seq_release_private(inode, file);
4702 return ERR_PTR(-ENOMEM);
4703 }
4704
tracing_open_generic(struct inode * inode,struct file * filp)4705 int tracing_open_generic(struct inode *inode, struct file *filp)
4706 {
4707 int ret;
4708
4709 ret = tracing_check_open_get_tr(NULL);
4710 if (ret)
4711 return ret;
4712
4713 filp->private_data = inode->i_private;
4714 return 0;
4715 }
4716
tracing_is_disabled(void)4717 bool tracing_is_disabled(void)
4718 {
4719 return (tracing_disabled) ? true: false;
4720 }
4721
4722 /*
4723 * Open and update trace_array ref count.
4724 * Must have the current trace_array passed to it.
4725 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4726 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4727 {
4728 struct trace_array *tr = inode->i_private;
4729 int ret;
4730
4731 ret = tracing_check_open_get_tr(tr);
4732 if (ret)
4733 return ret;
4734
4735 filp->private_data = inode->i_private;
4736
4737 return 0;
4738 }
4739
4740 /*
4741 * The private pointer of the inode is the trace_event_file.
4742 * Update the tr ref count associated to it.
4743 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4744 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4745 {
4746 struct trace_event_file *file = inode->i_private;
4747 int ret;
4748
4749 ret = tracing_check_open_get_tr(file->tr);
4750 if (ret)
4751 return ret;
4752
4753 guard(mutex)(&event_mutex);
4754
4755 /* Fail if the file is marked for removal */
4756 if (file->flags & EVENT_FILE_FL_FREED) {
4757 trace_array_put(file->tr);
4758 return -ENODEV;
4759 } else {
4760 event_file_get(file);
4761 }
4762
4763 filp->private_data = inode->i_private;
4764
4765 return 0;
4766 }
4767
tracing_release_file_tr(struct inode * inode,struct file * filp)4768 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4769 {
4770 struct trace_event_file *file = inode->i_private;
4771
4772 trace_array_put(file->tr);
4773 event_file_put(file);
4774
4775 return 0;
4776 }
4777
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4778 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4779 {
4780 tracing_release_file_tr(inode, filp);
4781 return single_release(inode, filp);
4782 }
4783
tracing_mark_open(struct inode * inode,struct file * filp)4784 static int tracing_mark_open(struct inode *inode, struct file *filp)
4785 {
4786 stream_open(inode, filp);
4787 return tracing_open_generic_tr(inode, filp);
4788 }
4789
tracing_release(struct inode * inode,struct file * file)4790 static int tracing_release(struct inode *inode, struct file *file)
4791 {
4792 struct trace_array *tr = inode->i_private;
4793 struct seq_file *m = file->private_data;
4794 struct trace_iterator *iter;
4795 int cpu;
4796
4797 if (!(file->f_mode & FMODE_READ)) {
4798 trace_array_put(tr);
4799 return 0;
4800 }
4801
4802 /* Writes do not use seq_file */
4803 iter = m->private;
4804 mutex_lock(&trace_types_lock);
4805
4806 for_each_tracing_cpu(cpu) {
4807 if (iter->buffer_iter[cpu])
4808 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4809 }
4810
4811 if (iter->trace && iter->trace->close)
4812 iter->trace->close(iter);
4813
4814 if (!iter->snapshot && tr->stop_count)
4815 /* reenable tracing if it was previously enabled */
4816 tracing_start_tr(tr);
4817
4818 __trace_array_put(tr);
4819
4820 mutex_unlock(&trace_types_lock);
4821
4822 free_trace_iter_content(iter);
4823 seq_release_private(inode, file);
4824
4825 return 0;
4826 }
4827
tracing_release_generic_tr(struct inode * inode,struct file * file)4828 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4829 {
4830 struct trace_array *tr = inode->i_private;
4831
4832 trace_array_put(tr);
4833 return 0;
4834 }
4835
tracing_single_release_tr(struct inode * inode,struct file * file)4836 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4837 {
4838 struct trace_array *tr = inode->i_private;
4839
4840 trace_array_put(tr);
4841
4842 return single_release(inode, file);
4843 }
4844
tracing_open(struct inode * inode,struct file * file)4845 static int tracing_open(struct inode *inode, struct file *file)
4846 {
4847 struct trace_array *tr = inode->i_private;
4848 struct trace_iterator *iter;
4849 int ret;
4850
4851 ret = tracing_check_open_get_tr(tr);
4852 if (ret)
4853 return ret;
4854
4855 /* If this file was open for write, then erase contents */
4856 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4857 int cpu = tracing_get_cpu(inode);
4858 struct array_buffer *trace_buf = &tr->array_buffer;
4859
4860 #ifdef CONFIG_TRACER_MAX_TRACE
4861 if (tr->current_trace->print_max)
4862 trace_buf = &tr->max_buffer;
4863 #endif
4864
4865 if (cpu == RING_BUFFER_ALL_CPUS)
4866 tracing_reset_online_cpus(trace_buf);
4867 else
4868 tracing_reset_cpu(trace_buf, cpu);
4869 }
4870
4871 if (file->f_mode & FMODE_READ) {
4872 iter = __tracing_open(inode, file, false);
4873 if (IS_ERR(iter))
4874 ret = PTR_ERR(iter);
4875 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4876 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4877 }
4878
4879 if (ret < 0)
4880 trace_array_put(tr);
4881
4882 return ret;
4883 }
4884
4885 /*
4886 * Some tracers are not suitable for instance buffers.
4887 * A tracer is always available for the global array (toplevel)
4888 * or if it explicitly states that it is.
4889 */
4890 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4891 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4892 {
4893 #ifdef CONFIG_TRACER_SNAPSHOT
4894 /* arrays with mapped buffer range do not have snapshots */
4895 if (tr->range_addr_start && t->use_max_tr)
4896 return false;
4897 #endif
4898 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4899 }
4900
4901 /* Find the next tracer that this trace array may use */
4902 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4903 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4904 {
4905 while (t && !trace_ok_for_array(t, tr))
4906 t = t->next;
4907
4908 return t;
4909 }
4910
4911 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4912 t_next(struct seq_file *m, void *v, loff_t *pos)
4913 {
4914 struct trace_array *tr = m->private;
4915 struct tracer *t = v;
4916
4917 (*pos)++;
4918
4919 if (t)
4920 t = get_tracer_for_array(tr, t->next);
4921
4922 return t;
4923 }
4924
t_start(struct seq_file * m,loff_t * pos)4925 static void *t_start(struct seq_file *m, loff_t *pos)
4926 {
4927 struct trace_array *tr = m->private;
4928 struct tracer *t;
4929 loff_t l = 0;
4930
4931 mutex_lock(&trace_types_lock);
4932
4933 t = get_tracer_for_array(tr, trace_types);
4934 for (; t && l < *pos; t = t_next(m, t, &l))
4935 ;
4936
4937 return t;
4938 }
4939
t_stop(struct seq_file * m,void * p)4940 static void t_stop(struct seq_file *m, void *p)
4941 {
4942 mutex_unlock(&trace_types_lock);
4943 }
4944
t_show(struct seq_file * m,void * v)4945 static int t_show(struct seq_file *m, void *v)
4946 {
4947 struct tracer *t = v;
4948
4949 if (!t)
4950 return 0;
4951
4952 seq_puts(m, t->name);
4953 if (t->next)
4954 seq_putc(m, ' ');
4955 else
4956 seq_putc(m, '\n');
4957
4958 return 0;
4959 }
4960
4961 static const struct seq_operations show_traces_seq_ops = {
4962 .start = t_start,
4963 .next = t_next,
4964 .stop = t_stop,
4965 .show = t_show,
4966 };
4967
show_traces_open(struct inode * inode,struct file * file)4968 static int show_traces_open(struct inode *inode, struct file *file)
4969 {
4970 struct trace_array *tr = inode->i_private;
4971 struct seq_file *m;
4972 int ret;
4973
4974 ret = tracing_check_open_get_tr(tr);
4975 if (ret)
4976 return ret;
4977
4978 ret = seq_open(file, &show_traces_seq_ops);
4979 if (ret) {
4980 trace_array_put(tr);
4981 return ret;
4982 }
4983
4984 m = file->private_data;
4985 m->private = tr;
4986
4987 return 0;
4988 }
4989
tracing_seq_release(struct inode * inode,struct file * file)4990 static int tracing_seq_release(struct inode *inode, struct file *file)
4991 {
4992 struct trace_array *tr = inode->i_private;
4993
4994 trace_array_put(tr);
4995 return seq_release(inode, file);
4996 }
4997
4998 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4999 tracing_write_stub(struct file *filp, const char __user *ubuf,
5000 size_t count, loff_t *ppos)
5001 {
5002 return count;
5003 }
5004
tracing_lseek(struct file * file,loff_t offset,int whence)5005 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5006 {
5007 int ret;
5008
5009 if (file->f_mode & FMODE_READ)
5010 ret = seq_lseek(file, offset, whence);
5011 else
5012 file->f_pos = ret = 0;
5013
5014 return ret;
5015 }
5016
5017 static const struct file_operations tracing_fops = {
5018 .open = tracing_open,
5019 .read = seq_read,
5020 .read_iter = seq_read_iter,
5021 .splice_read = copy_splice_read,
5022 .write = tracing_write_stub,
5023 .llseek = tracing_lseek,
5024 .release = tracing_release,
5025 };
5026
5027 static const struct file_operations show_traces_fops = {
5028 .open = show_traces_open,
5029 .read = seq_read,
5030 .llseek = seq_lseek,
5031 .release = tracing_seq_release,
5032 };
5033
5034 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5035 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5036 size_t count, loff_t *ppos)
5037 {
5038 struct trace_array *tr = file_inode(filp)->i_private;
5039 char *mask_str __free(kfree) = NULL;
5040 int len;
5041
5042 len = snprintf(NULL, 0, "%*pb\n",
5043 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5044 mask_str = kmalloc(len, GFP_KERNEL);
5045 if (!mask_str)
5046 return -ENOMEM;
5047
5048 len = snprintf(mask_str, len, "%*pb\n",
5049 cpumask_pr_args(tr->tracing_cpumask));
5050 if (len >= count)
5051 return -EINVAL;
5052
5053 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5054 }
5055
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5056 int tracing_set_cpumask(struct trace_array *tr,
5057 cpumask_var_t tracing_cpumask_new)
5058 {
5059 int cpu;
5060
5061 if (!tr)
5062 return -EINVAL;
5063
5064 local_irq_disable();
5065 arch_spin_lock(&tr->max_lock);
5066 for_each_tracing_cpu(cpu) {
5067 /*
5068 * Increase/decrease the disabled counter if we are
5069 * about to flip a bit in the cpumask:
5070 */
5071 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5072 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5073 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5074 #ifdef CONFIG_TRACER_MAX_TRACE
5075 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5076 #endif
5077 }
5078 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5079 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5080 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5081 #ifdef CONFIG_TRACER_MAX_TRACE
5082 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5083 #endif
5084 }
5085 }
5086 arch_spin_unlock(&tr->max_lock);
5087 local_irq_enable();
5088
5089 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5090
5091 return 0;
5092 }
5093
5094 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5095 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5096 size_t count, loff_t *ppos)
5097 {
5098 struct trace_array *tr = file_inode(filp)->i_private;
5099 cpumask_var_t tracing_cpumask_new;
5100 int err;
5101
5102 if (count == 0 || count > KMALLOC_MAX_SIZE)
5103 return -EINVAL;
5104
5105 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5106 return -ENOMEM;
5107
5108 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5109 if (err)
5110 goto err_free;
5111
5112 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5113 if (err)
5114 goto err_free;
5115
5116 free_cpumask_var(tracing_cpumask_new);
5117
5118 return count;
5119
5120 err_free:
5121 free_cpumask_var(tracing_cpumask_new);
5122
5123 return err;
5124 }
5125
5126 static const struct file_operations tracing_cpumask_fops = {
5127 .open = tracing_open_generic_tr,
5128 .read = tracing_cpumask_read,
5129 .write = tracing_cpumask_write,
5130 .release = tracing_release_generic_tr,
5131 .llseek = generic_file_llseek,
5132 };
5133
tracing_trace_options_show(struct seq_file * m,void * v)5134 static int tracing_trace_options_show(struct seq_file *m, void *v)
5135 {
5136 struct tracer_opt *trace_opts;
5137 struct trace_array *tr = m->private;
5138 u32 tracer_flags;
5139 int i;
5140
5141 guard(mutex)(&trace_types_lock);
5142
5143 tracer_flags = tr->current_trace->flags->val;
5144 trace_opts = tr->current_trace->flags->opts;
5145
5146 for (i = 0; trace_options[i]; i++) {
5147 if (tr->trace_flags & (1 << i))
5148 seq_printf(m, "%s\n", trace_options[i]);
5149 else
5150 seq_printf(m, "no%s\n", trace_options[i]);
5151 }
5152
5153 for (i = 0; trace_opts[i].name; i++) {
5154 if (tracer_flags & trace_opts[i].bit)
5155 seq_printf(m, "%s\n", trace_opts[i].name);
5156 else
5157 seq_printf(m, "no%s\n", trace_opts[i].name);
5158 }
5159
5160 return 0;
5161 }
5162
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5163 static int __set_tracer_option(struct trace_array *tr,
5164 struct tracer_flags *tracer_flags,
5165 struct tracer_opt *opts, int neg)
5166 {
5167 struct tracer *trace = tracer_flags->trace;
5168 int ret;
5169
5170 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5171 if (ret)
5172 return ret;
5173
5174 if (neg)
5175 tracer_flags->val &= ~opts->bit;
5176 else
5177 tracer_flags->val |= opts->bit;
5178 return 0;
5179 }
5180
5181 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5182 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5183 {
5184 struct tracer *trace = tr->current_trace;
5185 struct tracer_flags *tracer_flags = trace->flags;
5186 struct tracer_opt *opts = NULL;
5187 int i;
5188
5189 for (i = 0; tracer_flags->opts[i].name; i++) {
5190 opts = &tracer_flags->opts[i];
5191
5192 if (strcmp(cmp, opts->name) == 0)
5193 return __set_tracer_option(tr, trace->flags, opts, neg);
5194 }
5195
5196 return -EINVAL;
5197 }
5198
5199 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5200 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5201 {
5202 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5203 return -1;
5204
5205 return 0;
5206 }
5207
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5208 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5209 {
5210 if ((mask == TRACE_ITER_RECORD_TGID) ||
5211 (mask == TRACE_ITER_RECORD_CMD) ||
5212 (mask == TRACE_ITER_TRACE_PRINTK) ||
5213 (mask == TRACE_ITER_COPY_MARKER))
5214 lockdep_assert_held(&event_mutex);
5215
5216 /* do nothing if flag is already set */
5217 if (!!(tr->trace_flags & mask) == !!enabled)
5218 return 0;
5219
5220 /* Give the tracer a chance to approve the change */
5221 if (tr->current_trace->flag_changed)
5222 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5223 return -EINVAL;
5224
5225 if (mask == TRACE_ITER_TRACE_PRINTK) {
5226 if (enabled) {
5227 update_printk_trace(tr);
5228 } else {
5229 /*
5230 * The global_trace cannot clear this.
5231 * It's flag only gets cleared if another instance sets it.
5232 */
5233 if (printk_trace == &global_trace)
5234 return -EINVAL;
5235 /*
5236 * An instance must always have it set.
5237 * by default, that's the global_trace instane.
5238 */
5239 if (printk_trace == tr)
5240 update_printk_trace(&global_trace);
5241 }
5242 }
5243
5244 if (mask == TRACE_ITER_COPY_MARKER)
5245 update_marker_trace(tr, enabled);
5246
5247 if (enabled)
5248 tr->trace_flags |= mask;
5249 else
5250 tr->trace_flags &= ~mask;
5251
5252 if (mask == TRACE_ITER_RECORD_CMD)
5253 trace_event_enable_cmd_record(enabled);
5254
5255 if (mask == TRACE_ITER_RECORD_TGID) {
5256
5257 if (trace_alloc_tgid_map() < 0) {
5258 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5259 return -ENOMEM;
5260 }
5261
5262 trace_event_enable_tgid_record(enabled);
5263 }
5264
5265 if (mask == TRACE_ITER_EVENT_FORK)
5266 trace_event_follow_fork(tr, enabled);
5267
5268 if (mask == TRACE_ITER_FUNC_FORK)
5269 ftrace_pid_follow_fork(tr, enabled);
5270
5271 if (mask == TRACE_ITER_OVERWRITE) {
5272 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5273 #ifdef CONFIG_TRACER_MAX_TRACE
5274 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5275 #endif
5276 }
5277
5278 if (mask == TRACE_ITER_PRINTK) {
5279 trace_printk_start_stop_comm(enabled);
5280 trace_printk_control(enabled);
5281 }
5282
5283 return 0;
5284 }
5285
trace_set_options(struct trace_array * tr,char * option)5286 int trace_set_options(struct trace_array *tr, char *option)
5287 {
5288 char *cmp;
5289 int neg = 0;
5290 int ret;
5291 size_t orig_len = strlen(option);
5292 int len;
5293
5294 cmp = strstrip(option);
5295
5296 len = str_has_prefix(cmp, "no");
5297 if (len)
5298 neg = 1;
5299
5300 cmp += len;
5301
5302 mutex_lock(&event_mutex);
5303 mutex_lock(&trace_types_lock);
5304
5305 ret = match_string(trace_options, -1, cmp);
5306 /* If no option could be set, test the specific tracer options */
5307 if (ret < 0)
5308 ret = set_tracer_option(tr, cmp, neg);
5309 else
5310 ret = set_tracer_flag(tr, 1 << ret, !neg);
5311
5312 mutex_unlock(&trace_types_lock);
5313 mutex_unlock(&event_mutex);
5314
5315 /*
5316 * If the first trailing whitespace is replaced with '\0' by strstrip,
5317 * turn it back into a space.
5318 */
5319 if (orig_len > strlen(option))
5320 option[strlen(option)] = ' ';
5321
5322 return ret;
5323 }
5324
apply_trace_boot_options(void)5325 static void __init apply_trace_boot_options(void)
5326 {
5327 char *buf = trace_boot_options_buf;
5328 char *option;
5329
5330 while (true) {
5331 option = strsep(&buf, ",");
5332
5333 if (!option)
5334 break;
5335
5336 if (*option)
5337 trace_set_options(&global_trace, option);
5338
5339 /* Put back the comma to allow this to be called again */
5340 if (buf)
5341 *(buf - 1) = ',';
5342 }
5343 }
5344
5345 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5346 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5347 size_t cnt, loff_t *ppos)
5348 {
5349 struct seq_file *m = filp->private_data;
5350 struct trace_array *tr = m->private;
5351 char buf[64];
5352 int ret;
5353
5354 if (cnt >= sizeof(buf))
5355 return -EINVAL;
5356
5357 if (copy_from_user(buf, ubuf, cnt))
5358 return -EFAULT;
5359
5360 buf[cnt] = 0;
5361
5362 ret = trace_set_options(tr, buf);
5363 if (ret < 0)
5364 return ret;
5365
5366 *ppos += cnt;
5367
5368 return cnt;
5369 }
5370
tracing_trace_options_open(struct inode * inode,struct file * file)5371 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5372 {
5373 struct trace_array *tr = inode->i_private;
5374 int ret;
5375
5376 ret = tracing_check_open_get_tr(tr);
5377 if (ret)
5378 return ret;
5379
5380 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5381 if (ret < 0)
5382 trace_array_put(tr);
5383
5384 return ret;
5385 }
5386
5387 static const struct file_operations tracing_iter_fops = {
5388 .open = tracing_trace_options_open,
5389 .read = seq_read,
5390 .llseek = seq_lseek,
5391 .release = tracing_single_release_tr,
5392 .write = tracing_trace_options_write,
5393 };
5394
5395 static const char readme_msg[] =
5396 "tracing mini-HOWTO:\n\n"
5397 "By default tracefs removes all OTH file permission bits.\n"
5398 "When mounting tracefs an optional group id can be specified\n"
5399 "which adds the group to every directory and file in tracefs:\n\n"
5400 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5401 "# echo 0 > tracing_on : quick way to disable tracing\n"
5402 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5403 " Important files:\n"
5404 " trace\t\t\t- The static contents of the buffer\n"
5405 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5406 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5407 " current_tracer\t- function and latency tracers\n"
5408 " available_tracers\t- list of configured tracers for current_tracer\n"
5409 " error_log\t- error log for failed commands (that support it)\n"
5410 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5411 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5412 " trace_clock\t\t- change the clock used to order events\n"
5413 " local: Per cpu clock but may not be synced across CPUs\n"
5414 " global: Synced across CPUs but slows tracing down.\n"
5415 " counter: Not a clock, but just an increment\n"
5416 " uptime: Jiffy counter from time of boot\n"
5417 " perf: Same clock that perf events use\n"
5418 #ifdef CONFIG_X86_64
5419 " x86-tsc: TSC cycle counter\n"
5420 #endif
5421 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5422 " delta: Delta difference against a buffer-wide timestamp\n"
5423 " absolute: Absolute (standalone) timestamp\n"
5424 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5425 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5426 " tracing_cpumask\t- Limit which CPUs to trace\n"
5427 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5428 "\t\t\t Remove sub-buffer with rmdir\n"
5429 " trace_options\t\t- Set format or modify how tracing happens\n"
5430 "\t\t\t Disable an option by prefixing 'no' to the\n"
5431 "\t\t\t option name\n"
5432 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5433 #ifdef CONFIG_DYNAMIC_FTRACE
5434 "\n available_filter_functions - list of functions that can be filtered on\n"
5435 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5436 "\t\t\t functions\n"
5437 "\t accepts: func_full_name or glob-matching-pattern\n"
5438 "\t modules: Can select a group via module\n"
5439 "\t Format: :mod:<module-name>\n"
5440 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5441 "\t triggers: a command to perform when function is hit\n"
5442 "\t Format: <function>:<trigger>[:count]\n"
5443 "\t trigger: traceon, traceoff\n"
5444 "\t\t enable_event:<system>:<event>\n"
5445 "\t\t disable_event:<system>:<event>\n"
5446 #ifdef CONFIG_STACKTRACE
5447 "\t\t stacktrace\n"
5448 #endif
5449 #ifdef CONFIG_TRACER_SNAPSHOT
5450 "\t\t snapshot\n"
5451 #endif
5452 "\t\t dump\n"
5453 "\t\t cpudump\n"
5454 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5455 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5456 "\t The first one will disable tracing every time do_fault is hit\n"
5457 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5458 "\t The first time do trap is hit and it disables tracing, the\n"
5459 "\t counter will decrement to 2. If tracing is already disabled,\n"
5460 "\t the counter will not decrement. It only decrements when the\n"
5461 "\t trigger did work\n"
5462 "\t To remove trigger without count:\n"
5463 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5464 "\t To remove trigger with a count:\n"
5465 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5466 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5467 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5468 "\t modules: Can select a group via module command :mod:\n"
5469 "\t Does not accept triggers\n"
5470 #endif /* CONFIG_DYNAMIC_FTRACE */
5471 #ifdef CONFIG_FUNCTION_TRACER
5472 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5473 "\t\t (function)\n"
5474 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5475 "\t\t (function)\n"
5476 #endif
5477 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5478 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5479 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5480 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5481 #endif
5482 #ifdef CONFIG_TRACER_SNAPSHOT
5483 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5484 "\t\t\t snapshot buffer. Read the contents for more\n"
5485 "\t\t\t information\n"
5486 #endif
5487 #ifdef CONFIG_STACK_TRACER
5488 " stack_trace\t\t- Shows the max stack trace when active\n"
5489 " stack_max_size\t- Shows current max stack size that was traced\n"
5490 "\t\t\t Write into this file to reset the max size (trigger a\n"
5491 "\t\t\t new trace)\n"
5492 #ifdef CONFIG_DYNAMIC_FTRACE
5493 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5494 "\t\t\t traces\n"
5495 #endif
5496 #endif /* CONFIG_STACK_TRACER */
5497 #ifdef CONFIG_DYNAMIC_EVENTS
5498 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5499 "\t\t\t Write into this file to define/undefine new trace events.\n"
5500 #endif
5501 #ifdef CONFIG_KPROBE_EVENTS
5502 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5503 "\t\t\t Write into this file to define/undefine new trace events.\n"
5504 #endif
5505 #ifdef CONFIG_UPROBE_EVENTS
5506 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5507 "\t\t\t Write into this file to define/undefine new trace events.\n"
5508 #endif
5509 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5510 defined(CONFIG_FPROBE_EVENTS)
5511 "\t accepts: event-definitions (one definition per line)\n"
5512 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5513 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5514 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5515 #endif
5516 #ifdef CONFIG_FPROBE_EVENTS
5517 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5518 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5519 #endif
5520 #ifdef CONFIG_HIST_TRIGGERS
5521 "\t s:[synthetic/]<event> <field> [<field>]\n"
5522 #endif
5523 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5524 "\t -:[<group>/][<event>]\n"
5525 #ifdef CONFIG_KPROBE_EVENTS
5526 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5527 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5528 #endif
5529 #ifdef CONFIG_UPROBE_EVENTS
5530 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5531 #endif
5532 "\t args: <name>=fetcharg[:type]\n"
5533 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5534 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5535 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5536 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5537 "\t <argname>[->field[->field|.field...]],\n"
5538 #endif
5539 #else
5540 "\t $stack<index>, $stack, $retval, $comm,\n"
5541 #endif
5542 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5543 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5544 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5545 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5546 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5547 #ifdef CONFIG_HIST_TRIGGERS
5548 "\t field: <stype> <name>;\n"
5549 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5550 "\t [unsigned] char/int/long\n"
5551 #endif
5552 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5553 "\t of the <attached-group>/<attached-event>.\n"
5554 #endif
5555 " set_event\t\t- Enables events by name written into it\n"
5556 "\t\t\t Can enable module events via: :mod:<module>\n"
5557 " events/\t\t- Directory containing all trace event subsystems:\n"
5558 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5559 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5560 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5561 "\t\t\t events\n"
5562 " filter\t\t- If set, only events passing filter are traced\n"
5563 " events/<system>/<event>/\t- Directory containing control files for\n"
5564 "\t\t\t <event>:\n"
5565 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5566 " filter\t\t- If set, only events passing filter are traced\n"
5567 " trigger\t\t- If set, a command to perform when event is hit\n"
5568 "\t Format: <trigger>[:count][if <filter>]\n"
5569 "\t trigger: traceon, traceoff\n"
5570 "\t enable_event:<system>:<event>\n"
5571 "\t disable_event:<system>:<event>\n"
5572 #ifdef CONFIG_HIST_TRIGGERS
5573 "\t enable_hist:<system>:<event>\n"
5574 "\t disable_hist:<system>:<event>\n"
5575 #endif
5576 #ifdef CONFIG_STACKTRACE
5577 "\t\t stacktrace\n"
5578 #endif
5579 #ifdef CONFIG_TRACER_SNAPSHOT
5580 "\t\t snapshot\n"
5581 #endif
5582 #ifdef CONFIG_HIST_TRIGGERS
5583 "\t\t hist (see below)\n"
5584 #endif
5585 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5586 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5587 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5588 "\t events/block/block_unplug/trigger\n"
5589 "\t The first disables tracing every time block_unplug is hit.\n"
5590 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5591 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5592 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5593 "\t Like function triggers, the counter is only decremented if it\n"
5594 "\t enabled or disabled tracing.\n"
5595 "\t To remove a trigger without a count:\n"
5596 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5597 "\t To remove a trigger with a count:\n"
5598 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5599 "\t Filters can be ignored when removing a trigger.\n"
5600 #ifdef CONFIG_HIST_TRIGGERS
5601 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5602 "\t Format: hist:keys=<field1[,field2,...]>\n"
5603 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5604 "\t [:values=<field1[,field2,...]>]\n"
5605 "\t [:sort=<field1[,field2,...]>]\n"
5606 "\t [:size=#entries]\n"
5607 "\t [:pause][:continue][:clear]\n"
5608 "\t [:name=histname1]\n"
5609 "\t [:nohitcount]\n"
5610 "\t [:<handler>.<action>]\n"
5611 "\t [if <filter>]\n\n"
5612 "\t Note, special fields can be used as well:\n"
5613 "\t common_timestamp - to record current timestamp\n"
5614 "\t common_cpu - to record the CPU the event happened on\n"
5615 "\n"
5616 "\t A hist trigger variable can be:\n"
5617 "\t - a reference to a field e.g. x=current_timestamp,\n"
5618 "\t - a reference to another variable e.g. y=$x,\n"
5619 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5620 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5621 "\n"
5622 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5623 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5624 "\t variable reference, field or numeric literal.\n"
5625 "\n"
5626 "\t When a matching event is hit, an entry is added to a hash\n"
5627 "\t table using the key(s) and value(s) named, and the value of a\n"
5628 "\t sum called 'hitcount' is incremented. Keys and values\n"
5629 "\t correspond to fields in the event's format description. Keys\n"
5630 "\t can be any field, or the special string 'common_stacktrace'.\n"
5631 "\t Compound keys consisting of up to two fields can be specified\n"
5632 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5633 "\t fields. Sort keys consisting of up to two fields can be\n"
5634 "\t specified using the 'sort' keyword. The sort direction can\n"
5635 "\t be modified by appending '.descending' or '.ascending' to a\n"
5636 "\t sort field. The 'size' parameter can be used to specify more\n"
5637 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5638 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5639 "\t its histogram data will be shared with other triggers of the\n"
5640 "\t same name, and trigger hits will update this common data.\n\n"
5641 "\t Reading the 'hist' file for the event will dump the hash\n"
5642 "\t table in its entirety to stdout. If there are multiple hist\n"
5643 "\t triggers attached to an event, there will be a table for each\n"
5644 "\t trigger in the output. The table displayed for a named\n"
5645 "\t trigger will be the same as any other instance having the\n"
5646 "\t same name. The default format used to display a given field\n"
5647 "\t can be modified by appending any of the following modifiers\n"
5648 "\t to the field name, as applicable:\n\n"
5649 "\t .hex display a number as a hex value\n"
5650 "\t .sym display an address as a symbol\n"
5651 "\t .sym-offset display an address as a symbol and offset\n"
5652 "\t .execname display a common_pid as a program name\n"
5653 "\t .syscall display a syscall id as a syscall name\n"
5654 "\t .log2 display log2 value rather than raw number\n"
5655 "\t .buckets=size display values in groups of size rather than raw number\n"
5656 "\t .usecs display a common_timestamp in microseconds\n"
5657 "\t .percent display a number of percentage value\n"
5658 "\t .graph display a bar-graph of a value\n\n"
5659 "\t The 'pause' parameter can be used to pause an existing hist\n"
5660 "\t trigger or to start a hist trigger but not log any events\n"
5661 "\t until told to do so. 'continue' can be used to start or\n"
5662 "\t restart a paused hist trigger.\n\n"
5663 "\t The 'clear' parameter will clear the contents of a running\n"
5664 "\t hist trigger and leave its current paused/active state\n"
5665 "\t unchanged.\n\n"
5666 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5667 "\t raw hitcount in the histogram.\n\n"
5668 "\t The enable_hist and disable_hist triggers can be used to\n"
5669 "\t have one event conditionally start and stop another event's\n"
5670 "\t already-attached hist trigger. The syntax is analogous to\n"
5671 "\t the enable_event and disable_event triggers.\n\n"
5672 "\t Hist trigger handlers and actions are executed whenever a\n"
5673 "\t a histogram entry is added or updated. They take the form:\n\n"
5674 "\t <handler>.<action>\n\n"
5675 "\t The available handlers are:\n\n"
5676 "\t onmatch(matching.event) - invoke on addition or update\n"
5677 "\t onmax(var) - invoke if var exceeds current max\n"
5678 "\t onchange(var) - invoke action if var changes\n\n"
5679 "\t The available actions are:\n\n"
5680 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5681 "\t save(field,...) - save current event fields\n"
5682 #ifdef CONFIG_TRACER_SNAPSHOT
5683 "\t snapshot() - snapshot the trace buffer\n\n"
5684 #endif
5685 #ifdef CONFIG_SYNTH_EVENTS
5686 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5687 "\t Write into this file to define/undefine new synthetic events.\n"
5688 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5689 #endif
5690 #endif
5691 ;
5692
5693 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5694 tracing_readme_read(struct file *filp, char __user *ubuf,
5695 size_t cnt, loff_t *ppos)
5696 {
5697 return simple_read_from_buffer(ubuf, cnt, ppos,
5698 readme_msg, strlen(readme_msg));
5699 }
5700
5701 static const struct file_operations tracing_readme_fops = {
5702 .open = tracing_open_generic,
5703 .read = tracing_readme_read,
5704 .llseek = generic_file_llseek,
5705 };
5706
5707 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5708 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5709 update_eval_map(union trace_eval_map_item *ptr)
5710 {
5711 if (!ptr->map.eval_string) {
5712 if (ptr->tail.next) {
5713 ptr = ptr->tail.next;
5714 /* Set ptr to the next real item (skip head) */
5715 ptr++;
5716 } else
5717 return NULL;
5718 }
5719 return ptr;
5720 }
5721
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5722 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5723 {
5724 union trace_eval_map_item *ptr = v;
5725
5726 /*
5727 * Paranoid! If ptr points to end, we don't want to increment past it.
5728 * This really should never happen.
5729 */
5730 (*pos)++;
5731 ptr = update_eval_map(ptr);
5732 if (WARN_ON_ONCE(!ptr))
5733 return NULL;
5734
5735 ptr++;
5736 ptr = update_eval_map(ptr);
5737
5738 return ptr;
5739 }
5740
eval_map_start(struct seq_file * m,loff_t * pos)5741 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5742 {
5743 union trace_eval_map_item *v;
5744 loff_t l = 0;
5745
5746 mutex_lock(&trace_eval_mutex);
5747
5748 v = trace_eval_maps;
5749 if (v)
5750 v++;
5751
5752 while (v && l < *pos) {
5753 v = eval_map_next(m, v, &l);
5754 }
5755
5756 return v;
5757 }
5758
eval_map_stop(struct seq_file * m,void * v)5759 static void eval_map_stop(struct seq_file *m, void *v)
5760 {
5761 mutex_unlock(&trace_eval_mutex);
5762 }
5763
eval_map_show(struct seq_file * m,void * v)5764 static int eval_map_show(struct seq_file *m, void *v)
5765 {
5766 union trace_eval_map_item *ptr = v;
5767
5768 seq_printf(m, "%s %ld (%s)\n",
5769 ptr->map.eval_string, ptr->map.eval_value,
5770 ptr->map.system);
5771
5772 return 0;
5773 }
5774
5775 static const struct seq_operations tracing_eval_map_seq_ops = {
5776 .start = eval_map_start,
5777 .next = eval_map_next,
5778 .stop = eval_map_stop,
5779 .show = eval_map_show,
5780 };
5781
tracing_eval_map_open(struct inode * inode,struct file * filp)5782 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5783 {
5784 int ret;
5785
5786 ret = tracing_check_open_get_tr(NULL);
5787 if (ret)
5788 return ret;
5789
5790 return seq_open(filp, &tracing_eval_map_seq_ops);
5791 }
5792
5793 static const struct file_operations tracing_eval_map_fops = {
5794 .open = tracing_eval_map_open,
5795 .read = seq_read,
5796 .llseek = seq_lseek,
5797 .release = seq_release,
5798 };
5799
5800 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5801 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5802 {
5803 /* Return tail of array given the head */
5804 return ptr + ptr->head.length + 1;
5805 }
5806
5807 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5808 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5809 int len)
5810 {
5811 struct trace_eval_map **stop;
5812 struct trace_eval_map **map;
5813 union trace_eval_map_item *map_array;
5814 union trace_eval_map_item *ptr;
5815
5816 stop = start + len;
5817
5818 /*
5819 * The trace_eval_maps contains the map plus a head and tail item,
5820 * where the head holds the module and length of array, and the
5821 * tail holds a pointer to the next list.
5822 */
5823 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5824 if (!map_array) {
5825 pr_warn("Unable to allocate trace eval mapping\n");
5826 return;
5827 }
5828
5829 guard(mutex)(&trace_eval_mutex);
5830
5831 if (!trace_eval_maps)
5832 trace_eval_maps = map_array;
5833 else {
5834 ptr = trace_eval_maps;
5835 for (;;) {
5836 ptr = trace_eval_jmp_to_tail(ptr);
5837 if (!ptr->tail.next)
5838 break;
5839 ptr = ptr->tail.next;
5840
5841 }
5842 ptr->tail.next = map_array;
5843 }
5844 map_array->head.mod = mod;
5845 map_array->head.length = len;
5846 map_array++;
5847
5848 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5849 map_array->map = **map;
5850 map_array++;
5851 }
5852 memset(map_array, 0, sizeof(*map_array));
5853 }
5854
trace_create_eval_file(struct dentry * d_tracer)5855 static void trace_create_eval_file(struct dentry *d_tracer)
5856 {
5857 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5858 NULL, &tracing_eval_map_fops);
5859 }
5860
5861 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5862 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5863 static inline void trace_insert_eval_map_file(struct module *mod,
5864 struct trace_eval_map **start, int len) { }
5865 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5866
5867 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5868 trace_event_update_with_eval_map(struct module *mod,
5869 struct trace_eval_map **start,
5870 int len)
5871 {
5872 struct trace_eval_map **map;
5873
5874 /* Always run sanitizer only if btf_type_tag attr exists. */
5875 if (len <= 0) {
5876 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5877 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5878 __has_attribute(btf_type_tag)))
5879 return;
5880 }
5881
5882 map = start;
5883
5884 trace_event_update_all(map, len);
5885
5886 if (len <= 0)
5887 return;
5888
5889 trace_insert_eval_map_file(mod, start, len);
5890 }
5891
5892 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5893 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5894 size_t cnt, loff_t *ppos)
5895 {
5896 struct trace_array *tr = filp->private_data;
5897 char buf[MAX_TRACER_SIZE+2];
5898 int r;
5899
5900 scoped_guard(mutex, &trace_types_lock) {
5901 r = sprintf(buf, "%s\n", tr->current_trace->name);
5902 }
5903
5904 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5905 }
5906
tracer_init(struct tracer * t,struct trace_array * tr)5907 int tracer_init(struct tracer *t, struct trace_array *tr)
5908 {
5909 tracing_reset_online_cpus(&tr->array_buffer);
5910 return t->init(tr);
5911 }
5912
set_buffer_entries(struct array_buffer * buf,unsigned long val)5913 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5914 {
5915 int cpu;
5916
5917 for_each_tracing_cpu(cpu)
5918 per_cpu_ptr(buf->data, cpu)->entries = val;
5919 }
5920
update_buffer_entries(struct array_buffer * buf,int cpu)5921 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5922 {
5923 if (cpu == RING_BUFFER_ALL_CPUS) {
5924 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5925 } else {
5926 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5927 }
5928 }
5929
5930 #ifdef CONFIG_TRACER_MAX_TRACE
5931 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5932 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5933 struct array_buffer *size_buf, int cpu_id)
5934 {
5935 int cpu, ret = 0;
5936
5937 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5938 for_each_tracing_cpu(cpu) {
5939 ret = ring_buffer_resize(trace_buf->buffer,
5940 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5941 if (ret < 0)
5942 break;
5943 per_cpu_ptr(trace_buf->data, cpu)->entries =
5944 per_cpu_ptr(size_buf->data, cpu)->entries;
5945 }
5946 } else {
5947 ret = ring_buffer_resize(trace_buf->buffer,
5948 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5949 if (ret == 0)
5950 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5951 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5952 }
5953
5954 return ret;
5955 }
5956 #endif /* CONFIG_TRACER_MAX_TRACE */
5957
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5958 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5959 unsigned long size, int cpu)
5960 {
5961 int ret;
5962
5963 /*
5964 * If kernel or user changes the size of the ring buffer
5965 * we use the size that was given, and we can forget about
5966 * expanding it later.
5967 */
5968 trace_set_ring_buffer_expanded(tr);
5969
5970 /* May be called before buffers are initialized */
5971 if (!tr->array_buffer.buffer)
5972 return 0;
5973
5974 /* Do not allow tracing while resizing ring buffer */
5975 tracing_stop_tr(tr);
5976
5977 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5978 if (ret < 0)
5979 goto out_start;
5980
5981 #ifdef CONFIG_TRACER_MAX_TRACE
5982 if (!tr->allocated_snapshot)
5983 goto out;
5984
5985 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5986 if (ret < 0) {
5987 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5988 &tr->array_buffer, cpu);
5989 if (r < 0) {
5990 /*
5991 * AARGH! We are left with different
5992 * size max buffer!!!!
5993 * The max buffer is our "snapshot" buffer.
5994 * When a tracer needs a snapshot (one of the
5995 * latency tracers), it swaps the max buffer
5996 * with the saved snap shot. We succeeded to
5997 * update the size of the main buffer, but failed to
5998 * update the size of the max buffer. But when we tried
5999 * to reset the main buffer to the original size, we
6000 * failed there too. This is very unlikely to
6001 * happen, but if it does, warn and kill all
6002 * tracing.
6003 */
6004 WARN_ON(1);
6005 tracing_disabled = 1;
6006 }
6007 goto out_start;
6008 }
6009
6010 update_buffer_entries(&tr->max_buffer, cpu);
6011
6012 out:
6013 #endif /* CONFIG_TRACER_MAX_TRACE */
6014
6015 update_buffer_entries(&tr->array_buffer, cpu);
6016 out_start:
6017 tracing_start_tr(tr);
6018 return ret;
6019 }
6020
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6021 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6022 unsigned long size, int cpu_id)
6023 {
6024 guard(mutex)(&trace_types_lock);
6025
6026 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6027 /* make sure, this cpu is enabled in the mask */
6028 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6029 return -EINVAL;
6030 }
6031
6032 return __tracing_resize_ring_buffer(tr, size, cpu_id);
6033 }
6034
6035 struct trace_mod_entry {
6036 unsigned long mod_addr;
6037 char mod_name[MODULE_NAME_LEN];
6038 };
6039
6040 struct trace_scratch {
6041 unsigned int clock_id;
6042 unsigned long text_addr;
6043 unsigned long nr_entries;
6044 struct trace_mod_entry entries[];
6045 };
6046
6047 static DEFINE_MUTEX(scratch_mutex);
6048
cmp_mod_entry(const void * key,const void * pivot)6049 static int cmp_mod_entry(const void *key, const void *pivot)
6050 {
6051 unsigned long addr = (unsigned long)key;
6052 const struct trace_mod_entry *ent = pivot;
6053
6054 if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6055 return 0;
6056 else
6057 return addr - ent->mod_addr;
6058 }
6059
6060 /**
6061 * trace_adjust_address() - Adjust prev boot address to current address.
6062 * @tr: Persistent ring buffer's trace_array.
6063 * @addr: Address in @tr which is adjusted.
6064 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6065 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6066 {
6067 struct trace_module_delta *module_delta;
6068 struct trace_scratch *tscratch;
6069 struct trace_mod_entry *entry;
6070 unsigned long raddr;
6071 int idx = 0, nr_entries;
6072
6073 /* If we don't have last boot delta, return the address */
6074 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6075 return addr;
6076
6077 /* tr->module_delta must be protected by rcu. */
6078 guard(rcu)();
6079 tscratch = tr->scratch;
6080 /* if there is no tscrach, module_delta must be NULL. */
6081 module_delta = READ_ONCE(tr->module_delta);
6082 if (!module_delta || !tscratch->nr_entries ||
6083 tscratch->entries[0].mod_addr > addr) {
6084 raddr = addr + tr->text_delta;
6085 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6086 is_kernel_rodata(raddr) ? raddr : addr;
6087 }
6088
6089 /* Note that entries must be sorted. */
6090 nr_entries = tscratch->nr_entries;
6091 if (nr_entries == 1 ||
6092 tscratch->entries[nr_entries - 1].mod_addr < addr)
6093 idx = nr_entries - 1;
6094 else {
6095 entry = __inline_bsearch((void *)addr,
6096 tscratch->entries,
6097 nr_entries - 1,
6098 sizeof(tscratch->entries[0]),
6099 cmp_mod_entry);
6100 if (entry)
6101 idx = entry - tscratch->entries;
6102 }
6103
6104 return addr + module_delta->delta[idx];
6105 }
6106
6107 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6108 static int save_mod(struct module *mod, void *data)
6109 {
6110 struct trace_array *tr = data;
6111 struct trace_scratch *tscratch;
6112 struct trace_mod_entry *entry;
6113 unsigned int size;
6114
6115 tscratch = tr->scratch;
6116 if (!tscratch)
6117 return -1;
6118 size = tr->scratch_size;
6119
6120 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6121 return -1;
6122
6123 entry = &tscratch->entries[tscratch->nr_entries];
6124
6125 tscratch->nr_entries++;
6126
6127 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6128 strscpy(entry->mod_name, mod->name);
6129
6130 return 0;
6131 }
6132 #else
save_mod(struct module * mod,void * data)6133 static int save_mod(struct module *mod, void *data)
6134 {
6135 return 0;
6136 }
6137 #endif
6138
update_last_data(struct trace_array * tr)6139 static void update_last_data(struct trace_array *tr)
6140 {
6141 struct trace_module_delta *module_delta;
6142 struct trace_scratch *tscratch;
6143
6144 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6145 return;
6146
6147 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6148 return;
6149
6150 /* Only if the buffer has previous boot data clear and update it. */
6151 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6152
6153 /* Reset the module list and reload them */
6154 if (tr->scratch) {
6155 struct trace_scratch *tscratch = tr->scratch;
6156
6157 tscratch->clock_id = tr->clock_id;
6158 memset(tscratch->entries, 0,
6159 flex_array_size(tscratch, entries, tscratch->nr_entries));
6160 tscratch->nr_entries = 0;
6161
6162 guard(mutex)(&scratch_mutex);
6163 module_for_each_mod(save_mod, tr);
6164 }
6165
6166 /*
6167 * Need to clear all CPU buffers as there cannot be events
6168 * from the previous boot mixed with events with this boot
6169 * as that will cause a confusing trace. Need to clear all
6170 * CPU buffers, even for those that may currently be offline.
6171 */
6172 tracing_reset_all_cpus(&tr->array_buffer);
6173
6174 /* Using current data now */
6175 tr->text_delta = 0;
6176
6177 if (!tr->scratch)
6178 return;
6179
6180 tscratch = tr->scratch;
6181 module_delta = READ_ONCE(tr->module_delta);
6182 WRITE_ONCE(tr->module_delta, NULL);
6183 kfree_rcu(module_delta, rcu);
6184
6185 /* Set the persistent ring buffer meta data to this address */
6186 tscratch->text_addr = (unsigned long)_text;
6187 }
6188
6189 /**
6190 * tracing_update_buffers - used by tracing facility to expand ring buffers
6191 * @tr: The tracing instance
6192 *
6193 * To save on memory when the tracing is never used on a system with it
6194 * configured in. The ring buffers are set to a minimum size. But once
6195 * a user starts to use the tracing facility, then they need to grow
6196 * to their default size.
6197 *
6198 * This function is to be called when a tracer is about to be used.
6199 */
tracing_update_buffers(struct trace_array * tr)6200 int tracing_update_buffers(struct trace_array *tr)
6201 {
6202 int ret = 0;
6203
6204 guard(mutex)(&trace_types_lock);
6205
6206 update_last_data(tr);
6207
6208 if (!tr->ring_buffer_expanded)
6209 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6210 RING_BUFFER_ALL_CPUS);
6211 return ret;
6212 }
6213
6214 struct trace_option_dentry;
6215
6216 static void
6217 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6218
6219 /*
6220 * Used to clear out the tracer before deletion of an instance.
6221 * Must have trace_types_lock held.
6222 */
tracing_set_nop(struct trace_array * tr)6223 static void tracing_set_nop(struct trace_array *tr)
6224 {
6225 if (tr->current_trace == &nop_trace)
6226 return;
6227
6228 tr->current_trace->enabled--;
6229
6230 if (tr->current_trace->reset)
6231 tr->current_trace->reset(tr);
6232
6233 tr->current_trace = &nop_trace;
6234 }
6235
6236 static bool tracer_options_updated;
6237
add_tracer_options(struct trace_array * tr,struct tracer * t)6238 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6239 {
6240 /* Only enable if the directory has been created already. */
6241 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6242 return;
6243
6244 /* Only create trace option files after update_tracer_options finish */
6245 if (!tracer_options_updated)
6246 return;
6247
6248 create_trace_option_files(tr, t);
6249 }
6250
tracing_set_tracer(struct trace_array * tr,const char * buf)6251 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6252 {
6253 struct tracer *t;
6254 #ifdef CONFIG_TRACER_MAX_TRACE
6255 bool had_max_tr;
6256 #endif
6257 int ret;
6258
6259 guard(mutex)(&trace_types_lock);
6260
6261 update_last_data(tr);
6262
6263 if (!tr->ring_buffer_expanded) {
6264 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6265 RING_BUFFER_ALL_CPUS);
6266 if (ret < 0)
6267 return ret;
6268 ret = 0;
6269 }
6270
6271 for (t = trace_types; t; t = t->next) {
6272 if (strcmp(t->name, buf) == 0)
6273 break;
6274 }
6275 if (!t)
6276 return -EINVAL;
6277
6278 if (t == tr->current_trace)
6279 return 0;
6280
6281 #ifdef CONFIG_TRACER_SNAPSHOT
6282 if (t->use_max_tr) {
6283 local_irq_disable();
6284 arch_spin_lock(&tr->max_lock);
6285 ret = tr->cond_snapshot ? -EBUSY : 0;
6286 arch_spin_unlock(&tr->max_lock);
6287 local_irq_enable();
6288 if (ret)
6289 return ret;
6290 }
6291 #endif
6292 /* Some tracers won't work on kernel command line */
6293 if (system_state < SYSTEM_RUNNING && t->noboot) {
6294 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6295 t->name);
6296 return -EINVAL;
6297 }
6298
6299 /* Some tracers are only allowed for the top level buffer */
6300 if (!trace_ok_for_array(t, tr))
6301 return -EINVAL;
6302
6303 /* If trace pipe files are being read, we can't change the tracer */
6304 if (tr->trace_ref)
6305 return -EBUSY;
6306
6307 trace_branch_disable();
6308
6309 tr->current_trace->enabled--;
6310
6311 if (tr->current_trace->reset)
6312 tr->current_trace->reset(tr);
6313
6314 #ifdef CONFIG_TRACER_MAX_TRACE
6315 had_max_tr = tr->current_trace->use_max_tr;
6316
6317 /* Current trace needs to be nop_trace before synchronize_rcu */
6318 tr->current_trace = &nop_trace;
6319
6320 if (had_max_tr && !t->use_max_tr) {
6321 /*
6322 * We need to make sure that the update_max_tr sees that
6323 * current_trace changed to nop_trace to keep it from
6324 * swapping the buffers after we resize it.
6325 * The update_max_tr is called from interrupts disabled
6326 * so a synchronized_sched() is sufficient.
6327 */
6328 synchronize_rcu();
6329 free_snapshot(tr);
6330 tracing_disarm_snapshot(tr);
6331 }
6332
6333 if (!had_max_tr && t->use_max_tr) {
6334 ret = tracing_arm_snapshot_locked(tr);
6335 if (ret)
6336 return ret;
6337 }
6338 #else
6339 tr->current_trace = &nop_trace;
6340 #endif
6341
6342 if (t->init) {
6343 ret = tracer_init(t, tr);
6344 if (ret) {
6345 #ifdef CONFIG_TRACER_MAX_TRACE
6346 if (t->use_max_tr)
6347 tracing_disarm_snapshot(tr);
6348 #endif
6349 return ret;
6350 }
6351 }
6352
6353 tr->current_trace = t;
6354 tr->current_trace->enabled++;
6355 trace_branch_enable(tr);
6356
6357 return 0;
6358 }
6359
6360 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6361 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6362 size_t cnt, loff_t *ppos)
6363 {
6364 struct trace_array *tr = filp->private_data;
6365 char buf[MAX_TRACER_SIZE+1];
6366 char *name;
6367 size_t ret;
6368 int err;
6369
6370 ret = cnt;
6371
6372 if (cnt > MAX_TRACER_SIZE)
6373 cnt = MAX_TRACER_SIZE;
6374
6375 if (copy_from_user(buf, ubuf, cnt))
6376 return -EFAULT;
6377
6378 buf[cnt] = 0;
6379
6380 name = strim(buf);
6381
6382 err = tracing_set_tracer(tr, name);
6383 if (err)
6384 return err;
6385
6386 *ppos += ret;
6387
6388 return ret;
6389 }
6390
6391 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6392 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6393 size_t cnt, loff_t *ppos)
6394 {
6395 char buf[64];
6396 int r;
6397
6398 r = snprintf(buf, sizeof(buf), "%ld\n",
6399 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6400 if (r > sizeof(buf))
6401 r = sizeof(buf);
6402 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6403 }
6404
6405 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6406 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6407 size_t cnt, loff_t *ppos)
6408 {
6409 unsigned long val;
6410 int ret;
6411
6412 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6413 if (ret)
6414 return ret;
6415
6416 *ptr = val * 1000;
6417
6418 return cnt;
6419 }
6420
6421 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6422 tracing_thresh_read(struct file *filp, char __user *ubuf,
6423 size_t cnt, loff_t *ppos)
6424 {
6425 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6426 }
6427
6428 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6429 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6430 size_t cnt, loff_t *ppos)
6431 {
6432 struct trace_array *tr = filp->private_data;
6433 int ret;
6434
6435 guard(mutex)(&trace_types_lock);
6436 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6437 if (ret < 0)
6438 return ret;
6439
6440 if (tr->current_trace->update_thresh) {
6441 ret = tr->current_trace->update_thresh(tr);
6442 if (ret < 0)
6443 return ret;
6444 }
6445
6446 return cnt;
6447 }
6448
6449 #ifdef CONFIG_TRACER_MAX_TRACE
6450
6451 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6452 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6453 size_t cnt, loff_t *ppos)
6454 {
6455 struct trace_array *tr = filp->private_data;
6456
6457 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6458 }
6459
6460 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6461 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6462 size_t cnt, loff_t *ppos)
6463 {
6464 struct trace_array *tr = filp->private_data;
6465
6466 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6467 }
6468
6469 #endif
6470
open_pipe_on_cpu(struct trace_array * tr,int cpu)6471 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6472 {
6473 if (cpu == RING_BUFFER_ALL_CPUS) {
6474 if (cpumask_empty(tr->pipe_cpumask)) {
6475 cpumask_setall(tr->pipe_cpumask);
6476 return 0;
6477 }
6478 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6479 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6480 return 0;
6481 }
6482 return -EBUSY;
6483 }
6484
close_pipe_on_cpu(struct trace_array * tr,int cpu)6485 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6486 {
6487 if (cpu == RING_BUFFER_ALL_CPUS) {
6488 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6489 cpumask_clear(tr->pipe_cpumask);
6490 } else {
6491 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6492 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6493 }
6494 }
6495
tracing_open_pipe(struct inode * inode,struct file * filp)6496 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6497 {
6498 struct trace_array *tr = inode->i_private;
6499 struct trace_iterator *iter;
6500 int cpu;
6501 int ret;
6502
6503 ret = tracing_check_open_get_tr(tr);
6504 if (ret)
6505 return ret;
6506
6507 guard(mutex)(&trace_types_lock);
6508 cpu = tracing_get_cpu(inode);
6509 ret = open_pipe_on_cpu(tr, cpu);
6510 if (ret)
6511 goto fail_pipe_on_cpu;
6512
6513 /* create a buffer to store the information to pass to userspace */
6514 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6515 if (!iter) {
6516 ret = -ENOMEM;
6517 goto fail_alloc_iter;
6518 }
6519
6520 trace_seq_init(&iter->seq);
6521 iter->trace = tr->current_trace;
6522
6523 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6524 ret = -ENOMEM;
6525 goto fail;
6526 }
6527
6528 /* trace pipe does not show start of buffer */
6529 cpumask_setall(iter->started);
6530
6531 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6532 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6533
6534 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6535 if (trace_clocks[tr->clock_id].in_ns)
6536 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6537
6538 iter->tr = tr;
6539 iter->array_buffer = &tr->array_buffer;
6540 iter->cpu_file = cpu;
6541 mutex_init(&iter->mutex);
6542 filp->private_data = iter;
6543
6544 if (iter->trace->pipe_open)
6545 iter->trace->pipe_open(iter);
6546
6547 nonseekable_open(inode, filp);
6548
6549 tr->trace_ref++;
6550
6551 return ret;
6552
6553 fail:
6554 kfree(iter);
6555 fail_alloc_iter:
6556 close_pipe_on_cpu(tr, cpu);
6557 fail_pipe_on_cpu:
6558 __trace_array_put(tr);
6559 return ret;
6560 }
6561
tracing_release_pipe(struct inode * inode,struct file * file)6562 static int tracing_release_pipe(struct inode *inode, struct file *file)
6563 {
6564 struct trace_iterator *iter = file->private_data;
6565 struct trace_array *tr = inode->i_private;
6566
6567 scoped_guard(mutex, &trace_types_lock) {
6568 tr->trace_ref--;
6569
6570 if (iter->trace->pipe_close)
6571 iter->trace->pipe_close(iter);
6572 close_pipe_on_cpu(tr, iter->cpu_file);
6573 }
6574
6575 free_trace_iter_content(iter);
6576 kfree(iter);
6577
6578 trace_array_put(tr);
6579
6580 return 0;
6581 }
6582
6583 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6584 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6585 {
6586 struct trace_array *tr = iter->tr;
6587
6588 /* Iterators are static, they should be filled or empty */
6589 if (trace_buffer_iter(iter, iter->cpu_file))
6590 return EPOLLIN | EPOLLRDNORM;
6591
6592 if (tr->trace_flags & TRACE_ITER_BLOCK)
6593 /*
6594 * Always select as readable when in blocking mode
6595 */
6596 return EPOLLIN | EPOLLRDNORM;
6597 else
6598 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6599 filp, poll_table, iter->tr->buffer_percent);
6600 }
6601
6602 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6603 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6604 {
6605 struct trace_iterator *iter = filp->private_data;
6606
6607 return trace_poll(iter, filp, poll_table);
6608 }
6609
6610 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6611 static int tracing_wait_pipe(struct file *filp)
6612 {
6613 struct trace_iterator *iter = filp->private_data;
6614 int ret;
6615
6616 while (trace_empty(iter)) {
6617
6618 if ((filp->f_flags & O_NONBLOCK)) {
6619 return -EAGAIN;
6620 }
6621
6622 /*
6623 * We block until we read something and tracing is disabled.
6624 * We still block if tracing is disabled, but we have never
6625 * read anything. This allows a user to cat this file, and
6626 * then enable tracing. But after we have read something,
6627 * we give an EOF when tracing is again disabled.
6628 *
6629 * iter->pos will be 0 if we haven't read anything.
6630 */
6631 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6632 break;
6633
6634 mutex_unlock(&iter->mutex);
6635
6636 ret = wait_on_pipe(iter, 0);
6637
6638 mutex_lock(&iter->mutex);
6639
6640 if (ret)
6641 return ret;
6642 }
6643
6644 return 1;
6645 }
6646
update_last_data_if_empty(struct trace_array * tr)6647 static bool update_last_data_if_empty(struct trace_array *tr)
6648 {
6649 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6650 return false;
6651
6652 if (!ring_buffer_empty(tr->array_buffer.buffer))
6653 return false;
6654
6655 /*
6656 * If the buffer contains the last boot data and all per-cpu
6657 * buffers are empty, reset it from the kernel side.
6658 */
6659 update_last_data(tr);
6660 return true;
6661 }
6662
6663 /*
6664 * Consumer reader.
6665 */
6666 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6667 tracing_read_pipe(struct file *filp, char __user *ubuf,
6668 size_t cnt, loff_t *ppos)
6669 {
6670 struct trace_iterator *iter = filp->private_data;
6671 ssize_t sret;
6672
6673 /*
6674 * Avoid more than one consumer on a single file descriptor
6675 * This is just a matter of traces coherency, the ring buffer itself
6676 * is protected.
6677 */
6678 guard(mutex)(&iter->mutex);
6679
6680 /* return any leftover data */
6681 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6682 if (sret != -EBUSY)
6683 return sret;
6684
6685 trace_seq_init(&iter->seq);
6686
6687 if (iter->trace->read) {
6688 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6689 if (sret)
6690 return sret;
6691 }
6692
6693 waitagain:
6694 if (update_last_data_if_empty(iter->tr))
6695 return 0;
6696
6697 sret = tracing_wait_pipe(filp);
6698 if (sret <= 0)
6699 return sret;
6700
6701 /* stop when tracing is finished */
6702 if (trace_empty(iter))
6703 return 0;
6704
6705 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6706 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6707
6708 /* reset all but tr, trace, and overruns */
6709 trace_iterator_reset(iter);
6710 cpumask_clear(iter->started);
6711 trace_seq_init(&iter->seq);
6712
6713 trace_event_read_lock();
6714 trace_access_lock(iter->cpu_file);
6715 while (trace_find_next_entry_inc(iter) != NULL) {
6716 enum print_line_t ret;
6717 int save_len = iter->seq.seq.len;
6718
6719 ret = print_trace_line(iter);
6720 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6721 /*
6722 * If one print_trace_line() fills entire trace_seq in one shot,
6723 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6724 * In this case, we need to consume it, otherwise, loop will peek
6725 * this event next time, resulting in an infinite loop.
6726 */
6727 if (save_len == 0) {
6728 iter->seq.full = 0;
6729 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6730 trace_consume(iter);
6731 break;
6732 }
6733
6734 /* In other cases, don't print partial lines */
6735 iter->seq.seq.len = save_len;
6736 break;
6737 }
6738 if (ret != TRACE_TYPE_NO_CONSUME)
6739 trace_consume(iter);
6740
6741 if (trace_seq_used(&iter->seq) >= cnt)
6742 break;
6743
6744 /*
6745 * Setting the full flag means we reached the trace_seq buffer
6746 * size and we should leave by partial output condition above.
6747 * One of the trace_seq_* functions is not used properly.
6748 */
6749 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6750 iter->ent->type);
6751 }
6752 trace_access_unlock(iter->cpu_file);
6753 trace_event_read_unlock();
6754
6755 /* Now copy what we have to the user */
6756 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6757 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6758 trace_seq_init(&iter->seq);
6759
6760 /*
6761 * If there was nothing to send to user, in spite of consuming trace
6762 * entries, go back to wait for more entries.
6763 */
6764 if (sret == -EBUSY)
6765 goto waitagain;
6766
6767 return sret;
6768 }
6769
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6770 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6771 unsigned int idx)
6772 {
6773 __free_page(spd->pages[idx]);
6774 }
6775
6776 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6777 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6778 {
6779 size_t count;
6780 int save_len;
6781 int ret;
6782
6783 /* Seq buffer is page-sized, exactly what we need. */
6784 for (;;) {
6785 save_len = iter->seq.seq.len;
6786 ret = print_trace_line(iter);
6787
6788 if (trace_seq_has_overflowed(&iter->seq)) {
6789 iter->seq.seq.len = save_len;
6790 break;
6791 }
6792
6793 /*
6794 * This should not be hit, because it should only
6795 * be set if the iter->seq overflowed. But check it
6796 * anyway to be safe.
6797 */
6798 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6799 iter->seq.seq.len = save_len;
6800 break;
6801 }
6802
6803 count = trace_seq_used(&iter->seq) - save_len;
6804 if (rem < count) {
6805 rem = 0;
6806 iter->seq.seq.len = save_len;
6807 break;
6808 }
6809
6810 if (ret != TRACE_TYPE_NO_CONSUME)
6811 trace_consume(iter);
6812 rem -= count;
6813 if (!trace_find_next_entry_inc(iter)) {
6814 rem = 0;
6815 iter->ent = NULL;
6816 break;
6817 }
6818 }
6819
6820 return rem;
6821 }
6822
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6823 static ssize_t tracing_splice_read_pipe(struct file *filp,
6824 loff_t *ppos,
6825 struct pipe_inode_info *pipe,
6826 size_t len,
6827 unsigned int flags)
6828 {
6829 struct page *pages_def[PIPE_DEF_BUFFERS];
6830 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6831 struct trace_iterator *iter = filp->private_data;
6832 struct splice_pipe_desc spd = {
6833 .pages = pages_def,
6834 .partial = partial_def,
6835 .nr_pages = 0, /* This gets updated below. */
6836 .nr_pages_max = PIPE_DEF_BUFFERS,
6837 .ops = &default_pipe_buf_ops,
6838 .spd_release = tracing_spd_release_pipe,
6839 };
6840 ssize_t ret;
6841 size_t rem;
6842 unsigned int i;
6843
6844 if (splice_grow_spd(pipe, &spd))
6845 return -ENOMEM;
6846
6847 mutex_lock(&iter->mutex);
6848
6849 if (iter->trace->splice_read) {
6850 ret = iter->trace->splice_read(iter, filp,
6851 ppos, pipe, len, flags);
6852 if (ret)
6853 goto out_err;
6854 }
6855
6856 ret = tracing_wait_pipe(filp);
6857 if (ret <= 0)
6858 goto out_err;
6859
6860 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6861 ret = -EFAULT;
6862 goto out_err;
6863 }
6864
6865 trace_event_read_lock();
6866 trace_access_lock(iter->cpu_file);
6867
6868 /* Fill as many pages as possible. */
6869 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6870 spd.pages[i] = alloc_page(GFP_KERNEL);
6871 if (!spd.pages[i])
6872 break;
6873
6874 rem = tracing_fill_pipe_page(rem, iter);
6875
6876 /* Copy the data into the page, so we can start over. */
6877 ret = trace_seq_to_buffer(&iter->seq,
6878 page_address(spd.pages[i]),
6879 min((size_t)trace_seq_used(&iter->seq),
6880 (size_t)PAGE_SIZE));
6881 if (ret < 0) {
6882 __free_page(spd.pages[i]);
6883 break;
6884 }
6885 spd.partial[i].offset = 0;
6886 spd.partial[i].len = ret;
6887
6888 trace_seq_init(&iter->seq);
6889 }
6890
6891 trace_access_unlock(iter->cpu_file);
6892 trace_event_read_unlock();
6893 mutex_unlock(&iter->mutex);
6894
6895 spd.nr_pages = i;
6896
6897 if (i)
6898 ret = splice_to_pipe(pipe, &spd);
6899 else
6900 ret = 0;
6901 out:
6902 splice_shrink_spd(&spd);
6903 return ret;
6904
6905 out_err:
6906 mutex_unlock(&iter->mutex);
6907 goto out;
6908 }
6909
6910 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6911 tracing_entries_read(struct file *filp, char __user *ubuf,
6912 size_t cnt, loff_t *ppos)
6913 {
6914 struct inode *inode = file_inode(filp);
6915 struct trace_array *tr = inode->i_private;
6916 int cpu = tracing_get_cpu(inode);
6917 char buf[64];
6918 int r = 0;
6919 ssize_t ret;
6920
6921 mutex_lock(&trace_types_lock);
6922
6923 if (cpu == RING_BUFFER_ALL_CPUS) {
6924 int cpu, buf_size_same;
6925 unsigned long size;
6926
6927 size = 0;
6928 buf_size_same = 1;
6929 /* check if all cpu sizes are same */
6930 for_each_tracing_cpu(cpu) {
6931 /* fill in the size from first enabled cpu */
6932 if (size == 0)
6933 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6934 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6935 buf_size_same = 0;
6936 break;
6937 }
6938 }
6939
6940 if (buf_size_same) {
6941 if (!tr->ring_buffer_expanded)
6942 r = sprintf(buf, "%lu (expanded: %lu)\n",
6943 size >> 10,
6944 trace_buf_size >> 10);
6945 else
6946 r = sprintf(buf, "%lu\n", size >> 10);
6947 } else
6948 r = sprintf(buf, "X\n");
6949 } else
6950 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6951
6952 mutex_unlock(&trace_types_lock);
6953
6954 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6955 return ret;
6956 }
6957
6958 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6959 tracing_entries_write(struct file *filp, const char __user *ubuf,
6960 size_t cnt, loff_t *ppos)
6961 {
6962 struct inode *inode = file_inode(filp);
6963 struct trace_array *tr = inode->i_private;
6964 unsigned long val;
6965 int ret;
6966
6967 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6968 if (ret)
6969 return ret;
6970
6971 /* must have at least 1 entry */
6972 if (!val)
6973 return -EINVAL;
6974
6975 /* value is in KB */
6976 val <<= 10;
6977 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6978 if (ret < 0)
6979 return ret;
6980
6981 *ppos += cnt;
6982
6983 return cnt;
6984 }
6985
6986 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6987 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6988 size_t cnt, loff_t *ppos)
6989 {
6990 struct trace_array *tr = filp->private_data;
6991 char buf[64];
6992 int r, cpu;
6993 unsigned long size = 0, expanded_size = 0;
6994
6995 mutex_lock(&trace_types_lock);
6996 for_each_tracing_cpu(cpu) {
6997 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6998 if (!tr->ring_buffer_expanded)
6999 expanded_size += trace_buf_size >> 10;
7000 }
7001 if (tr->ring_buffer_expanded)
7002 r = sprintf(buf, "%lu\n", size);
7003 else
7004 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7005 mutex_unlock(&trace_types_lock);
7006
7007 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7008 }
7009
7010 #define LAST_BOOT_HEADER ((void *)1)
7011
l_next(struct seq_file * m,void * v,loff_t * pos)7012 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7013 {
7014 struct trace_array *tr = m->private;
7015 struct trace_scratch *tscratch = tr->scratch;
7016 unsigned int index = *pos;
7017
7018 (*pos)++;
7019
7020 if (*pos == 1)
7021 return LAST_BOOT_HEADER;
7022
7023 /* Only show offsets of the last boot data */
7024 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7025 return NULL;
7026
7027 /* *pos 0 is for the header, 1 is for the first module */
7028 index--;
7029
7030 if (index >= tscratch->nr_entries)
7031 return NULL;
7032
7033 return &tscratch->entries[index];
7034 }
7035
l_start(struct seq_file * m,loff_t * pos)7036 static void *l_start(struct seq_file *m, loff_t *pos)
7037 {
7038 mutex_lock(&scratch_mutex);
7039
7040 return l_next(m, NULL, pos);
7041 }
7042
l_stop(struct seq_file * m,void * p)7043 static void l_stop(struct seq_file *m, void *p)
7044 {
7045 mutex_unlock(&scratch_mutex);
7046 }
7047
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7048 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7049 {
7050 struct trace_scratch *tscratch = tr->scratch;
7051
7052 /*
7053 * Do not leak KASLR address. This only shows the KASLR address of
7054 * the last boot. When the ring buffer is started, the LAST_BOOT
7055 * flag gets cleared, and this should only report "current".
7056 * Otherwise it shows the KASLR address from the previous boot which
7057 * should not be the same as the current boot.
7058 */
7059 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7060 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7061 else
7062 seq_puts(m, "# Current\n");
7063 }
7064
l_show(struct seq_file * m,void * v)7065 static int l_show(struct seq_file *m, void *v)
7066 {
7067 struct trace_array *tr = m->private;
7068 struct trace_mod_entry *entry = v;
7069
7070 if (v == LAST_BOOT_HEADER) {
7071 show_last_boot_header(m, tr);
7072 return 0;
7073 }
7074
7075 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7076 return 0;
7077 }
7078
7079 static const struct seq_operations last_boot_seq_ops = {
7080 .start = l_start,
7081 .next = l_next,
7082 .stop = l_stop,
7083 .show = l_show,
7084 };
7085
tracing_last_boot_open(struct inode * inode,struct file * file)7086 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7087 {
7088 struct trace_array *tr = inode->i_private;
7089 struct seq_file *m;
7090 int ret;
7091
7092 ret = tracing_check_open_get_tr(tr);
7093 if (ret)
7094 return ret;
7095
7096 ret = seq_open(file, &last_boot_seq_ops);
7097 if (ret) {
7098 trace_array_put(tr);
7099 return ret;
7100 }
7101
7102 m = file->private_data;
7103 m->private = tr;
7104
7105 return 0;
7106 }
7107
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7108 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7109 {
7110 struct trace_array *tr = inode->i_private;
7111 int cpu = tracing_get_cpu(inode);
7112 int ret;
7113
7114 ret = tracing_check_open_get_tr(tr);
7115 if (ret)
7116 return ret;
7117
7118 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7119 if (ret < 0)
7120 __trace_array_put(tr);
7121 return ret;
7122 }
7123
7124 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7125 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7126 size_t cnt, loff_t *ppos)
7127 {
7128 /*
7129 * There is no need to read what the user has written, this function
7130 * is just to make sure that there is no error when "echo" is used
7131 */
7132
7133 *ppos += cnt;
7134
7135 return cnt;
7136 }
7137
7138 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7139 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7140 {
7141 struct trace_array *tr = inode->i_private;
7142
7143 /* disable tracing ? */
7144 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7145 tracer_tracing_off(tr);
7146 /* resize the ring buffer to 0 */
7147 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7148
7149 trace_array_put(tr);
7150
7151 return 0;
7152 }
7153
7154 #define TRACE_MARKER_MAX_SIZE 4096
7155
write_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt,unsigned long ip)7156 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7157 size_t cnt, unsigned long ip)
7158 {
7159 struct ring_buffer_event *event;
7160 enum event_trigger_type tt = ETT_NONE;
7161 struct trace_buffer *buffer;
7162 struct print_entry *entry;
7163 int meta_size;
7164 ssize_t written;
7165 size_t size;
7166 int len;
7167
7168 /* Used in tracing_mark_raw_write() as well */
7169 #define FAULTED_STR "<faulted>"
7170 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7171
7172 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
7173 again:
7174 size = cnt + meta_size;
7175
7176 /* If less than "<faulted>", then make sure we can still add that */
7177 if (cnt < FAULTED_SIZE)
7178 size += FAULTED_SIZE - cnt;
7179
7180 buffer = tr->array_buffer.buffer;
7181 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7182 tracing_gen_ctx());
7183 if (unlikely(!event)) {
7184 /*
7185 * If the size was greater than what was allowed, then
7186 * make it smaller and try again.
7187 */
7188 if (size > ring_buffer_max_event_size(buffer)) {
7189 /* cnt < FAULTED size should never be bigger than max */
7190 if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7191 return -EBADF;
7192 cnt = ring_buffer_max_event_size(buffer) - meta_size;
7193 /* The above should only happen once */
7194 if (WARN_ON_ONCE(cnt + meta_size == size))
7195 return -EBADF;
7196 goto again;
7197 }
7198
7199 /* Ring buffer disabled, return as if not open for write */
7200 return -EBADF;
7201 }
7202
7203 entry = ring_buffer_event_data(event);
7204 entry->ip = ip;
7205
7206 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7207 if (len) {
7208 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7209 cnt = FAULTED_SIZE;
7210 written = -EFAULT;
7211 } else
7212 written = cnt;
7213
7214 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7215 /* do not add \n before testing triggers, but add \0 */
7216 entry->buf[cnt] = '\0';
7217 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7218 }
7219
7220 if (entry->buf[cnt - 1] != '\n') {
7221 entry->buf[cnt] = '\n';
7222 entry->buf[cnt + 1] = '\0';
7223 } else
7224 entry->buf[cnt] = '\0';
7225
7226 if (static_branch_unlikely(&trace_marker_exports_enabled))
7227 ftrace_exports(event, TRACE_EXPORT_MARKER);
7228 __buffer_unlock_commit(buffer, event);
7229
7230 if (tt)
7231 event_triggers_post_call(tr->trace_marker_file, tt);
7232
7233 return written;
7234 }
7235
7236 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7237 tracing_mark_write(struct file *filp, const char __user *ubuf,
7238 size_t cnt, loff_t *fpos)
7239 {
7240 struct trace_array *tr = filp->private_data;
7241 ssize_t written = -ENODEV;
7242 unsigned long ip;
7243
7244 if (tracing_disabled)
7245 return -EINVAL;
7246
7247 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7248 return -EINVAL;
7249
7250 if ((ssize_t)cnt < 0)
7251 return -EINVAL;
7252
7253 if (cnt > TRACE_MARKER_MAX_SIZE)
7254 cnt = TRACE_MARKER_MAX_SIZE;
7255
7256 /* The selftests expect this function to be the IP address */
7257 ip = _THIS_IP_;
7258
7259 /* The global trace_marker can go to multiple instances */
7260 if (tr == &global_trace) {
7261 guard(rcu)();
7262 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7263 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7264 if (written < 0)
7265 break;
7266 }
7267 } else {
7268 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7269 }
7270
7271 return written;
7272 }
7273
write_raw_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt)7274 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7275 const char __user *ubuf, size_t cnt)
7276 {
7277 struct ring_buffer_event *event;
7278 struct trace_buffer *buffer;
7279 struct raw_data_entry *entry;
7280 ssize_t written;
7281 int size;
7282 int len;
7283
7284 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7285
7286 size = sizeof(*entry) + cnt;
7287 if (cnt < FAULT_SIZE_ID)
7288 size += FAULT_SIZE_ID - cnt;
7289
7290 buffer = tr->array_buffer.buffer;
7291
7292 if (size > ring_buffer_max_event_size(buffer))
7293 return -EINVAL;
7294
7295 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7296 tracing_gen_ctx());
7297 if (!event)
7298 /* Ring buffer disabled, return as if not open for write */
7299 return -EBADF;
7300
7301 entry = ring_buffer_event_data(event);
7302
7303 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7304 if (len) {
7305 entry->id = -1;
7306 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7307 written = -EFAULT;
7308 } else
7309 written = cnt;
7310
7311 __buffer_unlock_commit(buffer, event);
7312
7313 return written;
7314 }
7315
7316 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7317 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7318 size_t cnt, loff_t *fpos)
7319 {
7320 struct trace_array *tr = filp->private_data;
7321 ssize_t written = -ENODEV;
7322
7323 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7324
7325 if (tracing_disabled)
7326 return -EINVAL;
7327
7328 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7329 return -EINVAL;
7330
7331 /* The marker must at least have a tag id */
7332 if (cnt < sizeof(unsigned int))
7333 return -EINVAL;
7334
7335 /* The global trace_marker_raw can go to multiple instances */
7336 if (tr == &global_trace) {
7337 guard(rcu)();
7338 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7339 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7340 if (written < 0)
7341 break;
7342 }
7343 } else {
7344 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7345 }
7346
7347 return written;
7348 }
7349
tracing_clock_show(struct seq_file * m,void * v)7350 static int tracing_clock_show(struct seq_file *m, void *v)
7351 {
7352 struct trace_array *tr = m->private;
7353 int i;
7354
7355 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7356 seq_printf(m,
7357 "%s%s%s%s", i ? " " : "",
7358 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7359 i == tr->clock_id ? "]" : "");
7360 seq_putc(m, '\n');
7361
7362 return 0;
7363 }
7364
tracing_set_clock(struct trace_array * tr,const char * clockstr)7365 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7366 {
7367 int i;
7368
7369 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7370 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7371 break;
7372 }
7373 if (i == ARRAY_SIZE(trace_clocks))
7374 return -EINVAL;
7375
7376 guard(mutex)(&trace_types_lock);
7377
7378 tr->clock_id = i;
7379
7380 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7381
7382 /*
7383 * New clock may not be consistent with the previous clock.
7384 * Reset the buffer so that it doesn't have incomparable timestamps.
7385 */
7386 tracing_reset_online_cpus(&tr->array_buffer);
7387
7388 #ifdef CONFIG_TRACER_MAX_TRACE
7389 if (tr->max_buffer.buffer)
7390 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7391 tracing_reset_online_cpus(&tr->max_buffer);
7392 #endif
7393
7394 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7395 struct trace_scratch *tscratch = tr->scratch;
7396
7397 tscratch->clock_id = i;
7398 }
7399
7400 return 0;
7401 }
7402
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7403 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7404 size_t cnt, loff_t *fpos)
7405 {
7406 struct seq_file *m = filp->private_data;
7407 struct trace_array *tr = m->private;
7408 char buf[64];
7409 const char *clockstr;
7410 int ret;
7411
7412 if (cnt >= sizeof(buf))
7413 return -EINVAL;
7414
7415 if (copy_from_user(buf, ubuf, cnt))
7416 return -EFAULT;
7417
7418 buf[cnt] = 0;
7419
7420 clockstr = strstrip(buf);
7421
7422 ret = tracing_set_clock(tr, clockstr);
7423 if (ret)
7424 return ret;
7425
7426 *fpos += cnt;
7427
7428 return cnt;
7429 }
7430
tracing_clock_open(struct inode * inode,struct file * file)7431 static int tracing_clock_open(struct inode *inode, struct file *file)
7432 {
7433 struct trace_array *tr = inode->i_private;
7434 int ret;
7435
7436 ret = tracing_check_open_get_tr(tr);
7437 if (ret)
7438 return ret;
7439
7440 ret = single_open(file, tracing_clock_show, inode->i_private);
7441 if (ret < 0)
7442 trace_array_put(tr);
7443
7444 return ret;
7445 }
7446
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7447 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7448 {
7449 struct trace_array *tr = m->private;
7450
7451 guard(mutex)(&trace_types_lock);
7452
7453 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7454 seq_puts(m, "delta [absolute]\n");
7455 else
7456 seq_puts(m, "[delta] absolute\n");
7457
7458 return 0;
7459 }
7460
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7461 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7462 {
7463 struct trace_array *tr = inode->i_private;
7464 int ret;
7465
7466 ret = tracing_check_open_get_tr(tr);
7467 if (ret)
7468 return ret;
7469
7470 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7471 if (ret < 0)
7472 trace_array_put(tr);
7473
7474 return ret;
7475 }
7476
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7477 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7478 {
7479 if (rbe == this_cpu_read(trace_buffered_event))
7480 return ring_buffer_time_stamp(buffer);
7481
7482 return ring_buffer_event_time_stamp(buffer, rbe);
7483 }
7484
7485 /*
7486 * Set or disable using the per CPU trace_buffer_event when possible.
7487 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7488 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7489 {
7490 guard(mutex)(&trace_types_lock);
7491
7492 if (set && tr->no_filter_buffering_ref++)
7493 return 0;
7494
7495 if (!set) {
7496 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7497 return -EINVAL;
7498
7499 --tr->no_filter_buffering_ref;
7500 }
7501
7502 return 0;
7503 }
7504
7505 struct ftrace_buffer_info {
7506 struct trace_iterator iter;
7507 void *spare;
7508 unsigned int spare_cpu;
7509 unsigned int spare_size;
7510 unsigned int read;
7511 };
7512
7513 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7514 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7515 {
7516 struct trace_array *tr = inode->i_private;
7517 struct trace_iterator *iter;
7518 struct seq_file *m;
7519 int ret;
7520
7521 ret = tracing_check_open_get_tr(tr);
7522 if (ret)
7523 return ret;
7524
7525 if (file->f_mode & FMODE_READ) {
7526 iter = __tracing_open(inode, file, true);
7527 if (IS_ERR(iter))
7528 ret = PTR_ERR(iter);
7529 } else {
7530 /* Writes still need the seq_file to hold the private data */
7531 ret = -ENOMEM;
7532 m = kzalloc(sizeof(*m), GFP_KERNEL);
7533 if (!m)
7534 goto out;
7535 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7536 if (!iter) {
7537 kfree(m);
7538 goto out;
7539 }
7540 ret = 0;
7541
7542 iter->tr = tr;
7543 iter->array_buffer = &tr->max_buffer;
7544 iter->cpu_file = tracing_get_cpu(inode);
7545 m->private = iter;
7546 file->private_data = m;
7547 }
7548 out:
7549 if (ret < 0)
7550 trace_array_put(tr);
7551
7552 return ret;
7553 }
7554
tracing_swap_cpu_buffer(void * tr)7555 static void tracing_swap_cpu_buffer(void *tr)
7556 {
7557 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7558 }
7559
7560 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7561 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7562 loff_t *ppos)
7563 {
7564 struct seq_file *m = filp->private_data;
7565 struct trace_iterator *iter = m->private;
7566 struct trace_array *tr = iter->tr;
7567 unsigned long val;
7568 int ret;
7569
7570 ret = tracing_update_buffers(tr);
7571 if (ret < 0)
7572 return ret;
7573
7574 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7575 if (ret)
7576 return ret;
7577
7578 guard(mutex)(&trace_types_lock);
7579
7580 if (tr->current_trace->use_max_tr)
7581 return -EBUSY;
7582
7583 local_irq_disable();
7584 arch_spin_lock(&tr->max_lock);
7585 if (tr->cond_snapshot)
7586 ret = -EBUSY;
7587 arch_spin_unlock(&tr->max_lock);
7588 local_irq_enable();
7589 if (ret)
7590 return ret;
7591
7592 switch (val) {
7593 case 0:
7594 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7595 return -EINVAL;
7596 if (tr->allocated_snapshot)
7597 free_snapshot(tr);
7598 break;
7599 case 1:
7600 /* Only allow per-cpu swap if the ring buffer supports it */
7601 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7602 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7603 return -EINVAL;
7604 #endif
7605 if (tr->allocated_snapshot)
7606 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7607 &tr->array_buffer, iter->cpu_file);
7608
7609 ret = tracing_arm_snapshot_locked(tr);
7610 if (ret)
7611 return ret;
7612
7613 /* Now, we're going to swap */
7614 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7615 local_irq_disable();
7616 update_max_tr(tr, current, smp_processor_id(), NULL);
7617 local_irq_enable();
7618 } else {
7619 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7620 (void *)tr, 1);
7621 }
7622 tracing_disarm_snapshot(tr);
7623 break;
7624 default:
7625 if (tr->allocated_snapshot) {
7626 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7627 tracing_reset_online_cpus(&tr->max_buffer);
7628 else
7629 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7630 }
7631 break;
7632 }
7633
7634 if (ret >= 0) {
7635 *ppos += cnt;
7636 ret = cnt;
7637 }
7638
7639 return ret;
7640 }
7641
tracing_snapshot_release(struct inode * inode,struct file * file)7642 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7643 {
7644 struct seq_file *m = file->private_data;
7645 int ret;
7646
7647 ret = tracing_release(inode, file);
7648
7649 if (file->f_mode & FMODE_READ)
7650 return ret;
7651
7652 /* If write only, the seq_file is just a stub */
7653 if (m)
7654 kfree(m->private);
7655 kfree(m);
7656
7657 return 0;
7658 }
7659
7660 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7661 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7662 size_t count, loff_t *ppos);
7663 static int tracing_buffers_release(struct inode *inode, struct file *file);
7664 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7665 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7666
snapshot_raw_open(struct inode * inode,struct file * filp)7667 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7668 {
7669 struct ftrace_buffer_info *info;
7670 int ret;
7671
7672 /* The following checks for tracefs lockdown */
7673 ret = tracing_buffers_open(inode, filp);
7674 if (ret < 0)
7675 return ret;
7676
7677 info = filp->private_data;
7678
7679 if (info->iter.trace->use_max_tr) {
7680 tracing_buffers_release(inode, filp);
7681 return -EBUSY;
7682 }
7683
7684 info->iter.snapshot = true;
7685 info->iter.array_buffer = &info->iter.tr->max_buffer;
7686
7687 return ret;
7688 }
7689
7690 #endif /* CONFIG_TRACER_SNAPSHOT */
7691
7692
7693 static const struct file_operations tracing_thresh_fops = {
7694 .open = tracing_open_generic,
7695 .read = tracing_thresh_read,
7696 .write = tracing_thresh_write,
7697 .llseek = generic_file_llseek,
7698 };
7699
7700 #ifdef CONFIG_TRACER_MAX_TRACE
7701 static const struct file_operations tracing_max_lat_fops = {
7702 .open = tracing_open_generic_tr,
7703 .read = tracing_max_lat_read,
7704 .write = tracing_max_lat_write,
7705 .llseek = generic_file_llseek,
7706 .release = tracing_release_generic_tr,
7707 };
7708 #endif
7709
7710 static const struct file_operations set_tracer_fops = {
7711 .open = tracing_open_generic_tr,
7712 .read = tracing_set_trace_read,
7713 .write = tracing_set_trace_write,
7714 .llseek = generic_file_llseek,
7715 .release = tracing_release_generic_tr,
7716 };
7717
7718 static const struct file_operations tracing_pipe_fops = {
7719 .open = tracing_open_pipe,
7720 .poll = tracing_poll_pipe,
7721 .read = tracing_read_pipe,
7722 .splice_read = tracing_splice_read_pipe,
7723 .release = tracing_release_pipe,
7724 };
7725
7726 static const struct file_operations tracing_entries_fops = {
7727 .open = tracing_open_generic_tr,
7728 .read = tracing_entries_read,
7729 .write = tracing_entries_write,
7730 .llseek = generic_file_llseek,
7731 .release = tracing_release_generic_tr,
7732 };
7733
7734 static const struct file_operations tracing_buffer_meta_fops = {
7735 .open = tracing_buffer_meta_open,
7736 .read = seq_read,
7737 .llseek = seq_lseek,
7738 .release = tracing_seq_release,
7739 };
7740
7741 static const struct file_operations tracing_total_entries_fops = {
7742 .open = tracing_open_generic_tr,
7743 .read = tracing_total_entries_read,
7744 .llseek = generic_file_llseek,
7745 .release = tracing_release_generic_tr,
7746 };
7747
7748 static const struct file_operations tracing_free_buffer_fops = {
7749 .open = tracing_open_generic_tr,
7750 .write = tracing_free_buffer_write,
7751 .release = tracing_free_buffer_release,
7752 };
7753
7754 static const struct file_operations tracing_mark_fops = {
7755 .open = tracing_mark_open,
7756 .write = tracing_mark_write,
7757 .release = tracing_release_generic_tr,
7758 };
7759
7760 static const struct file_operations tracing_mark_raw_fops = {
7761 .open = tracing_mark_open,
7762 .write = tracing_mark_raw_write,
7763 .release = tracing_release_generic_tr,
7764 };
7765
7766 static const struct file_operations trace_clock_fops = {
7767 .open = tracing_clock_open,
7768 .read = seq_read,
7769 .llseek = seq_lseek,
7770 .release = tracing_single_release_tr,
7771 .write = tracing_clock_write,
7772 };
7773
7774 static const struct file_operations trace_time_stamp_mode_fops = {
7775 .open = tracing_time_stamp_mode_open,
7776 .read = seq_read,
7777 .llseek = seq_lseek,
7778 .release = tracing_single_release_tr,
7779 };
7780
7781 static const struct file_operations last_boot_fops = {
7782 .open = tracing_last_boot_open,
7783 .read = seq_read,
7784 .llseek = seq_lseek,
7785 .release = tracing_seq_release,
7786 };
7787
7788 #ifdef CONFIG_TRACER_SNAPSHOT
7789 static const struct file_operations snapshot_fops = {
7790 .open = tracing_snapshot_open,
7791 .read = seq_read,
7792 .write = tracing_snapshot_write,
7793 .llseek = tracing_lseek,
7794 .release = tracing_snapshot_release,
7795 };
7796
7797 static const struct file_operations snapshot_raw_fops = {
7798 .open = snapshot_raw_open,
7799 .read = tracing_buffers_read,
7800 .release = tracing_buffers_release,
7801 .splice_read = tracing_buffers_splice_read,
7802 };
7803
7804 #endif /* CONFIG_TRACER_SNAPSHOT */
7805
7806 /*
7807 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7808 * @filp: The active open file structure
7809 * @ubuf: The userspace provided buffer to read value into
7810 * @cnt: The maximum number of bytes to read
7811 * @ppos: The current "file" position
7812 *
7813 * This function implements the write interface for a struct trace_min_max_param.
7814 * The filp->private_data must point to a trace_min_max_param structure that
7815 * defines where to write the value, the min and the max acceptable values,
7816 * and a lock to protect the write.
7817 */
7818 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7819 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7820 {
7821 struct trace_min_max_param *param = filp->private_data;
7822 u64 val;
7823 int err;
7824
7825 if (!param)
7826 return -EFAULT;
7827
7828 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7829 if (err)
7830 return err;
7831
7832 if (param->lock)
7833 mutex_lock(param->lock);
7834
7835 if (param->min && val < *param->min)
7836 err = -EINVAL;
7837
7838 if (param->max && val > *param->max)
7839 err = -EINVAL;
7840
7841 if (!err)
7842 *param->val = val;
7843
7844 if (param->lock)
7845 mutex_unlock(param->lock);
7846
7847 if (err)
7848 return err;
7849
7850 return cnt;
7851 }
7852
7853 /*
7854 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7855 * @filp: The active open file structure
7856 * @ubuf: The userspace provided buffer to read value into
7857 * @cnt: The maximum number of bytes to read
7858 * @ppos: The current "file" position
7859 *
7860 * This function implements the read interface for a struct trace_min_max_param.
7861 * The filp->private_data must point to a trace_min_max_param struct with valid
7862 * data.
7863 */
7864 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7865 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7866 {
7867 struct trace_min_max_param *param = filp->private_data;
7868 char buf[U64_STR_SIZE];
7869 int len;
7870 u64 val;
7871
7872 if (!param)
7873 return -EFAULT;
7874
7875 val = *param->val;
7876
7877 if (cnt > sizeof(buf))
7878 cnt = sizeof(buf);
7879
7880 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7881
7882 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7883 }
7884
7885 const struct file_operations trace_min_max_fops = {
7886 .open = tracing_open_generic,
7887 .read = trace_min_max_read,
7888 .write = trace_min_max_write,
7889 };
7890
7891 #define TRACING_LOG_ERRS_MAX 8
7892 #define TRACING_LOG_LOC_MAX 128
7893
7894 #define CMD_PREFIX " Command: "
7895
7896 struct err_info {
7897 const char **errs; /* ptr to loc-specific array of err strings */
7898 u8 type; /* index into errs -> specific err string */
7899 u16 pos; /* caret position */
7900 u64 ts;
7901 };
7902
7903 struct tracing_log_err {
7904 struct list_head list;
7905 struct err_info info;
7906 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7907 char *cmd; /* what caused err */
7908 };
7909
7910 static DEFINE_MUTEX(tracing_err_log_lock);
7911
alloc_tracing_log_err(int len)7912 static struct tracing_log_err *alloc_tracing_log_err(int len)
7913 {
7914 struct tracing_log_err *err;
7915
7916 err = kzalloc(sizeof(*err), GFP_KERNEL);
7917 if (!err)
7918 return ERR_PTR(-ENOMEM);
7919
7920 err->cmd = kzalloc(len, GFP_KERNEL);
7921 if (!err->cmd) {
7922 kfree(err);
7923 return ERR_PTR(-ENOMEM);
7924 }
7925
7926 return err;
7927 }
7928
free_tracing_log_err(struct tracing_log_err * err)7929 static void free_tracing_log_err(struct tracing_log_err *err)
7930 {
7931 kfree(err->cmd);
7932 kfree(err);
7933 }
7934
get_tracing_log_err(struct trace_array * tr,int len)7935 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7936 int len)
7937 {
7938 struct tracing_log_err *err;
7939 char *cmd;
7940
7941 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7942 err = alloc_tracing_log_err(len);
7943 if (PTR_ERR(err) != -ENOMEM)
7944 tr->n_err_log_entries++;
7945
7946 return err;
7947 }
7948 cmd = kzalloc(len, GFP_KERNEL);
7949 if (!cmd)
7950 return ERR_PTR(-ENOMEM);
7951 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7952 kfree(err->cmd);
7953 err->cmd = cmd;
7954 list_del(&err->list);
7955
7956 return err;
7957 }
7958
7959 /**
7960 * err_pos - find the position of a string within a command for error careting
7961 * @cmd: The tracing command that caused the error
7962 * @str: The string to position the caret at within @cmd
7963 *
7964 * Finds the position of the first occurrence of @str within @cmd. The
7965 * return value can be passed to tracing_log_err() for caret placement
7966 * within @cmd.
7967 *
7968 * Returns the index within @cmd of the first occurrence of @str or 0
7969 * if @str was not found.
7970 */
err_pos(char * cmd,const char * str)7971 unsigned int err_pos(char *cmd, const char *str)
7972 {
7973 char *found;
7974
7975 if (WARN_ON(!strlen(cmd)))
7976 return 0;
7977
7978 found = strstr(cmd, str);
7979 if (found)
7980 return found - cmd;
7981
7982 return 0;
7983 }
7984
7985 /**
7986 * tracing_log_err - write an error to the tracing error log
7987 * @tr: The associated trace array for the error (NULL for top level array)
7988 * @loc: A string describing where the error occurred
7989 * @cmd: The tracing command that caused the error
7990 * @errs: The array of loc-specific static error strings
7991 * @type: The index into errs[], which produces the specific static err string
7992 * @pos: The position the caret should be placed in the cmd
7993 *
7994 * Writes an error into tracing/error_log of the form:
7995 *
7996 * <loc>: error: <text>
7997 * Command: <cmd>
7998 * ^
7999 *
8000 * tracing/error_log is a small log file containing the last
8001 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8002 * unless there has been a tracing error, and the error log can be
8003 * cleared and have its memory freed by writing the empty string in
8004 * truncation mode to it i.e. echo > tracing/error_log.
8005 *
8006 * NOTE: the @errs array along with the @type param are used to
8007 * produce a static error string - this string is not copied and saved
8008 * when the error is logged - only a pointer to it is saved. See
8009 * existing callers for examples of how static strings are typically
8010 * defined for use with tracing_log_err().
8011 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8012 void tracing_log_err(struct trace_array *tr,
8013 const char *loc, const char *cmd,
8014 const char **errs, u8 type, u16 pos)
8015 {
8016 struct tracing_log_err *err;
8017 int len = 0;
8018
8019 if (!tr)
8020 tr = &global_trace;
8021
8022 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8023
8024 guard(mutex)(&tracing_err_log_lock);
8025
8026 err = get_tracing_log_err(tr, len);
8027 if (PTR_ERR(err) == -ENOMEM)
8028 return;
8029
8030 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8031 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8032
8033 err->info.errs = errs;
8034 err->info.type = type;
8035 err->info.pos = pos;
8036 err->info.ts = local_clock();
8037
8038 list_add_tail(&err->list, &tr->err_log);
8039 }
8040
clear_tracing_err_log(struct trace_array * tr)8041 static void clear_tracing_err_log(struct trace_array *tr)
8042 {
8043 struct tracing_log_err *err, *next;
8044
8045 guard(mutex)(&tracing_err_log_lock);
8046
8047 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8048 list_del(&err->list);
8049 free_tracing_log_err(err);
8050 }
8051
8052 tr->n_err_log_entries = 0;
8053 }
8054
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8055 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8056 {
8057 struct trace_array *tr = m->private;
8058
8059 mutex_lock(&tracing_err_log_lock);
8060
8061 return seq_list_start(&tr->err_log, *pos);
8062 }
8063
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8064 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8065 {
8066 struct trace_array *tr = m->private;
8067
8068 return seq_list_next(v, &tr->err_log, pos);
8069 }
8070
tracing_err_log_seq_stop(struct seq_file * m,void * v)8071 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8072 {
8073 mutex_unlock(&tracing_err_log_lock);
8074 }
8075
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8076 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8077 {
8078 u16 i;
8079
8080 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8081 seq_putc(m, ' ');
8082 for (i = 0; i < pos; i++)
8083 seq_putc(m, ' ');
8084 seq_puts(m, "^\n");
8085 }
8086
tracing_err_log_seq_show(struct seq_file * m,void * v)8087 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8088 {
8089 struct tracing_log_err *err = v;
8090
8091 if (err) {
8092 const char *err_text = err->info.errs[err->info.type];
8093 u64 sec = err->info.ts;
8094 u32 nsec;
8095
8096 nsec = do_div(sec, NSEC_PER_SEC);
8097 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8098 err->loc, err_text);
8099 seq_printf(m, "%s", err->cmd);
8100 tracing_err_log_show_pos(m, err->info.pos);
8101 }
8102
8103 return 0;
8104 }
8105
8106 static const struct seq_operations tracing_err_log_seq_ops = {
8107 .start = tracing_err_log_seq_start,
8108 .next = tracing_err_log_seq_next,
8109 .stop = tracing_err_log_seq_stop,
8110 .show = tracing_err_log_seq_show
8111 };
8112
tracing_err_log_open(struct inode * inode,struct file * file)8113 static int tracing_err_log_open(struct inode *inode, struct file *file)
8114 {
8115 struct trace_array *tr = inode->i_private;
8116 int ret = 0;
8117
8118 ret = tracing_check_open_get_tr(tr);
8119 if (ret)
8120 return ret;
8121
8122 /* If this file was opened for write, then erase contents */
8123 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8124 clear_tracing_err_log(tr);
8125
8126 if (file->f_mode & FMODE_READ) {
8127 ret = seq_open(file, &tracing_err_log_seq_ops);
8128 if (!ret) {
8129 struct seq_file *m = file->private_data;
8130 m->private = tr;
8131 } else {
8132 trace_array_put(tr);
8133 }
8134 }
8135 return ret;
8136 }
8137
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8138 static ssize_t tracing_err_log_write(struct file *file,
8139 const char __user *buffer,
8140 size_t count, loff_t *ppos)
8141 {
8142 return count;
8143 }
8144
tracing_err_log_release(struct inode * inode,struct file * file)8145 static int tracing_err_log_release(struct inode *inode, struct file *file)
8146 {
8147 struct trace_array *tr = inode->i_private;
8148
8149 trace_array_put(tr);
8150
8151 if (file->f_mode & FMODE_READ)
8152 seq_release(inode, file);
8153
8154 return 0;
8155 }
8156
8157 static const struct file_operations tracing_err_log_fops = {
8158 .open = tracing_err_log_open,
8159 .write = tracing_err_log_write,
8160 .read = seq_read,
8161 .llseek = tracing_lseek,
8162 .release = tracing_err_log_release,
8163 };
8164
tracing_buffers_open(struct inode * inode,struct file * filp)8165 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8166 {
8167 struct trace_array *tr = inode->i_private;
8168 struct ftrace_buffer_info *info;
8169 int ret;
8170
8171 ret = tracing_check_open_get_tr(tr);
8172 if (ret)
8173 return ret;
8174
8175 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8176 if (!info) {
8177 trace_array_put(tr);
8178 return -ENOMEM;
8179 }
8180
8181 mutex_lock(&trace_types_lock);
8182
8183 info->iter.tr = tr;
8184 info->iter.cpu_file = tracing_get_cpu(inode);
8185 info->iter.trace = tr->current_trace;
8186 info->iter.array_buffer = &tr->array_buffer;
8187 info->spare = NULL;
8188 /* Force reading ring buffer for first read */
8189 info->read = (unsigned int)-1;
8190
8191 filp->private_data = info;
8192
8193 tr->trace_ref++;
8194
8195 mutex_unlock(&trace_types_lock);
8196
8197 ret = nonseekable_open(inode, filp);
8198 if (ret < 0)
8199 trace_array_put(tr);
8200
8201 return ret;
8202 }
8203
8204 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8205 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8206 {
8207 struct ftrace_buffer_info *info = filp->private_data;
8208 struct trace_iterator *iter = &info->iter;
8209
8210 return trace_poll(iter, filp, poll_table);
8211 }
8212
8213 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8214 tracing_buffers_read(struct file *filp, char __user *ubuf,
8215 size_t count, loff_t *ppos)
8216 {
8217 struct ftrace_buffer_info *info = filp->private_data;
8218 struct trace_iterator *iter = &info->iter;
8219 void *trace_data;
8220 int page_size;
8221 ssize_t ret = 0;
8222 ssize_t size;
8223
8224 if (!count)
8225 return 0;
8226
8227 #ifdef CONFIG_TRACER_MAX_TRACE
8228 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8229 return -EBUSY;
8230 #endif
8231
8232 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8233
8234 /* Make sure the spare matches the current sub buffer size */
8235 if (info->spare) {
8236 if (page_size != info->spare_size) {
8237 ring_buffer_free_read_page(iter->array_buffer->buffer,
8238 info->spare_cpu, info->spare);
8239 info->spare = NULL;
8240 }
8241 }
8242
8243 if (!info->spare) {
8244 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8245 iter->cpu_file);
8246 if (IS_ERR(info->spare)) {
8247 ret = PTR_ERR(info->spare);
8248 info->spare = NULL;
8249 } else {
8250 info->spare_cpu = iter->cpu_file;
8251 info->spare_size = page_size;
8252 }
8253 }
8254 if (!info->spare)
8255 return ret;
8256
8257 /* Do we have previous read data to read? */
8258 if (info->read < page_size)
8259 goto read;
8260
8261 again:
8262 trace_access_lock(iter->cpu_file);
8263 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8264 info->spare,
8265 count,
8266 iter->cpu_file, 0);
8267 trace_access_unlock(iter->cpu_file);
8268
8269 if (ret < 0) {
8270 if (trace_empty(iter) && !iter->closed) {
8271 if (update_last_data_if_empty(iter->tr))
8272 return 0;
8273
8274 if ((filp->f_flags & O_NONBLOCK))
8275 return -EAGAIN;
8276
8277 ret = wait_on_pipe(iter, 0);
8278 if (ret)
8279 return ret;
8280
8281 goto again;
8282 }
8283 return 0;
8284 }
8285
8286 info->read = 0;
8287 read:
8288 size = page_size - info->read;
8289 if (size > count)
8290 size = count;
8291 trace_data = ring_buffer_read_page_data(info->spare);
8292 ret = copy_to_user(ubuf, trace_data + info->read, size);
8293 if (ret == size)
8294 return -EFAULT;
8295
8296 size -= ret;
8297
8298 *ppos += size;
8299 info->read += size;
8300
8301 return size;
8302 }
8303
tracing_buffers_flush(struct file * file,fl_owner_t id)8304 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8305 {
8306 struct ftrace_buffer_info *info = file->private_data;
8307 struct trace_iterator *iter = &info->iter;
8308
8309 iter->closed = true;
8310 /* Make sure the waiters see the new wait_index */
8311 (void)atomic_fetch_inc_release(&iter->wait_index);
8312
8313 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8314
8315 return 0;
8316 }
8317
tracing_buffers_release(struct inode * inode,struct file * file)8318 static int tracing_buffers_release(struct inode *inode, struct file *file)
8319 {
8320 struct ftrace_buffer_info *info = file->private_data;
8321 struct trace_iterator *iter = &info->iter;
8322
8323 guard(mutex)(&trace_types_lock);
8324
8325 iter->tr->trace_ref--;
8326
8327 __trace_array_put(iter->tr);
8328
8329 if (info->spare)
8330 ring_buffer_free_read_page(iter->array_buffer->buffer,
8331 info->spare_cpu, info->spare);
8332 kvfree(info);
8333
8334 return 0;
8335 }
8336
8337 struct buffer_ref {
8338 struct trace_buffer *buffer;
8339 void *page;
8340 int cpu;
8341 refcount_t refcount;
8342 };
8343
buffer_ref_release(struct buffer_ref * ref)8344 static void buffer_ref_release(struct buffer_ref *ref)
8345 {
8346 if (!refcount_dec_and_test(&ref->refcount))
8347 return;
8348 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8349 kfree(ref);
8350 }
8351
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8352 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8353 struct pipe_buffer *buf)
8354 {
8355 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8356
8357 buffer_ref_release(ref);
8358 buf->private = 0;
8359 }
8360
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8361 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8362 struct pipe_buffer *buf)
8363 {
8364 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8365
8366 if (refcount_read(&ref->refcount) > INT_MAX/2)
8367 return false;
8368
8369 refcount_inc(&ref->refcount);
8370 return true;
8371 }
8372
8373 /* Pipe buffer operations for a buffer. */
8374 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8375 .release = buffer_pipe_buf_release,
8376 .get = buffer_pipe_buf_get,
8377 };
8378
8379 /*
8380 * Callback from splice_to_pipe(), if we need to release some pages
8381 * at the end of the spd in case we error'ed out in filling the pipe.
8382 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8383 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8384 {
8385 struct buffer_ref *ref =
8386 (struct buffer_ref *)spd->partial[i].private;
8387
8388 buffer_ref_release(ref);
8389 spd->partial[i].private = 0;
8390 }
8391
8392 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8393 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8394 struct pipe_inode_info *pipe, size_t len,
8395 unsigned int flags)
8396 {
8397 struct ftrace_buffer_info *info = file->private_data;
8398 struct trace_iterator *iter = &info->iter;
8399 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8400 struct page *pages_def[PIPE_DEF_BUFFERS];
8401 struct splice_pipe_desc spd = {
8402 .pages = pages_def,
8403 .partial = partial_def,
8404 .nr_pages_max = PIPE_DEF_BUFFERS,
8405 .ops = &buffer_pipe_buf_ops,
8406 .spd_release = buffer_spd_release,
8407 };
8408 struct buffer_ref *ref;
8409 bool woken = false;
8410 int page_size;
8411 int entries, i;
8412 ssize_t ret = 0;
8413
8414 #ifdef CONFIG_TRACER_MAX_TRACE
8415 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8416 return -EBUSY;
8417 #endif
8418
8419 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8420 if (*ppos & (page_size - 1))
8421 return -EINVAL;
8422
8423 if (len & (page_size - 1)) {
8424 if (len < page_size)
8425 return -EINVAL;
8426 len &= (~(page_size - 1));
8427 }
8428
8429 if (splice_grow_spd(pipe, &spd))
8430 return -ENOMEM;
8431
8432 again:
8433 trace_access_lock(iter->cpu_file);
8434 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8435
8436 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8437 struct page *page;
8438 int r;
8439
8440 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8441 if (!ref) {
8442 ret = -ENOMEM;
8443 break;
8444 }
8445
8446 refcount_set(&ref->refcount, 1);
8447 ref->buffer = iter->array_buffer->buffer;
8448 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8449 if (IS_ERR(ref->page)) {
8450 ret = PTR_ERR(ref->page);
8451 ref->page = NULL;
8452 kfree(ref);
8453 break;
8454 }
8455 ref->cpu = iter->cpu_file;
8456
8457 r = ring_buffer_read_page(ref->buffer, ref->page,
8458 len, iter->cpu_file, 1);
8459 if (r < 0) {
8460 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8461 ref->page);
8462 kfree(ref);
8463 break;
8464 }
8465
8466 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8467
8468 spd.pages[i] = page;
8469 spd.partial[i].len = page_size;
8470 spd.partial[i].offset = 0;
8471 spd.partial[i].private = (unsigned long)ref;
8472 spd.nr_pages++;
8473 *ppos += page_size;
8474
8475 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8476 }
8477
8478 trace_access_unlock(iter->cpu_file);
8479 spd.nr_pages = i;
8480
8481 /* did we read anything? */
8482 if (!spd.nr_pages) {
8483
8484 if (ret)
8485 goto out;
8486
8487 if (woken)
8488 goto out;
8489
8490 ret = -EAGAIN;
8491 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8492 goto out;
8493
8494 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8495 if (ret)
8496 goto out;
8497
8498 /* No need to wait after waking up when tracing is off */
8499 if (!tracer_tracing_is_on(iter->tr))
8500 goto out;
8501
8502 /* Iterate one more time to collect any new data then exit */
8503 woken = true;
8504
8505 goto again;
8506 }
8507
8508 ret = splice_to_pipe(pipe, &spd);
8509 out:
8510 splice_shrink_spd(&spd);
8511
8512 return ret;
8513 }
8514
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8515 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8516 {
8517 struct ftrace_buffer_info *info = file->private_data;
8518 struct trace_iterator *iter = &info->iter;
8519 int err;
8520
8521 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8522 if (!(file->f_flags & O_NONBLOCK)) {
8523 err = ring_buffer_wait(iter->array_buffer->buffer,
8524 iter->cpu_file,
8525 iter->tr->buffer_percent,
8526 NULL, NULL);
8527 if (err)
8528 return err;
8529 }
8530
8531 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8532 iter->cpu_file);
8533 } else if (cmd) {
8534 return -ENOTTY;
8535 }
8536
8537 /*
8538 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8539 * waiters
8540 */
8541 guard(mutex)(&trace_types_lock);
8542
8543 /* Make sure the waiters see the new wait_index */
8544 (void)atomic_fetch_inc_release(&iter->wait_index);
8545
8546 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8547
8548 return 0;
8549 }
8550
8551 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8552 static int get_snapshot_map(struct trace_array *tr)
8553 {
8554 int err = 0;
8555
8556 /*
8557 * Called with mmap_lock held. lockdep would be unhappy if we would now
8558 * take trace_types_lock. Instead use the specific
8559 * snapshot_trigger_lock.
8560 */
8561 spin_lock(&tr->snapshot_trigger_lock);
8562
8563 if (tr->snapshot || tr->mapped == UINT_MAX)
8564 err = -EBUSY;
8565 else
8566 tr->mapped++;
8567
8568 spin_unlock(&tr->snapshot_trigger_lock);
8569
8570 /* Wait for update_max_tr() to observe iter->tr->mapped */
8571 if (tr->mapped == 1)
8572 synchronize_rcu();
8573
8574 return err;
8575
8576 }
put_snapshot_map(struct trace_array * tr)8577 static void put_snapshot_map(struct trace_array *tr)
8578 {
8579 spin_lock(&tr->snapshot_trigger_lock);
8580 if (!WARN_ON(!tr->mapped))
8581 tr->mapped--;
8582 spin_unlock(&tr->snapshot_trigger_lock);
8583 }
8584 #else
get_snapshot_map(struct trace_array * tr)8585 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8586 static inline void put_snapshot_map(struct trace_array *tr) { }
8587 #endif
8588
tracing_buffers_mmap_close(struct vm_area_struct * vma)8589 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8590 {
8591 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8592 struct trace_iterator *iter = &info->iter;
8593
8594 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8595 put_snapshot_map(iter->tr);
8596 }
8597
8598 static const struct vm_operations_struct tracing_buffers_vmops = {
8599 .close = tracing_buffers_mmap_close,
8600 };
8601
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8602 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8603 {
8604 struct ftrace_buffer_info *info = filp->private_data;
8605 struct trace_iterator *iter = &info->iter;
8606 int ret = 0;
8607
8608 /* A memmap'ed buffer is not supported for user space mmap */
8609 if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8610 return -ENODEV;
8611
8612 ret = get_snapshot_map(iter->tr);
8613 if (ret)
8614 return ret;
8615
8616 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8617 if (ret)
8618 put_snapshot_map(iter->tr);
8619
8620 vma->vm_ops = &tracing_buffers_vmops;
8621
8622 return ret;
8623 }
8624
8625 static const struct file_operations tracing_buffers_fops = {
8626 .open = tracing_buffers_open,
8627 .read = tracing_buffers_read,
8628 .poll = tracing_buffers_poll,
8629 .release = tracing_buffers_release,
8630 .flush = tracing_buffers_flush,
8631 .splice_read = tracing_buffers_splice_read,
8632 .unlocked_ioctl = tracing_buffers_ioctl,
8633 .mmap = tracing_buffers_mmap,
8634 };
8635
8636 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8637 tracing_stats_read(struct file *filp, char __user *ubuf,
8638 size_t count, loff_t *ppos)
8639 {
8640 struct inode *inode = file_inode(filp);
8641 struct trace_array *tr = inode->i_private;
8642 struct array_buffer *trace_buf = &tr->array_buffer;
8643 int cpu = tracing_get_cpu(inode);
8644 struct trace_seq *s;
8645 unsigned long cnt;
8646 unsigned long long t;
8647 unsigned long usec_rem;
8648
8649 s = kmalloc(sizeof(*s), GFP_KERNEL);
8650 if (!s)
8651 return -ENOMEM;
8652
8653 trace_seq_init(s);
8654
8655 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8656 trace_seq_printf(s, "entries: %ld\n", cnt);
8657
8658 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8659 trace_seq_printf(s, "overrun: %ld\n", cnt);
8660
8661 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8662 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8663
8664 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8665 trace_seq_printf(s, "bytes: %ld\n", cnt);
8666
8667 if (trace_clocks[tr->clock_id].in_ns) {
8668 /* local or global for trace_clock */
8669 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8670 usec_rem = do_div(t, USEC_PER_SEC);
8671 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8672 t, usec_rem);
8673
8674 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8675 usec_rem = do_div(t, USEC_PER_SEC);
8676 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8677 } else {
8678 /* counter or tsc mode for trace_clock */
8679 trace_seq_printf(s, "oldest event ts: %llu\n",
8680 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8681
8682 trace_seq_printf(s, "now ts: %llu\n",
8683 ring_buffer_time_stamp(trace_buf->buffer));
8684 }
8685
8686 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8687 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8688
8689 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8690 trace_seq_printf(s, "read events: %ld\n", cnt);
8691
8692 count = simple_read_from_buffer(ubuf, count, ppos,
8693 s->buffer, trace_seq_used(s));
8694
8695 kfree(s);
8696
8697 return count;
8698 }
8699
8700 static const struct file_operations tracing_stats_fops = {
8701 .open = tracing_open_generic_tr,
8702 .read = tracing_stats_read,
8703 .llseek = generic_file_llseek,
8704 .release = tracing_release_generic_tr,
8705 };
8706
8707 #ifdef CONFIG_DYNAMIC_FTRACE
8708
8709 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8710 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8711 size_t cnt, loff_t *ppos)
8712 {
8713 ssize_t ret;
8714 char *buf;
8715 int r;
8716
8717 /* 512 should be plenty to hold the amount needed */
8718 #define DYN_INFO_BUF_SIZE 512
8719
8720 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8721 if (!buf)
8722 return -ENOMEM;
8723
8724 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8725 "%ld pages:%ld groups: %ld\n"
8726 "ftrace boot update time = %llu (ns)\n"
8727 "ftrace module total update time = %llu (ns)\n",
8728 ftrace_update_tot_cnt,
8729 ftrace_number_of_pages,
8730 ftrace_number_of_groups,
8731 ftrace_update_time,
8732 ftrace_total_mod_time);
8733
8734 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8735 kfree(buf);
8736 return ret;
8737 }
8738
8739 static const struct file_operations tracing_dyn_info_fops = {
8740 .open = tracing_open_generic,
8741 .read = tracing_read_dyn_info,
8742 .llseek = generic_file_llseek,
8743 };
8744 #endif /* CONFIG_DYNAMIC_FTRACE */
8745
8746 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8747 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8748 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8749 struct trace_array *tr, struct ftrace_probe_ops *ops,
8750 void *data)
8751 {
8752 tracing_snapshot_instance(tr);
8753 }
8754
8755 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8756 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8757 struct trace_array *tr, struct ftrace_probe_ops *ops,
8758 void *data)
8759 {
8760 struct ftrace_func_mapper *mapper = data;
8761 long *count = NULL;
8762
8763 if (mapper)
8764 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8765
8766 if (count) {
8767
8768 if (*count <= 0)
8769 return;
8770
8771 (*count)--;
8772 }
8773
8774 tracing_snapshot_instance(tr);
8775 }
8776
8777 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8778 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8779 struct ftrace_probe_ops *ops, void *data)
8780 {
8781 struct ftrace_func_mapper *mapper = data;
8782 long *count = NULL;
8783
8784 seq_printf(m, "%ps:", (void *)ip);
8785
8786 seq_puts(m, "snapshot");
8787
8788 if (mapper)
8789 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8790
8791 if (count)
8792 seq_printf(m, ":count=%ld\n", *count);
8793 else
8794 seq_puts(m, ":unlimited\n");
8795
8796 return 0;
8797 }
8798
8799 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8800 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8801 unsigned long ip, void *init_data, void **data)
8802 {
8803 struct ftrace_func_mapper *mapper = *data;
8804
8805 if (!mapper) {
8806 mapper = allocate_ftrace_func_mapper();
8807 if (!mapper)
8808 return -ENOMEM;
8809 *data = mapper;
8810 }
8811
8812 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8813 }
8814
8815 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8816 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8817 unsigned long ip, void *data)
8818 {
8819 struct ftrace_func_mapper *mapper = data;
8820
8821 if (!ip) {
8822 if (!mapper)
8823 return;
8824 free_ftrace_func_mapper(mapper, NULL);
8825 return;
8826 }
8827
8828 ftrace_func_mapper_remove_ip(mapper, ip);
8829 }
8830
8831 static struct ftrace_probe_ops snapshot_probe_ops = {
8832 .func = ftrace_snapshot,
8833 .print = ftrace_snapshot_print,
8834 };
8835
8836 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8837 .func = ftrace_count_snapshot,
8838 .print = ftrace_snapshot_print,
8839 .init = ftrace_snapshot_init,
8840 .free = ftrace_snapshot_free,
8841 };
8842
8843 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8844 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8845 char *glob, char *cmd, char *param, int enable)
8846 {
8847 struct ftrace_probe_ops *ops;
8848 void *count = (void *)-1;
8849 char *number;
8850 int ret;
8851
8852 if (!tr)
8853 return -ENODEV;
8854
8855 /* hash funcs only work with set_ftrace_filter */
8856 if (!enable)
8857 return -EINVAL;
8858
8859 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8860
8861 if (glob[0] == '!') {
8862 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8863 if (!ret)
8864 tracing_disarm_snapshot(tr);
8865
8866 return ret;
8867 }
8868
8869 if (!param)
8870 goto out_reg;
8871
8872 number = strsep(¶m, ":");
8873
8874 if (!strlen(number))
8875 goto out_reg;
8876
8877 /*
8878 * We use the callback data field (which is a pointer)
8879 * as our counter.
8880 */
8881 ret = kstrtoul(number, 0, (unsigned long *)&count);
8882 if (ret)
8883 return ret;
8884
8885 out_reg:
8886 ret = tracing_arm_snapshot(tr);
8887 if (ret < 0)
8888 return ret;
8889
8890 ret = register_ftrace_function_probe(glob, tr, ops, count);
8891 if (ret < 0)
8892 tracing_disarm_snapshot(tr);
8893
8894 return ret < 0 ? ret : 0;
8895 }
8896
8897 static struct ftrace_func_command ftrace_snapshot_cmd = {
8898 .name = "snapshot",
8899 .func = ftrace_trace_snapshot_callback,
8900 };
8901
register_snapshot_cmd(void)8902 static __init int register_snapshot_cmd(void)
8903 {
8904 return register_ftrace_command(&ftrace_snapshot_cmd);
8905 }
8906 #else
register_snapshot_cmd(void)8907 static inline __init int register_snapshot_cmd(void) { return 0; }
8908 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8909
tracing_get_dentry(struct trace_array * tr)8910 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8911 {
8912 /* Top directory uses NULL as the parent */
8913 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8914 return NULL;
8915
8916 if (WARN_ON(!tr->dir))
8917 return ERR_PTR(-ENODEV);
8918
8919 /* All sub buffers have a descriptor */
8920 return tr->dir;
8921 }
8922
tracing_dentry_percpu(struct trace_array * tr,int cpu)8923 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8924 {
8925 struct dentry *d_tracer;
8926
8927 if (tr->percpu_dir)
8928 return tr->percpu_dir;
8929
8930 d_tracer = tracing_get_dentry(tr);
8931 if (IS_ERR(d_tracer))
8932 return NULL;
8933
8934 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8935
8936 MEM_FAIL(!tr->percpu_dir,
8937 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8938
8939 return tr->percpu_dir;
8940 }
8941
8942 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8943 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8944 void *data, long cpu, const struct file_operations *fops)
8945 {
8946 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8947
8948 if (ret) /* See tracing_get_cpu() */
8949 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8950 return ret;
8951 }
8952
8953 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8954 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8955 {
8956 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8957 struct dentry *d_cpu;
8958 char cpu_dir[30]; /* 30 characters should be more than enough */
8959
8960 if (!d_percpu)
8961 return;
8962
8963 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8964 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8965 if (!d_cpu) {
8966 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8967 return;
8968 }
8969
8970 /* per cpu trace_pipe */
8971 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8972 tr, cpu, &tracing_pipe_fops);
8973
8974 /* per cpu trace */
8975 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8976 tr, cpu, &tracing_fops);
8977
8978 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8979 tr, cpu, &tracing_buffers_fops);
8980
8981 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8982 tr, cpu, &tracing_stats_fops);
8983
8984 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8985 tr, cpu, &tracing_entries_fops);
8986
8987 if (tr->range_addr_start)
8988 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8989 tr, cpu, &tracing_buffer_meta_fops);
8990 #ifdef CONFIG_TRACER_SNAPSHOT
8991 if (!tr->range_addr_start) {
8992 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8993 tr, cpu, &snapshot_fops);
8994
8995 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8996 tr, cpu, &snapshot_raw_fops);
8997 }
8998 #endif
8999 }
9000
9001 #ifdef CONFIG_FTRACE_SELFTEST
9002 /* Let selftest have access to static functions in this file */
9003 #include "trace_selftest.c"
9004 #endif
9005
9006 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9007 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9008 loff_t *ppos)
9009 {
9010 struct trace_option_dentry *topt = filp->private_data;
9011 char *buf;
9012
9013 if (topt->flags->val & topt->opt->bit)
9014 buf = "1\n";
9015 else
9016 buf = "0\n";
9017
9018 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9019 }
9020
9021 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9022 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9023 loff_t *ppos)
9024 {
9025 struct trace_option_dentry *topt = filp->private_data;
9026 unsigned long val;
9027 int ret;
9028
9029 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9030 if (ret)
9031 return ret;
9032
9033 if (val != 0 && val != 1)
9034 return -EINVAL;
9035
9036 if (!!(topt->flags->val & topt->opt->bit) != val) {
9037 guard(mutex)(&trace_types_lock);
9038 ret = __set_tracer_option(topt->tr, topt->flags,
9039 topt->opt, !val);
9040 if (ret)
9041 return ret;
9042 }
9043
9044 *ppos += cnt;
9045
9046 return cnt;
9047 }
9048
tracing_open_options(struct inode * inode,struct file * filp)9049 static int tracing_open_options(struct inode *inode, struct file *filp)
9050 {
9051 struct trace_option_dentry *topt = inode->i_private;
9052 int ret;
9053
9054 ret = tracing_check_open_get_tr(topt->tr);
9055 if (ret)
9056 return ret;
9057
9058 filp->private_data = inode->i_private;
9059 return 0;
9060 }
9061
tracing_release_options(struct inode * inode,struct file * file)9062 static int tracing_release_options(struct inode *inode, struct file *file)
9063 {
9064 struct trace_option_dentry *topt = file->private_data;
9065
9066 trace_array_put(topt->tr);
9067 return 0;
9068 }
9069
9070 static const struct file_operations trace_options_fops = {
9071 .open = tracing_open_options,
9072 .read = trace_options_read,
9073 .write = trace_options_write,
9074 .llseek = generic_file_llseek,
9075 .release = tracing_release_options,
9076 };
9077
9078 /*
9079 * In order to pass in both the trace_array descriptor as well as the index
9080 * to the flag that the trace option file represents, the trace_array
9081 * has a character array of trace_flags_index[], which holds the index
9082 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9083 * The address of this character array is passed to the flag option file
9084 * read/write callbacks.
9085 *
9086 * In order to extract both the index and the trace_array descriptor,
9087 * get_tr_index() uses the following algorithm.
9088 *
9089 * idx = *ptr;
9090 *
9091 * As the pointer itself contains the address of the index (remember
9092 * index[1] == 1).
9093 *
9094 * Then to get the trace_array descriptor, by subtracting that index
9095 * from the ptr, we get to the start of the index itself.
9096 *
9097 * ptr - idx == &index[0]
9098 *
9099 * Then a simple container_of() from that pointer gets us to the
9100 * trace_array descriptor.
9101 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9102 static void get_tr_index(void *data, struct trace_array **ptr,
9103 unsigned int *pindex)
9104 {
9105 *pindex = *(unsigned char *)data;
9106
9107 *ptr = container_of(data - *pindex, struct trace_array,
9108 trace_flags_index);
9109 }
9110
9111 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9112 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9113 loff_t *ppos)
9114 {
9115 void *tr_index = filp->private_data;
9116 struct trace_array *tr;
9117 unsigned int index;
9118 char *buf;
9119
9120 get_tr_index(tr_index, &tr, &index);
9121
9122 if (tr->trace_flags & (1 << index))
9123 buf = "1\n";
9124 else
9125 buf = "0\n";
9126
9127 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9128 }
9129
9130 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9131 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9132 loff_t *ppos)
9133 {
9134 void *tr_index = filp->private_data;
9135 struct trace_array *tr;
9136 unsigned int index;
9137 unsigned long val;
9138 int ret;
9139
9140 get_tr_index(tr_index, &tr, &index);
9141
9142 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9143 if (ret)
9144 return ret;
9145
9146 if (val != 0 && val != 1)
9147 return -EINVAL;
9148
9149 mutex_lock(&event_mutex);
9150 mutex_lock(&trace_types_lock);
9151 ret = set_tracer_flag(tr, 1 << index, val);
9152 mutex_unlock(&trace_types_lock);
9153 mutex_unlock(&event_mutex);
9154
9155 if (ret < 0)
9156 return ret;
9157
9158 *ppos += cnt;
9159
9160 return cnt;
9161 }
9162
9163 static const struct file_operations trace_options_core_fops = {
9164 .open = tracing_open_generic,
9165 .read = trace_options_core_read,
9166 .write = trace_options_core_write,
9167 .llseek = generic_file_llseek,
9168 };
9169
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9170 struct dentry *trace_create_file(const char *name,
9171 umode_t mode,
9172 struct dentry *parent,
9173 void *data,
9174 const struct file_operations *fops)
9175 {
9176 struct dentry *ret;
9177
9178 ret = tracefs_create_file(name, mode, parent, data, fops);
9179 if (!ret)
9180 pr_warn("Could not create tracefs '%s' entry\n", name);
9181
9182 return ret;
9183 }
9184
9185
trace_options_init_dentry(struct trace_array * tr)9186 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9187 {
9188 struct dentry *d_tracer;
9189
9190 if (tr->options)
9191 return tr->options;
9192
9193 d_tracer = tracing_get_dentry(tr);
9194 if (IS_ERR(d_tracer))
9195 return NULL;
9196
9197 tr->options = tracefs_create_dir("options", d_tracer);
9198 if (!tr->options) {
9199 pr_warn("Could not create tracefs directory 'options'\n");
9200 return NULL;
9201 }
9202
9203 return tr->options;
9204 }
9205
9206 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9207 create_trace_option_file(struct trace_array *tr,
9208 struct trace_option_dentry *topt,
9209 struct tracer_flags *flags,
9210 struct tracer_opt *opt)
9211 {
9212 struct dentry *t_options;
9213
9214 t_options = trace_options_init_dentry(tr);
9215 if (!t_options)
9216 return;
9217
9218 topt->flags = flags;
9219 topt->opt = opt;
9220 topt->tr = tr;
9221
9222 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9223 t_options, topt, &trace_options_fops);
9224
9225 }
9226
9227 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9228 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9229 {
9230 struct trace_option_dentry *topts;
9231 struct trace_options *tr_topts;
9232 struct tracer_flags *flags;
9233 struct tracer_opt *opts;
9234 int cnt;
9235 int i;
9236
9237 if (!tracer)
9238 return;
9239
9240 flags = tracer->flags;
9241
9242 if (!flags || !flags->opts)
9243 return;
9244
9245 /*
9246 * If this is an instance, only create flags for tracers
9247 * the instance may have.
9248 */
9249 if (!trace_ok_for_array(tracer, tr))
9250 return;
9251
9252 for (i = 0; i < tr->nr_topts; i++) {
9253 /* Make sure there's no duplicate flags. */
9254 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9255 return;
9256 }
9257
9258 opts = flags->opts;
9259
9260 for (cnt = 0; opts[cnt].name; cnt++)
9261 ;
9262
9263 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9264 if (!topts)
9265 return;
9266
9267 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9268 GFP_KERNEL);
9269 if (!tr_topts) {
9270 kfree(topts);
9271 return;
9272 }
9273
9274 tr->topts = tr_topts;
9275 tr->topts[tr->nr_topts].tracer = tracer;
9276 tr->topts[tr->nr_topts].topts = topts;
9277 tr->nr_topts++;
9278
9279 for (cnt = 0; opts[cnt].name; cnt++) {
9280 create_trace_option_file(tr, &topts[cnt], flags,
9281 &opts[cnt]);
9282 MEM_FAIL(topts[cnt].entry == NULL,
9283 "Failed to create trace option: %s",
9284 opts[cnt].name);
9285 }
9286 }
9287
9288 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9289 create_trace_option_core_file(struct trace_array *tr,
9290 const char *option, long index)
9291 {
9292 struct dentry *t_options;
9293
9294 t_options = trace_options_init_dentry(tr);
9295 if (!t_options)
9296 return NULL;
9297
9298 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9299 (void *)&tr->trace_flags_index[index],
9300 &trace_options_core_fops);
9301 }
9302
create_trace_options_dir(struct trace_array * tr)9303 static void create_trace_options_dir(struct trace_array *tr)
9304 {
9305 struct dentry *t_options;
9306 bool top_level = tr == &global_trace;
9307 int i;
9308
9309 t_options = trace_options_init_dentry(tr);
9310 if (!t_options)
9311 return;
9312
9313 for (i = 0; trace_options[i]; i++) {
9314 if (top_level ||
9315 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9316 create_trace_option_core_file(tr, trace_options[i], i);
9317 }
9318 }
9319
9320 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9321 rb_simple_read(struct file *filp, char __user *ubuf,
9322 size_t cnt, loff_t *ppos)
9323 {
9324 struct trace_array *tr = filp->private_data;
9325 char buf[64];
9326 int r;
9327
9328 r = tracer_tracing_is_on(tr);
9329 r = sprintf(buf, "%d\n", r);
9330
9331 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9332 }
9333
9334 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9335 rb_simple_write(struct file *filp, const char __user *ubuf,
9336 size_t cnt, loff_t *ppos)
9337 {
9338 struct trace_array *tr = filp->private_data;
9339 struct trace_buffer *buffer = tr->array_buffer.buffer;
9340 unsigned long val;
9341 int ret;
9342
9343 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9344 if (ret)
9345 return ret;
9346
9347 if (buffer) {
9348 guard(mutex)(&trace_types_lock);
9349 if (!!val == tracer_tracing_is_on(tr)) {
9350 val = 0; /* do nothing */
9351 } else if (val) {
9352 tracer_tracing_on(tr);
9353 if (tr->current_trace->start)
9354 tr->current_trace->start(tr);
9355 } else {
9356 tracer_tracing_off(tr);
9357 if (tr->current_trace->stop)
9358 tr->current_trace->stop(tr);
9359 /* Wake up any waiters */
9360 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9361 }
9362 }
9363
9364 (*ppos)++;
9365
9366 return cnt;
9367 }
9368
9369 static const struct file_operations rb_simple_fops = {
9370 .open = tracing_open_generic_tr,
9371 .read = rb_simple_read,
9372 .write = rb_simple_write,
9373 .release = tracing_release_generic_tr,
9374 .llseek = default_llseek,
9375 };
9376
9377 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9378 buffer_percent_read(struct file *filp, char __user *ubuf,
9379 size_t cnt, loff_t *ppos)
9380 {
9381 struct trace_array *tr = filp->private_data;
9382 char buf[64];
9383 int r;
9384
9385 r = tr->buffer_percent;
9386 r = sprintf(buf, "%d\n", r);
9387
9388 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9389 }
9390
9391 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9392 buffer_percent_write(struct file *filp, const char __user *ubuf,
9393 size_t cnt, loff_t *ppos)
9394 {
9395 struct trace_array *tr = filp->private_data;
9396 unsigned long val;
9397 int ret;
9398
9399 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9400 if (ret)
9401 return ret;
9402
9403 if (val > 100)
9404 return -EINVAL;
9405
9406 tr->buffer_percent = val;
9407
9408 (*ppos)++;
9409
9410 return cnt;
9411 }
9412
9413 static const struct file_operations buffer_percent_fops = {
9414 .open = tracing_open_generic_tr,
9415 .read = buffer_percent_read,
9416 .write = buffer_percent_write,
9417 .release = tracing_release_generic_tr,
9418 .llseek = default_llseek,
9419 };
9420
9421 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9422 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9423 {
9424 struct trace_array *tr = filp->private_data;
9425 size_t size;
9426 char buf[64];
9427 int order;
9428 int r;
9429
9430 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9431 size = (PAGE_SIZE << order) / 1024;
9432
9433 r = sprintf(buf, "%zd\n", size);
9434
9435 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9436 }
9437
9438 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9439 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9440 size_t cnt, loff_t *ppos)
9441 {
9442 struct trace_array *tr = filp->private_data;
9443 unsigned long val;
9444 int old_order;
9445 int order;
9446 int pages;
9447 int ret;
9448
9449 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9450 if (ret)
9451 return ret;
9452
9453 val *= 1024; /* value passed in is in KB */
9454
9455 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9456 order = fls(pages - 1);
9457
9458 /* limit between 1 and 128 system pages */
9459 if (order < 0 || order > 7)
9460 return -EINVAL;
9461
9462 /* Do not allow tracing while changing the order of the ring buffer */
9463 tracing_stop_tr(tr);
9464
9465 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9466 if (old_order == order)
9467 goto out;
9468
9469 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9470 if (ret)
9471 goto out;
9472
9473 #ifdef CONFIG_TRACER_MAX_TRACE
9474
9475 if (!tr->allocated_snapshot)
9476 goto out_max;
9477
9478 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9479 if (ret) {
9480 /* Put back the old order */
9481 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9482 if (WARN_ON_ONCE(cnt)) {
9483 /*
9484 * AARGH! We are left with different orders!
9485 * The max buffer is our "snapshot" buffer.
9486 * When a tracer needs a snapshot (one of the
9487 * latency tracers), it swaps the max buffer
9488 * with the saved snap shot. We succeeded to
9489 * update the order of the main buffer, but failed to
9490 * update the order of the max buffer. But when we tried
9491 * to reset the main buffer to the original size, we
9492 * failed there too. This is very unlikely to
9493 * happen, but if it does, warn and kill all
9494 * tracing.
9495 */
9496 tracing_disabled = 1;
9497 }
9498 goto out;
9499 }
9500 out_max:
9501 #endif
9502 (*ppos)++;
9503 out:
9504 if (ret)
9505 cnt = ret;
9506 tracing_start_tr(tr);
9507 return cnt;
9508 }
9509
9510 static const struct file_operations buffer_subbuf_size_fops = {
9511 .open = tracing_open_generic_tr,
9512 .read = buffer_subbuf_size_read,
9513 .write = buffer_subbuf_size_write,
9514 .release = tracing_release_generic_tr,
9515 .llseek = default_llseek,
9516 };
9517
9518 static struct dentry *trace_instance_dir;
9519
9520 static void
9521 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9522
9523 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9524 static int make_mod_delta(struct module *mod, void *data)
9525 {
9526 struct trace_module_delta *module_delta;
9527 struct trace_scratch *tscratch;
9528 struct trace_mod_entry *entry;
9529 struct trace_array *tr = data;
9530 int i;
9531
9532 tscratch = tr->scratch;
9533 module_delta = READ_ONCE(tr->module_delta);
9534 for (i = 0; i < tscratch->nr_entries; i++) {
9535 entry = &tscratch->entries[i];
9536 if (strcmp(mod->name, entry->mod_name))
9537 continue;
9538 if (mod->state == MODULE_STATE_GOING)
9539 module_delta->delta[i] = 0;
9540 else
9541 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9542 - entry->mod_addr;
9543 break;
9544 }
9545 return 0;
9546 }
9547 #else
make_mod_delta(struct module * mod,void * data)9548 static int make_mod_delta(struct module *mod, void *data)
9549 {
9550 return 0;
9551 }
9552 #endif
9553
mod_addr_comp(const void * a,const void * b,const void * data)9554 static int mod_addr_comp(const void *a, const void *b, const void *data)
9555 {
9556 const struct trace_mod_entry *e1 = a;
9557 const struct trace_mod_entry *e2 = b;
9558
9559 return e1->mod_addr > e2->mod_addr ? 1 : -1;
9560 }
9561
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9562 static void setup_trace_scratch(struct trace_array *tr,
9563 struct trace_scratch *tscratch, unsigned int size)
9564 {
9565 struct trace_module_delta *module_delta;
9566 struct trace_mod_entry *entry;
9567 int i, nr_entries;
9568
9569 if (!tscratch)
9570 return;
9571
9572 tr->scratch = tscratch;
9573 tr->scratch_size = size;
9574
9575 if (tscratch->text_addr)
9576 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9577
9578 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9579 goto reset;
9580
9581 /* Check if each module name is a valid string */
9582 for (i = 0; i < tscratch->nr_entries; i++) {
9583 int n;
9584
9585 entry = &tscratch->entries[i];
9586
9587 for (n = 0; n < MODULE_NAME_LEN; n++) {
9588 if (entry->mod_name[n] == '\0')
9589 break;
9590 if (!isprint(entry->mod_name[n]))
9591 goto reset;
9592 }
9593 if (n == MODULE_NAME_LEN)
9594 goto reset;
9595 }
9596
9597 /* Sort the entries so that we can find appropriate module from address. */
9598 nr_entries = tscratch->nr_entries;
9599 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9600 mod_addr_comp, NULL, NULL);
9601
9602 if (IS_ENABLED(CONFIG_MODULES)) {
9603 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9604 if (!module_delta) {
9605 pr_info("module_delta allocation failed. Not able to decode module address.");
9606 goto reset;
9607 }
9608 init_rcu_head(&module_delta->rcu);
9609 } else
9610 module_delta = NULL;
9611 WRITE_ONCE(tr->module_delta, module_delta);
9612
9613 /* Scan modules to make text delta for modules. */
9614 module_for_each_mod(make_mod_delta, tr);
9615
9616 /* Set trace_clock as the same of the previous boot. */
9617 if (tscratch->clock_id != tr->clock_id) {
9618 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9619 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9620 pr_info("the previous trace_clock info is not valid.");
9621 goto reset;
9622 }
9623 }
9624 return;
9625 reset:
9626 /* Invalid trace modules */
9627 memset(tscratch, 0, size);
9628 }
9629
9630 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9631 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9632 {
9633 enum ring_buffer_flags rb_flags;
9634 struct trace_scratch *tscratch;
9635 unsigned int scratch_size = 0;
9636
9637 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9638
9639 buf->tr = tr;
9640
9641 if (tr->range_addr_start && tr->range_addr_size) {
9642 /* Add scratch buffer to handle 128 modules */
9643 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9644 tr->range_addr_start,
9645 tr->range_addr_size,
9646 struct_size(tscratch, entries, 128));
9647
9648 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9649 setup_trace_scratch(tr, tscratch, scratch_size);
9650
9651 /*
9652 * This is basically the same as a mapped buffer,
9653 * with the same restrictions.
9654 */
9655 tr->mapped++;
9656 } else {
9657 buf->buffer = ring_buffer_alloc(size, rb_flags);
9658 }
9659 if (!buf->buffer)
9660 return -ENOMEM;
9661
9662 buf->data = alloc_percpu(struct trace_array_cpu);
9663 if (!buf->data) {
9664 ring_buffer_free(buf->buffer);
9665 buf->buffer = NULL;
9666 return -ENOMEM;
9667 }
9668
9669 /* Allocate the first page for all buffers */
9670 set_buffer_entries(&tr->array_buffer,
9671 ring_buffer_size(tr->array_buffer.buffer, 0));
9672
9673 return 0;
9674 }
9675
free_trace_buffer(struct array_buffer * buf)9676 static void free_trace_buffer(struct array_buffer *buf)
9677 {
9678 if (buf->buffer) {
9679 ring_buffer_free(buf->buffer);
9680 buf->buffer = NULL;
9681 free_percpu(buf->data);
9682 buf->data = NULL;
9683 }
9684 }
9685
allocate_trace_buffers(struct trace_array * tr,int size)9686 static int allocate_trace_buffers(struct trace_array *tr, int size)
9687 {
9688 int ret;
9689
9690 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9691 if (ret)
9692 return ret;
9693
9694 #ifdef CONFIG_TRACER_MAX_TRACE
9695 /* Fix mapped buffer trace arrays do not have snapshot buffers */
9696 if (tr->range_addr_start)
9697 return 0;
9698
9699 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9700 allocate_snapshot ? size : 1);
9701 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9702 free_trace_buffer(&tr->array_buffer);
9703 return -ENOMEM;
9704 }
9705 tr->allocated_snapshot = allocate_snapshot;
9706
9707 allocate_snapshot = false;
9708 #endif
9709
9710 return 0;
9711 }
9712
free_trace_buffers(struct trace_array * tr)9713 static void free_trace_buffers(struct trace_array *tr)
9714 {
9715 if (!tr)
9716 return;
9717
9718 free_trace_buffer(&tr->array_buffer);
9719 kfree(tr->module_delta);
9720
9721 #ifdef CONFIG_TRACER_MAX_TRACE
9722 free_trace_buffer(&tr->max_buffer);
9723 #endif
9724 }
9725
init_trace_flags_index(struct trace_array * tr)9726 static void init_trace_flags_index(struct trace_array *tr)
9727 {
9728 int i;
9729
9730 /* Used by the trace options files */
9731 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9732 tr->trace_flags_index[i] = i;
9733 }
9734
__update_tracer_options(struct trace_array * tr)9735 static void __update_tracer_options(struct trace_array *tr)
9736 {
9737 struct tracer *t;
9738
9739 for (t = trace_types; t; t = t->next)
9740 add_tracer_options(tr, t);
9741 }
9742
update_tracer_options(struct trace_array * tr)9743 static void update_tracer_options(struct trace_array *tr)
9744 {
9745 guard(mutex)(&trace_types_lock);
9746 tracer_options_updated = true;
9747 __update_tracer_options(tr);
9748 }
9749
9750 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9751 struct trace_array *trace_array_find(const char *instance)
9752 {
9753 struct trace_array *tr, *found = NULL;
9754
9755 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9756 if (tr->name && strcmp(tr->name, instance) == 0) {
9757 found = tr;
9758 break;
9759 }
9760 }
9761
9762 return found;
9763 }
9764
trace_array_find_get(const char * instance)9765 struct trace_array *trace_array_find_get(const char *instance)
9766 {
9767 struct trace_array *tr;
9768
9769 guard(mutex)(&trace_types_lock);
9770 tr = trace_array_find(instance);
9771 if (tr)
9772 tr->ref++;
9773
9774 return tr;
9775 }
9776
trace_array_create_dir(struct trace_array * tr)9777 static int trace_array_create_dir(struct trace_array *tr)
9778 {
9779 int ret;
9780
9781 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9782 if (!tr->dir)
9783 return -EINVAL;
9784
9785 ret = event_trace_add_tracer(tr->dir, tr);
9786 if (ret) {
9787 tracefs_remove(tr->dir);
9788 return ret;
9789 }
9790
9791 init_tracer_tracefs(tr, tr->dir);
9792 __update_tracer_options(tr);
9793
9794 return ret;
9795 }
9796
9797 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9798 trace_array_create_systems(const char *name, const char *systems,
9799 unsigned long range_addr_start,
9800 unsigned long range_addr_size)
9801 {
9802 struct trace_array *tr;
9803 int ret;
9804
9805 ret = -ENOMEM;
9806 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9807 if (!tr)
9808 return ERR_PTR(ret);
9809
9810 tr->name = kstrdup(name, GFP_KERNEL);
9811 if (!tr->name)
9812 goto out_free_tr;
9813
9814 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9815 goto out_free_tr;
9816
9817 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9818 goto out_free_tr;
9819
9820 if (systems) {
9821 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9822 if (!tr->system_names)
9823 goto out_free_tr;
9824 }
9825
9826 /* Only for boot up memory mapped ring buffers */
9827 tr->range_addr_start = range_addr_start;
9828 tr->range_addr_size = range_addr_size;
9829
9830 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9831
9832 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9833
9834 raw_spin_lock_init(&tr->start_lock);
9835
9836 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9837 #ifdef CONFIG_TRACER_MAX_TRACE
9838 spin_lock_init(&tr->snapshot_trigger_lock);
9839 #endif
9840 tr->current_trace = &nop_trace;
9841
9842 INIT_LIST_HEAD(&tr->systems);
9843 INIT_LIST_HEAD(&tr->events);
9844 INIT_LIST_HEAD(&tr->hist_vars);
9845 INIT_LIST_HEAD(&tr->err_log);
9846 INIT_LIST_HEAD(&tr->marker_list);
9847
9848 #ifdef CONFIG_MODULES
9849 INIT_LIST_HEAD(&tr->mod_events);
9850 #endif
9851
9852 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9853 goto out_free_tr;
9854
9855 /* The ring buffer is defaultly expanded */
9856 trace_set_ring_buffer_expanded(tr);
9857
9858 if (ftrace_allocate_ftrace_ops(tr) < 0)
9859 goto out_free_tr;
9860
9861 ftrace_init_trace_array(tr);
9862
9863 init_trace_flags_index(tr);
9864
9865 if (trace_instance_dir) {
9866 ret = trace_array_create_dir(tr);
9867 if (ret)
9868 goto out_free_tr;
9869 } else
9870 __trace_early_add_events(tr);
9871
9872 list_add(&tr->list, &ftrace_trace_arrays);
9873
9874 tr->ref++;
9875
9876 return tr;
9877
9878 out_free_tr:
9879 ftrace_free_ftrace_ops(tr);
9880 free_trace_buffers(tr);
9881 free_cpumask_var(tr->pipe_cpumask);
9882 free_cpumask_var(tr->tracing_cpumask);
9883 kfree_const(tr->system_names);
9884 kfree(tr->range_name);
9885 kfree(tr->name);
9886 kfree(tr);
9887
9888 return ERR_PTR(ret);
9889 }
9890
trace_array_create(const char * name)9891 static struct trace_array *trace_array_create(const char *name)
9892 {
9893 return trace_array_create_systems(name, NULL, 0, 0);
9894 }
9895
instance_mkdir(const char * name)9896 static int instance_mkdir(const char *name)
9897 {
9898 struct trace_array *tr;
9899 int ret;
9900
9901 guard(mutex)(&event_mutex);
9902 guard(mutex)(&trace_types_lock);
9903
9904 ret = -EEXIST;
9905 if (trace_array_find(name))
9906 return -EEXIST;
9907
9908 tr = trace_array_create(name);
9909
9910 ret = PTR_ERR_OR_ZERO(tr);
9911
9912 return ret;
9913 }
9914
9915 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9916 static u64 map_pages(unsigned long start, unsigned long size)
9917 {
9918 unsigned long vmap_start, vmap_end;
9919 struct vm_struct *area;
9920 int ret;
9921
9922 area = get_vm_area(size, VM_IOREMAP);
9923 if (!area)
9924 return 0;
9925
9926 vmap_start = (unsigned long) area->addr;
9927 vmap_end = vmap_start + size;
9928
9929 ret = vmap_page_range(vmap_start, vmap_end,
9930 start, pgprot_nx(PAGE_KERNEL));
9931 if (ret < 0) {
9932 free_vm_area(area);
9933 return 0;
9934 }
9935
9936 return (u64)vmap_start;
9937 }
9938 #else
map_pages(unsigned long start,unsigned long size)9939 static inline u64 map_pages(unsigned long start, unsigned long size)
9940 {
9941 return 0;
9942 }
9943 #endif
9944
9945 /**
9946 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9947 * @name: The name of the trace array to be looked up/created.
9948 * @systems: A list of systems to create event directories for (NULL for all)
9949 *
9950 * Returns pointer to trace array with given name.
9951 * NULL, if it cannot be created.
9952 *
9953 * NOTE: This function increments the reference counter associated with the
9954 * trace array returned. This makes sure it cannot be freed while in use.
9955 * Use trace_array_put() once the trace array is no longer needed.
9956 * If the trace_array is to be freed, trace_array_destroy() needs to
9957 * be called after the trace_array_put(), or simply let user space delete
9958 * it from the tracefs instances directory. But until the
9959 * trace_array_put() is called, user space can not delete it.
9960 *
9961 */
trace_array_get_by_name(const char * name,const char * systems)9962 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9963 {
9964 struct trace_array *tr;
9965
9966 guard(mutex)(&event_mutex);
9967 guard(mutex)(&trace_types_lock);
9968
9969 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9970 if (tr->name && strcmp(tr->name, name) == 0) {
9971 tr->ref++;
9972 return tr;
9973 }
9974 }
9975
9976 tr = trace_array_create_systems(name, systems, 0, 0);
9977
9978 if (IS_ERR(tr))
9979 tr = NULL;
9980 else
9981 tr->ref++;
9982
9983 return tr;
9984 }
9985 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9986
__remove_instance(struct trace_array * tr)9987 static int __remove_instance(struct trace_array *tr)
9988 {
9989 int i;
9990
9991 /* Reference counter for a newly created trace array = 1. */
9992 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9993 return -EBUSY;
9994
9995 list_del(&tr->list);
9996
9997 /* Disable all the flags that were enabled coming in */
9998 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9999 if ((1 << i) & ZEROED_TRACE_FLAGS)
10000 set_tracer_flag(tr, 1 << i, 0);
10001 }
10002
10003 if (printk_trace == tr)
10004 update_printk_trace(&global_trace);
10005
10006 if (update_marker_trace(tr, 0))
10007 synchronize_rcu();
10008
10009 tracing_set_nop(tr);
10010 clear_ftrace_function_probes(tr);
10011 event_trace_del_tracer(tr);
10012 ftrace_clear_pids(tr);
10013 ftrace_destroy_function_files(tr);
10014 tracefs_remove(tr->dir);
10015 free_percpu(tr->last_func_repeats);
10016 free_trace_buffers(tr);
10017 clear_tracing_err_log(tr);
10018
10019 if (tr->range_name) {
10020 reserve_mem_release_by_name(tr->range_name);
10021 kfree(tr->range_name);
10022 }
10023
10024 for (i = 0; i < tr->nr_topts; i++) {
10025 kfree(tr->topts[i].topts);
10026 }
10027 kfree(tr->topts);
10028
10029 free_cpumask_var(tr->pipe_cpumask);
10030 free_cpumask_var(tr->tracing_cpumask);
10031 kfree_const(tr->system_names);
10032 kfree(tr->name);
10033 kfree(tr);
10034
10035 return 0;
10036 }
10037
trace_array_destroy(struct trace_array * this_tr)10038 int trace_array_destroy(struct trace_array *this_tr)
10039 {
10040 struct trace_array *tr;
10041
10042 if (!this_tr)
10043 return -EINVAL;
10044
10045 guard(mutex)(&event_mutex);
10046 guard(mutex)(&trace_types_lock);
10047
10048
10049 /* Making sure trace array exists before destroying it. */
10050 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10051 if (tr == this_tr)
10052 return __remove_instance(tr);
10053 }
10054
10055 return -ENODEV;
10056 }
10057 EXPORT_SYMBOL_GPL(trace_array_destroy);
10058
instance_rmdir(const char * name)10059 static int instance_rmdir(const char *name)
10060 {
10061 struct trace_array *tr;
10062
10063 guard(mutex)(&event_mutex);
10064 guard(mutex)(&trace_types_lock);
10065
10066 tr = trace_array_find(name);
10067 if (!tr)
10068 return -ENODEV;
10069
10070 return __remove_instance(tr);
10071 }
10072
create_trace_instances(struct dentry * d_tracer)10073 static __init void create_trace_instances(struct dentry *d_tracer)
10074 {
10075 struct trace_array *tr;
10076
10077 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10078 instance_mkdir,
10079 instance_rmdir);
10080 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10081 return;
10082
10083 guard(mutex)(&event_mutex);
10084 guard(mutex)(&trace_types_lock);
10085
10086 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10087 if (!tr->name)
10088 continue;
10089 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10090 "Failed to create instance directory\n"))
10091 return;
10092 }
10093 }
10094
10095 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10096 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10097 {
10098 int cpu;
10099
10100 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10101 tr, &show_traces_fops);
10102
10103 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10104 tr, &set_tracer_fops);
10105
10106 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10107 tr, &tracing_cpumask_fops);
10108
10109 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10110 tr, &tracing_iter_fops);
10111
10112 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10113 tr, &tracing_fops);
10114
10115 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10116 tr, &tracing_pipe_fops);
10117
10118 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10119 tr, &tracing_entries_fops);
10120
10121 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10122 tr, &tracing_total_entries_fops);
10123
10124 trace_create_file("free_buffer", 0200, d_tracer,
10125 tr, &tracing_free_buffer_fops);
10126
10127 trace_create_file("trace_marker", 0220, d_tracer,
10128 tr, &tracing_mark_fops);
10129
10130 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10131
10132 trace_create_file("trace_marker_raw", 0220, d_tracer,
10133 tr, &tracing_mark_raw_fops);
10134
10135 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10136 &trace_clock_fops);
10137
10138 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10139 tr, &rb_simple_fops);
10140
10141 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10142 &trace_time_stamp_mode_fops);
10143
10144 tr->buffer_percent = 50;
10145
10146 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10147 tr, &buffer_percent_fops);
10148
10149 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10150 tr, &buffer_subbuf_size_fops);
10151
10152 create_trace_options_dir(tr);
10153
10154 #ifdef CONFIG_TRACER_MAX_TRACE
10155 trace_create_maxlat_file(tr, d_tracer);
10156 #endif
10157
10158 if (ftrace_create_function_files(tr, d_tracer))
10159 MEM_FAIL(1, "Could not allocate function filter files");
10160
10161 if (tr->range_addr_start) {
10162 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10163 tr, &last_boot_fops);
10164 #ifdef CONFIG_TRACER_SNAPSHOT
10165 } else {
10166 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10167 tr, &snapshot_fops);
10168 #endif
10169 }
10170
10171 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10172 tr, &tracing_err_log_fops);
10173
10174 for_each_tracing_cpu(cpu)
10175 tracing_init_tracefs_percpu(tr, cpu);
10176
10177 ftrace_init_tracefs(tr, d_tracer);
10178 }
10179
10180 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10181 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10182 {
10183 struct vfsmount *mnt;
10184 struct file_system_type *type;
10185 struct fs_context *fc;
10186 int ret;
10187
10188 /*
10189 * To maintain backward compatibility for tools that mount
10190 * debugfs to get to the tracing facility, tracefs is automatically
10191 * mounted to the debugfs/tracing directory.
10192 */
10193 type = get_fs_type("tracefs");
10194 if (!type)
10195 return NULL;
10196
10197 fc = fs_context_for_submount(type, mntpt);
10198 put_filesystem(type);
10199 if (IS_ERR(fc))
10200 return ERR_CAST(fc);
10201
10202 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10203
10204 ret = vfs_parse_fs_string(fc, "source",
10205 "tracefs", strlen("tracefs"));
10206 if (!ret)
10207 mnt = fc_mount(fc);
10208 else
10209 mnt = ERR_PTR(ret);
10210
10211 put_fs_context(fc);
10212 return mnt;
10213 }
10214 #endif
10215
10216 /**
10217 * tracing_init_dentry - initialize top level trace array
10218 *
10219 * This is called when creating files or directories in the tracing
10220 * directory. It is called via fs_initcall() by any of the boot up code
10221 * and expects to return the dentry of the top level tracing directory.
10222 */
tracing_init_dentry(void)10223 int tracing_init_dentry(void)
10224 {
10225 struct trace_array *tr = &global_trace;
10226
10227 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10228 pr_warn("Tracing disabled due to lockdown\n");
10229 return -EPERM;
10230 }
10231
10232 /* The top level trace array uses NULL as parent */
10233 if (tr->dir)
10234 return 0;
10235
10236 if (WARN_ON(!tracefs_initialized()))
10237 return -ENODEV;
10238
10239 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10240 /*
10241 * As there may still be users that expect the tracing
10242 * files to exist in debugfs/tracing, we must automount
10243 * the tracefs file system there, so older tools still
10244 * work with the newer kernel.
10245 */
10246 tr->dir = debugfs_create_automount("tracing", NULL,
10247 trace_automount, NULL);
10248 #endif
10249
10250 return 0;
10251 }
10252
10253 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10254 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10255
10256 static struct workqueue_struct *eval_map_wq __initdata;
10257 static struct work_struct eval_map_work __initdata;
10258 static struct work_struct tracerfs_init_work __initdata;
10259
eval_map_work_func(struct work_struct * work)10260 static void __init eval_map_work_func(struct work_struct *work)
10261 {
10262 int len;
10263
10264 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10265 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10266 }
10267
trace_eval_init(void)10268 static int __init trace_eval_init(void)
10269 {
10270 INIT_WORK(&eval_map_work, eval_map_work_func);
10271
10272 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10273 if (!eval_map_wq) {
10274 pr_err("Unable to allocate eval_map_wq\n");
10275 /* Do work here */
10276 eval_map_work_func(&eval_map_work);
10277 return -ENOMEM;
10278 }
10279
10280 queue_work(eval_map_wq, &eval_map_work);
10281 return 0;
10282 }
10283
10284 subsys_initcall(trace_eval_init);
10285
trace_eval_sync(void)10286 static int __init trace_eval_sync(void)
10287 {
10288 /* Make sure the eval map updates are finished */
10289 if (eval_map_wq)
10290 destroy_workqueue(eval_map_wq);
10291 return 0;
10292 }
10293
10294 late_initcall_sync(trace_eval_sync);
10295
10296
10297 #ifdef CONFIG_MODULES
10298
module_exists(const char * module)10299 bool module_exists(const char *module)
10300 {
10301 /* All modules have the symbol __this_module */
10302 static const char this_mod[] = "__this_module";
10303 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10304 unsigned long val;
10305 int n;
10306
10307 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10308
10309 if (n > sizeof(modname) - 1)
10310 return false;
10311
10312 val = module_kallsyms_lookup_name(modname);
10313 return val != 0;
10314 }
10315
trace_module_add_evals(struct module * mod)10316 static void trace_module_add_evals(struct module *mod)
10317 {
10318 /*
10319 * Modules with bad taint do not have events created, do
10320 * not bother with enums either.
10321 */
10322 if (trace_module_has_bad_taint(mod))
10323 return;
10324
10325 /* Even if no trace_evals, this need to sanitize field types. */
10326 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10327 }
10328
10329 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10330 static void trace_module_remove_evals(struct module *mod)
10331 {
10332 union trace_eval_map_item *map;
10333 union trace_eval_map_item **last = &trace_eval_maps;
10334
10335 if (!mod->num_trace_evals)
10336 return;
10337
10338 guard(mutex)(&trace_eval_mutex);
10339
10340 map = trace_eval_maps;
10341
10342 while (map) {
10343 if (map->head.mod == mod)
10344 break;
10345 map = trace_eval_jmp_to_tail(map);
10346 last = &map->tail.next;
10347 map = map->tail.next;
10348 }
10349 if (!map)
10350 return;
10351
10352 *last = trace_eval_jmp_to_tail(map)->tail.next;
10353 kfree(map);
10354 }
10355 #else
trace_module_remove_evals(struct module * mod)10356 static inline void trace_module_remove_evals(struct module *mod) { }
10357 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10358
trace_module_record(struct module * mod,bool add)10359 static void trace_module_record(struct module *mod, bool add)
10360 {
10361 struct trace_array *tr;
10362 unsigned long flags;
10363
10364 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10365 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10366 /* Update any persistent trace array that has already been started */
10367 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10368 guard(mutex)(&scratch_mutex);
10369 save_mod(mod, tr);
10370 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10371 /* Update delta if the module loaded in previous boot */
10372 make_mod_delta(mod, tr);
10373 }
10374 }
10375 }
10376
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10377 static int trace_module_notify(struct notifier_block *self,
10378 unsigned long val, void *data)
10379 {
10380 struct module *mod = data;
10381
10382 switch (val) {
10383 case MODULE_STATE_COMING:
10384 trace_module_add_evals(mod);
10385 trace_module_record(mod, true);
10386 break;
10387 case MODULE_STATE_GOING:
10388 trace_module_remove_evals(mod);
10389 trace_module_record(mod, false);
10390 break;
10391 }
10392
10393 return NOTIFY_OK;
10394 }
10395
10396 static struct notifier_block trace_module_nb = {
10397 .notifier_call = trace_module_notify,
10398 .priority = 0,
10399 };
10400 #endif /* CONFIG_MODULES */
10401
tracer_init_tracefs_work_func(struct work_struct * work)10402 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10403 {
10404
10405 event_trace_init();
10406
10407 init_tracer_tracefs(&global_trace, NULL);
10408 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10409
10410 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10411 &global_trace, &tracing_thresh_fops);
10412
10413 trace_create_file("README", TRACE_MODE_READ, NULL,
10414 NULL, &tracing_readme_fops);
10415
10416 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10417 NULL, &tracing_saved_cmdlines_fops);
10418
10419 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10420 NULL, &tracing_saved_cmdlines_size_fops);
10421
10422 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10423 NULL, &tracing_saved_tgids_fops);
10424
10425 trace_create_eval_file(NULL);
10426
10427 #ifdef CONFIG_MODULES
10428 register_module_notifier(&trace_module_nb);
10429 #endif
10430
10431 #ifdef CONFIG_DYNAMIC_FTRACE
10432 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10433 NULL, &tracing_dyn_info_fops);
10434 #endif
10435
10436 create_trace_instances(NULL);
10437
10438 update_tracer_options(&global_trace);
10439 }
10440
tracer_init_tracefs(void)10441 static __init int tracer_init_tracefs(void)
10442 {
10443 int ret;
10444
10445 trace_access_lock_init();
10446
10447 ret = tracing_init_dentry();
10448 if (ret)
10449 return 0;
10450
10451 if (eval_map_wq) {
10452 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10453 queue_work(eval_map_wq, &tracerfs_init_work);
10454 } else {
10455 tracer_init_tracefs_work_func(NULL);
10456 }
10457
10458 rv_init_interface();
10459
10460 return 0;
10461 }
10462
10463 fs_initcall(tracer_init_tracefs);
10464
10465 static int trace_die_panic_handler(struct notifier_block *self,
10466 unsigned long ev, void *unused);
10467
10468 static struct notifier_block trace_panic_notifier = {
10469 .notifier_call = trace_die_panic_handler,
10470 .priority = INT_MAX - 1,
10471 };
10472
10473 static struct notifier_block trace_die_notifier = {
10474 .notifier_call = trace_die_panic_handler,
10475 .priority = INT_MAX - 1,
10476 };
10477
10478 /*
10479 * The idea is to execute the following die/panic callback early, in order
10480 * to avoid showing irrelevant information in the trace (like other panic
10481 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10482 * warnings get disabled (to prevent potential log flooding).
10483 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10484 static int trace_die_panic_handler(struct notifier_block *self,
10485 unsigned long ev, void *unused)
10486 {
10487 if (!ftrace_dump_on_oops_enabled())
10488 return NOTIFY_DONE;
10489
10490 /* The die notifier requires DIE_OOPS to trigger */
10491 if (self == &trace_die_notifier && ev != DIE_OOPS)
10492 return NOTIFY_DONE;
10493
10494 ftrace_dump(DUMP_PARAM);
10495
10496 return NOTIFY_DONE;
10497 }
10498
10499 /*
10500 * printk is set to max of 1024, we really don't need it that big.
10501 * Nothing should be printing 1000 characters anyway.
10502 */
10503 #define TRACE_MAX_PRINT 1000
10504
10505 /*
10506 * Define here KERN_TRACE so that we have one place to modify
10507 * it if we decide to change what log level the ftrace dump
10508 * should be at.
10509 */
10510 #define KERN_TRACE KERN_EMERG
10511
10512 void
trace_printk_seq(struct trace_seq * s)10513 trace_printk_seq(struct trace_seq *s)
10514 {
10515 /* Probably should print a warning here. */
10516 if (s->seq.len >= TRACE_MAX_PRINT)
10517 s->seq.len = TRACE_MAX_PRINT;
10518
10519 /*
10520 * More paranoid code. Although the buffer size is set to
10521 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10522 * an extra layer of protection.
10523 */
10524 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10525 s->seq.len = s->seq.size - 1;
10526
10527 /* should be zero ended, but we are paranoid. */
10528 s->buffer[s->seq.len] = 0;
10529
10530 printk(KERN_TRACE "%s", s->buffer);
10531
10532 trace_seq_init(s);
10533 }
10534
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10535 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10536 {
10537 iter->tr = tr;
10538 iter->trace = iter->tr->current_trace;
10539 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10540 iter->array_buffer = &tr->array_buffer;
10541
10542 if (iter->trace && iter->trace->open)
10543 iter->trace->open(iter);
10544
10545 /* Annotate start of buffers if we had overruns */
10546 if (ring_buffer_overruns(iter->array_buffer->buffer))
10547 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10548
10549 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10550 if (trace_clocks[iter->tr->clock_id].in_ns)
10551 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10552
10553 /* Can not use kmalloc for iter.temp and iter.fmt */
10554 iter->temp = static_temp_buf;
10555 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10556 iter->fmt = static_fmt_buf;
10557 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10558 }
10559
trace_init_global_iter(struct trace_iterator * iter)10560 void trace_init_global_iter(struct trace_iterator *iter)
10561 {
10562 trace_init_iter(iter, &global_trace);
10563 }
10564
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10565 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10566 {
10567 /* use static because iter can be a bit big for the stack */
10568 static struct trace_iterator iter;
10569 unsigned int old_userobj;
10570 unsigned long flags;
10571 int cnt = 0;
10572
10573 /*
10574 * Always turn off tracing when we dump.
10575 * We don't need to show trace output of what happens
10576 * between multiple crashes.
10577 *
10578 * If the user does a sysrq-z, then they can re-enable
10579 * tracing with echo 1 > tracing_on.
10580 */
10581 tracer_tracing_off(tr);
10582
10583 local_irq_save(flags);
10584
10585 /* Simulate the iterator */
10586 trace_init_iter(&iter, tr);
10587
10588 /* While dumping, do not allow the buffer to be enable */
10589 tracer_tracing_disable(tr);
10590
10591 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10592
10593 /* don't look at user memory in panic mode */
10594 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10595
10596 if (dump_mode == DUMP_ORIG)
10597 iter.cpu_file = raw_smp_processor_id();
10598 else
10599 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10600
10601 if (tr == &global_trace)
10602 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10603 else
10604 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10605
10606 /* Did function tracer already get disabled? */
10607 if (ftrace_is_dead()) {
10608 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10609 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10610 }
10611
10612 /*
10613 * We need to stop all tracing on all CPUS to read
10614 * the next buffer. This is a bit expensive, but is
10615 * not done often. We fill all what we can read,
10616 * and then release the locks again.
10617 */
10618
10619 while (!trace_empty(&iter)) {
10620
10621 if (!cnt)
10622 printk(KERN_TRACE "---------------------------------\n");
10623
10624 cnt++;
10625
10626 trace_iterator_reset(&iter);
10627 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10628
10629 if (trace_find_next_entry_inc(&iter) != NULL) {
10630 int ret;
10631
10632 ret = print_trace_line(&iter);
10633 if (ret != TRACE_TYPE_NO_CONSUME)
10634 trace_consume(&iter);
10635 }
10636 touch_nmi_watchdog();
10637
10638 trace_printk_seq(&iter.seq);
10639 }
10640
10641 if (!cnt)
10642 printk(KERN_TRACE " (ftrace buffer empty)\n");
10643 else
10644 printk(KERN_TRACE "---------------------------------\n");
10645
10646 tr->trace_flags |= old_userobj;
10647
10648 tracer_tracing_enable(tr);
10649 local_irq_restore(flags);
10650 }
10651
ftrace_dump_by_param(void)10652 static void ftrace_dump_by_param(void)
10653 {
10654 bool first_param = true;
10655 char dump_param[MAX_TRACER_SIZE];
10656 char *buf, *token, *inst_name;
10657 struct trace_array *tr;
10658
10659 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10660 buf = dump_param;
10661
10662 while ((token = strsep(&buf, ",")) != NULL) {
10663 if (first_param) {
10664 first_param = false;
10665 if (!strcmp("0", token))
10666 continue;
10667 else if (!strcmp("1", token)) {
10668 ftrace_dump_one(&global_trace, DUMP_ALL);
10669 continue;
10670 }
10671 else if (!strcmp("2", token) ||
10672 !strcmp("orig_cpu", token)) {
10673 ftrace_dump_one(&global_trace, DUMP_ORIG);
10674 continue;
10675 }
10676 }
10677
10678 inst_name = strsep(&token, "=");
10679 tr = trace_array_find(inst_name);
10680 if (!tr) {
10681 printk(KERN_TRACE "Instance %s not found\n", inst_name);
10682 continue;
10683 }
10684
10685 if (token && (!strcmp("2", token) ||
10686 !strcmp("orig_cpu", token)))
10687 ftrace_dump_one(tr, DUMP_ORIG);
10688 else
10689 ftrace_dump_one(tr, DUMP_ALL);
10690 }
10691 }
10692
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10693 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10694 {
10695 static atomic_t dump_running;
10696
10697 /* Only allow one dump user at a time. */
10698 if (atomic_inc_return(&dump_running) != 1) {
10699 atomic_dec(&dump_running);
10700 return;
10701 }
10702
10703 switch (oops_dump_mode) {
10704 case DUMP_ALL:
10705 ftrace_dump_one(&global_trace, DUMP_ALL);
10706 break;
10707 case DUMP_ORIG:
10708 ftrace_dump_one(&global_trace, DUMP_ORIG);
10709 break;
10710 case DUMP_PARAM:
10711 ftrace_dump_by_param();
10712 break;
10713 case DUMP_NONE:
10714 break;
10715 default:
10716 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10717 ftrace_dump_one(&global_trace, DUMP_ALL);
10718 }
10719
10720 atomic_dec(&dump_running);
10721 }
10722 EXPORT_SYMBOL_GPL(ftrace_dump);
10723
10724 #define WRITE_BUFSIZE 4096
10725
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10726 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10727 size_t count, loff_t *ppos,
10728 int (*createfn)(const char *))
10729 {
10730 char *kbuf __free(kfree) = NULL;
10731 char *buf, *tmp;
10732 int ret = 0;
10733 size_t done = 0;
10734 size_t size;
10735
10736 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10737 if (!kbuf)
10738 return -ENOMEM;
10739
10740 while (done < count) {
10741 size = count - done;
10742
10743 if (size >= WRITE_BUFSIZE)
10744 size = WRITE_BUFSIZE - 1;
10745
10746 if (copy_from_user(kbuf, buffer + done, size))
10747 return -EFAULT;
10748
10749 kbuf[size] = '\0';
10750 buf = kbuf;
10751 do {
10752 tmp = strchr(buf, '\n');
10753 if (tmp) {
10754 *tmp = '\0';
10755 size = tmp - buf + 1;
10756 } else {
10757 size = strlen(buf);
10758 if (done + size < count) {
10759 if (buf != kbuf)
10760 break;
10761 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10762 pr_warn("Line length is too long: Should be less than %d\n",
10763 WRITE_BUFSIZE - 2);
10764 return -EINVAL;
10765 }
10766 }
10767 done += size;
10768
10769 /* Remove comments */
10770 tmp = strchr(buf, '#');
10771
10772 if (tmp)
10773 *tmp = '\0';
10774
10775 ret = createfn(buf);
10776 if (ret)
10777 return ret;
10778 buf += size;
10779
10780 } while (done < count);
10781 }
10782 return done;
10783 }
10784
10785 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10786 __init static bool tr_needs_alloc_snapshot(const char *name)
10787 {
10788 char *test;
10789 int len = strlen(name);
10790 bool ret;
10791
10792 if (!boot_snapshot_index)
10793 return false;
10794
10795 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10796 boot_snapshot_info[len] == '\t')
10797 return true;
10798
10799 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10800 if (!test)
10801 return false;
10802
10803 sprintf(test, "\t%s\t", name);
10804 ret = strstr(boot_snapshot_info, test) == NULL;
10805 kfree(test);
10806 return ret;
10807 }
10808
do_allocate_snapshot(const char * name)10809 __init static void do_allocate_snapshot(const char *name)
10810 {
10811 if (!tr_needs_alloc_snapshot(name))
10812 return;
10813
10814 /*
10815 * When allocate_snapshot is set, the next call to
10816 * allocate_trace_buffers() (called by trace_array_get_by_name())
10817 * will allocate the snapshot buffer. That will alse clear
10818 * this flag.
10819 */
10820 allocate_snapshot = true;
10821 }
10822 #else
do_allocate_snapshot(const char * name)10823 static inline void do_allocate_snapshot(const char *name) { }
10824 #endif
10825
enable_instances(void)10826 __init static void enable_instances(void)
10827 {
10828 struct trace_array *tr;
10829 bool memmap_area = false;
10830 char *curr_str;
10831 char *name;
10832 char *str;
10833 char *tok;
10834
10835 /* A tab is always appended */
10836 boot_instance_info[boot_instance_index - 1] = '\0';
10837 str = boot_instance_info;
10838
10839 while ((curr_str = strsep(&str, "\t"))) {
10840 phys_addr_t start = 0;
10841 phys_addr_t size = 0;
10842 unsigned long addr = 0;
10843 bool traceprintk = false;
10844 bool traceoff = false;
10845 char *flag_delim;
10846 char *addr_delim;
10847 char *rname __free(kfree) = NULL;
10848
10849 tok = strsep(&curr_str, ",");
10850
10851 flag_delim = strchr(tok, '^');
10852 addr_delim = strchr(tok, '@');
10853
10854 if (addr_delim)
10855 *addr_delim++ = '\0';
10856
10857 if (flag_delim)
10858 *flag_delim++ = '\0';
10859
10860 name = tok;
10861
10862 if (flag_delim) {
10863 char *flag;
10864
10865 while ((flag = strsep(&flag_delim, "^"))) {
10866 if (strcmp(flag, "traceoff") == 0) {
10867 traceoff = true;
10868 } else if ((strcmp(flag, "printk") == 0) ||
10869 (strcmp(flag, "traceprintk") == 0) ||
10870 (strcmp(flag, "trace_printk") == 0)) {
10871 traceprintk = true;
10872 } else {
10873 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10874 flag, name);
10875 }
10876 }
10877 }
10878
10879 tok = addr_delim;
10880 if (tok && isdigit(*tok)) {
10881 start = memparse(tok, &tok);
10882 if (!start) {
10883 pr_warn("Tracing: Invalid boot instance address for %s\n",
10884 name);
10885 continue;
10886 }
10887 if (*tok != ':') {
10888 pr_warn("Tracing: No size specified for instance %s\n", name);
10889 continue;
10890 }
10891 tok++;
10892 size = memparse(tok, &tok);
10893 if (!size) {
10894 pr_warn("Tracing: Invalid boot instance size for %s\n",
10895 name);
10896 continue;
10897 }
10898 memmap_area = true;
10899 } else if (tok) {
10900 if (!reserve_mem_find_by_name(tok, &start, &size)) {
10901 start = 0;
10902 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10903 continue;
10904 }
10905 rname = kstrdup(tok, GFP_KERNEL);
10906 }
10907
10908 if (start) {
10909 /* Start and size must be page aligned */
10910 if (start & ~PAGE_MASK) {
10911 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10912 continue;
10913 }
10914 if (size & ~PAGE_MASK) {
10915 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10916 continue;
10917 }
10918
10919 if (memmap_area)
10920 addr = map_pages(start, size);
10921 else
10922 addr = (unsigned long)phys_to_virt(start);
10923 if (addr) {
10924 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10925 name, &start, (unsigned long)size);
10926 } else {
10927 pr_warn("Tracing: Failed to map boot instance %s\n", name);
10928 continue;
10929 }
10930 } else {
10931 /* Only non mapped buffers have snapshot buffers */
10932 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10933 do_allocate_snapshot(name);
10934 }
10935
10936 tr = trace_array_create_systems(name, NULL, addr, size);
10937 if (IS_ERR(tr)) {
10938 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10939 continue;
10940 }
10941
10942 if (traceoff)
10943 tracer_tracing_off(tr);
10944
10945 if (traceprintk)
10946 update_printk_trace(tr);
10947
10948 /*
10949 * memmap'd buffers can not be freed.
10950 */
10951 if (memmap_area) {
10952 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10953 tr->ref++;
10954 }
10955
10956 if (start) {
10957 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10958 tr->range_name = no_free_ptr(rname);
10959 }
10960
10961 while ((tok = strsep(&curr_str, ","))) {
10962 early_enable_events(tr, tok, true);
10963 }
10964 }
10965 }
10966
tracer_alloc_buffers(void)10967 __init static int tracer_alloc_buffers(void)
10968 {
10969 int ring_buf_size;
10970 int ret = -ENOMEM;
10971
10972
10973 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10974 pr_warn("Tracing disabled due to lockdown\n");
10975 return -EPERM;
10976 }
10977
10978 /*
10979 * Make sure we don't accidentally add more trace options
10980 * than we have bits for.
10981 */
10982 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10983
10984 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10985 return -ENOMEM;
10986
10987 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10988 goto out_free_buffer_mask;
10989
10990 /* Only allocate trace_printk buffers if a trace_printk exists */
10991 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10992 /* Must be called before global_trace.buffer is allocated */
10993 trace_printk_init_buffers();
10994
10995 /* To save memory, keep the ring buffer size to its minimum */
10996 if (global_trace.ring_buffer_expanded)
10997 ring_buf_size = trace_buf_size;
10998 else
10999 ring_buf_size = 1;
11000
11001 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11002 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11003
11004 raw_spin_lock_init(&global_trace.start_lock);
11005
11006 /*
11007 * The prepare callbacks allocates some memory for the ring buffer. We
11008 * don't free the buffer if the CPU goes down. If we were to free
11009 * the buffer, then the user would lose any trace that was in the
11010 * buffer. The memory will be removed once the "instance" is removed.
11011 */
11012 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11013 "trace/RB:prepare", trace_rb_cpu_prepare,
11014 NULL);
11015 if (ret < 0)
11016 goto out_free_cpumask;
11017 /* Used for event triggers */
11018 ret = -ENOMEM;
11019 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11020 if (!temp_buffer)
11021 goto out_rm_hp_state;
11022
11023 if (trace_create_savedcmd() < 0)
11024 goto out_free_temp_buffer;
11025
11026 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11027 goto out_free_savedcmd;
11028
11029 /* TODO: make the number of buffers hot pluggable with CPUS */
11030 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11031 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11032 goto out_free_pipe_cpumask;
11033 }
11034 if (global_trace.buffer_disabled)
11035 tracing_off();
11036
11037 if (trace_boot_clock) {
11038 ret = tracing_set_clock(&global_trace, trace_boot_clock);
11039 if (ret < 0)
11040 pr_warn("Trace clock %s not defined, going back to default\n",
11041 trace_boot_clock);
11042 }
11043
11044 /*
11045 * register_tracer() might reference current_trace, so it
11046 * needs to be set before we register anything. This is
11047 * just a bootstrap of current_trace anyway.
11048 */
11049 global_trace.current_trace = &nop_trace;
11050
11051 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11052 #ifdef CONFIG_TRACER_MAX_TRACE
11053 spin_lock_init(&global_trace.snapshot_trigger_lock);
11054 #endif
11055 ftrace_init_global_array_ops(&global_trace);
11056
11057 #ifdef CONFIG_MODULES
11058 INIT_LIST_HEAD(&global_trace.mod_events);
11059 #endif
11060
11061 init_trace_flags_index(&global_trace);
11062
11063 register_tracer(&nop_trace);
11064
11065 /* Function tracing may start here (via kernel command line) */
11066 init_function_trace();
11067
11068 /* All seems OK, enable tracing */
11069 tracing_disabled = 0;
11070
11071 atomic_notifier_chain_register(&panic_notifier_list,
11072 &trace_panic_notifier);
11073
11074 register_die_notifier(&trace_die_notifier);
11075
11076 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11077
11078 INIT_LIST_HEAD(&global_trace.systems);
11079 INIT_LIST_HEAD(&global_trace.events);
11080 INIT_LIST_HEAD(&global_trace.hist_vars);
11081 INIT_LIST_HEAD(&global_trace.err_log);
11082 list_add(&global_trace.marker_list, &marker_copies);
11083 list_add(&global_trace.list, &ftrace_trace_arrays);
11084
11085 apply_trace_boot_options();
11086
11087 register_snapshot_cmd();
11088
11089 return 0;
11090
11091 out_free_pipe_cpumask:
11092 free_cpumask_var(global_trace.pipe_cpumask);
11093 out_free_savedcmd:
11094 trace_free_saved_cmdlines_buffer();
11095 out_free_temp_buffer:
11096 ring_buffer_free(temp_buffer);
11097 out_rm_hp_state:
11098 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11099 out_free_cpumask:
11100 free_cpumask_var(global_trace.tracing_cpumask);
11101 out_free_buffer_mask:
11102 free_cpumask_var(tracing_buffer_mask);
11103 return ret;
11104 }
11105
11106 #ifdef CONFIG_FUNCTION_TRACER
11107 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11108 __init struct trace_array *trace_get_global_array(void)
11109 {
11110 return &global_trace;
11111 }
11112 #endif
11113
ftrace_boot_snapshot(void)11114 void __init ftrace_boot_snapshot(void)
11115 {
11116 #ifdef CONFIG_TRACER_MAX_TRACE
11117 struct trace_array *tr;
11118
11119 if (!snapshot_at_boot)
11120 return;
11121
11122 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11123 if (!tr->allocated_snapshot)
11124 continue;
11125
11126 tracing_snapshot_instance(tr);
11127 trace_array_puts(tr, "** Boot snapshot taken **\n");
11128 }
11129 #endif
11130 }
11131
early_trace_init(void)11132 void __init early_trace_init(void)
11133 {
11134 if (tracepoint_printk) {
11135 tracepoint_print_iter =
11136 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11137 if (MEM_FAIL(!tracepoint_print_iter,
11138 "Failed to allocate trace iterator\n"))
11139 tracepoint_printk = 0;
11140 else
11141 static_key_enable(&tracepoint_printk_key.key);
11142 }
11143 tracer_alloc_buffers();
11144
11145 init_events();
11146 }
11147
trace_init(void)11148 void __init trace_init(void)
11149 {
11150 trace_event_init();
11151
11152 if (boot_instance_index)
11153 enable_instances();
11154 }
11155
clear_boot_tracer(void)11156 __init static void clear_boot_tracer(void)
11157 {
11158 /*
11159 * The default tracer at boot buffer is an init section.
11160 * This function is called in lateinit. If we did not
11161 * find the boot tracer, then clear it out, to prevent
11162 * later registration from accessing the buffer that is
11163 * about to be freed.
11164 */
11165 if (!default_bootup_tracer)
11166 return;
11167
11168 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11169 default_bootup_tracer);
11170 default_bootup_tracer = NULL;
11171 }
11172
11173 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11174 __init static void tracing_set_default_clock(void)
11175 {
11176 /* sched_clock_stable() is determined in late_initcall */
11177 if (!trace_boot_clock && !sched_clock_stable()) {
11178 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11179 pr_warn("Can not set tracing clock due to lockdown\n");
11180 return;
11181 }
11182
11183 printk(KERN_WARNING
11184 "Unstable clock detected, switching default tracing clock to \"global\"\n"
11185 "If you want to keep using the local clock, then add:\n"
11186 " \"trace_clock=local\"\n"
11187 "on the kernel command line\n");
11188 tracing_set_clock(&global_trace, "global");
11189 }
11190 }
11191 #else
tracing_set_default_clock(void)11192 static inline void tracing_set_default_clock(void) { }
11193 #endif
11194
late_trace_init(void)11195 __init static int late_trace_init(void)
11196 {
11197 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11198 static_key_disable(&tracepoint_printk_key.key);
11199 tracepoint_printk = 0;
11200 }
11201
11202 if (traceoff_after_boot)
11203 tracing_off();
11204
11205 tracing_set_default_clock();
11206 clear_boot_tracer();
11207 return 0;
11208 }
11209
11210 late_initcall_sync(late_trace_init);
11211