1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57
58 #include "trace.h"
59 #include "trace_output.h"
60
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63 * We need to change this state when a selftest is running.
64 * A selftest will lurk into the ring-buffer to count the
65 * entries inserted during the selftest although some concurrent
66 * insertions into the ring-buffer such as trace_printk could occurred
67 * at the same time, giving false positive or negative results.
68 */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72 * If boot-time tracing including tracers/events via kernel cmdline
73 * is running, we do not want to run SELFTEST.
74 */
75 bool __read_mostly tracing_selftest_disabled;
76
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 }
83 }
84 #else
85 #define tracing_selftest_running 0
86 #define tracing_selftest_disabled 0
87 #endif
88
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 { }
99 };
100
101 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 return 0;
105 }
106
107 /*
108 * To prevent the comm cache from being overwritten when no
109 * tracing is active, only save the comm when a trace event
110 * occurred.
111 */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113
114 /*
115 * Kill all tracing for good (never come back).
116 * It is initialized to 1 but will turn to zero if the initialization
117 * of the tracer is successful. But that is the only place that sets
118 * this back to zero.
119 */
120 static int tracing_disabled = 1;
121
122 cpumask_var_t __read_mostly tracing_buffer_mask;
123
124 #define MAX_TRACER_SIZE 100
125 /*
126 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127 *
128 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129 * is set, then ftrace_dump is called. This will output the contents
130 * of the ftrace buffers to the console. This is very useful for
131 * capturing traces that lead to crashes and outputing it to a
132 * serial console.
133 *
134 * It is default off, but you can enable it with either specifying
135 * "ftrace_dump_on_oops" in the kernel command line, or setting
136 * /proc/sys/kernel/ftrace_dump_on_oops
137 * Set 1 if you want to dump buffers of all CPUs
138 * Set 2 if you want to dump the buffer of the CPU that triggered oops
139 * Set instance name if you want to dump the specific trace instance
140 * Multiple instance dump is also supported, and instances are seperated
141 * by commas.
142 */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 {
153 .procname = "ftrace_dump_on_oops",
154 .data = &ftrace_dump_on_oops,
155 .maxlen = MAX_TRACER_SIZE,
156 .mode = 0644,
157 .proc_handler = proc_dostring,
158 },
159 {
160 .procname = "traceoff_on_warning",
161 .data = &__disable_trace_on_warning,
162 .maxlen = sizeof(__disable_trace_on_warning),
163 .mode = 0644,
164 .proc_handler = proc_dointvec,
165 },
166 {
167 .procname = "tracepoint_printk",
168 .data = &tracepoint_printk,
169 .maxlen = sizeof(tracepoint_printk),
170 .mode = 0644,
171 .proc_handler = tracepoint_printk_sysctl,
172 },
173 };
174
init_trace_sysctls(void)175 static int __init init_trace_sysctls(void)
176 {
177 register_sysctl_init("kernel", trace_sysctl_table);
178 return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 struct module *mod;
186 unsigned long length;
187 };
188
189 union trace_eval_map_item;
190
191 struct trace_eval_map_tail {
192 /*
193 * "end" is first and points to NULL as it must be different
194 * than "mod" or "eval_string"
195 */
196 union trace_eval_map_item *next;
197 const char *end; /* points to NULL */
198 };
199
200 static DEFINE_MUTEX(trace_eval_mutex);
201
202 /*
203 * The trace_eval_maps are saved in an array with two extra elements,
204 * one at the beginning, and one at the end. The beginning item contains
205 * the count of the saved maps (head.length), and the module they
206 * belong to if not built in (head.mod). The ending item contains a
207 * pointer to the next array of saved eval_map items.
208 */
209 union trace_eval_map_item {
210 struct trace_eval_map map;
211 struct trace_eval_map_head head;
212 struct trace_eval_map_tail tail;
213 };
214
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 struct trace_buffer *buffer,
221 unsigned int trace_ctx);
222
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234
set_cmdline_ftrace(char * str)235 static int __init set_cmdline_ftrace(char *str)
236 {
237 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 default_bootup_tracer = bootup_tracer_buf;
239 /* We are using ftrace early, expand it */
240 trace_set_ring_buffer_expanded(NULL);
241 return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244
ftrace_dump_on_oops_enabled(void)245 int ftrace_dump_on_oops_enabled(void)
246 {
247 if (!strcmp("0", ftrace_dump_on_oops))
248 return 0;
249 else
250 return 1;
251 }
252
set_ftrace_dump_on_oops(char * str)253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 if (!*str) {
256 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 return 1;
258 }
259
260 if (*str == ',') {
261 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 return 1;
264 }
265
266 if (*str++ == '=') {
267 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 return 1;
269 }
270
271 return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274
stop_trace_on_warning(char * str)275 static int __init stop_trace_on_warning(char *str)
276 {
277 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 __disable_trace_on_warning = 1;
279 return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282
boot_alloc_snapshot(char * str)283 static int __init boot_alloc_snapshot(char *str)
284 {
285 char *slot = boot_snapshot_info + boot_snapshot_index;
286 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 int ret;
288
289 if (str[0] == '=') {
290 str++;
291 if (strlen(str) >= left)
292 return -1;
293
294 ret = snprintf(slot, left, "%s\t", str);
295 boot_snapshot_index += ret;
296 } else {
297 allocate_snapshot = true;
298 /* We also need the main ring buffer expanded */
299 trace_set_ring_buffer_expanded(NULL);
300 }
301 return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304
305
boot_snapshot(char * str)306 static int __init boot_snapshot(char *str)
307 {
308 snapshot_at_boot = true;
309 boot_alloc_snapshot(str);
310 return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313
314
boot_instance(char * str)315 static int __init boot_instance(char *str)
316 {
317 char *slot = boot_instance_info + boot_instance_index;
318 int left = sizeof(boot_instance_info) - boot_instance_index;
319 int ret;
320
321 if (strlen(str) >= left)
322 return -1;
323
324 ret = snprintf(slot, left, "%s\t", str);
325 boot_instance_index += ret;
326
327 return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330
331
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333
set_trace_boot_options(char * str)334 static int __init set_trace_boot_options(char *str)
335 {
336 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343
set_trace_boot_clock(char * str)344 static int __init set_trace_boot_clock(char *str)
345 {
346 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 trace_boot_clock = trace_boot_clock_buf;
348 return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351
set_tracepoint_printk(char * str)352 static int __init set_tracepoint_printk(char *str)
353 {
354 /* Ignore the "tp_printk_stop_on_boot" param */
355 if (*str == '_')
356 return 0;
357
358 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 tracepoint_printk = 1;
360 return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363
set_tracepoint_printk_stop(char * str)364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 tracepoint_printk_stop_on_boot = true;
367 return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370
set_traceoff_after_boot(char * str)371 static int __init set_traceoff_after_boot(char *str)
372 {
373 traceoff_after_boot = true;
374 return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377
ns2usecs(u64 nsec)378 unsigned long long ns2usecs(u64 nsec)
379 {
380 nsec += 500;
381 do_div(nsec, 1000);
382 return nsec;
383 }
384
385 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)386 trace_process_export(struct trace_export *export,
387 struct ring_buffer_event *event, int flag)
388 {
389 struct trace_entry *entry;
390 unsigned int size = 0;
391
392 if (export->flags & flag) {
393 entry = ring_buffer_event_data(event);
394 size = ring_buffer_event_length(event);
395 export->write(export, entry, size);
396 }
397 }
398
399 static DEFINE_MUTEX(ftrace_export_lock);
400
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406
ftrace_exports_enable(struct trace_export * export)407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 if (export->flags & TRACE_EXPORT_FUNCTION)
410 static_branch_inc(&trace_function_exports_enabled);
411
412 if (export->flags & TRACE_EXPORT_EVENT)
413 static_branch_inc(&trace_event_exports_enabled);
414
415 if (export->flags & TRACE_EXPORT_MARKER)
416 static_branch_inc(&trace_marker_exports_enabled);
417 }
418
ftrace_exports_disable(struct trace_export * export)419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 if (export->flags & TRACE_EXPORT_FUNCTION)
422 static_branch_dec(&trace_function_exports_enabled);
423
424 if (export->flags & TRACE_EXPORT_EVENT)
425 static_branch_dec(&trace_event_exports_enabled);
426
427 if (export->flags & TRACE_EXPORT_MARKER)
428 static_branch_dec(&trace_marker_exports_enabled);
429 }
430
ftrace_exports(struct ring_buffer_event * event,int flag)431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 struct trace_export *export;
434
435 guard(preempt_notrace)();
436
437 export = rcu_dereference_raw_check(ftrace_exports_list);
438 while (export) {
439 trace_process_export(export, event, flag);
440 export = rcu_dereference_raw_check(export->next);
441 }
442 }
443
444 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)445 add_trace_export(struct trace_export **list, struct trace_export *export)
446 {
447 rcu_assign_pointer(export->next, *list);
448 /*
449 * We are entering export into the list but another
450 * CPU might be walking that list. We need to make sure
451 * the export->next pointer is valid before another CPU sees
452 * the export pointer included into the list.
453 */
454 rcu_assign_pointer(*list, export);
455 }
456
457 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)458 rm_trace_export(struct trace_export **list, struct trace_export *export)
459 {
460 struct trace_export **p;
461
462 for (p = list; *p != NULL; p = &(*p)->next)
463 if (*p == export)
464 break;
465
466 if (*p != export)
467 return -1;
468
469 rcu_assign_pointer(*p, (*p)->next);
470
471 return 0;
472 }
473
474 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)475 add_ftrace_export(struct trace_export **list, struct trace_export *export)
476 {
477 ftrace_exports_enable(export);
478
479 add_trace_export(list, export);
480 }
481
482 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)483 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
484 {
485 int ret;
486
487 ret = rm_trace_export(list, export);
488 ftrace_exports_disable(export);
489
490 return ret;
491 }
492
register_ftrace_export(struct trace_export * export)493 int register_ftrace_export(struct trace_export *export)
494 {
495 if (WARN_ON_ONCE(!export->write))
496 return -1;
497
498 guard(mutex)(&ftrace_export_lock);
499
500 add_ftrace_export(&ftrace_exports_list, export);
501
502 return 0;
503 }
504 EXPORT_SYMBOL_GPL(register_ftrace_export);
505
unregister_ftrace_export(struct trace_export * export)506 int unregister_ftrace_export(struct trace_export *export)
507 {
508 guard(mutex)(&ftrace_export_lock);
509 return rm_ftrace_export(&ftrace_exports_list, export);
510 }
511 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
512
513 /* trace_flags holds trace_options default values */
514 #define TRACE_DEFAULT_FLAGS \
515 (FUNCTION_DEFAULT_FLAGS | \
516 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
517 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
518 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
519 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
520 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \
521 TRACE_ITER_COPY_MARKER)
522
523 /* trace_options that are only supported by global_trace */
524 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
525 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
526
527 /* trace_flags that are default zero for instances */
528 #define ZEROED_TRACE_FLAGS \
529 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
530 TRACE_ITER_COPY_MARKER)
531
532 /*
533 * The global_trace is the descriptor that holds the top-level tracing
534 * buffers for the live tracing.
535 */
536 static struct trace_array global_trace = {
537 .trace_flags = TRACE_DEFAULT_FLAGS,
538 };
539
540 static struct trace_array *printk_trace = &global_trace;
541
542 /* List of trace_arrays interested in the top level trace_marker */
543 static LIST_HEAD(marker_copies);
544
printk_binsafe(struct trace_array * tr)545 static __always_inline bool printk_binsafe(struct trace_array *tr)
546 {
547 /*
548 * The binary format of traceprintk can cause a crash if used
549 * by a buffer from another boot. Force the use of the
550 * non binary version of trace_printk if the trace_printk
551 * buffer is a boot mapped ring buffer.
552 */
553 return !(tr->flags & TRACE_ARRAY_FL_BOOT);
554 }
555
update_printk_trace(struct trace_array * tr)556 static void update_printk_trace(struct trace_array *tr)
557 {
558 if (printk_trace == tr)
559 return;
560
561 printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
562 printk_trace = tr;
563 tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
564 }
565
566 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)567 static bool update_marker_trace(struct trace_array *tr, int enabled)
568 {
569 lockdep_assert_held(&event_mutex);
570
571 if (enabled) {
572 if (!list_empty(&tr->marker_list))
573 return false;
574
575 list_add_rcu(&tr->marker_list, &marker_copies);
576 tr->trace_flags |= TRACE_ITER_COPY_MARKER;
577 return true;
578 }
579
580 if (list_empty(&tr->marker_list))
581 return false;
582
583 list_del_init(&tr->marker_list);
584 tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
585 return true;
586 }
587
trace_set_ring_buffer_expanded(struct trace_array * tr)588 void trace_set_ring_buffer_expanded(struct trace_array *tr)
589 {
590 if (!tr)
591 tr = &global_trace;
592 tr->ring_buffer_expanded = true;
593 }
594
595 LIST_HEAD(ftrace_trace_arrays);
596
trace_array_get(struct trace_array * this_tr)597 int trace_array_get(struct trace_array *this_tr)
598 {
599 struct trace_array *tr;
600
601 guard(mutex)(&trace_types_lock);
602 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
603 if (tr == this_tr) {
604 tr->ref++;
605 return 0;
606 }
607 }
608
609 return -ENODEV;
610 }
611
__trace_array_put(struct trace_array * this_tr)612 static void __trace_array_put(struct trace_array *this_tr)
613 {
614 WARN_ON(!this_tr->ref);
615 this_tr->ref--;
616 }
617
618 /**
619 * trace_array_put - Decrement the reference counter for this trace array.
620 * @this_tr : pointer to the trace array
621 *
622 * NOTE: Use this when we no longer need the trace array returned by
623 * trace_array_get_by_name(). This ensures the trace array can be later
624 * destroyed.
625 *
626 */
trace_array_put(struct trace_array * this_tr)627 void trace_array_put(struct trace_array *this_tr)
628 {
629 if (!this_tr)
630 return;
631
632 guard(mutex)(&trace_types_lock);
633 __trace_array_put(this_tr);
634 }
635 EXPORT_SYMBOL_GPL(trace_array_put);
636
tracing_check_open_get_tr(struct trace_array * tr)637 int tracing_check_open_get_tr(struct trace_array *tr)
638 {
639 int ret;
640
641 ret = security_locked_down(LOCKDOWN_TRACEFS);
642 if (ret)
643 return ret;
644
645 if (tracing_disabled)
646 return -ENODEV;
647
648 if (tr && trace_array_get(tr) < 0)
649 return -ENODEV;
650
651 return 0;
652 }
653
654 /**
655 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
656 * @filtered_pids: The list of pids to check
657 * @search_pid: The PID to find in @filtered_pids
658 *
659 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
660 */
661 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)662 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
663 {
664 return trace_pid_list_is_set(filtered_pids, search_pid);
665 }
666
667 /**
668 * trace_ignore_this_task - should a task be ignored for tracing
669 * @filtered_pids: The list of pids to check
670 * @filtered_no_pids: The list of pids not to be traced
671 * @task: The task that should be ignored if not filtered
672 *
673 * Checks if @task should be traced or not from @filtered_pids.
674 * Returns true if @task should *NOT* be traced.
675 * Returns false if @task should be traced.
676 */
677 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)678 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
679 struct trace_pid_list *filtered_no_pids,
680 struct task_struct *task)
681 {
682 /*
683 * If filtered_no_pids is not empty, and the task's pid is listed
684 * in filtered_no_pids, then return true.
685 * Otherwise, if filtered_pids is empty, that means we can
686 * trace all tasks. If it has content, then only trace pids
687 * within filtered_pids.
688 */
689
690 return (filtered_pids &&
691 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
692 (filtered_no_pids &&
693 trace_find_filtered_pid(filtered_no_pids, task->pid));
694 }
695
696 /**
697 * trace_filter_add_remove_task - Add or remove a task from a pid_list
698 * @pid_list: The list to modify
699 * @self: The current task for fork or NULL for exit
700 * @task: The task to add or remove
701 *
702 * If adding a task, if @self is defined, the task is only added if @self
703 * is also included in @pid_list. This happens on fork and tasks should
704 * only be added when the parent is listed. If @self is NULL, then the
705 * @task pid will be removed from the list, which would happen on exit
706 * of a task.
707 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)708 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
709 struct task_struct *self,
710 struct task_struct *task)
711 {
712 if (!pid_list)
713 return;
714
715 /* For forks, we only add if the forking task is listed */
716 if (self) {
717 if (!trace_find_filtered_pid(pid_list, self->pid))
718 return;
719 }
720
721 /* "self" is set for forks, and NULL for exits */
722 if (self)
723 trace_pid_list_set(pid_list, task->pid);
724 else
725 trace_pid_list_clear(pid_list, task->pid);
726 }
727
728 /**
729 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
730 * @pid_list: The pid list to show
731 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
732 * @pos: The position of the file
733 *
734 * This is used by the seq_file "next" operation to iterate the pids
735 * listed in a trace_pid_list structure.
736 *
737 * Returns the pid+1 as we want to display pid of zero, but NULL would
738 * stop the iteration.
739 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)740 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
741 {
742 long pid = (unsigned long)v;
743 unsigned int next;
744
745 (*pos)++;
746
747 /* pid already is +1 of the actual previous bit */
748 if (trace_pid_list_next(pid_list, pid, &next) < 0)
749 return NULL;
750
751 pid = next;
752
753 /* Return pid + 1 to allow zero to be represented */
754 return (void *)(pid + 1);
755 }
756
757 /**
758 * trace_pid_start - Used for seq_file to start reading pid lists
759 * @pid_list: The pid list to show
760 * @pos: The position of the file
761 *
762 * This is used by seq_file "start" operation to start the iteration
763 * of listing pids.
764 *
765 * Returns the pid+1 as we want to display pid of zero, but NULL would
766 * stop the iteration.
767 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)768 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
769 {
770 unsigned long pid;
771 unsigned int first;
772 loff_t l = 0;
773
774 if (trace_pid_list_first(pid_list, &first) < 0)
775 return NULL;
776
777 pid = first;
778
779 /* Return pid + 1 so that zero can be the exit value */
780 for (pid++; pid && l < *pos;
781 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
782 ;
783 return (void *)pid;
784 }
785
786 /**
787 * trace_pid_show - show the current pid in seq_file processing
788 * @m: The seq_file structure to write into
789 * @v: A void pointer of the pid (+1) value to display
790 *
791 * Can be directly used by seq_file operations to display the current
792 * pid value.
793 */
trace_pid_show(struct seq_file * m,void * v)794 int trace_pid_show(struct seq_file *m, void *v)
795 {
796 unsigned long pid = (unsigned long)v - 1;
797
798 seq_printf(m, "%lu\n", pid);
799 return 0;
800 }
801
802 /* 128 should be much more than enough */
803 #define PID_BUF_SIZE 127
804
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)805 int trace_pid_write(struct trace_pid_list *filtered_pids,
806 struct trace_pid_list **new_pid_list,
807 const char __user *ubuf, size_t cnt)
808 {
809 struct trace_pid_list *pid_list;
810 struct trace_parser parser;
811 unsigned long val;
812 int nr_pids = 0;
813 ssize_t read = 0;
814 ssize_t ret;
815 loff_t pos;
816 pid_t pid;
817
818 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
819 return -ENOMEM;
820
821 /*
822 * Always recreate a new array. The write is an all or nothing
823 * operation. Always create a new array when adding new pids by
824 * the user. If the operation fails, then the current list is
825 * not modified.
826 */
827 pid_list = trace_pid_list_alloc();
828 if (!pid_list) {
829 trace_parser_put(&parser);
830 return -ENOMEM;
831 }
832
833 if (filtered_pids) {
834 /* copy the current bits to the new max */
835 ret = trace_pid_list_first(filtered_pids, &pid);
836 while (!ret) {
837 trace_pid_list_set(pid_list, pid);
838 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
839 nr_pids++;
840 }
841 }
842
843 ret = 0;
844 while (cnt > 0) {
845
846 pos = 0;
847
848 ret = trace_get_user(&parser, ubuf, cnt, &pos);
849 if (ret < 0)
850 break;
851
852 read += ret;
853 ubuf += ret;
854 cnt -= ret;
855
856 if (!trace_parser_loaded(&parser))
857 break;
858
859 ret = -EINVAL;
860 if (kstrtoul(parser.buffer, 0, &val))
861 break;
862
863 pid = (pid_t)val;
864
865 if (trace_pid_list_set(pid_list, pid) < 0) {
866 ret = -1;
867 break;
868 }
869 nr_pids++;
870
871 trace_parser_clear(&parser);
872 ret = 0;
873 }
874 trace_parser_put(&parser);
875
876 if (ret < 0) {
877 trace_pid_list_free(pid_list);
878 return ret;
879 }
880
881 if (!nr_pids) {
882 /* Cleared the list of pids */
883 trace_pid_list_free(pid_list);
884 pid_list = NULL;
885 }
886
887 *new_pid_list = pid_list;
888
889 return read;
890 }
891
buffer_ftrace_now(struct array_buffer * buf,int cpu)892 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
893 {
894 u64 ts;
895
896 /* Early boot up does not have a buffer yet */
897 if (!buf->buffer)
898 return trace_clock_local();
899
900 ts = ring_buffer_time_stamp(buf->buffer);
901 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
902
903 return ts;
904 }
905
ftrace_now(int cpu)906 u64 ftrace_now(int cpu)
907 {
908 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
909 }
910
911 /**
912 * tracing_is_enabled - Show if global_trace has been enabled
913 *
914 * Shows if the global trace has been enabled or not. It uses the
915 * mirror flag "buffer_disabled" to be used in fast paths such as for
916 * the irqsoff tracer. But it may be inaccurate due to races. If you
917 * need to know the accurate state, use tracing_is_on() which is a little
918 * slower, but accurate.
919 */
tracing_is_enabled(void)920 int tracing_is_enabled(void)
921 {
922 /*
923 * For quick access (irqsoff uses this in fast path), just
924 * return the mirror variable of the state of the ring buffer.
925 * It's a little racy, but we don't really care.
926 */
927 return !global_trace.buffer_disabled;
928 }
929
930 /*
931 * trace_buf_size is the size in bytes that is allocated
932 * for a buffer. Note, the number of bytes is always rounded
933 * to page size.
934 *
935 * This number is purposely set to a low number of 16384.
936 * If the dump on oops happens, it will be much appreciated
937 * to not have to wait for all that output. Anyway this can be
938 * boot time and run time configurable.
939 */
940 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
941
942 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
943
944 /* trace_types holds a link list of available tracers. */
945 static struct tracer *trace_types __read_mostly;
946
947 /*
948 * trace_types_lock is used to protect the trace_types list.
949 */
950 DEFINE_MUTEX(trace_types_lock);
951
952 /*
953 * serialize the access of the ring buffer
954 *
955 * ring buffer serializes readers, but it is low level protection.
956 * The validity of the events (which returns by ring_buffer_peek() ..etc)
957 * are not protected by ring buffer.
958 *
959 * The content of events may become garbage if we allow other process consumes
960 * these events concurrently:
961 * A) the page of the consumed events may become a normal page
962 * (not reader page) in ring buffer, and this page will be rewritten
963 * by events producer.
964 * B) The page of the consumed events may become a page for splice_read,
965 * and this page will be returned to system.
966 *
967 * These primitives allow multi process access to different cpu ring buffer
968 * concurrently.
969 *
970 * These primitives don't distinguish read-only and read-consume access.
971 * Multi read-only access are also serialized.
972 */
973
974 #ifdef CONFIG_SMP
975 static DECLARE_RWSEM(all_cpu_access_lock);
976 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
977
trace_access_lock(int cpu)978 static inline void trace_access_lock(int cpu)
979 {
980 if (cpu == RING_BUFFER_ALL_CPUS) {
981 /* gain it for accessing the whole ring buffer. */
982 down_write(&all_cpu_access_lock);
983 } else {
984 /* gain it for accessing a cpu ring buffer. */
985
986 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
987 down_read(&all_cpu_access_lock);
988
989 /* Secondly block other access to this @cpu ring buffer. */
990 mutex_lock(&per_cpu(cpu_access_lock, cpu));
991 }
992 }
993
trace_access_unlock(int cpu)994 static inline void trace_access_unlock(int cpu)
995 {
996 if (cpu == RING_BUFFER_ALL_CPUS) {
997 up_write(&all_cpu_access_lock);
998 } else {
999 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1000 up_read(&all_cpu_access_lock);
1001 }
1002 }
1003
trace_access_lock_init(void)1004 static inline void trace_access_lock_init(void)
1005 {
1006 int cpu;
1007
1008 for_each_possible_cpu(cpu)
1009 mutex_init(&per_cpu(cpu_access_lock, cpu));
1010 }
1011
1012 #else
1013
1014 static DEFINE_MUTEX(access_lock);
1015
trace_access_lock(int cpu)1016 static inline void trace_access_lock(int cpu)
1017 {
1018 (void)cpu;
1019 mutex_lock(&access_lock);
1020 }
1021
trace_access_unlock(int cpu)1022 static inline void trace_access_unlock(int cpu)
1023 {
1024 (void)cpu;
1025 mutex_unlock(&access_lock);
1026 }
1027
trace_access_lock_init(void)1028 static inline void trace_access_lock_init(void)
1029 {
1030 }
1031
1032 #endif
1033
1034 #ifdef CONFIG_STACKTRACE
1035 static void __ftrace_trace_stack(struct trace_array *tr,
1036 struct trace_buffer *buffer,
1037 unsigned int trace_ctx,
1038 int skip, struct pt_regs *regs);
1039 static inline void ftrace_trace_stack(struct trace_array *tr,
1040 struct trace_buffer *buffer,
1041 unsigned int trace_ctx,
1042 int skip, struct pt_regs *regs);
1043
1044 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1045 static inline void __ftrace_trace_stack(struct trace_array *tr,
1046 struct trace_buffer *buffer,
1047 unsigned int trace_ctx,
1048 int skip, struct pt_regs *regs)
1049 {
1050 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1051 static inline void ftrace_trace_stack(struct trace_array *tr,
1052 struct trace_buffer *buffer,
1053 unsigned long trace_ctx,
1054 int skip, struct pt_regs *regs)
1055 {
1056 }
1057
1058 #endif
1059
1060 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1061 trace_event_setup(struct ring_buffer_event *event,
1062 int type, unsigned int trace_ctx)
1063 {
1064 struct trace_entry *ent = ring_buffer_event_data(event);
1065
1066 tracing_generic_entry_update(ent, type, trace_ctx);
1067 }
1068
1069 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1070 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1071 int type,
1072 unsigned long len,
1073 unsigned int trace_ctx)
1074 {
1075 struct ring_buffer_event *event;
1076
1077 event = ring_buffer_lock_reserve(buffer, len);
1078 if (event != NULL)
1079 trace_event_setup(event, type, trace_ctx);
1080
1081 return event;
1082 }
1083
tracer_tracing_on(struct trace_array * tr)1084 void tracer_tracing_on(struct trace_array *tr)
1085 {
1086 if (tr->array_buffer.buffer)
1087 ring_buffer_record_on(tr->array_buffer.buffer);
1088 /*
1089 * This flag is looked at when buffers haven't been allocated
1090 * yet, or by some tracers (like irqsoff), that just want to
1091 * know if the ring buffer has been disabled, but it can handle
1092 * races of where it gets disabled but we still do a record.
1093 * As the check is in the fast path of the tracers, it is more
1094 * important to be fast than accurate.
1095 */
1096 tr->buffer_disabled = 0;
1097 }
1098
1099 /**
1100 * tracing_on - enable tracing buffers
1101 *
1102 * This function enables tracing buffers that may have been
1103 * disabled with tracing_off.
1104 */
tracing_on(void)1105 void tracing_on(void)
1106 {
1107 tracer_tracing_on(&global_trace);
1108 }
1109 EXPORT_SYMBOL_GPL(tracing_on);
1110
1111
1112 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1113 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1114 {
1115 __this_cpu_write(trace_taskinfo_save, true);
1116
1117 /* If this is the temp buffer, we need to commit fully */
1118 if (this_cpu_read(trace_buffered_event) == event) {
1119 /* Length is in event->array[0] */
1120 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1121 /* Release the temp buffer */
1122 this_cpu_dec(trace_buffered_event_cnt);
1123 /* ring_buffer_unlock_commit() enables preemption */
1124 preempt_enable_notrace();
1125 } else
1126 ring_buffer_unlock_commit(buffer);
1127 }
1128
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1129 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1130 const char *str, int size)
1131 {
1132 struct ring_buffer_event *event;
1133 struct trace_buffer *buffer;
1134 struct print_entry *entry;
1135 unsigned int trace_ctx;
1136 int alloc;
1137
1138 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1139 return 0;
1140
1141 if (unlikely(tracing_selftest_running && tr == &global_trace))
1142 return 0;
1143
1144 if (unlikely(tracing_disabled))
1145 return 0;
1146
1147 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1148
1149 trace_ctx = tracing_gen_ctx();
1150 buffer = tr->array_buffer.buffer;
1151 guard(ring_buffer_nest)(buffer);
1152 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1153 trace_ctx);
1154 if (!event)
1155 return 0;
1156
1157 entry = ring_buffer_event_data(event);
1158 entry->ip = ip;
1159
1160 memcpy(&entry->buf, str, size);
1161
1162 /* Add a newline if necessary */
1163 if (entry->buf[size - 1] != '\n') {
1164 entry->buf[size] = '\n';
1165 entry->buf[size + 1] = '\0';
1166 } else
1167 entry->buf[size] = '\0';
1168
1169 __buffer_unlock_commit(buffer, event);
1170 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1171 return size;
1172 }
1173 EXPORT_SYMBOL_GPL(__trace_array_puts);
1174
1175 /**
1176 * __trace_puts - write a constant string into the trace buffer.
1177 * @ip: The address of the caller
1178 * @str: The constant string to write
1179 * @size: The size of the string.
1180 */
__trace_puts(unsigned long ip,const char * str,int size)1181 int __trace_puts(unsigned long ip, const char *str, int size)
1182 {
1183 return __trace_array_puts(printk_trace, ip, str, size);
1184 }
1185 EXPORT_SYMBOL_GPL(__trace_puts);
1186
1187 /**
1188 * __trace_bputs - write the pointer to a constant string into trace buffer
1189 * @ip: The address of the caller
1190 * @str: The constant string to write to the buffer to
1191 */
__trace_bputs(unsigned long ip,const char * str)1192 int __trace_bputs(unsigned long ip, const char *str)
1193 {
1194 struct trace_array *tr = READ_ONCE(printk_trace);
1195 struct ring_buffer_event *event;
1196 struct trace_buffer *buffer;
1197 struct bputs_entry *entry;
1198 unsigned int trace_ctx;
1199 int size = sizeof(struct bputs_entry);
1200
1201 if (!printk_binsafe(tr))
1202 return __trace_puts(ip, str, strlen(str));
1203
1204 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1205 return 0;
1206
1207 if (unlikely(tracing_selftest_running || tracing_disabled))
1208 return 0;
1209
1210 trace_ctx = tracing_gen_ctx();
1211 buffer = tr->array_buffer.buffer;
1212
1213 guard(ring_buffer_nest)(buffer);
1214 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1215 trace_ctx);
1216 if (!event)
1217 return 0;
1218
1219 entry = ring_buffer_event_data(event);
1220 entry->ip = ip;
1221 entry->str = str;
1222
1223 __buffer_unlock_commit(buffer, event);
1224 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1225
1226 return 1;
1227 }
1228 EXPORT_SYMBOL_GPL(__trace_bputs);
1229
1230 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1231 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1232 void *cond_data)
1233 {
1234 struct tracer *tracer = tr->current_trace;
1235 unsigned long flags;
1236
1237 if (in_nmi()) {
1238 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1239 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1240 return;
1241 }
1242
1243 if (!tr->allocated_snapshot) {
1244 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1245 trace_array_puts(tr, "*** stopping trace here! ***\n");
1246 tracer_tracing_off(tr);
1247 return;
1248 }
1249
1250 /* Note, snapshot can not be used when the tracer uses it */
1251 if (tracer->use_max_tr) {
1252 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1253 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1254 return;
1255 }
1256
1257 if (tr->mapped) {
1258 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1259 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1260 return;
1261 }
1262
1263 local_irq_save(flags);
1264 update_max_tr(tr, current, smp_processor_id(), cond_data);
1265 local_irq_restore(flags);
1266 }
1267
tracing_snapshot_instance(struct trace_array * tr)1268 void tracing_snapshot_instance(struct trace_array *tr)
1269 {
1270 tracing_snapshot_instance_cond(tr, NULL);
1271 }
1272
1273 /**
1274 * tracing_snapshot - take a snapshot of the current buffer.
1275 *
1276 * This causes a swap between the snapshot buffer and the current live
1277 * tracing buffer. You can use this to take snapshots of the live
1278 * trace when some condition is triggered, but continue to trace.
1279 *
1280 * Note, make sure to allocate the snapshot with either
1281 * a tracing_snapshot_alloc(), or by doing it manually
1282 * with: echo 1 > /sys/kernel/tracing/snapshot
1283 *
1284 * If the snapshot buffer is not allocated, it will stop tracing.
1285 * Basically making a permanent snapshot.
1286 */
tracing_snapshot(void)1287 void tracing_snapshot(void)
1288 {
1289 struct trace_array *tr = &global_trace;
1290
1291 tracing_snapshot_instance(tr);
1292 }
1293 EXPORT_SYMBOL_GPL(tracing_snapshot);
1294
1295 /**
1296 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1297 * @tr: The tracing instance to snapshot
1298 * @cond_data: The data to be tested conditionally, and possibly saved
1299 *
1300 * This is the same as tracing_snapshot() except that the snapshot is
1301 * conditional - the snapshot will only happen if the
1302 * cond_snapshot.update() implementation receiving the cond_data
1303 * returns true, which means that the trace array's cond_snapshot
1304 * update() operation used the cond_data to determine whether the
1305 * snapshot should be taken, and if it was, presumably saved it along
1306 * with the snapshot.
1307 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1308 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1309 {
1310 tracing_snapshot_instance_cond(tr, cond_data);
1311 }
1312 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1313
1314 /**
1315 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1316 * @tr: The tracing instance
1317 *
1318 * When the user enables a conditional snapshot using
1319 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1320 * with the snapshot. This accessor is used to retrieve it.
1321 *
1322 * Should not be called from cond_snapshot.update(), since it takes
1323 * the tr->max_lock lock, which the code calling
1324 * cond_snapshot.update() has already done.
1325 *
1326 * Returns the cond_data associated with the trace array's snapshot.
1327 */
tracing_cond_snapshot_data(struct trace_array * tr)1328 void *tracing_cond_snapshot_data(struct trace_array *tr)
1329 {
1330 void *cond_data = NULL;
1331
1332 local_irq_disable();
1333 arch_spin_lock(&tr->max_lock);
1334
1335 if (tr->cond_snapshot)
1336 cond_data = tr->cond_snapshot->cond_data;
1337
1338 arch_spin_unlock(&tr->max_lock);
1339 local_irq_enable();
1340
1341 return cond_data;
1342 }
1343 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1344
1345 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1346 struct array_buffer *size_buf, int cpu_id);
1347 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1348
tracing_alloc_snapshot_instance(struct trace_array * tr)1349 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1350 {
1351 int order;
1352 int ret;
1353
1354 if (!tr->allocated_snapshot) {
1355
1356 /* Make the snapshot buffer have the same order as main buffer */
1357 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1358 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1359 if (ret < 0)
1360 return ret;
1361
1362 /* allocate spare buffer */
1363 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1364 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1365 if (ret < 0)
1366 return ret;
1367
1368 tr->allocated_snapshot = true;
1369 }
1370
1371 return 0;
1372 }
1373
free_snapshot(struct trace_array * tr)1374 static void free_snapshot(struct trace_array *tr)
1375 {
1376 /*
1377 * We don't free the ring buffer. instead, resize it because
1378 * The max_tr ring buffer has some state (e.g. ring->clock) and
1379 * we want preserve it.
1380 */
1381 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1382 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1383 set_buffer_entries(&tr->max_buffer, 1);
1384 tracing_reset_online_cpus(&tr->max_buffer);
1385 tr->allocated_snapshot = false;
1386 }
1387
tracing_arm_snapshot_locked(struct trace_array * tr)1388 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1389 {
1390 int ret;
1391
1392 lockdep_assert_held(&trace_types_lock);
1393
1394 spin_lock(&tr->snapshot_trigger_lock);
1395 if (tr->snapshot == UINT_MAX || tr->mapped) {
1396 spin_unlock(&tr->snapshot_trigger_lock);
1397 return -EBUSY;
1398 }
1399
1400 tr->snapshot++;
1401 spin_unlock(&tr->snapshot_trigger_lock);
1402
1403 ret = tracing_alloc_snapshot_instance(tr);
1404 if (ret) {
1405 spin_lock(&tr->snapshot_trigger_lock);
1406 tr->snapshot--;
1407 spin_unlock(&tr->snapshot_trigger_lock);
1408 }
1409
1410 return ret;
1411 }
1412
tracing_arm_snapshot(struct trace_array * tr)1413 int tracing_arm_snapshot(struct trace_array *tr)
1414 {
1415 guard(mutex)(&trace_types_lock);
1416 return tracing_arm_snapshot_locked(tr);
1417 }
1418
tracing_disarm_snapshot(struct trace_array * tr)1419 void tracing_disarm_snapshot(struct trace_array *tr)
1420 {
1421 spin_lock(&tr->snapshot_trigger_lock);
1422 if (!WARN_ON(!tr->snapshot))
1423 tr->snapshot--;
1424 spin_unlock(&tr->snapshot_trigger_lock);
1425 }
1426
1427 /**
1428 * tracing_alloc_snapshot - allocate snapshot buffer.
1429 *
1430 * This only allocates the snapshot buffer if it isn't already
1431 * allocated - it doesn't also take a snapshot.
1432 *
1433 * This is meant to be used in cases where the snapshot buffer needs
1434 * to be set up for events that can't sleep but need to be able to
1435 * trigger a snapshot.
1436 */
tracing_alloc_snapshot(void)1437 int tracing_alloc_snapshot(void)
1438 {
1439 struct trace_array *tr = &global_trace;
1440 int ret;
1441
1442 ret = tracing_alloc_snapshot_instance(tr);
1443 WARN_ON(ret < 0);
1444
1445 return ret;
1446 }
1447 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1448
1449 /**
1450 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1451 *
1452 * This is similar to tracing_snapshot(), but it will allocate the
1453 * snapshot buffer if it isn't already allocated. Use this only
1454 * where it is safe to sleep, as the allocation may sleep.
1455 *
1456 * This causes a swap between the snapshot buffer and the current live
1457 * tracing buffer. You can use this to take snapshots of the live
1458 * trace when some condition is triggered, but continue to trace.
1459 */
tracing_snapshot_alloc(void)1460 void tracing_snapshot_alloc(void)
1461 {
1462 int ret;
1463
1464 ret = tracing_alloc_snapshot();
1465 if (ret < 0)
1466 return;
1467
1468 tracing_snapshot();
1469 }
1470 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1471
1472 /**
1473 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1474 * @tr: The tracing instance
1475 * @cond_data: User data to associate with the snapshot
1476 * @update: Implementation of the cond_snapshot update function
1477 *
1478 * Check whether the conditional snapshot for the given instance has
1479 * already been enabled, or if the current tracer is already using a
1480 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1481 * save the cond_data and update function inside.
1482 *
1483 * Returns 0 if successful, error otherwise.
1484 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1485 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1486 cond_update_fn_t update)
1487 {
1488 struct cond_snapshot *cond_snapshot __free(kfree) =
1489 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1490 int ret;
1491
1492 if (!cond_snapshot)
1493 return -ENOMEM;
1494
1495 cond_snapshot->cond_data = cond_data;
1496 cond_snapshot->update = update;
1497
1498 guard(mutex)(&trace_types_lock);
1499
1500 if (tr->current_trace->use_max_tr)
1501 return -EBUSY;
1502
1503 /*
1504 * The cond_snapshot can only change to NULL without the
1505 * trace_types_lock. We don't care if we race with it going
1506 * to NULL, but we want to make sure that it's not set to
1507 * something other than NULL when we get here, which we can
1508 * do safely with only holding the trace_types_lock and not
1509 * having to take the max_lock.
1510 */
1511 if (tr->cond_snapshot)
1512 return -EBUSY;
1513
1514 ret = tracing_arm_snapshot_locked(tr);
1515 if (ret)
1516 return ret;
1517
1518 local_irq_disable();
1519 arch_spin_lock(&tr->max_lock);
1520 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1521 arch_spin_unlock(&tr->max_lock);
1522 local_irq_enable();
1523
1524 return 0;
1525 }
1526 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1527
1528 /**
1529 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1530 * @tr: The tracing instance
1531 *
1532 * Check whether the conditional snapshot for the given instance is
1533 * enabled; if so, free the cond_snapshot associated with it,
1534 * otherwise return -EINVAL.
1535 *
1536 * Returns 0 if successful, error otherwise.
1537 */
tracing_snapshot_cond_disable(struct trace_array * tr)1538 int tracing_snapshot_cond_disable(struct trace_array *tr)
1539 {
1540 int ret = 0;
1541
1542 local_irq_disable();
1543 arch_spin_lock(&tr->max_lock);
1544
1545 if (!tr->cond_snapshot)
1546 ret = -EINVAL;
1547 else {
1548 kfree(tr->cond_snapshot);
1549 tr->cond_snapshot = NULL;
1550 }
1551
1552 arch_spin_unlock(&tr->max_lock);
1553 local_irq_enable();
1554
1555 tracing_disarm_snapshot(tr);
1556
1557 return ret;
1558 }
1559 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1560 #else
tracing_snapshot(void)1561 void tracing_snapshot(void)
1562 {
1563 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1564 }
1565 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1566 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1567 {
1568 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1569 }
1570 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1571 int tracing_alloc_snapshot(void)
1572 {
1573 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1574 return -ENODEV;
1575 }
1576 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1577 void tracing_snapshot_alloc(void)
1578 {
1579 /* Give warning */
1580 tracing_snapshot();
1581 }
1582 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1583 void *tracing_cond_snapshot_data(struct trace_array *tr)
1584 {
1585 return NULL;
1586 }
1587 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1588 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1589 {
1590 return -ENODEV;
1591 }
1592 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1593 int tracing_snapshot_cond_disable(struct trace_array *tr)
1594 {
1595 return false;
1596 }
1597 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1598 #define free_snapshot(tr) do { } while (0)
1599 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1600 #endif /* CONFIG_TRACER_SNAPSHOT */
1601
tracer_tracing_off(struct trace_array * tr)1602 void tracer_tracing_off(struct trace_array *tr)
1603 {
1604 if (tr->array_buffer.buffer)
1605 ring_buffer_record_off(tr->array_buffer.buffer);
1606 /*
1607 * This flag is looked at when buffers haven't been allocated
1608 * yet, or by some tracers (like irqsoff), that just want to
1609 * know if the ring buffer has been disabled, but it can handle
1610 * races of where it gets disabled but we still do a record.
1611 * As the check is in the fast path of the tracers, it is more
1612 * important to be fast than accurate.
1613 */
1614 tr->buffer_disabled = 1;
1615 }
1616
1617 /**
1618 * tracer_tracing_disable() - temporary disable the buffer from write
1619 * @tr: The trace array to disable its buffer for
1620 *
1621 * Expects trace_tracing_enable() to re-enable tracing.
1622 * The difference between this and tracer_tracing_off() is that this
1623 * is a counter and can nest, whereas, tracer_tracing_off() can
1624 * be called multiple times and a single trace_tracing_on() will
1625 * enable it.
1626 */
tracer_tracing_disable(struct trace_array * tr)1627 void tracer_tracing_disable(struct trace_array *tr)
1628 {
1629 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1630 return;
1631
1632 ring_buffer_record_disable(tr->array_buffer.buffer);
1633 }
1634
1635 /**
1636 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1637 * @tr: The trace array that had tracer_tracincg_disable() called on it
1638 *
1639 * This is called after tracer_tracing_disable() has been called on @tr,
1640 * when it's safe to re-enable tracing.
1641 */
tracer_tracing_enable(struct trace_array * tr)1642 void tracer_tracing_enable(struct trace_array *tr)
1643 {
1644 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1645 return;
1646
1647 ring_buffer_record_enable(tr->array_buffer.buffer);
1648 }
1649
1650 /**
1651 * tracing_off - turn off tracing buffers
1652 *
1653 * This function stops the tracing buffers from recording data.
1654 * It does not disable any overhead the tracers themselves may
1655 * be causing. This function simply causes all recording to
1656 * the ring buffers to fail.
1657 */
tracing_off(void)1658 void tracing_off(void)
1659 {
1660 tracer_tracing_off(&global_trace);
1661 }
1662 EXPORT_SYMBOL_GPL(tracing_off);
1663
disable_trace_on_warning(void)1664 void disable_trace_on_warning(void)
1665 {
1666 if (__disable_trace_on_warning) {
1667 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1668 "Disabling tracing due to warning\n");
1669 tracing_off();
1670 }
1671 }
1672
1673 /**
1674 * tracer_tracing_is_on - show real state of ring buffer enabled
1675 * @tr : the trace array to know if ring buffer is enabled
1676 *
1677 * Shows real state of the ring buffer if it is enabled or not.
1678 */
tracer_tracing_is_on(struct trace_array * tr)1679 bool tracer_tracing_is_on(struct trace_array *tr)
1680 {
1681 if (tr->array_buffer.buffer)
1682 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1683 return !tr->buffer_disabled;
1684 }
1685
1686 /**
1687 * tracing_is_on - show state of ring buffers enabled
1688 */
tracing_is_on(void)1689 int tracing_is_on(void)
1690 {
1691 return tracer_tracing_is_on(&global_trace);
1692 }
1693 EXPORT_SYMBOL_GPL(tracing_is_on);
1694
set_buf_size(char * str)1695 static int __init set_buf_size(char *str)
1696 {
1697 unsigned long buf_size;
1698
1699 if (!str)
1700 return 0;
1701 buf_size = memparse(str, &str);
1702 /*
1703 * nr_entries can not be zero and the startup
1704 * tests require some buffer space. Therefore
1705 * ensure we have at least 4096 bytes of buffer.
1706 */
1707 trace_buf_size = max(4096UL, buf_size);
1708 return 1;
1709 }
1710 __setup("trace_buf_size=", set_buf_size);
1711
set_tracing_thresh(char * str)1712 static int __init set_tracing_thresh(char *str)
1713 {
1714 unsigned long threshold;
1715 int ret;
1716
1717 if (!str)
1718 return 0;
1719 ret = kstrtoul(str, 0, &threshold);
1720 if (ret < 0)
1721 return 0;
1722 tracing_thresh = threshold * 1000;
1723 return 1;
1724 }
1725 __setup("tracing_thresh=", set_tracing_thresh);
1726
nsecs_to_usecs(unsigned long nsecs)1727 unsigned long nsecs_to_usecs(unsigned long nsecs)
1728 {
1729 return nsecs / 1000;
1730 }
1731
1732 /*
1733 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1734 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1735 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1736 * of strings in the order that the evals (enum) were defined.
1737 */
1738 #undef C
1739 #define C(a, b) b
1740
1741 /* These must match the bit positions in trace_iterator_flags */
1742 static const char *trace_options[] = {
1743 TRACE_FLAGS
1744 NULL
1745 };
1746
1747 static struct {
1748 u64 (*func)(void);
1749 const char *name;
1750 int in_ns; /* is this clock in nanoseconds? */
1751 } trace_clocks[] = {
1752 { trace_clock_local, "local", 1 },
1753 { trace_clock_global, "global", 1 },
1754 { trace_clock_counter, "counter", 0 },
1755 { trace_clock_jiffies, "uptime", 0 },
1756 { trace_clock, "perf", 1 },
1757 { ktime_get_mono_fast_ns, "mono", 1 },
1758 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1759 { ktime_get_boot_fast_ns, "boot", 1 },
1760 { ktime_get_tai_fast_ns, "tai", 1 },
1761 ARCH_TRACE_CLOCKS
1762 };
1763
trace_clock_in_ns(struct trace_array * tr)1764 bool trace_clock_in_ns(struct trace_array *tr)
1765 {
1766 if (trace_clocks[tr->clock_id].in_ns)
1767 return true;
1768
1769 return false;
1770 }
1771
1772 /*
1773 * trace_parser_get_init - gets the buffer for trace parser
1774 */
trace_parser_get_init(struct trace_parser * parser,int size)1775 int trace_parser_get_init(struct trace_parser *parser, int size)
1776 {
1777 memset(parser, 0, sizeof(*parser));
1778
1779 parser->buffer = kmalloc(size, GFP_KERNEL);
1780 if (!parser->buffer)
1781 return 1;
1782
1783 parser->size = size;
1784 return 0;
1785 }
1786
1787 /*
1788 * trace_parser_put - frees the buffer for trace parser
1789 */
trace_parser_put(struct trace_parser * parser)1790 void trace_parser_put(struct trace_parser *parser)
1791 {
1792 kfree(parser->buffer);
1793 parser->buffer = NULL;
1794 }
1795
1796 /*
1797 * trace_get_user - reads the user input string separated by space
1798 * (matched by isspace(ch))
1799 *
1800 * For each string found the 'struct trace_parser' is updated,
1801 * and the function returns.
1802 *
1803 * Returns number of bytes read.
1804 *
1805 * See kernel/trace/trace.h for 'struct trace_parser' details.
1806 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1807 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1808 size_t cnt, loff_t *ppos)
1809 {
1810 char ch;
1811 size_t read = 0;
1812 ssize_t ret;
1813
1814 if (!*ppos)
1815 trace_parser_clear(parser);
1816
1817 ret = get_user(ch, ubuf++);
1818 if (ret)
1819 goto fail;
1820
1821 read++;
1822 cnt--;
1823
1824 /*
1825 * The parser is not finished with the last write,
1826 * continue reading the user input without skipping spaces.
1827 */
1828 if (!parser->cont) {
1829 /* skip white space */
1830 while (cnt && isspace(ch)) {
1831 ret = get_user(ch, ubuf++);
1832 if (ret)
1833 goto fail;
1834 read++;
1835 cnt--;
1836 }
1837
1838 parser->idx = 0;
1839
1840 /* only spaces were written */
1841 if (isspace(ch) || !ch) {
1842 *ppos += read;
1843 return read;
1844 }
1845 }
1846
1847 /* read the non-space input */
1848 while (cnt && !isspace(ch) && ch) {
1849 if (parser->idx < parser->size - 1)
1850 parser->buffer[parser->idx++] = ch;
1851 else {
1852 ret = -EINVAL;
1853 goto fail;
1854 }
1855
1856 ret = get_user(ch, ubuf++);
1857 if (ret)
1858 goto fail;
1859 read++;
1860 cnt--;
1861 }
1862
1863 /* We either got finished input or we have to wait for another call. */
1864 if (isspace(ch) || !ch) {
1865 parser->buffer[parser->idx] = 0;
1866 parser->cont = false;
1867 } else if (parser->idx < parser->size - 1) {
1868 parser->cont = true;
1869 parser->buffer[parser->idx++] = ch;
1870 /* Make sure the parsed string always terminates with '\0'. */
1871 parser->buffer[parser->idx] = 0;
1872 } else {
1873 ret = -EINVAL;
1874 goto fail;
1875 }
1876
1877 *ppos += read;
1878 return read;
1879 fail:
1880 trace_parser_fail(parser);
1881 return ret;
1882 }
1883
1884 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1885 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1886 {
1887 int len;
1888
1889 if (trace_seq_used(s) <= s->readpos)
1890 return -EBUSY;
1891
1892 len = trace_seq_used(s) - s->readpos;
1893 if (cnt > len)
1894 cnt = len;
1895 memcpy(buf, s->buffer + s->readpos, cnt);
1896
1897 s->readpos += cnt;
1898 return cnt;
1899 }
1900
1901 unsigned long __read_mostly tracing_thresh;
1902
1903 #ifdef CONFIG_TRACER_MAX_TRACE
1904 static const struct file_operations tracing_max_lat_fops;
1905
1906 #ifdef LATENCY_FS_NOTIFY
1907
1908 static struct workqueue_struct *fsnotify_wq;
1909
latency_fsnotify_workfn(struct work_struct * work)1910 static void latency_fsnotify_workfn(struct work_struct *work)
1911 {
1912 struct trace_array *tr = container_of(work, struct trace_array,
1913 fsnotify_work);
1914 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1915 }
1916
latency_fsnotify_workfn_irq(struct irq_work * iwork)1917 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1918 {
1919 struct trace_array *tr = container_of(iwork, struct trace_array,
1920 fsnotify_irqwork);
1921 queue_work(fsnotify_wq, &tr->fsnotify_work);
1922 }
1923
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1924 static void trace_create_maxlat_file(struct trace_array *tr,
1925 struct dentry *d_tracer)
1926 {
1927 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1928 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1929 tr->d_max_latency = trace_create_file("tracing_max_latency",
1930 TRACE_MODE_WRITE,
1931 d_tracer, tr,
1932 &tracing_max_lat_fops);
1933 }
1934
latency_fsnotify_init(void)1935 __init static int latency_fsnotify_init(void)
1936 {
1937 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1938 WQ_UNBOUND | WQ_HIGHPRI, 0);
1939 if (!fsnotify_wq) {
1940 pr_err("Unable to allocate tr_max_lat_wq\n");
1941 return -ENOMEM;
1942 }
1943 return 0;
1944 }
1945
1946 late_initcall_sync(latency_fsnotify_init);
1947
latency_fsnotify(struct trace_array * tr)1948 void latency_fsnotify(struct trace_array *tr)
1949 {
1950 if (!fsnotify_wq)
1951 return;
1952 /*
1953 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1954 * possible that we are called from __schedule() or do_idle(), which
1955 * could cause a deadlock.
1956 */
1957 irq_work_queue(&tr->fsnotify_irqwork);
1958 }
1959
1960 #else /* !LATENCY_FS_NOTIFY */
1961
1962 #define trace_create_maxlat_file(tr, d_tracer) \
1963 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1964 d_tracer, tr, &tracing_max_lat_fops)
1965
1966 #endif
1967
1968 /*
1969 * Copy the new maximum trace into the separate maximum-trace
1970 * structure. (this way the maximum trace is permanently saved,
1971 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1972 */
1973 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1974 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1975 {
1976 struct array_buffer *trace_buf = &tr->array_buffer;
1977 struct array_buffer *max_buf = &tr->max_buffer;
1978 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1979 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1980
1981 max_buf->cpu = cpu;
1982 max_buf->time_start = data->preempt_timestamp;
1983
1984 max_data->saved_latency = tr->max_latency;
1985 max_data->critical_start = data->critical_start;
1986 max_data->critical_end = data->critical_end;
1987
1988 strscpy(max_data->comm, tsk->comm);
1989 max_data->pid = tsk->pid;
1990 /*
1991 * If tsk == current, then use current_uid(), as that does not use
1992 * RCU. The irq tracer can be called out of RCU scope.
1993 */
1994 if (tsk == current)
1995 max_data->uid = current_uid();
1996 else
1997 max_data->uid = task_uid(tsk);
1998
1999 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2000 max_data->policy = tsk->policy;
2001 max_data->rt_priority = tsk->rt_priority;
2002
2003 /* record this tasks comm */
2004 tracing_record_cmdline(tsk);
2005 latency_fsnotify(tr);
2006 }
2007
2008 /**
2009 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2010 * @tr: tracer
2011 * @tsk: the task with the latency
2012 * @cpu: The cpu that initiated the trace.
2013 * @cond_data: User data associated with a conditional snapshot
2014 *
2015 * Flip the buffers between the @tr and the max_tr and record information
2016 * about which task was the cause of this latency.
2017 */
2018 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2019 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2020 void *cond_data)
2021 {
2022 if (tr->stop_count)
2023 return;
2024
2025 WARN_ON_ONCE(!irqs_disabled());
2026
2027 if (!tr->allocated_snapshot) {
2028 /* Only the nop tracer should hit this when disabling */
2029 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2030 return;
2031 }
2032
2033 arch_spin_lock(&tr->max_lock);
2034
2035 /* Inherit the recordable setting from array_buffer */
2036 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2037 ring_buffer_record_on(tr->max_buffer.buffer);
2038 else
2039 ring_buffer_record_off(tr->max_buffer.buffer);
2040
2041 #ifdef CONFIG_TRACER_SNAPSHOT
2042 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2043 arch_spin_unlock(&tr->max_lock);
2044 return;
2045 }
2046 #endif
2047 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2048
2049 __update_max_tr(tr, tsk, cpu);
2050
2051 arch_spin_unlock(&tr->max_lock);
2052
2053 /* Any waiters on the old snapshot buffer need to wake up */
2054 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2055 }
2056
2057 /**
2058 * update_max_tr_single - only copy one trace over, and reset the rest
2059 * @tr: tracer
2060 * @tsk: task with the latency
2061 * @cpu: the cpu of the buffer to copy.
2062 *
2063 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2064 */
2065 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2066 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2067 {
2068 int ret;
2069
2070 if (tr->stop_count)
2071 return;
2072
2073 WARN_ON_ONCE(!irqs_disabled());
2074 if (!tr->allocated_snapshot) {
2075 /* Only the nop tracer should hit this when disabling */
2076 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2077 return;
2078 }
2079
2080 arch_spin_lock(&tr->max_lock);
2081
2082 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2083
2084 if (ret == -EBUSY) {
2085 /*
2086 * We failed to swap the buffer due to a commit taking
2087 * place on this CPU. We fail to record, but we reset
2088 * the max trace buffer (no one writes directly to it)
2089 * and flag that it failed.
2090 * Another reason is resize is in progress.
2091 */
2092 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2093 "Failed to swap buffers due to commit or resize in progress\n");
2094 }
2095
2096 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2097
2098 __update_max_tr(tr, tsk, cpu);
2099 arch_spin_unlock(&tr->max_lock);
2100 }
2101
2102 #endif /* CONFIG_TRACER_MAX_TRACE */
2103
2104 struct pipe_wait {
2105 struct trace_iterator *iter;
2106 int wait_index;
2107 };
2108
wait_pipe_cond(void * data)2109 static bool wait_pipe_cond(void *data)
2110 {
2111 struct pipe_wait *pwait = data;
2112 struct trace_iterator *iter = pwait->iter;
2113
2114 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2115 return true;
2116
2117 return iter->closed;
2118 }
2119
wait_on_pipe(struct trace_iterator * iter,int full)2120 static int wait_on_pipe(struct trace_iterator *iter, int full)
2121 {
2122 struct pipe_wait pwait;
2123 int ret;
2124
2125 /* Iterators are static, they should be filled or empty */
2126 if (trace_buffer_iter(iter, iter->cpu_file))
2127 return 0;
2128
2129 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2130 pwait.iter = iter;
2131
2132 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2133 wait_pipe_cond, &pwait);
2134
2135 #ifdef CONFIG_TRACER_MAX_TRACE
2136 /*
2137 * Make sure this is still the snapshot buffer, as if a snapshot were
2138 * to happen, this would now be the main buffer.
2139 */
2140 if (iter->snapshot)
2141 iter->array_buffer = &iter->tr->max_buffer;
2142 #endif
2143 return ret;
2144 }
2145
2146 #ifdef CONFIG_FTRACE_STARTUP_TEST
2147 static bool selftests_can_run;
2148
2149 struct trace_selftests {
2150 struct list_head list;
2151 struct tracer *type;
2152 };
2153
2154 static LIST_HEAD(postponed_selftests);
2155
save_selftest(struct tracer * type)2156 static int save_selftest(struct tracer *type)
2157 {
2158 struct trace_selftests *selftest;
2159
2160 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2161 if (!selftest)
2162 return -ENOMEM;
2163
2164 selftest->type = type;
2165 list_add(&selftest->list, &postponed_selftests);
2166 return 0;
2167 }
2168
run_tracer_selftest(struct tracer * type)2169 static int run_tracer_selftest(struct tracer *type)
2170 {
2171 struct trace_array *tr = &global_trace;
2172 struct tracer *saved_tracer = tr->current_trace;
2173 int ret;
2174
2175 if (!type->selftest || tracing_selftest_disabled)
2176 return 0;
2177
2178 /*
2179 * If a tracer registers early in boot up (before scheduling is
2180 * initialized and such), then do not run its selftests yet.
2181 * Instead, run it a little later in the boot process.
2182 */
2183 if (!selftests_can_run)
2184 return save_selftest(type);
2185
2186 if (!tracing_is_on()) {
2187 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2188 type->name);
2189 return 0;
2190 }
2191
2192 /*
2193 * Run a selftest on this tracer.
2194 * Here we reset the trace buffer, and set the current
2195 * tracer to be this tracer. The tracer can then run some
2196 * internal tracing to verify that everything is in order.
2197 * If we fail, we do not register this tracer.
2198 */
2199 tracing_reset_online_cpus(&tr->array_buffer);
2200
2201 tr->current_trace = type;
2202
2203 #ifdef CONFIG_TRACER_MAX_TRACE
2204 if (type->use_max_tr) {
2205 /* If we expanded the buffers, make sure the max is expanded too */
2206 if (tr->ring_buffer_expanded)
2207 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2208 RING_BUFFER_ALL_CPUS);
2209 tr->allocated_snapshot = true;
2210 }
2211 #endif
2212
2213 /* the test is responsible for initializing and enabling */
2214 pr_info("Testing tracer %s: ", type->name);
2215 ret = type->selftest(type, tr);
2216 /* the test is responsible for resetting too */
2217 tr->current_trace = saved_tracer;
2218 if (ret) {
2219 printk(KERN_CONT "FAILED!\n");
2220 /* Add the warning after printing 'FAILED' */
2221 WARN_ON(1);
2222 return -1;
2223 }
2224 /* Only reset on passing, to avoid touching corrupted buffers */
2225 tracing_reset_online_cpus(&tr->array_buffer);
2226
2227 #ifdef CONFIG_TRACER_MAX_TRACE
2228 if (type->use_max_tr) {
2229 tr->allocated_snapshot = false;
2230
2231 /* Shrink the max buffer again */
2232 if (tr->ring_buffer_expanded)
2233 ring_buffer_resize(tr->max_buffer.buffer, 1,
2234 RING_BUFFER_ALL_CPUS);
2235 }
2236 #endif
2237
2238 printk(KERN_CONT "PASSED\n");
2239 return 0;
2240 }
2241
do_run_tracer_selftest(struct tracer * type)2242 static int do_run_tracer_selftest(struct tracer *type)
2243 {
2244 int ret;
2245
2246 /*
2247 * Tests can take a long time, especially if they are run one after the
2248 * other, as does happen during bootup when all the tracers are
2249 * registered. This could cause the soft lockup watchdog to trigger.
2250 */
2251 cond_resched();
2252
2253 tracing_selftest_running = true;
2254 ret = run_tracer_selftest(type);
2255 tracing_selftest_running = false;
2256
2257 return ret;
2258 }
2259
init_trace_selftests(void)2260 static __init int init_trace_selftests(void)
2261 {
2262 struct trace_selftests *p, *n;
2263 struct tracer *t, **last;
2264 int ret;
2265
2266 selftests_can_run = true;
2267
2268 guard(mutex)(&trace_types_lock);
2269
2270 if (list_empty(&postponed_selftests))
2271 return 0;
2272
2273 pr_info("Running postponed tracer tests:\n");
2274
2275 tracing_selftest_running = true;
2276 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2277 /* This loop can take minutes when sanitizers are enabled, so
2278 * lets make sure we allow RCU processing.
2279 */
2280 cond_resched();
2281 ret = run_tracer_selftest(p->type);
2282 /* If the test fails, then warn and remove from available_tracers */
2283 if (ret < 0) {
2284 WARN(1, "tracer: %s failed selftest, disabling\n",
2285 p->type->name);
2286 last = &trace_types;
2287 for (t = trace_types; t; t = t->next) {
2288 if (t == p->type) {
2289 *last = t->next;
2290 break;
2291 }
2292 last = &t->next;
2293 }
2294 }
2295 list_del(&p->list);
2296 kfree(p);
2297 }
2298 tracing_selftest_running = false;
2299
2300 return 0;
2301 }
2302 core_initcall(init_trace_selftests);
2303 #else
do_run_tracer_selftest(struct tracer * type)2304 static inline int do_run_tracer_selftest(struct tracer *type)
2305 {
2306 return 0;
2307 }
2308 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2309
2310 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2311
2312 static void __init apply_trace_boot_options(void);
2313
2314 /**
2315 * register_tracer - register a tracer with the ftrace system.
2316 * @type: the plugin for the tracer
2317 *
2318 * Register a new plugin tracer.
2319 */
register_tracer(struct tracer * type)2320 int __init register_tracer(struct tracer *type)
2321 {
2322 struct tracer *t;
2323 int ret = 0;
2324
2325 if (!type->name) {
2326 pr_info("Tracer must have a name\n");
2327 return -1;
2328 }
2329
2330 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2331 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2332 return -1;
2333 }
2334
2335 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2336 pr_warn("Can not register tracer %s due to lockdown\n",
2337 type->name);
2338 return -EPERM;
2339 }
2340
2341 mutex_lock(&trace_types_lock);
2342
2343 for (t = trace_types; t; t = t->next) {
2344 if (strcmp(type->name, t->name) == 0) {
2345 /* already found */
2346 pr_info("Tracer %s already registered\n",
2347 type->name);
2348 ret = -1;
2349 goto out;
2350 }
2351 }
2352
2353 if (!type->set_flag)
2354 type->set_flag = &dummy_set_flag;
2355 if (!type->flags) {
2356 /*allocate a dummy tracer_flags*/
2357 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2358 if (!type->flags) {
2359 ret = -ENOMEM;
2360 goto out;
2361 }
2362 type->flags->val = 0;
2363 type->flags->opts = dummy_tracer_opt;
2364 } else
2365 if (!type->flags->opts)
2366 type->flags->opts = dummy_tracer_opt;
2367
2368 /* store the tracer for __set_tracer_option */
2369 type->flags->trace = type;
2370
2371 ret = do_run_tracer_selftest(type);
2372 if (ret < 0)
2373 goto out;
2374
2375 type->next = trace_types;
2376 trace_types = type;
2377 add_tracer_options(&global_trace, type);
2378
2379 out:
2380 mutex_unlock(&trace_types_lock);
2381
2382 if (ret || !default_bootup_tracer)
2383 return ret;
2384
2385 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2386 return 0;
2387
2388 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2389 /* Do we want this tracer to start on bootup? */
2390 tracing_set_tracer(&global_trace, type->name);
2391 default_bootup_tracer = NULL;
2392
2393 apply_trace_boot_options();
2394
2395 /* disable other selftests, since this will break it. */
2396 disable_tracing_selftest("running a tracer");
2397
2398 return 0;
2399 }
2400
tracing_reset_cpu(struct array_buffer * buf,int cpu)2401 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2402 {
2403 struct trace_buffer *buffer = buf->buffer;
2404
2405 if (!buffer)
2406 return;
2407
2408 ring_buffer_record_disable(buffer);
2409
2410 /* Make sure all commits have finished */
2411 synchronize_rcu();
2412 ring_buffer_reset_cpu(buffer, cpu);
2413
2414 ring_buffer_record_enable(buffer);
2415 }
2416
tracing_reset_online_cpus(struct array_buffer * buf)2417 void tracing_reset_online_cpus(struct array_buffer *buf)
2418 {
2419 struct trace_buffer *buffer = buf->buffer;
2420
2421 if (!buffer)
2422 return;
2423
2424 ring_buffer_record_disable(buffer);
2425
2426 /* Make sure all commits have finished */
2427 synchronize_rcu();
2428
2429 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2430
2431 ring_buffer_reset_online_cpus(buffer);
2432
2433 ring_buffer_record_enable(buffer);
2434 }
2435
tracing_reset_all_cpus(struct array_buffer * buf)2436 static void tracing_reset_all_cpus(struct array_buffer *buf)
2437 {
2438 struct trace_buffer *buffer = buf->buffer;
2439
2440 if (!buffer)
2441 return;
2442
2443 ring_buffer_record_disable(buffer);
2444
2445 /* Make sure all commits have finished */
2446 synchronize_rcu();
2447
2448 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2449
2450 ring_buffer_reset(buffer);
2451
2452 ring_buffer_record_enable(buffer);
2453 }
2454
2455 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2456 void tracing_reset_all_online_cpus_unlocked(void)
2457 {
2458 struct trace_array *tr;
2459
2460 lockdep_assert_held(&trace_types_lock);
2461
2462 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2463 if (!tr->clear_trace)
2464 continue;
2465 tr->clear_trace = false;
2466 tracing_reset_online_cpus(&tr->array_buffer);
2467 #ifdef CONFIG_TRACER_MAX_TRACE
2468 tracing_reset_online_cpus(&tr->max_buffer);
2469 #endif
2470 }
2471 }
2472
tracing_reset_all_online_cpus(void)2473 void tracing_reset_all_online_cpus(void)
2474 {
2475 guard(mutex)(&trace_types_lock);
2476 tracing_reset_all_online_cpus_unlocked();
2477 }
2478
is_tracing_stopped(void)2479 int is_tracing_stopped(void)
2480 {
2481 return global_trace.stop_count;
2482 }
2483
tracing_start_tr(struct trace_array * tr)2484 static void tracing_start_tr(struct trace_array *tr)
2485 {
2486 struct trace_buffer *buffer;
2487
2488 if (tracing_disabled)
2489 return;
2490
2491 guard(raw_spinlock_irqsave)(&tr->start_lock);
2492 if (--tr->stop_count) {
2493 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2494 /* Someone screwed up their debugging */
2495 tr->stop_count = 0;
2496 }
2497 return;
2498 }
2499
2500 /* Prevent the buffers from switching */
2501 arch_spin_lock(&tr->max_lock);
2502
2503 buffer = tr->array_buffer.buffer;
2504 if (buffer)
2505 ring_buffer_record_enable(buffer);
2506
2507 #ifdef CONFIG_TRACER_MAX_TRACE
2508 buffer = tr->max_buffer.buffer;
2509 if (buffer)
2510 ring_buffer_record_enable(buffer);
2511 #endif
2512
2513 arch_spin_unlock(&tr->max_lock);
2514 }
2515
2516 /**
2517 * tracing_start - quick start of the tracer
2518 *
2519 * If tracing is enabled but was stopped by tracing_stop,
2520 * this will start the tracer back up.
2521 */
tracing_start(void)2522 void tracing_start(void)
2523
2524 {
2525 return tracing_start_tr(&global_trace);
2526 }
2527
tracing_stop_tr(struct trace_array * tr)2528 static void tracing_stop_tr(struct trace_array *tr)
2529 {
2530 struct trace_buffer *buffer;
2531
2532 guard(raw_spinlock_irqsave)(&tr->start_lock);
2533 if (tr->stop_count++)
2534 return;
2535
2536 /* Prevent the buffers from switching */
2537 arch_spin_lock(&tr->max_lock);
2538
2539 buffer = tr->array_buffer.buffer;
2540 if (buffer)
2541 ring_buffer_record_disable(buffer);
2542
2543 #ifdef CONFIG_TRACER_MAX_TRACE
2544 buffer = tr->max_buffer.buffer;
2545 if (buffer)
2546 ring_buffer_record_disable(buffer);
2547 #endif
2548
2549 arch_spin_unlock(&tr->max_lock);
2550 }
2551
2552 /**
2553 * tracing_stop - quick stop of the tracer
2554 *
2555 * Light weight way to stop tracing. Use in conjunction with
2556 * tracing_start.
2557 */
tracing_stop(void)2558 void tracing_stop(void)
2559 {
2560 return tracing_stop_tr(&global_trace);
2561 }
2562
2563 /*
2564 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2565 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2566 * simplifies those functions and keeps them in sync.
2567 */
trace_handle_return(struct trace_seq * s)2568 enum print_line_t trace_handle_return(struct trace_seq *s)
2569 {
2570 return trace_seq_has_overflowed(s) ?
2571 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2572 }
2573 EXPORT_SYMBOL_GPL(trace_handle_return);
2574
migration_disable_value(void)2575 static unsigned short migration_disable_value(void)
2576 {
2577 #if defined(CONFIG_SMP)
2578 return current->migration_disabled;
2579 #else
2580 return 0;
2581 #endif
2582 }
2583
tracing_gen_ctx_irq_test(unsigned int irqs_status)2584 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2585 {
2586 unsigned int trace_flags = irqs_status;
2587 unsigned int pc;
2588
2589 pc = preempt_count();
2590
2591 if (pc & NMI_MASK)
2592 trace_flags |= TRACE_FLAG_NMI;
2593 if (pc & HARDIRQ_MASK)
2594 trace_flags |= TRACE_FLAG_HARDIRQ;
2595 if (in_serving_softirq())
2596 trace_flags |= TRACE_FLAG_SOFTIRQ;
2597 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2598 trace_flags |= TRACE_FLAG_BH_OFF;
2599
2600 if (tif_need_resched())
2601 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2602 if (test_preempt_need_resched())
2603 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2604 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2605 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2606 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2607 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2608 }
2609
2610 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2611 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2612 int type,
2613 unsigned long len,
2614 unsigned int trace_ctx)
2615 {
2616 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2617 }
2618
2619 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2620 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2621 static int trace_buffered_event_ref;
2622
2623 /**
2624 * trace_buffered_event_enable - enable buffering events
2625 *
2626 * When events are being filtered, it is quicker to use a temporary
2627 * buffer to write the event data into if there's a likely chance
2628 * that it will not be committed. The discard of the ring buffer
2629 * is not as fast as committing, and is much slower than copying
2630 * a commit.
2631 *
2632 * When an event is to be filtered, allocate per cpu buffers to
2633 * write the event data into, and if the event is filtered and discarded
2634 * it is simply dropped, otherwise, the entire data is to be committed
2635 * in one shot.
2636 */
trace_buffered_event_enable(void)2637 void trace_buffered_event_enable(void)
2638 {
2639 struct ring_buffer_event *event;
2640 struct page *page;
2641 int cpu;
2642
2643 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2644
2645 if (trace_buffered_event_ref++)
2646 return;
2647
2648 for_each_tracing_cpu(cpu) {
2649 page = alloc_pages_node(cpu_to_node(cpu),
2650 GFP_KERNEL | __GFP_NORETRY, 0);
2651 /* This is just an optimization and can handle failures */
2652 if (!page) {
2653 pr_err("Failed to allocate event buffer\n");
2654 break;
2655 }
2656
2657 event = page_address(page);
2658 memset(event, 0, sizeof(*event));
2659
2660 per_cpu(trace_buffered_event, cpu) = event;
2661
2662 scoped_guard(preempt,) {
2663 if (cpu == smp_processor_id() &&
2664 __this_cpu_read(trace_buffered_event) !=
2665 per_cpu(trace_buffered_event, cpu))
2666 WARN_ON_ONCE(1);
2667 }
2668 }
2669 }
2670
enable_trace_buffered_event(void * data)2671 static void enable_trace_buffered_event(void *data)
2672 {
2673 this_cpu_dec(trace_buffered_event_cnt);
2674 }
2675
disable_trace_buffered_event(void * data)2676 static void disable_trace_buffered_event(void *data)
2677 {
2678 this_cpu_inc(trace_buffered_event_cnt);
2679 }
2680
2681 /**
2682 * trace_buffered_event_disable - disable buffering events
2683 *
2684 * When a filter is removed, it is faster to not use the buffered
2685 * events, and to commit directly into the ring buffer. Free up
2686 * the temp buffers when there are no more users. This requires
2687 * special synchronization with current events.
2688 */
trace_buffered_event_disable(void)2689 void trace_buffered_event_disable(void)
2690 {
2691 int cpu;
2692
2693 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2694
2695 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2696 return;
2697
2698 if (--trace_buffered_event_ref)
2699 return;
2700
2701 /* For each CPU, set the buffer as used. */
2702 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2703 NULL, true);
2704
2705 /* Wait for all current users to finish */
2706 synchronize_rcu();
2707
2708 for_each_tracing_cpu(cpu) {
2709 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2710 per_cpu(trace_buffered_event, cpu) = NULL;
2711 }
2712
2713 /*
2714 * Wait for all CPUs that potentially started checking if they can use
2715 * their event buffer only after the previous synchronize_rcu() call and
2716 * they still read a valid pointer from trace_buffered_event. It must be
2717 * ensured they don't see cleared trace_buffered_event_cnt else they
2718 * could wrongly decide to use the pointed-to buffer which is now freed.
2719 */
2720 synchronize_rcu();
2721
2722 /* For each CPU, relinquish the buffer */
2723 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2724 true);
2725 }
2726
2727 static struct trace_buffer *temp_buffer;
2728
2729 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2730 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2731 struct trace_event_file *trace_file,
2732 int type, unsigned long len,
2733 unsigned int trace_ctx)
2734 {
2735 struct ring_buffer_event *entry;
2736 struct trace_array *tr = trace_file->tr;
2737 int val;
2738
2739 *current_rb = tr->array_buffer.buffer;
2740
2741 if (!tr->no_filter_buffering_ref &&
2742 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2743 preempt_disable_notrace();
2744 /*
2745 * Filtering is on, so try to use the per cpu buffer first.
2746 * This buffer will simulate a ring_buffer_event,
2747 * where the type_len is zero and the array[0] will
2748 * hold the full length.
2749 * (see include/linux/ring-buffer.h for details on
2750 * how the ring_buffer_event is structured).
2751 *
2752 * Using a temp buffer during filtering and copying it
2753 * on a matched filter is quicker than writing directly
2754 * into the ring buffer and then discarding it when
2755 * it doesn't match. That is because the discard
2756 * requires several atomic operations to get right.
2757 * Copying on match and doing nothing on a failed match
2758 * is still quicker than no copy on match, but having
2759 * to discard out of the ring buffer on a failed match.
2760 */
2761 if ((entry = __this_cpu_read(trace_buffered_event))) {
2762 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2763
2764 val = this_cpu_inc_return(trace_buffered_event_cnt);
2765
2766 /*
2767 * Preemption is disabled, but interrupts and NMIs
2768 * can still come in now. If that happens after
2769 * the above increment, then it will have to go
2770 * back to the old method of allocating the event
2771 * on the ring buffer, and if the filter fails, it
2772 * will have to call ring_buffer_discard_commit()
2773 * to remove it.
2774 *
2775 * Need to also check the unlikely case that the
2776 * length is bigger than the temp buffer size.
2777 * If that happens, then the reserve is pretty much
2778 * guaranteed to fail, as the ring buffer currently
2779 * only allows events less than a page. But that may
2780 * change in the future, so let the ring buffer reserve
2781 * handle the failure in that case.
2782 */
2783 if (val == 1 && likely(len <= max_len)) {
2784 trace_event_setup(entry, type, trace_ctx);
2785 entry->array[0] = len;
2786 /* Return with preemption disabled */
2787 return entry;
2788 }
2789 this_cpu_dec(trace_buffered_event_cnt);
2790 }
2791 /* __trace_buffer_lock_reserve() disables preemption */
2792 preempt_enable_notrace();
2793 }
2794
2795 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2796 trace_ctx);
2797 /*
2798 * If tracing is off, but we have triggers enabled
2799 * we still need to look at the event data. Use the temp_buffer
2800 * to store the trace event for the trigger to use. It's recursive
2801 * safe and will not be recorded anywhere.
2802 */
2803 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2804 *current_rb = temp_buffer;
2805 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2806 trace_ctx);
2807 }
2808 return entry;
2809 }
2810 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2811
2812 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2813 static DEFINE_MUTEX(tracepoint_printk_mutex);
2814
output_printk(struct trace_event_buffer * fbuffer)2815 static void output_printk(struct trace_event_buffer *fbuffer)
2816 {
2817 struct trace_event_call *event_call;
2818 struct trace_event_file *file;
2819 struct trace_event *event;
2820 unsigned long flags;
2821 struct trace_iterator *iter = tracepoint_print_iter;
2822
2823 /* We should never get here if iter is NULL */
2824 if (WARN_ON_ONCE(!iter))
2825 return;
2826
2827 event_call = fbuffer->trace_file->event_call;
2828 if (!event_call || !event_call->event.funcs ||
2829 !event_call->event.funcs->trace)
2830 return;
2831
2832 file = fbuffer->trace_file;
2833 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2834 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2835 !filter_match_preds(file->filter, fbuffer->entry)))
2836 return;
2837
2838 event = &fbuffer->trace_file->event_call->event;
2839
2840 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2841 trace_seq_init(&iter->seq);
2842 iter->ent = fbuffer->entry;
2843 event_call->event.funcs->trace(iter, 0, event);
2844 trace_seq_putc(&iter->seq, 0);
2845 printk("%s", iter->seq.buffer);
2846
2847 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2848 }
2849
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2850 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2851 void *buffer, size_t *lenp,
2852 loff_t *ppos)
2853 {
2854 int save_tracepoint_printk;
2855 int ret;
2856
2857 guard(mutex)(&tracepoint_printk_mutex);
2858 save_tracepoint_printk = tracepoint_printk;
2859
2860 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2861
2862 /*
2863 * This will force exiting early, as tracepoint_printk
2864 * is always zero when tracepoint_printk_iter is not allocated
2865 */
2866 if (!tracepoint_print_iter)
2867 tracepoint_printk = 0;
2868
2869 if (save_tracepoint_printk == tracepoint_printk)
2870 return ret;
2871
2872 if (tracepoint_printk)
2873 static_key_enable(&tracepoint_printk_key.key);
2874 else
2875 static_key_disable(&tracepoint_printk_key.key);
2876
2877 return ret;
2878 }
2879
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2880 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2881 {
2882 enum event_trigger_type tt = ETT_NONE;
2883 struct trace_event_file *file = fbuffer->trace_file;
2884
2885 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2886 fbuffer->entry, &tt))
2887 goto discard;
2888
2889 if (static_key_false(&tracepoint_printk_key.key))
2890 output_printk(fbuffer);
2891
2892 if (static_branch_unlikely(&trace_event_exports_enabled))
2893 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2894
2895 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2896 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2897
2898 discard:
2899 if (tt)
2900 event_triggers_post_call(file, tt);
2901
2902 }
2903 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2904
2905 /*
2906 * Skip 3:
2907 *
2908 * trace_buffer_unlock_commit_regs()
2909 * trace_event_buffer_commit()
2910 * trace_event_raw_event_xxx()
2911 */
2912 # define STACK_SKIP 3
2913
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2914 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2915 struct trace_buffer *buffer,
2916 struct ring_buffer_event *event,
2917 unsigned int trace_ctx,
2918 struct pt_regs *regs)
2919 {
2920 __buffer_unlock_commit(buffer, event);
2921
2922 /*
2923 * If regs is not set, then skip the necessary functions.
2924 * Note, we can still get here via blktrace, wakeup tracer
2925 * and mmiotrace, but that's ok if they lose a function or
2926 * two. They are not that meaningful.
2927 */
2928 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2929 ftrace_trace_userstack(tr, buffer, trace_ctx);
2930 }
2931
2932 /*
2933 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2934 */
2935 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2936 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2937 struct ring_buffer_event *event)
2938 {
2939 __buffer_unlock_commit(buffer, event);
2940 }
2941
2942 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2943 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2944 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2945 {
2946 struct trace_buffer *buffer = tr->array_buffer.buffer;
2947 struct ring_buffer_event *event;
2948 struct ftrace_entry *entry;
2949 int size = sizeof(*entry);
2950
2951 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2952
2953 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2954 trace_ctx);
2955 if (!event)
2956 return;
2957 entry = ring_buffer_event_data(event);
2958 entry->ip = ip;
2959 entry->parent_ip = parent_ip;
2960
2961 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2962 if (fregs) {
2963 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2964 entry->args[i] = ftrace_regs_get_argument(fregs, i);
2965 }
2966 #endif
2967
2968 if (static_branch_unlikely(&trace_function_exports_enabled))
2969 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2970 __buffer_unlock_commit(buffer, event);
2971 }
2972
2973 #ifdef CONFIG_STACKTRACE
2974
2975 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2976 #define FTRACE_KSTACK_NESTING 4
2977
2978 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
2979
2980 struct ftrace_stack {
2981 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2982 };
2983
2984
2985 struct ftrace_stacks {
2986 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2987 };
2988
2989 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2990 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2991
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2992 static void __ftrace_trace_stack(struct trace_array *tr,
2993 struct trace_buffer *buffer,
2994 unsigned int trace_ctx,
2995 int skip, struct pt_regs *regs)
2996 {
2997 struct ring_buffer_event *event;
2998 unsigned int size, nr_entries;
2999 struct ftrace_stack *fstack;
3000 struct stack_entry *entry;
3001 int stackidx;
3002
3003 /*
3004 * Add one, for this function and the call to save_stack_trace()
3005 * If regs is set, then these functions will not be in the way.
3006 */
3007 #ifndef CONFIG_UNWINDER_ORC
3008 if (!regs)
3009 skip++;
3010 #endif
3011
3012 guard(preempt_notrace)();
3013
3014 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3015
3016 /* This should never happen. If it does, yell once and skip */
3017 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3018 goto out;
3019
3020 /*
3021 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3022 * interrupt will either see the value pre increment or post
3023 * increment. If the interrupt happens pre increment it will have
3024 * restored the counter when it returns. We just need a barrier to
3025 * keep gcc from moving things around.
3026 */
3027 barrier();
3028
3029 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3030 size = ARRAY_SIZE(fstack->calls);
3031
3032 if (regs) {
3033 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3034 size, skip);
3035 } else {
3036 nr_entries = stack_trace_save(fstack->calls, size, skip);
3037 }
3038
3039 #ifdef CONFIG_DYNAMIC_FTRACE
3040 /* Mark entry of stack trace as trampoline code */
3041 if (tr->ops && tr->ops->trampoline) {
3042 unsigned long tramp_start = tr->ops->trampoline;
3043 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3044 unsigned long *calls = fstack->calls;
3045
3046 for (int i = 0; i < nr_entries; i++) {
3047 if (calls[i] >= tramp_start && calls[i] < tramp_end)
3048 calls[i] = FTRACE_TRAMPOLINE_MARKER;
3049 }
3050 }
3051 #endif
3052
3053 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3054 struct_size(entry, caller, nr_entries),
3055 trace_ctx);
3056 if (!event)
3057 goto out;
3058 entry = ring_buffer_event_data(event);
3059
3060 entry->size = nr_entries;
3061 memcpy(&entry->caller, fstack->calls,
3062 flex_array_size(entry, caller, nr_entries));
3063
3064 __buffer_unlock_commit(buffer, event);
3065
3066 out:
3067 /* Again, don't let gcc optimize things here */
3068 barrier();
3069 __this_cpu_dec(ftrace_stack_reserve);
3070 }
3071
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3072 static inline void ftrace_trace_stack(struct trace_array *tr,
3073 struct trace_buffer *buffer,
3074 unsigned int trace_ctx,
3075 int skip, struct pt_regs *regs)
3076 {
3077 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3078 return;
3079
3080 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3081 }
3082
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3083 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3084 int skip)
3085 {
3086 struct trace_buffer *buffer = tr->array_buffer.buffer;
3087
3088 if (rcu_is_watching()) {
3089 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3090 return;
3091 }
3092
3093 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3094 return;
3095
3096 /*
3097 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3098 * but if the above rcu_is_watching() failed, then the NMI
3099 * triggered someplace critical, and ct_irq_enter() should
3100 * not be called from NMI.
3101 */
3102 if (unlikely(in_nmi()))
3103 return;
3104
3105 ct_irq_enter_irqson();
3106 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3107 ct_irq_exit_irqson();
3108 }
3109
3110 /**
3111 * trace_dump_stack - record a stack back trace in the trace buffer
3112 * @skip: Number of functions to skip (helper handlers)
3113 */
trace_dump_stack(int skip)3114 void trace_dump_stack(int skip)
3115 {
3116 if (tracing_disabled || tracing_selftest_running)
3117 return;
3118
3119 #ifndef CONFIG_UNWINDER_ORC
3120 /* Skip 1 to skip this function. */
3121 skip++;
3122 #endif
3123 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3124 tracing_gen_ctx(), skip, NULL);
3125 }
3126 EXPORT_SYMBOL_GPL(trace_dump_stack);
3127
3128 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3129 static DEFINE_PER_CPU(int, user_stack_count);
3130
3131 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3132 ftrace_trace_userstack(struct trace_array *tr,
3133 struct trace_buffer *buffer, unsigned int trace_ctx)
3134 {
3135 struct ring_buffer_event *event;
3136 struct userstack_entry *entry;
3137
3138 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3139 return;
3140
3141 /*
3142 * NMIs can not handle page faults, even with fix ups.
3143 * The save user stack can (and often does) fault.
3144 */
3145 if (unlikely(in_nmi()))
3146 return;
3147
3148 /*
3149 * prevent recursion, since the user stack tracing may
3150 * trigger other kernel events.
3151 */
3152 guard(preempt)();
3153 if (__this_cpu_read(user_stack_count))
3154 return;
3155
3156 __this_cpu_inc(user_stack_count);
3157
3158 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3159 sizeof(*entry), trace_ctx);
3160 if (!event)
3161 goto out_drop_count;
3162 entry = ring_buffer_event_data(event);
3163
3164 entry->tgid = current->tgid;
3165 memset(&entry->caller, 0, sizeof(entry->caller));
3166
3167 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3168 __buffer_unlock_commit(buffer, event);
3169
3170 out_drop_count:
3171 __this_cpu_dec(user_stack_count);
3172 }
3173 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3174 static void ftrace_trace_userstack(struct trace_array *tr,
3175 struct trace_buffer *buffer,
3176 unsigned int trace_ctx)
3177 {
3178 }
3179 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3180
3181 #endif /* CONFIG_STACKTRACE */
3182
3183 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3184 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3185 unsigned long long delta)
3186 {
3187 entry->bottom_delta_ts = delta & U32_MAX;
3188 entry->top_delta_ts = (delta >> 32);
3189 }
3190
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3191 void trace_last_func_repeats(struct trace_array *tr,
3192 struct trace_func_repeats *last_info,
3193 unsigned int trace_ctx)
3194 {
3195 struct trace_buffer *buffer = tr->array_buffer.buffer;
3196 struct func_repeats_entry *entry;
3197 struct ring_buffer_event *event;
3198 u64 delta;
3199
3200 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3201 sizeof(*entry), trace_ctx);
3202 if (!event)
3203 return;
3204
3205 delta = ring_buffer_event_time_stamp(buffer, event) -
3206 last_info->ts_last_call;
3207
3208 entry = ring_buffer_event_data(event);
3209 entry->ip = last_info->ip;
3210 entry->parent_ip = last_info->parent_ip;
3211 entry->count = last_info->count;
3212 func_repeats_set_delta_ts(entry, delta);
3213
3214 __buffer_unlock_commit(buffer, event);
3215 }
3216
3217 /* created for use with alloc_percpu */
3218 struct trace_buffer_struct {
3219 int nesting;
3220 char buffer[4][TRACE_BUF_SIZE];
3221 };
3222
3223 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3224
3225 /*
3226 * This allows for lockless recording. If we're nested too deeply, then
3227 * this returns NULL.
3228 */
get_trace_buf(void)3229 static char *get_trace_buf(void)
3230 {
3231 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3232
3233 if (!trace_percpu_buffer || buffer->nesting >= 4)
3234 return NULL;
3235
3236 buffer->nesting++;
3237
3238 /* Interrupts must see nesting incremented before we use the buffer */
3239 barrier();
3240 return &buffer->buffer[buffer->nesting - 1][0];
3241 }
3242
put_trace_buf(void)3243 static void put_trace_buf(void)
3244 {
3245 /* Don't let the decrement of nesting leak before this */
3246 barrier();
3247 this_cpu_dec(trace_percpu_buffer->nesting);
3248 }
3249
alloc_percpu_trace_buffer(void)3250 static int alloc_percpu_trace_buffer(void)
3251 {
3252 struct trace_buffer_struct __percpu *buffers;
3253
3254 if (trace_percpu_buffer)
3255 return 0;
3256
3257 buffers = alloc_percpu(struct trace_buffer_struct);
3258 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3259 return -ENOMEM;
3260
3261 trace_percpu_buffer = buffers;
3262 return 0;
3263 }
3264
3265 static int buffers_allocated;
3266
trace_printk_init_buffers(void)3267 void trace_printk_init_buffers(void)
3268 {
3269 if (buffers_allocated)
3270 return;
3271
3272 if (alloc_percpu_trace_buffer())
3273 return;
3274
3275 /* trace_printk() is for debug use only. Don't use it in production. */
3276
3277 pr_warn("\n");
3278 pr_warn("**********************************************************\n");
3279 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3280 pr_warn("** **\n");
3281 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3282 pr_warn("** **\n");
3283 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3284 pr_warn("** unsafe for production use. **\n");
3285 pr_warn("** **\n");
3286 pr_warn("** If you see this message and you are not debugging **\n");
3287 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3288 pr_warn("** **\n");
3289 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3290 pr_warn("**********************************************************\n");
3291
3292 /* Expand the buffers to set size */
3293 tracing_update_buffers(&global_trace);
3294
3295 buffers_allocated = 1;
3296
3297 /*
3298 * trace_printk_init_buffers() can be called by modules.
3299 * If that happens, then we need to start cmdline recording
3300 * directly here. If the global_trace.buffer is already
3301 * allocated here, then this was called by module code.
3302 */
3303 if (global_trace.array_buffer.buffer)
3304 tracing_start_cmdline_record();
3305 }
3306 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3307
trace_printk_start_comm(void)3308 void trace_printk_start_comm(void)
3309 {
3310 /* Start tracing comms if trace printk is set */
3311 if (!buffers_allocated)
3312 return;
3313 tracing_start_cmdline_record();
3314 }
3315
trace_printk_start_stop_comm(int enabled)3316 static void trace_printk_start_stop_comm(int enabled)
3317 {
3318 if (!buffers_allocated)
3319 return;
3320
3321 if (enabled)
3322 tracing_start_cmdline_record();
3323 else
3324 tracing_stop_cmdline_record();
3325 }
3326
3327 /**
3328 * trace_vbprintk - write binary msg to tracing buffer
3329 * @ip: The address of the caller
3330 * @fmt: The string format to write to the buffer
3331 * @args: Arguments for @fmt
3332 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3333 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3334 {
3335 struct ring_buffer_event *event;
3336 struct trace_buffer *buffer;
3337 struct trace_array *tr = READ_ONCE(printk_trace);
3338 struct bprint_entry *entry;
3339 unsigned int trace_ctx;
3340 char *tbuffer;
3341 int len = 0, size;
3342
3343 if (!printk_binsafe(tr))
3344 return trace_vprintk(ip, fmt, args);
3345
3346 if (unlikely(tracing_selftest_running || tracing_disabled))
3347 return 0;
3348
3349 /* Don't pollute graph traces with trace_vprintk internals */
3350 pause_graph_tracing();
3351
3352 trace_ctx = tracing_gen_ctx();
3353 guard(preempt_notrace)();
3354
3355 tbuffer = get_trace_buf();
3356 if (!tbuffer) {
3357 len = 0;
3358 goto out_nobuffer;
3359 }
3360
3361 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3362
3363 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3364 goto out_put;
3365
3366 size = sizeof(*entry) + sizeof(u32) * len;
3367 buffer = tr->array_buffer.buffer;
3368 scoped_guard(ring_buffer_nest, buffer) {
3369 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3370 trace_ctx);
3371 if (!event)
3372 goto out_put;
3373 entry = ring_buffer_event_data(event);
3374 entry->ip = ip;
3375 entry->fmt = fmt;
3376
3377 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3378 __buffer_unlock_commit(buffer, event);
3379 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3380 }
3381 out_put:
3382 put_trace_buf();
3383
3384 out_nobuffer:
3385 unpause_graph_tracing();
3386
3387 return len;
3388 }
3389 EXPORT_SYMBOL_GPL(trace_vbprintk);
3390
3391 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3392 int __trace_array_vprintk(struct trace_buffer *buffer,
3393 unsigned long ip, const char *fmt, va_list args)
3394 {
3395 struct ring_buffer_event *event;
3396 int len = 0, size;
3397 struct print_entry *entry;
3398 unsigned int trace_ctx;
3399 char *tbuffer;
3400
3401 if (tracing_disabled)
3402 return 0;
3403
3404 /* Don't pollute graph traces with trace_vprintk internals */
3405 pause_graph_tracing();
3406
3407 trace_ctx = tracing_gen_ctx();
3408 guard(preempt_notrace)();
3409
3410
3411 tbuffer = get_trace_buf();
3412 if (!tbuffer) {
3413 len = 0;
3414 goto out_nobuffer;
3415 }
3416
3417 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3418
3419 size = sizeof(*entry) + len + 1;
3420 scoped_guard(ring_buffer_nest, buffer) {
3421 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3422 trace_ctx);
3423 if (!event)
3424 goto out;
3425 entry = ring_buffer_event_data(event);
3426 entry->ip = ip;
3427
3428 memcpy(&entry->buf, tbuffer, len + 1);
3429 __buffer_unlock_commit(buffer, event);
3430 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3431 }
3432 out:
3433 put_trace_buf();
3434
3435 out_nobuffer:
3436 unpause_graph_tracing();
3437
3438 return len;
3439 }
3440
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3441 int trace_array_vprintk(struct trace_array *tr,
3442 unsigned long ip, const char *fmt, va_list args)
3443 {
3444 if (tracing_selftest_running && tr == &global_trace)
3445 return 0;
3446
3447 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3448 }
3449
3450 /**
3451 * trace_array_printk - Print a message to a specific instance
3452 * @tr: The instance trace_array descriptor
3453 * @ip: The instruction pointer that this is called from.
3454 * @fmt: The format to print (printf format)
3455 *
3456 * If a subsystem sets up its own instance, they have the right to
3457 * printk strings into their tracing instance buffer using this
3458 * function. Note, this function will not write into the top level
3459 * buffer (use trace_printk() for that), as writing into the top level
3460 * buffer should only have events that can be individually disabled.
3461 * trace_printk() is only used for debugging a kernel, and should not
3462 * be ever incorporated in normal use.
3463 *
3464 * trace_array_printk() can be used, as it will not add noise to the
3465 * top level tracing buffer.
3466 *
3467 * Note, trace_array_init_printk() must be called on @tr before this
3468 * can be used.
3469 */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3470 int trace_array_printk(struct trace_array *tr,
3471 unsigned long ip, const char *fmt, ...)
3472 {
3473 int ret;
3474 va_list ap;
3475
3476 if (!tr)
3477 return -ENOENT;
3478
3479 /* This is only allowed for created instances */
3480 if (tr == &global_trace)
3481 return 0;
3482
3483 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3484 return 0;
3485
3486 va_start(ap, fmt);
3487 ret = trace_array_vprintk(tr, ip, fmt, ap);
3488 va_end(ap);
3489 return ret;
3490 }
3491 EXPORT_SYMBOL_GPL(trace_array_printk);
3492
3493 /**
3494 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3495 * @tr: The trace array to initialize the buffers for
3496 *
3497 * As trace_array_printk() only writes into instances, they are OK to
3498 * have in the kernel (unlike trace_printk()). This needs to be called
3499 * before trace_array_printk() can be used on a trace_array.
3500 */
trace_array_init_printk(struct trace_array * tr)3501 int trace_array_init_printk(struct trace_array *tr)
3502 {
3503 if (!tr)
3504 return -ENOENT;
3505
3506 /* This is only allowed for created instances */
3507 if (tr == &global_trace)
3508 return -EINVAL;
3509
3510 return alloc_percpu_trace_buffer();
3511 }
3512 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3513
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3514 int trace_array_printk_buf(struct trace_buffer *buffer,
3515 unsigned long ip, const char *fmt, ...)
3516 {
3517 int ret;
3518 va_list ap;
3519
3520 if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3521 return 0;
3522
3523 va_start(ap, fmt);
3524 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3525 va_end(ap);
3526 return ret;
3527 }
3528
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3529 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3530 {
3531 return trace_array_vprintk(printk_trace, ip, fmt, args);
3532 }
3533 EXPORT_SYMBOL_GPL(trace_vprintk);
3534
trace_iterator_increment(struct trace_iterator * iter)3535 static void trace_iterator_increment(struct trace_iterator *iter)
3536 {
3537 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3538
3539 iter->idx++;
3540 if (buf_iter)
3541 ring_buffer_iter_advance(buf_iter);
3542 }
3543
3544 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3545 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3546 unsigned long *lost_events)
3547 {
3548 struct ring_buffer_event *event;
3549 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3550
3551 if (buf_iter) {
3552 event = ring_buffer_iter_peek(buf_iter, ts);
3553 if (lost_events)
3554 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3555 (unsigned long)-1 : 0;
3556 } else {
3557 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3558 lost_events);
3559 }
3560
3561 if (event) {
3562 iter->ent_size = ring_buffer_event_length(event);
3563 return ring_buffer_event_data(event);
3564 }
3565 iter->ent_size = 0;
3566 return NULL;
3567 }
3568
3569 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3570 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3571 unsigned long *missing_events, u64 *ent_ts)
3572 {
3573 struct trace_buffer *buffer = iter->array_buffer->buffer;
3574 struct trace_entry *ent, *next = NULL;
3575 unsigned long lost_events = 0, next_lost = 0;
3576 int cpu_file = iter->cpu_file;
3577 u64 next_ts = 0, ts;
3578 int next_cpu = -1;
3579 int next_size = 0;
3580 int cpu;
3581
3582 /*
3583 * If we are in a per_cpu trace file, don't bother by iterating over
3584 * all cpu and peek directly.
3585 */
3586 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3587 if (ring_buffer_empty_cpu(buffer, cpu_file))
3588 return NULL;
3589 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3590 if (ent_cpu)
3591 *ent_cpu = cpu_file;
3592
3593 return ent;
3594 }
3595
3596 for_each_tracing_cpu(cpu) {
3597
3598 if (ring_buffer_empty_cpu(buffer, cpu))
3599 continue;
3600
3601 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3602
3603 /*
3604 * Pick the entry with the smallest timestamp:
3605 */
3606 if (ent && (!next || ts < next_ts)) {
3607 next = ent;
3608 next_cpu = cpu;
3609 next_ts = ts;
3610 next_lost = lost_events;
3611 next_size = iter->ent_size;
3612 }
3613 }
3614
3615 iter->ent_size = next_size;
3616
3617 if (ent_cpu)
3618 *ent_cpu = next_cpu;
3619
3620 if (ent_ts)
3621 *ent_ts = next_ts;
3622
3623 if (missing_events)
3624 *missing_events = next_lost;
3625
3626 return next;
3627 }
3628
3629 #define STATIC_FMT_BUF_SIZE 128
3630 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3631
trace_iter_expand_format(struct trace_iterator * iter)3632 char *trace_iter_expand_format(struct trace_iterator *iter)
3633 {
3634 char *tmp;
3635
3636 /*
3637 * iter->tr is NULL when used with tp_printk, which makes
3638 * this get called where it is not safe to call krealloc().
3639 */
3640 if (!iter->tr || iter->fmt == static_fmt_buf)
3641 return NULL;
3642
3643 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3644 GFP_KERNEL);
3645 if (tmp) {
3646 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3647 iter->fmt = tmp;
3648 }
3649
3650 return tmp;
3651 }
3652
3653 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3654 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3655 {
3656 unsigned long addr = (unsigned long)str;
3657 struct trace_event *trace_event;
3658 struct trace_event_call *event;
3659
3660 /* OK if part of the event data */
3661 if ((addr >= (unsigned long)iter->ent) &&
3662 (addr < (unsigned long)iter->ent + iter->ent_size))
3663 return true;
3664
3665 /* OK if part of the temp seq buffer */
3666 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3667 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3668 return true;
3669
3670 /* Core rodata can not be freed */
3671 if (is_kernel_rodata(addr))
3672 return true;
3673
3674 if (trace_is_tracepoint_string(str))
3675 return true;
3676
3677 /*
3678 * Now this could be a module event, referencing core module
3679 * data, which is OK.
3680 */
3681 if (!iter->ent)
3682 return false;
3683
3684 trace_event = ftrace_find_event(iter->ent->type);
3685 if (!trace_event)
3686 return false;
3687
3688 event = container_of(trace_event, struct trace_event_call, event);
3689 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3690 return false;
3691
3692 /* Would rather have rodata, but this will suffice */
3693 if (within_module_core(addr, event->module))
3694 return true;
3695
3696 return false;
3697 }
3698
3699 /**
3700 * ignore_event - Check dereferenced fields while writing to the seq buffer
3701 * @iter: The iterator that holds the seq buffer and the event being printed
3702 *
3703 * At boot up, test_event_printk() will flag any event that dereferences
3704 * a string with "%s" that does exist in the ring buffer. It may still
3705 * be valid, as the string may point to a static string in the kernel
3706 * rodata that never gets freed. But if the string pointer is pointing
3707 * to something that was allocated, there's a chance that it can be freed
3708 * by the time the user reads the trace. This would cause a bad memory
3709 * access by the kernel and possibly crash the system.
3710 *
3711 * This function will check if the event has any fields flagged as needing
3712 * to be checked at runtime and perform those checks.
3713 *
3714 * If it is found that a field is unsafe, it will write into the @iter->seq
3715 * a message stating what was found to be unsafe.
3716 *
3717 * @return: true if the event is unsafe and should be ignored,
3718 * false otherwise.
3719 */
ignore_event(struct trace_iterator * iter)3720 bool ignore_event(struct trace_iterator *iter)
3721 {
3722 struct ftrace_event_field *field;
3723 struct trace_event *trace_event;
3724 struct trace_event_call *event;
3725 struct list_head *head;
3726 struct trace_seq *seq;
3727 const void *ptr;
3728
3729 trace_event = ftrace_find_event(iter->ent->type);
3730
3731 seq = &iter->seq;
3732
3733 if (!trace_event) {
3734 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3735 return true;
3736 }
3737
3738 event = container_of(trace_event, struct trace_event_call, event);
3739 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3740 return false;
3741
3742 head = trace_get_fields(event);
3743 if (!head) {
3744 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3745 trace_event_name(event));
3746 return true;
3747 }
3748
3749 /* Offsets are from the iter->ent that points to the raw event */
3750 ptr = iter->ent;
3751
3752 list_for_each_entry(field, head, link) {
3753 const char *str;
3754 bool good;
3755
3756 if (!field->needs_test)
3757 continue;
3758
3759 str = *(const char **)(ptr + field->offset);
3760
3761 good = trace_safe_str(iter, str);
3762
3763 /*
3764 * If you hit this warning, it is likely that the
3765 * trace event in question used %s on a string that
3766 * was saved at the time of the event, but may not be
3767 * around when the trace is read. Use __string(),
3768 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3769 * instead. See samples/trace_events/trace-events-sample.h
3770 * for reference.
3771 */
3772 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3773 trace_event_name(event), field->name)) {
3774 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3775 trace_event_name(event), field->name);
3776 return true;
3777 }
3778 }
3779 return false;
3780 }
3781
trace_event_format(struct trace_iterator * iter,const char * fmt)3782 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3783 {
3784 const char *p, *new_fmt;
3785 char *q;
3786
3787 if (WARN_ON_ONCE(!fmt))
3788 return fmt;
3789
3790 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3791 return fmt;
3792
3793 p = fmt;
3794 new_fmt = q = iter->fmt;
3795 while (*p) {
3796 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3797 if (!trace_iter_expand_format(iter))
3798 return fmt;
3799
3800 q += iter->fmt - new_fmt;
3801 new_fmt = iter->fmt;
3802 }
3803
3804 *q++ = *p++;
3805
3806 /* Replace %p with %px */
3807 if (p[-1] == '%') {
3808 if (p[0] == '%') {
3809 *q++ = *p++;
3810 } else if (p[0] == 'p' && !isalnum(p[1])) {
3811 *q++ = *p++;
3812 *q++ = 'x';
3813 }
3814 }
3815 }
3816 *q = '\0';
3817
3818 return new_fmt;
3819 }
3820
3821 #define STATIC_TEMP_BUF_SIZE 128
3822 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3823
3824 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3825 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3826 int *ent_cpu, u64 *ent_ts)
3827 {
3828 /* __find_next_entry will reset ent_size */
3829 int ent_size = iter->ent_size;
3830 struct trace_entry *entry;
3831
3832 /*
3833 * If called from ftrace_dump(), then the iter->temp buffer
3834 * will be the static_temp_buf and not created from kmalloc.
3835 * If the entry size is greater than the buffer, we can
3836 * not save it. Just return NULL in that case. This is only
3837 * used to add markers when two consecutive events' time
3838 * stamps have a large delta. See trace_print_lat_context()
3839 */
3840 if (iter->temp == static_temp_buf &&
3841 STATIC_TEMP_BUF_SIZE < ent_size)
3842 return NULL;
3843
3844 /*
3845 * The __find_next_entry() may call peek_next_entry(), which may
3846 * call ring_buffer_peek() that may make the contents of iter->ent
3847 * undefined. Need to copy iter->ent now.
3848 */
3849 if (iter->ent && iter->ent != iter->temp) {
3850 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3851 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3852 void *temp;
3853 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3854 if (!temp)
3855 return NULL;
3856 kfree(iter->temp);
3857 iter->temp = temp;
3858 iter->temp_size = iter->ent_size;
3859 }
3860 memcpy(iter->temp, iter->ent, iter->ent_size);
3861 iter->ent = iter->temp;
3862 }
3863 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3864 /* Put back the original ent_size */
3865 iter->ent_size = ent_size;
3866
3867 return entry;
3868 }
3869
3870 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3871 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3872 {
3873 iter->ent = __find_next_entry(iter, &iter->cpu,
3874 &iter->lost_events, &iter->ts);
3875
3876 if (iter->ent)
3877 trace_iterator_increment(iter);
3878
3879 return iter->ent ? iter : NULL;
3880 }
3881
trace_consume(struct trace_iterator * iter)3882 static void trace_consume(struct trace_iterator *iter)
3883 {
3884 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3885 &iter->lost_events);
3886 }
3887
s_next(struct seq_file * m,void * v,loff_t * pos)3888 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3889 {
3890 struct trace_iterator *iter = m->private;
3891 int i = (int)*pos;
3892 void *ent;
3893
3894 WARN_ON_ONCE(iter->leftover);
3895
3896 (*pos)++;
3897
3898 /* can't go backwards */
3899 if (iter->idx > i)
3900 return NULL;
3901
3902 if (iter->idx < 0)
3903 ent = trace_find_next_entry_inc(iter);
3904 else
3905 ent = iter;
3906
3907 while (ent && iter->idx < i)
3908 ent = trace_find_next_entry_inc(iter);
3909
3910 iter->pos = *pos;
3911
3912 return ent;
3913 }
3914
tracing_iter_reset(struct trace_iterator * iter,int cpu)3915 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3916 {
3917 struct ring_buffer_iter *buf_iter;
3918 unsigned long entries = 0;
3919 u64 ts;
3920
3921 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3922
3923 buf_iter = trace_buffer_iter(iter, cpu);
3924 if (!buf_iter)
3925 return;
3926
3927 ring_buffer_iter_reset(buf_iter);
3928
3929 /*
3930 * We could have the case with the max latency tracers
3931 * that a reset never took place on a cpu. This is evident
3932 * by the timestamp being before the start of the buffer.
3933 */
3934 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3935 if (ts >= iter->array_buffer->time_start)
3936 break;
3937 entries++;
3938 ring_buffer_iter_advance(buf_iter);
3939 /* This could be a big loop */
3940 cond_resched();
3941 }
3942
3943 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3944 }
3945
3946 /*
3947 * The current tracer is copied to avoid a global locking
3948 * all around.
3949 */
s_start(struct seq_file * m,loff_t * pos)3950 static void *s_start(struct seq_file *m, loff_t *pos)
3951 {
3952 struct trace_iterator *iter = m->private;
3953 struct trace_array *tr = iter->tr;
3954 int cpu_file = iter->cpu_file;
3955 void *p = NULL;
3956 loff_t l = 0;
3957 int cpu;
3958
3959 mutex_lock(&trace_types_lock);
3960 if (unlikely(tr->current_trace != iter->trace)) {
3961 /* Close iter->trace before switching to the new current tracer */
3962 if (iter->trace->close)
3963 iter->trace->close(iter);
3964 iter->trace = tr->current_trace;
3965 /* Reopen the new current tracer */
3966 if (iter->trace->open)
3967 iter->trace->open(iter);
3968 }
3969 mutex_unlock(&trace_types_lock);
3970
3971 #ifdef CONFIG_TRACER_MAX_TRACE
3972 if (iter->snapshot && iter->trace->use_max_tr)
3973 return ERR_PTR(-EBUSY);
3974 #endif
3975
3976 if (*pos != iter->pos) {
3977 iter->ent = NULL;
3978 iter->cpu = 0;
3979 iter->idx = -1;
3980
3981 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3982 for_each_tracing_cpu(cpu)
3983 tracing_iter_reset(iter, cpu);
3984 } else
3985 tracing_iter_reset(iter, cpu_file);
3986
3987 iter->leftover = 0;
3988 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3989 ;
3990
3991 } else {
3992 /*
3993 * If we overflowed the seq_file before, then we want
3994 * to just reuse the trace_seq buffer again.
3995 */
3996 if (iter->leftover)
3997 p = iter;
3998 else {
3999 l = *pos - 1;
4000 p = s_next(m, p, &l);
4001 }
4002 }
4003
4004 trace_event_read_lock();
4005 trace_access_lock(cpu_file);
4006 return p;
4007 }
4008
s_stop(struct seq_file * m,void * p)4009 static void s_stop(struct seq_file *m, void *p)
4010 {
4011 struct trace_iterator *iter = m->private;
4012
4013 #ifdef CONFIG_TRACER_MAX_TRACE
4014 if (iter->snapshot && iter->trace->use_max_tr)
4015 return;
4016 #endif
4017
4018 trace_access_unlock(iter->cpu_file);
4019 trace_event_read_unlock();
4020 }
4021
4022 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4023 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4024 unsigned long *entries, int cpu)
4025 {
4026 unsigned long count;
4027
4028 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4029 /*
4030 * If this buffer has skipped entries, then we hold all
4031 * entries for the trace and we need to ignore the
4032 * ones before the time stamp.
4033 */
4034 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4035 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4036 /* total is the same as the entries */
4037 *total = count;
4038 } else
4039 *total = count +
4040 ring_buffer_overrun_cpu(buf->buffer, cpu);
4041 *entries = count;
4042 }
4043
4044 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4045 get_total_entries(struct array_buffer *buf,
4046 unsigned long *total, unsigned long *entries)
4047 {
4048 unsigned long t, e;
4049 int cpu;
4050
4051 *total = 0;
4052 *entries = 0;
4053
4054 for_each_tracing_cpu(cpu) {
4055 get_total_entries_cpu(buf, &t, &e, cpu);
4056 *total += t;
4057 *entries += e;
4058 }
4059 }
4060
trace_total_entries_cpu(struct trace_array * tr,int cpu)4061 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4062 {
4063 unsigned long total, entries;
4064
4065 if (!tr)
4066 tr = &global_trace;
4067
4068 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4069
4070 return entries;
4071 }
4072
trace_total_entries(struct trace_array * tr)4073 unsigned long trace_total_entries(struct trace_array *tr)
4074 {
4075 unsigned long total, entries;
4076
4077 if (!tr)
4078 tr = &global_trace;
4079
4080 get_total_entries(&tr->array_buffer, &total, &entries);
4081
4082 return entries;
4083 }
4084
print_lat_help_header(struct seq_file * m)4085 static void print_lat_help_header(struct seq_file *m)
4086 {
4087 seq_puts(m, "# _------=> CPU# \n"
4088 "# / _-----=> irqs-off/BH-disabled\n"
4089 "# | / _----=> need-resched \n"
4090 "# || / _---=> hardirq/softirq \n"
4091 "# ||| / _--=> preempt-depth \n"
4092 "# |||| / _-=> migrate-disable \n"
4093 "# ||||| / delay \n"
4094 "# cmd pid |||||| time | caller \n"
4095 "# \\ / |||||| \\ | / \n");
4096 }
4097
print_event_info(struct array_buffer * buf,struct seq_file * m)4098 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4099 {
4100 unsigned long total;
4101 unsigned long entries;
4102
4103 get_total_entries(buf, &total, &entries);
4104 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4105 entries, total, num_online_cpus());
4106 seq_puts(m, "#\n");
4107 }
4108
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4109 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4110 unsigned int flags)
4111 {
4112 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4113
4114 print_event_info(buf, m);
4115
4116 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4117 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4118 }
4119
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4120 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4121 unsigned int flags)
4122 {
4123 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4124 static const char space[] = " ";
4125 int prec = tgid ? 12 : 2;
4126
4127 print_event_info(buf, m);
4128
4129 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4130 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4131 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4132 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4133 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4134 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4135 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4136 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4137 }
4138
4139 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4140 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4141 {
4142 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4143 struct array_buffer *buf = iter->array_buffer;
4144 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4145 struct tracer *type = iter->trace;
4146 unsigned long entries;
4147 unsigned long total;
4148 const char *name = type->name;
4149
4150 get_total_entries(buf, &total, &entries);
4151
4152 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4153 name, init_utsname()->release);
4154 seq_puts(m, "# -----------------------------------"
4155 "---------------------------------\n");
4156 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4157 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4158 nsecs_to_usecs(data->saved_latency),
4159 entries,
4160 total,
4161 buf->cpu,
4162 preempt_model_str(),
4163 /* These are reserved for later use */
4164 0, 0, 0, 0);
4165 #ifdef CONFIG_SMP
4166 seq_printf(m, " #P:%d)\n", num_online_cpus());
4167 #else
4168 seq_puts(m, ")\n");
4169 #endif
4170 seq_puts(m, "# -----------------\n");
4171 seq_printf(m, "# | task: %.16s-%d "
4172 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4173 data->comm, data->pid,
4174 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4175 data->policy, data->rt_priority);
4176 seq_puts(m, "# -----------------\n");
4177
4178 if (data->critical_start) {
4179 seq_puts(m, "# => started at: ");
4180 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4181 trace_print_seq(m, &iter->seq);
4182 seq_puts(m, "\n# => ended at: ");
4183 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4184 trace_print_seq(m, &iter->seq);
4185 seq_puts(m, "\n#\n");
4186 }
4187
4188 seq_puts(m, "#\n");
4189 }
4190
test_cpu_buff_start(struct trace_iterator * iter)4191 static void test_cpu_buff_start(struct trace_iterator *iter)
4192 {
4193 struct trace_seq *s = &iter->seq;
4194 struct trace_array *tr = iter->tr;
4195
4196 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4197 return;
4198
4199 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4200 return;
4201
4202 if (cpumask_available(iter->started) &&
4203 cpumask_test_cpu(iter->cpu, iter->started))
4204 return;
4205
4206 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4207 return;
4208
4209 if (cpumask_available(iter->started))
4210 cpumask_set_cpu(iter->cpu, iter->started);
4211
4212 /* Don't print started cpu buffer for the first entry of the trace */
4213 if (iter->idx > 1)
4214 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4215 iter->cpu);
4216 }
4217
print_trace_fmt(struct trace_iterator * iter)4218 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4219 {
4220 struct trace_array *tr = iter->tr;
4221 struct trace_seq *s = &iter->seq;
4222 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4223 struct trace_entry *entry;
4224 struct trace_event *event;
4225
4226 entry = iter->ent;
4227
4228 test_cpu_buff_start(iter);
4229
4230 event = ftrace_find_event(entry->type);
4231
4232 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4233 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4234 trace_print_lat_context(iter);
4235 else
4236 trace_print_context(iter);
4237 }
4238
4239 if (trace_seq_has_overflowed(s))
4240 return TRACE_TYPE_PARTIAL_LINE;
4241
4242 if (event) {
4243 if (tr->trace_flags & TRACE_ITER_FIELDS)
4244 return print_event_fields(iter, event);
4245 /*
4246 * For TRACE_EVENT() events, the print_fmt is not
4247 * safe to use if the array has delta offsets
4248 * Force printing via the fields.
4249 */
4250 if ((tr->text_delta) &&
4251 event->type > __TRACE_LAST_TYPE)
4252 return print_event_fields(iter, event);
4253
4254 return event->funcs->trace(iter, sym_flags, event);
4255 }
4256
4257 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4258
4259 return trace_handle_return(s);
4260 }
4261
print_raw_fmt(struct trace_iterator * iter)4262 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4263 {
4264 struct trace_array *tr = iter->tr;
4265 struct trace_seq *s = &iter->seq;
4266 struct trace_entry *entry;
4267 struct trace_event *event;
4268
4269 entry = iter->ent;
4270
4271 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4272 trace_seq_printf(s, "%d %d %llu ",
4273 entry->pid, iter->cpu, iter->ts);
4274
4275 if (trace_seq_has_overflowed(s))
4276 return TRACE_TYPE_PARTIAL_LINE;
4277
4278 event = ftrace_find_event(entry->type);
4279 if (event)
4280 return event->funcs->raw(iter, 0, event);
4281
4282 trace_seq_printf(s, "%d ?\n", entry->type);
4283
4284 return trace_handle_return(s);
4285 }
4286
print_hex_fmt(struct trace_iterator * iter)4287 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4288 {
4289 struct trace_array *tr = iter->tr;
4290 struct trace_seq *s = &iter->seq;
4291 unsigned char newline = '\n';
4292 struct trace_entry *entry;
4293 struct trace_event *event;
4294
4295 entry = iter->ent;
4296
4297 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4298 SEQ_PUT_HEX_FIELD(s, entry->pid);
4299 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4300 SEQ_PUT_HEX_FIELD(s, iter->ts);
4301 if (trace_seq_has_overflowed(s))
4302 return TRACE_TYPE_PARTIAL_LINE;
4303 }
4304
4305 event = ftrace_find_event(entry->type);
4306 if (event) {
4307 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4308 if (ret != TRACE_TYPE_HANDLED)
4309 return ret;
4310 }
4311
4312 SEQ_PUT_FIELD(s, newline);
4313
4314 return trace_handle_return(s);
4315 }
4316
print_bin_fmt(struct trace_iterator * iter)4317 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4318 {
4319 struct trace_array *tr = iter->tr;
4320 struct trace_seq *s = &iter->seq;
4321 struct trace_entry *entry;
4322 struct trace_event *event;
4323
4324 entry = iter->ent;
4325
4326 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4327 SEQ_PUT_FIELD(s, entry->pid);
4328 SEQ_PUT_FIELD(s, iter->cpu);
4329 SEQ_PUT_FIELD(s, iter->ts);
4330 if (trace_seq_has_overflowed(s))
4331 return TRACE_TYPE_PARTIAL_LINE;
4332 }
4333
4334 event = ftrace_find_event(entry->type);
4335 return event ? event->funcs->binary(iter, 0, event) :
4336 TRACE_TYPE_HANDLED;
4337 }
4338
trace_empty(struct trace_iterator * iter)4339 int trace_empty(struct trace_iterator *iter)
4340 {
4341 struct ring_buffer_iter *buf_iter;
4342 int cpu;
4343
4344 /* If we are looking at one CPU buffer, only check that one */
4345 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4346 cpu = iter->cpu_file;
4347 buf_iter = trace_buffer_iter(iter, cpu);
4348 if (buf_iter) {
4349 if (!ring_buffer_iter_empty(buf_iter))
4350 return 0;
4351 } else {
4352 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4353 return 0;
4354 }
4355 return 1;
4356 }
4357
4358 for_each_tracing_cpu(cpu) {
4359 buf_iter = trace_buffer_iter(iter, cpu);
4360 if (buf_iter) {
4361 if (!ring_buffer_iter_empty(buf_iter))
4362 return 0;
4363 } else {
4364 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4365 return 0;
4366 }
4367 }
4368
4369 return 1;
4370 }
4371
4372 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4373 enum print_line_t print_trace_line(struct trace_iterator *iter)
4374 {
4375 struct trace_array *tr = iter->tr;
4376 unsigned long trace_flags = tr->trace_flags;
4377 enum print_line_t ret;
4378
4379 if (iter->lost_events) {
4380 if (iter->lost_events == (unsigned long)-1)
4381 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4382 iter->cpu);
4383 else
4384 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4385 iter->cpu, iter->lost_events);
4386 if (trace_seq_has_overflowed(&iter->seq))
4387 return TRACE_TYPE_PARTIAL_LINE;
4388 }
4389
4390 if (iter->trace && iter->trace->print_line) {
4391 ret = iter->trace->print_line(iter);
4392 if (ret != TRACE_TYPE_UNHANDLED)
4393 return ret;
4394 }
4395
4396 if (iter->ent->type == TRACE_BPUTS &&
4397 trace_flags & TRACE_ITER_PRINTK &&
4398 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4399 return trace_print_bputs_msg_only(iter);
4400
4401 if (iter->ent->type == TRACE_BPRINT &&
4402 trace_flags & TRACE_ITER_PRINTK &&
4403 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4404 return trace_print_bprintk_msg_only(iter);
4405
4406 if (iter->ent->type == TRACE_PRINT &&
4407 trace_flags & TRACE_ITER_PRINTK &&
4408 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4409 return trace_print_printk_msg_only(iter);
4410
4411 if (trace_flags & TRACE_ITER_BIN)
4412 return print_bin_fmt(iter);
4413
4414 if (trace_flags & TRACE_ITER_HEX)
4415 return print_hex_fmt(iter);
4416
4417 if (trace_flags & TRACE_ITER_RAW)
4418 return print_raw_fmt(iter);
4419
4420 return print_trace_fmt(iter);
4421 }
4422
trace_latency_header(struct seq_file * m)4423 void trace_latency_header(struct seq_file *m)
4424 {
4425 struct trace_iterator *iter = m->private;
4426 struct trace_array *tr = iter->tr;
4427
4428 /* print nothing if the buffers are empty */
4429 if (trace_empty(iter))
4430 return;
4431
4432 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4433 print_trace_header(m, iter);
4434
4435 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4436 print_lat_help_header(m);
4437 }
4438
trace_default_header(struct seq_file * m)4439 void trace_default_header(struct seq_file *m)
4440 {
4441 struct trace_iterator *iter = m->private;
4442 struct trace_array *tr = iter->tr;
4443 unsigned long trace_flags = tr->trace_flags;
4444
4445 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4446 return;
4447
4448 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4449 /* print nothing if the buffers are empty */
4450 if (trace_empty(iter))
4451 return;
4452 print_trace_header(m, iter);
4453 if (!(trace_flags & TRACE_ITER_VERBOSE))
4454 print_lat_help_header(m);
4455 } else {
4456 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4457 if (trace_flags & TRACE_ITER_IRQ_INFO)
4458 print_func_help_header_irq(iter->array_buffer,
4459 m, trace_flags);
4460 else
4461 print_func_help_header(iter->array_buffer, m,
4462 trace_flags);
4463 }
4464 }
4465 }
4466
test_ftrace_alive(struct seq_file * m)4467 static void test_ftrace_alive(struct seq_file *m)
4468 {
4469 if (!ftrace_is_dead())
4470 return;
4471 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4472 "# MAY BE MISSING FUNCTION EVENTS\n");
4473 }
4474
4475 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4476 static void show_snapshot_main_help(struct seq_file *m)
4477 {
4478 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4479 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4480 "# Takes a snapshot of the main buffer.\n"
4481 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4482 "# (Doesn't have to be '2' works with any number that\n"
4483 "# is not a '0' or '1')\n");
4484 }
4485
show_snapshot_percpu_help(struct seq_file * m)4486 static void show_snapshot_percpu_help(struct seq_file *m)
4487 {
4488 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4489 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4490 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4491 "# Takes a snapshot of the main buffer for this cpu.\n");
4492 #else
4493 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4494 "# Must use main snapshot file to allocate.\n");
4495 #endif
4496 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4497 "# (Doesn't have to be '2' works with any number that\n"
4498 "# is not a '0' or '1')\n");
4499 }
4500
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4501 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4502 {
4503 if (iter->tr->allocated_snapshot)
4504 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4505 else
4506 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4507
4508 seq_puts(m, "# Snapshot commands:\n");
4509 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4510 show_snapshot_main_help(m);
4511 else
4512 show_snapshot_percpu_help(m);
4513 }
4514 #else
4515 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4516 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4517 #endif
4518
s_show(struct seq_file * m,void * v)4519 static int s_show(struct seq_file *m, void *v)
4520 {
4521 struct trace_iterator *iter = v;
4522 int ret;
4523
4524 if (iter->ent == NULL) {
4525 if (iter->tr) {
4526 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4527 seq_puts(m, "#\n");
4528 test_ftrace_alive(m);
4529 }
4530 if (iter->snapshot && trace_empty(iter))
4531 print_snapshot_help(m, iter);
4532 else if (iter->trace && iter->trace->print_header)
4533 iter->trace->print_header(m);
4534 else
4535 trace_default_header(m);
4536
4537 } else if (iter->leftover) {
4538 /*
4539 * If we filled the seq_file buffer earlier, we
4540 * want to just show it now.
4541 */
4542 ret = trace_print_seq(m, &iter->seq);
4543
4544 /* ret should this time be zero, but you never know */
4545 iter->leftover = ret;
4546
4547 } else {
4548 ret = print_trace_line(iter);
4549 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4550 iter->seq.full = 0;
4551 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4552 }
4553 ret = trace_print_seq(m, &iter->seq);
4554 /*
4555 * If we overflow the seq_file buffer, then it will
4556 * ask us for this data again at start up.
4557 * Use that instead.
4558 * ret is 0 if seq_file write succeeded.
4559 * -1 otherwise.
4560 */
4561 iter->leftover = ret;
4562 }
4563
4564 return 0;
4565 }
4566
4567 /*
4568 * Should be used after trace_array_get(), trace_types_lock
4569 * ensures that i_cdev was already initialized.
4570 */
tracing_get_cpu(struct inode * inode)4571 static inline int tracing_get_cpu(struct inode *inode)
4572 {
4573 if (inode->i_cdev) /* See trace_create_cpu_file() */
4574 return (long)inode->i_cdev - 1;
4575 return RING_BUFFER_ALL_CPUS;
4576 }
4577
4578 static const struct seq_operations tracer_seq_ops = {
4579 .start = s_start,
4580 .next = s_next,
4581 .stop = s_stop,
4582 .show = s_show,
4583 };
4584
4585 /*
4586 * Note, as iter itself can be allocated and freed in different
4587 * ways, this function is only used to free its content, and not
4588 * the iterator itself. The only requirement to all the allocations
4589 * is that it must zero all fields (kzalloc), as freeing works with
4590 * ethier allocated content or NULL.
4591 */
free_trace_iter_content(struct trace_iterator * iter)4592 static void free_trace_iter_content(struct trace_iterator *iter)
4593 {
4594 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4595 if (iter->fmt != static_fmt_buf)
4596 kfree(iter->fmt);
4597
4598 kfree(iter->temp);
4599 kfree(iter->buffer_iter);
4600 mutex_destroy(&iter->mutex);
4601 free_cpumask_var(iter->started);
4602 }
4603
4604 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4605 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4606 {
4607 struct trace_array *tr = inode->i_private;
4608 struct trace_iterator *iter;
4609 int cpu;
4610
4611 if (tracing_disabled)
4612 return ERR_PTR(-ENODEV);
4613
4614 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4615 if (!iter)
4616 return ERR_PTR(-ENOMEM);
4617
4618 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4619 GFP_KERNEL);
4620 if (!iter->buffer_iter)
4621 goto release;
4622
4623 /*
4624 * trace_find_next_entry() may need to save off iter->ent.
4625 * It will place it into the iter->temp buffer. As most
4626 * events are less than 128, allocate a buffer of that size.
4627 * If one is greater, then trace_find_next_entry() will
4628 * allocate a new buffer to adjust for the bigger iter->ent.
4629 * It's not critical if it fails to get allocated here.
4630 */
4631 iter->temp = kmalloc(128, GFP_KERNEL);
4632 if (iter->temp)
4633 iter->temp_size = 128;
4634
4635 /*
4636 * trace_event_printf() may need to modify given format
4637 * string to replace %p with %px so that it shows real address
4638 * instead of hash value. However, that is only for the event
4639 * tracing, other tracer may not need. Defer the allocation
4640 * until it is needed.
4641 */
4642 iter->fmt = NULL;
4643 iter->fmt_size = 0;
4644
4645 mutex_lock(&trace_types_lock);
4646 iter->trace = tr->current_trace;
4647
4648 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4649 goto fail;
4650
4651 iter->tr = tr;
4652
4653 #ifdef CONFIG_TRACER_MAX_TRACE
4654 /* Currently only the top directory has a snapshot */
4655 if (tr->current_trace->print_max || snapshot)
4656 iter->array_buffer = &tr->max_buffer;
4657 else
4658 #endif
4659 iter->array_buffer = &tr->array_buffer;
4660 iter->snapshot = snapshot;
4661 iter->pos = -1;
4662 iter->cpu_file = tracing_get_cpu(inode);
4663 mutex_init(&iter->mutex);
4664
4665 /* Notify the tracer early; before we stop tracing. */
4666 if (iter->trace->open)
4667 iter->trace->open(iter);
4668
4669 /* Annotate start of buffers if we had overruns */
4670 if (ring_buffer_overruns(iter->array_buffer->buffer))
4671 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4672
4673 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4674 if (trace_clocks[tr->clock_id].in_ns)
4675 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4676
4677 /*
4678 * If pause-on-trace is enabled, then stop the trace while
4679 * dumping, unless this is the "snapshot" file
4680 */
4681 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4682 tracing_stop_tr(tr);
4683
4684 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4685 for_each_tracing_cpu(cpu) {
4686 iter->buffer_iter[cpu] =
4687 ring_buffer_read_start(iter->array_buffer->buffer,
4688 cpu, GFP_KERNEL);
4689 tracing_iter_reset(iter, cpu);
4690 }
4691 } else {
4692 cpu = iter->cpu_file;
4693 iter->buffer_iter[cpu] =
4694 ring_buffer_read_start(iter->array_buffer->buffer,
4695 cpu, GFP_KERNEL);
4696 tracing_iter_reset(iter, cpu);
4697 }
4698
4699 mutex_unlock(&trace_types_lock);
4700
4701 return iter;
4702
4703 fail:
4704 mutex_unlock(&trace_types_lock);
4705 free_trace_iter_content(iter);
4706 release:
4707 seq_release_private(inode, file);
4708 return ERR_PTR(-ENOMEM);
4709 }
4710
tracing_open_generic(struct inode * inode,struct file * filp)4711 int tracing_open_generic(struct inode *inode, struct file *filp)
4712 {
4713 int ret;
4714
4715 ret = tracing_check_open_get_tr(NULL);
4716 if (ret)
4717 return ret;
4718
4719 filp->private_data = inode->i_private;
4720 return 0;
4721 }
4722
tracing_is_disabled(void)4723 bool tracing_is_disabled(void)
4724 {
4725 return (tracing_disabled) ? true: false;
4726 }
4727
4728 /*
4729 * Open and update trace_array ref count.
4730 * Must have the current trace_array passed to it.
4731 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4732 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4733 {
4734 struct trace_array *tr = inode->i_private;
4735 int ret;
4736
4737 ret = tracing_check_open_get_tr(tr);
4738 if (ret)
4739 return ret;
4740
4741 filp->private_data = inode->i_private;
4742
4743 return 0;
4744 }
4745
4746 /*
4747 * The private pointer of the inode is the trace_event_file.
4748 * Update the tr ref count associated to it.
4749 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4750 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4751 {
4752 struct trace_event_file *file = inode->i_private;
4753 int ret;
4754
4755 ret = tracing_check_open_get_tr(file->tr);
4756 if (ret)
4757 return ret;
4758
4759 guard(mutex)(&event_mutex);
4760
4761 /* Fail if the file is marked for removal */
4762 if (file->flags & EVENT_FILE_FL_FREED) {
4763 trace_array_put(file->tr);
4764 return -ENODEV;
4765 } else {
4766 event_file_get(file);
4767 }
4768
4769 filp->private_data = inode->i_private;
4770
4771 return 0;
4772 }
4773
tracing_release_file_tr(struct inode * inode,struct file * filp)4774 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4775 {
4776 struct trace_event_file *file = inode->i_private;
4777
4778 trace_array_put(file->tr);
4779 event_file_put(file);
4780
4781 return 0;
4782 }
4783
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4784 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4785 {
4786 tracing_release_file_tr(inode, filp);
4787 return single_release(inode, filp);
4788 }
4789
tracing_mark_open(struct inode * inode,struct file * filp)4790 static int tracing_mark_open(struct inode *inode, struct file *filp)
4791 {
4792 stream_open(inode, filp);
4793 return tracing_open_generic_tr(inode, filp);
4794 }
4795
tracing_release(struct inode * inode,struct file * file)4796 static int tracing_release(struct inode *inode, struct file *file)
4797 {
4798 struct trace_array *tr = inode->i_private;
4799 struct seq_file *m = file->private_data;
4800 struct trace_iterator *iter;
4801 int cpu;
4802
4803 if (!(file->f_mode & FMODE_READ)) {
4804 trace_array_put(tr);
4805 return 0;
4806 }
4807
4808 /* Writes do not use seq_file */
4809 iter = m->private;
4810 mutex_lock(&trace_types_lock);
4811
4812 for_each_tracing_cpu(cpu) {
4813 if (iter->buffer_iter[cpu])
4814 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4815 }
4816
4817 if (iter->trace && iter->trace->close)
4818 iter->trace->close(iter);
4819
4820 if (!iter->snapshot && tr->stop_count)
4821 /* reenable tracing if it was previously enabled */
4822 tracing_start_tr(tr);
4823
4824 __trace_array_put(tr);
4825
4826 mutex_unlock(&trace_types_lock);
4827
4828 free_trace_iter_content(iter);
4829 seq_release_private(inode, file);
4830
4831 return 0;
4832 }
4833
tracing_release_generic_tr(struct inode * inode,struct file * file)4834 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4835 {
4836 struct trace_array *tr = inode->i_private;
4837
4838 trace_array_put(tr);
4839 return 0;
4840 }
4841
tracing_single_release_tr(struct inode * inode,struct file * file)4842 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4843 {
4844 struct trace_array *tr = inode->i_private;
4845
4846 trace_array_put(tr);
4847
4848 return single_release(inode, file);
4849 }
4850
tracing_open(struct inode * inode,struct file * file)4851 static int tracing_open(struct inode *inode, struct file *file)
4852 {
4853 struct trace_array *tr = inode->i_private;
4854 struct trace_iterator *iter;
4855 int ret;
4856
4857 ret = tracing_check_open_get_tr(tr);
4858 if (ret)
4859 return ret;
4860
4861 /* If this file was open for write, then erase contents */
4862 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4863 int cpu = tracing_get_cpu(inode);
4864 struct array_buffer *trace_buf = &tr->array_buffer;
4865
4866 #ifdef CONFIG_TRACER_MAX_TRACE
4867 if (tr->current_trace->print_max)
4868 trace_buf = &tr->max_buffer;
4869 #endif
4870
4871 if (cpu == RING_BUFFER_ALL_CPUS)
4872 tracing_reset_online_cpus(trace_buf);
4873 else
4874 tracing_reset_cpu(trace_buf, cpu);
4875 }
4876
4877 if (file->f_mode & FMODE_READ) {
4878 iter = __tracing_open(inode, file, false);
4879 if (IS_ERR(iter))
4880 ret = PTR_ERR(iter);
4881 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4882 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4883 }
4884
4885 if (ret < 0)
4886 trace_array_put(tr);
4887
4888 return ret;
4889 }
4890
4891 /*
4892 * Some tracers are not suitable for instance buffers.
4893 * A tracer is always available for the global array (toplevel)
4894 * or if it explicitly states that it is.
4895 */
4896 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4897 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4898 {
4899 #ifdef CONFIG_TRACER_SNAPSHOT
4900 /* arrays with mapped buffer range do not have snapshots */
4901 if (tr->range_addr_start && t->use_max_tr)
4902 return false;
4903 #endif
4904 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4905 }
4906
4907 /* Find the next tracer that this trace array may use */
4908 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4909 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4910 {
4911 while (t && !trace_ok_for_array(t, tr))
4912 t = t->next;
4913
4914 return t;
4915 }
4916
4917 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4918 t_next(struct seq_file *m, void *v, loff_t *pos)
4919 {
4920 struct trace_array *tr = m->private;
4921 struct tracer *t = v;
4922
4923 (*pos)++;
4924
4925 if (t)
4926 t = get_tracer_for_array(tr, t->next);
4927
4928 return t;
4929 }
4930
t_start(struct seq_file * m,loff_t * pos)4931 static void *t_start(struct seq_file *m, loff_t *pos)
4932 {
4933 struct trace_array *tr = m->private;
4934 struct tracer *t;
4935 loff_t l = 0;
4936
4937 mutex_lock(&trace_types_lock);
4938
4939 t = get_tracer_for_array(tr, trace_types);
4940 for (; t && l < *pos; t = t_next(m, t, &l))
4941 ;
4942
4943 return t;
4944 }
4945
t_stop(struct seq_file * m,void * p)4946 static void t_stop(struct seq_file *m, void *p)
4947 {
4948 mutex_unlock(&trace_types_lock);
4949 }
4950
t_show(struct seq_file * m,void * v)4951 static int t_show(struct seq_file *m, void *v)
4952 {
4953 struct tracer *t = v;
4954
4955 if (!t)
4956 return 0;
4957
4958 seq_puts(m, t->name);
4959 if (t->next)
4960 seq_putc(m, ' ');
4961 else
4962 seq_putc(m, '\n');
4963
4964 return 0;
4965 }
4966
4967 static const struct seq_operations show_traces_seq_ops = {
4968 .start = t_start,
4969 .next = t_next,
4970 .stop = t_stop,
4971 .show = t_show,
4972 };
4973
show_traces_open(struct inode * inode,struct file * file)4974 static int show_traces_open(struct inode *inode, struct file *file)
4975 {
4976 struct trace_array *tr = inode->i_private;
4977 struct seq_file *m;
4978 int ret;
4979
4980 ret = tracing_check_open_get_tr(tr);
4981 if (ret)
4982 return ret;
4983
4984 ret = seq_open(file, &show_traces_seq_ops);
4985 if (ret) {
4986 trace_array_put(tr);
4987 return ret;
4988 }
4989
4990 m = file->private_data;
4991 m->private = tr;
4992
4993 return 0;
4994 }
4995
tracing_seq_release(struct inode * inode,struct file * file)4996 static int tracing_seq_release(struct inode *inode, struct file *file)
4997 {
4998 struct trace_array *tr = inode->i_private;
4999
5000 trace_array_put(tr);
5001 return seq_release(inode, file);
5002 }
5003
5004 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5005 tracing_write_stub(struct file *filp, const char __user *ubuf,
5006 size_t count, loff_t *ppos)
5007 {
5008 return count;
5009 }
5010
tracing_lseek(struct file * file,loff_t offset,int whence)5011 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5012 {
5013 int ret;
5014
5015 if (file->f_mode & FMODE_READ)
5016 ret = seq_lseek(file, offset, whence);
5017 else
5018 file->f_pos = ret = 0;
5019
5020 return ret;
5021 }
5022
5023 static const struct file_operations tracing_fops = {
5024 .open = tracing_open,
5025 .read = seq_read,
5026 .read_iter = seq_read_iter,
5027 .splice_read = copy_splice_read,
5028 .write = tracing_write_stub,
5029 .llseek = tracing_lseek,
5030 .release = tracing_release,
5031 };
5032
5033 static const struct file_operations show_traces_fops = {
5034 .open = show_traces_open,
5035 .read = seq_read,
5036 .llseek = seq_lseek,
5037 .release = tracing_seq_release,
5038 };
5039
5040 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5041 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5042 size_t count, loff_t *ppos)
5043 {
5044 struct trace_array *tr = file_inode(filp)->i_private;
5045 char *mask_str __free(kfree) = NULL;
5046 int len;
5047
5048 len = snprintf(NULL, 0, "%*pb\n",
5049 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5050 mask_str = kmalloc(len, GFP_KERNEL);
5051 if (!mask_str)
5052 return -ENOMEM;
5053
5054 len = snprintf(mask_str, len, "%*pb\n",
5055 cpumask_pr_args(tr->tracing_cpumask));
5056 if (len >= count)
5057 return -EINVAL;
5058
5059 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5060 }
5061
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5062 int tracing_set_cpumask(struct trace_array *tr,
5063 cpumask_var_t tracing_cpumask_new)
5064 {
5065 int cpu;
5066
5067 if (!tr)
5068 return -EINVAL;
5069
5070 local_irq_disable();
5071 arch_spin_lock(&tr->max_lock);
5072 for_each_tracing_cpu(cpu) {
5073 /*
5074 * Increase/decrease the disabled counter if we are
5075 * about to flip a bit in the cpumask:
5076 */
5077 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5078 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5079 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5080 #ifdef CONFIG_TRACER_MAX_TRACE
5081 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5082 #endif
5083 }
5084 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5085 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5086 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5087 #ifdef CONFIG_TRACER_MAX_TRACE
5088 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5089 #endif
5090 }
5091 }
5092 arch_spin_unlock(&tr->max_lock);
5093 local_irq_enable();
5094
5095 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5096
5097 return 0;
5098 }
5099
5100 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5101 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5102 size_t count, loff_t *ppos)
5103 {
5104 struct trace_array *tr = file_inode(filp)->i_private;
5105 cpumask_var_t tracing_cpumask_new;
5106 int err;
5107
5108 if (count == 0 || count > KMALLOC_MAX_SIZE)
5109 return -EINVAL;
5110
5111 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5112 return -ENOMEM;
5113
5114 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5115 if (err)
5116 goto err_free;
5117
5118 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5119 if (err)
5120 goto err_free;
5121
5122 free_cpumask_var(tracing_cpumask_new);
5123
5124 return count;
5125
5126 err_free:
5127 free_cpumask_var(tracing_cpumask_new);
5128
5129 return err;
5130 }
5131
5132 static const struct file_operations tracing_cpumask_fops = {
5133 .open = tracing_open_generic_tr,
5134 .read = tracing_cpumask_read,
5135 .write = tracing_cpumask_write,
5136 .release = tracing_release_generic_tr,
5137 .llseek = generic_file_llseek,
5138 };
5139
tracing_trace_options_show(struct seq_file * m,void * v)5140 static int tracing_trace_options_show(struct seq_file *m, void *v)
5141 {
5142 struct tracer_opt *trace_opts;
5143 struct trace_array *tr = m->private;
5144 u32 tracer_flags;
5145 int i;
5146
5147 guard(mutex)(&trace_types_lock);
5148
5149 tracer_flags = tr->current_trace->flags->val;
5150 trace_opts = tr->current_trace->flags->opts;
5151
5152 for (i = 0; trace_options[i]; i++) {
5153 if (tr->trace_flags & (1 << i))
5154 seq_printf(m, "%s\n", trace_options[i]);
5155 else
5156 seq_printf(m, "no%s\n", trace_options[i]);
5157 }
5158
5159 for (i = 0; trace_opts[i].name; i++) {
5160 if (tracer_flags & trace_opts[i].bit)
5161 seq_printf(m, "%s\n", trace_opts[i].name);
5162 else
5163 seq_printf(m, "no%s\n", trace_opts[i].name);
5164 }
5165
5166 return 0;
5167 }
5168
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5169 static int __set_tracer_option(struct trace_array *tr,
5170 struct tracer_flags *tracer_flags,
5171 struct tracer_opt *opts, int neg)
5172 {
5173 struct tracer *trace = tracer_flags->trace;
5174 int ret;
5175
5176 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5177 if (ret)
5178 return ret;
5179
5180 if (neg)
5181 tracer_flags->val &= ~opts->bit;
5182 else
5183 tracer_flags->val |= opts->bit;
5184 return 0;
5185 }
5186
5187 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5188 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5189 {
5190 struct tracer *trace = tr->current_trace;
5191 struct tracer_flags *tracer_flags = trace->flags;
5192 struct tracer_opt *opts = NULL;
5193 int i;
5194
5195 for (i = 0; tracer_flags->opts[i].name; i++) {
5196 opts = &tracer_flags->opts[i];
5197
5198 if (strcmp(cmp, opts->name) == 0)
5199 return __set_tracer_option(tr, trace->flags, opts, neg);
5200 }
5201
5202 return -EINVAL;
5203 }
5204
5205 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5206 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5207 {
5208 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5209 return -1;
5210
5211 return 0;
5212 }
5213
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5214 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5215 {
5216 if ((mask == TRACE_ITER_RECORD_TGID) ||
5217 (mask == TRACE_ITER_RECORD_CMD) ||
5218 (mask == TRACE_ITER_TRACE_PRINTK) ||
5219 (mask == TRACE_ITER_COPY_MARKER))
5220 lockdep_assert_held(&event_mutex);
5221
5222 /* do nothing if flag is already set */
5223 if (!!(tr->trace_flags & mask) == !!enabled)
5224 return 0;
5225
5226 /* Give the tracer a chance to approve the change */
5227 if (tr->current_trace->flag_changed)
5228 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5229 return -EINVAL;
5230
5231 if (mask == TRACE_ITER_TRACE_PRINTK) {
5232 if (enabled) {
5233 update_printk_trace(tr);
5234 } else {
5235 /*
5236 * The global_trace cannot clear this.
5237 * It's flag only gets cleared if another instance sets it.
5238 */
5239 if (printk_trace == &global_trace)
5240 return -EINVAL;
5241 /*
5242 * An instance must always have it set.
5243 * by default, that's the global_trace instane.
5244 */
5245 if (printk_trace == tr)
5246 update_printk_trace(&global_trace);
5247 }
5248 }
5249
5250 if (mask == TRACE_ITER_COPY_MARKER)
5251 update_marker_trace(tr, enabled);
5252
5253 if (enabled)
5254 tr->trace_flags |= mask;
5255 else
5256 tr->trace_flags &= ~mask;
5257
5258 if (mask == TRACE_ITER_RECORD_CMD)
5259 trace_event_enable_cmd_record(enabled);
5260
5261 if (mask == TRACE_ITER_RECORD_TGID) {
5262
5263 if (trace_alloc_tgid_map() < 0) {
5264 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5265 return -ENOMEM;
5266 }
5267
5268 trace_event_enable_tgid_record(enabled);
5269 }
5270
5271 if (mask == TRACE_ITER_EVENT_FORK)
5272 trace_event_follow_fork(tr, enabled);
5273
5274 if (mask == TRACE_ITER_FUNC_FORK)
5275 ftrace_pid_follow_fork(tr, enabled);
5276
5277 if (mask == TRACE_ITER_OVERWRITE) {
5278 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5279 #ifdef CONFIG_TRACER_MAX_TRACE
5280 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5281 #endif
5282 }
5283
5284 if (mask == TRACE_ITER_PRINTK) {
5285 trace_printk_start_stop_comm(enabled);
5286 trace_printk_control(enabled);
5287 }
5288
5289 return 0;
5290 }
5291
trace_set_options(struct trace_array * tr,char * option)5292 int trace_set_options(struct trace_array *tr, char *option)
5293 {
5294 char *cmp;
5295 int neg = 0;
5296 int ret;
5297 size_t orig_len = strlen(option);
5298 int len;
5299
5300 cmp = strstrip(option);
5301
5302 len = str_has_prefix(cmp, "no");
5303 if (len)
5304 neg = 1;
5305
5306 cmp += len;
5307
5308 mutex_lock(&event_mutex);
5309 mutex_lock(&trace_types_lock);
5310
5311 ret = match_string(trace_options, -1, cmp);
5312 /* If no option could be set, test the specific tracer options */
5313 if (ret < 0)
5314 ret = set_tracer_option(tr, cmp, neg);
5315 else
5316 ret = set_tracer_flag(tr, 1 << ret, !neg);
5317
5318 mutex_unlock(&trace_types_lock);
5319 mutex_unlock(&event_mutex);
5320
5321 /*
5322 * If the first trailing whitespace is replaced with '\0' by strstrip,
5323 * turn it back into a space.
5324 */
5325 if (orig_len > strlen(option))
5326 option[strlen(option)] = ' ';
5327
5328 return ret;
5329 }
5330
apply_trace_boot_options(void)5331 static void __init apply_trace_boot_options(void)
5332 {
5333 char *buf = trace_boot_options_buf;
5334 char *option;
5335
5336 while (true) {
5337 option = strsep(&buf, ",");
5338
5339 if (!option)
5340 break;
5341
5342 if (*option)
5343 trace_set_options(&global_trace, option);
5344
5345 /* Put back the comma to allow this to be called again */
5346 if (buf)
5347 *(buf - 1) = ',';
5348 }
5349 }
5350
5351 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5352 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5353 size_t cnt, loff_t *ppos)
5354 {
5355 struct seq_file *m = filp->private_data;
5356 struct trace_array *tr = m->private;
5357 char buf[64];
5358 int ret;
5359
5360 if (cnt >= sizeof(buf))
5361 return -EINVAL;
5362
5363 if (copy_from_user(buf, ubuf, cnt))
5364 return -EFAULT;
5365
5366 buf[cnt] = 0;
5367
5368 ret = trace_set_options(tr, buf);
5369 if (ret < 0)
5370 return ret;
5371
5372 *ppos += cnt;
5373
5374 return cnt;
5375 }
5376
tracing_trace_options_open(struct inode * inode,struct file * file)5377 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5378 {
5379 struct trace_array *tr = inode->i_private;
5380 int ret;
5381
5382 ret = tracing_check_open_get_tr(tr);
5383 if (ret)
5384 return ret;
5385
5386 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5387 if (ret < 0)
5388 trace_array_put(tr);
5389
5390 return ret;
5391 }
5392
5393 static const struct file_operations tracing_iter_fops = {
5394 .open = tracing_trace_options_open,
5395 .read = seq_read,
5396 .llseek = seq_lseek,
5397 .release = tracing_single_release_tr,
5398 .write = tracing_trace_options_write,
5399 };
5400
5401 static const char readme_msg[] =
5402 "tracing mini-HOWTO:\n\n"
5403 "By default tracefs removes all OTH file permission bits.\n"
5404 "When mounting tracefs an optional group id can be specified\n"
5405 "which adds the group to every directory and file in tracefs:\n\n"
5406 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5407 "# echo 0 > tracing_on : quick way to disable tracing\n"
5408 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5409 " Important files:\n"
5410 " trace\t\t\t- The static contents of the buffer\n"
5411 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5412 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5413 " current_tracer\t- function and latency tracers\n"
5414 " available_tracers\t- list of configured tracers for current_tracer\n"
5415 " error_log\t- error log for failed commands (that support it)\n"
5416 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5417 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5418 " trace_clock\t\t- change the clock used to order events\n"
5419 " local: Per cpu clock but may not be synced across CPUs\n"
5420 " global: Synced across CPUs but slows tracing down.\n"
5421 " counter: Not a clock, but just an increment\n"
5422 " uptime: Jiffy counter from time of boot\n"
5423 " perf: Same clock that perf events use\n"
5424 #ifdef CONFIG_X86_64
5425 " x86-tsc: TSC cycle counter\n"
5426 #endif
5427 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5428 " delta: Delta difference against a buffer-wide timestamp\n"
5429 " absolute: Absolute (standalone) timestamp\n"
5430 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5431 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5432 " tracing_cpumask\t- Limit which CPUs to trace\n"
5433 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5434 "\t\t\t Remove sub-buffer with rmdir\n"
5435 " trace_options\t\t- Set format or modify how tracing happens\n"
5436 "\t\t\t Disable an option by prefixing 'no' to the\n"
5437 "\t\t\t option name\n"
5438 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5439 #ifdef CONFIG_DYNAMIC_FTRACE
5440 "\n available_filter_functions - list of functions that can be filtered on\n"
5441 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5442 "\t\t\t functions\n"
5443 "\t accepts: func_full_name or glob-matching-pattern\n"
5444 "\t modules: Can select a group via module\n"
5445 "\t Format: :mod:<module-name>\n"
5446 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5447 "\t triggers: a command to perform when function is hit\n"
5448 "\t Format: <function>:<trigger>[:count]\n"
5449 "\t trigger: traceon, traceoff\n"
5450 "\t\t enable_event:<system>:<event>\n"
5451 "\t\t disable_event:<system>:<event>\n"
5452 #ifdef CONFIG_STACKTRACE
5453 "\t\t stacktrace\n"
5454 #endif
5455 #ifdef CONFIG_TRACER_SNAPSHOT
5456 "\t\t snapshot\n"
5457 #endif
5458 "\t\t dump\n"
5459 "\t\t cpudump\n"
5460 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5461 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5462 "\t The first one will disable tracing every time do_fault is hit\n"
5463 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5464 "\t The first time do trap is hit and it disables tracing, the\n"
5465 "\t counter will decrement to 2. If tracing is already disabled,\n"
5466 "\t the counter will not decrement. It only decrements when the\n"
5467 "\t trigger did work\n"
5468 "\t To remove trigger without count:\n"
5469 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5470 "\t To remove trigger with a count:\n"
5471 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5472 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5473 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5474 "\t modules: Can select a group via module command :mod:\n"
5475 "\t Does not accept triggers\n"
5476 #endif /* CONFIG_DYNAMIC_FTRACE */
5477 #ifdef CONFIG_FUNCTION_TRACER
5478 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5479 "\t\t (function)\n"
5480 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5481 "\t\t (function)\n"
5482 #endif
5483 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5484 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5485 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5486 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5487 #endif
5488 #ifdef CONFIG_TRACER_SNAPSHOT
5489 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5490 "\t\t\t snapshot buffer. Read the contents for more\n"
5491 "\t\t\t information\n"
5492 #endif
5493 #ifdef CONFIG_STACK_TRACER
5494 " stack_trace\t\t- Shows the max stack trace when active\n"
5495 " stack_max_size\t- Shows current max stack size that was traced\n"
5496 "\t\t\t Write into this file to reset the max size (trigger a\n"
5497 "\t\t\t new trace)\n"
5498 #ifdef CONFIG_DYNAMIC_FTRACE
5499 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5500 "\t\t\t traces\n"
5501 #endif
5502 #endif /* CONFIG_STACK_TRACER */
5503 #ifdef CONFIG_DYNAMIC_EVENTS
5504 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5505 "\t\t\t Write into this file to define/undefine new trace events.\n"
5506 #endif
5507 #ifdef CONFIG_KPROBE_EVENTS
5508 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5509 "\t\t\t Write into this file to define/undefine new trace events.\n"
5510 #endif
5511 #ifdef CONFIG_UPROBE_EVENTS
5512 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5513 "\t\t\t Write into this file to define/undefine new trace events.\n"
5514 #endif
5515 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5516 defined(CONFIG_FPROBE_EVENTS)
5517 "\t accepts: event-definitions (one definition per line)\n"
5518 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5519 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5520 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5521 #endif
5522 #ifdef CONFIG_FPROBE_EVENTS
5523 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5524 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5525 #endif
5526 #ifdef CONFIG_HIST_TRIGGERS
5527 "\t s:[synthetic/]<event> <field> [<field>]\n"
5528 #endif
5529 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5530 "\t -:[<group>/][<event>]\n"
5531 #ifdef CONFIG_KPROBE_EVENTS
5532 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5533 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5534 #endif
5535 #ifdef CONFIG_UPROBE_EVENTS
5536 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5537 #endif
5538 "\t args: <name>=fetcharg[:type]\n"
5539 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5540 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5541 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5542 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5543 "\t <argname>[->field[->field|.field...]],\n"
5544 #endif
5545 #else
5546 "\t $stack<index>, $stack, $retval, $comm,\n"
5547 #endif
5548 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5549 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5550 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5551 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5552 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5553 #ifdef CONFIG_HIST_TRIGGERS
5554 "\t field: <stype> <name>;\n"
5555 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5556 "\t [unsigned] char/int/long\n"
5557 #endif
5558 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5559 "\t of the <attached-group>/<attached-event>.\n"
5560 #endif
5561 " set_event\t\t- Enables events by name written into it\n"
5562 "\t\t\t Can enable module events via: :mod:<module>\n"
5563 " events/\t\t- Directory containing all trace event subsystems:\n"
5564 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5565 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5566 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5567 "\t\t\t events\n"
5568 " filter\t\t- If set, only events passing filter are traced\n"
5569 " events/<system>/<event>/\t- Directory containing control files for\n"
5570 "\t\t\t <event>:\n"
5571 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5572 " filter\t\t- If set, only events passing filter are traced\n"
5573 " trigger\t\t- If set, a command to perform when event is hit\n"
5574 "\t Format: <trigger>[:count][if <filter>]\n"
5575 "\t trigger: traceon, traceoff\n"
5576 "\t enable_event:<system>:<event>\n"
5577 "\t disable_event:<system>:<event>\n"
5578 #ifdef CONFIG_HIST_TRIGGERS
5579 "\t enable_hist:<system>:<event>\n"
5580 "\t disable_hist:<system>:<event>\n"
5581 #endif
5582 #ifdef CONFIG_STACKTRACE
5583 "\t\t stacktrace\n"
5584 #endif
5585 #ifdef CONFIG_TRACER_SNAPSHOT
5586 "\t\t snapshot\n"
5587 #endif
5588 #ifdef CONFIG_HIST_TRIGGERS
5589 "\t\t hist (see below)\n"
5590 #endif
5591 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5592 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5593 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5594 "\t events/block/block_unplug/trigger\n"
5595 "\t The first disables tracing every time block_unplug is hit.\n"
5596 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5597 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5598 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5599 "\t Like function triggers, the counter is only decremented if it\n"
5600 "\t enabled or disabled tracing.\n"
5601 "\t To remove a trigger without a count:\n"
5602 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5603 "\t To remove a trigger with a count:\n"
5604 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5605 "\t Filters can be ignored when removing a trigger.\n"
5606 #ifdef CONFIG_HIST_TRIGGERS
5607 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5608 "\t Format: hist:keys=<field1[,field2,...]>\n"
5609 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5610 "\t [:values=<field1[,field2,...]>]\n"
5611 "\t [:sort=<field1[,field2,...]>]\n"
5612 "\t [:size=#entries]\n"
5613 "\t [:pause][:continue][:clear]\n"
5614 "\t [:name=histname1]\n"
5615 "\t [:nohitcount]\n"
5616 "\t [:<handler>.<action>]\n"
5617 "\t [if <filter>]\n\n"
5618 "\t Note, special fields can be used as well:\n"
5619 "\t common_timestamp - to record current timestamp\n"
5620 "\t common_cpu - to record the CPU the event happened on\n"
5621 "\n"
5622 "\t A hist trigger variable can be:\n"
5623 "\t - a reference to a field e.g. x=current_timestamp,\n"
5624 "\t - a reference to another variable e.g. y=$x,\n"
5625 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5626 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5627 "\n"
5628 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5629 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5630 "\t variable reference, field or numeric literal.\n"
5631 "\n"
5632 "\t When a matching event is hit, an entry is added to a hash\n"
5633 "\t table using the key(s) and value(s) named, and the value of a\n"
5634 "\t sum called 'hitcount' is incremented. Keys and values\n"
5635 "\t correspond to fields in the event's format description. Keys\n"
5636 "\t can be any field, or the special string 'common_stacktrace'.\n"
5637 "\t Compound keys consisting of up to two fields can be specified\n"
5638 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5639 "\t fields. Sort keys consisting of up to two fields can be\n"
5640 "\t specified using the 'sort' keyword. The sort direction can\n"
5641 "\t be modified by appending '.descending' or '.ascending' to a\n"
5642 "\t sort field. The 'size' parameter can be used to specify more\n"
5643 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5644 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5645 "\t its histogram data will be shared with other triggers of the\n"
5646 "\t same name, and trigger hits will update this common data.\n\n"
5647 "\t Reading the 'hist' file for the event will dump the hash\n"
5648 "\t table in its entirety to stdout. If there are multiple hist\n"
5649 "\t triggers attached to an event, there will be a table for each\n"
5650 "\t trigger in the output. The table displayed for a named\n"
5651 "\t trigger will be the same as any other instance having the\n"
5652 "\t same name. The default format used to display a given field\n"
5653 "\t can be modified by appending any of the following modifiers\n"
5654 "\t to the field name, as applicable:\n\n"
5655 "\t .hex display a number as a hex value\n"
5656 "\t .sym display an address as a symbol\n"
5657 "\t .sym-offset display an address as a symbol and offset\n"
5658 "\t .execname display a common_pid as a program name\n"
5659 "\t .syscall display a syscall id as a syscall name\n"
5660 "\t .log2 display log2 value rather than raw number\n"
5661 "\t .buckets=size display values in groups of size rather than raw number\n"
5662 "\t .usecs display a common_timestamp in microseconds\n"
5663 "\t .percent display a number of percentage value\n"
5664 "\t .graph display a bar-graph of a value\n\n"
5665 "\t The 'pause' parameter can be used to pause an existing hist\n"
5666 "\t trigger or to start a hist trigger but not log any events\n"
5667 "\t until told to do so. 'continue' can be used to start or\n"
5668 "\t restart a paused hist trigger.\n\n"
5669 "\t The 'clear' parameter will clear the contents of a running\n"
5670 "\t hist trigger and leave its current paused/active state\n"
5671 "\t unchanged.\n\n"
5672 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5673 "\t raw hitcount in the histogram.\n\n"
5674 "\t The enable_hist and disable_hist triggers can be used to\n"
5675 "\t have one event conditionally start and stop another event's\n"
5676 "\t already-attached hist trigger. The syntax is analogous to\n"
5677 "\t the enable_event and disable_event triggers.\n\n"
5678 "\t Hist trigger handlers and actions are executed whenever a\n"
5679 "\t a histogram entry is added or updated. They take the form:\n\n"
5680 "\t <handler>.<action>\n\n"
5681 "\t The available handlers are:\n\n"
5682 "\t onmatch(matching.event) - invoke on addition or update\n"
5683 "\t onmax(var) - invoke if var exceeds current max\n"
5684 "\t onchange(var) - invoke action if var changes\n\n"
5685 "\t The available actions are:\n\n"
5686 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5687 "\t save(field,...) - save current event fields\n"
5688 #ifdef CONFIG_TRACER_SNAPSHOT
5689 "\t snapshot() - snapshot the trace buffer\n\n"
5690 #endif
5691 #ifdef CONFIG_SYNTH_EVENTS
5692 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5693 "\t Write into this file to define/undefine new synthetic events.\n"
5694 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5695 #endif
5696 #endif
5697 ;
5698
5699 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5700 tracing_readme_read(struct file *filp, char __user *ubuf,
5701 size_t cnt, loff_t *ppos)
5702 {
5703 return simple_read_from_buffer(ubuf, cnt, ppos,
5704 readme_msg, strlen(readme_msg));
5705 }
5706
5707 static const struct file_operations tracing_readme_fops = {
5708 .open = tracing_open_generic,
5709 .read = tracing_readme_read,
5710 .llseek = generic_file_llseek,
5711 };
5712
5713 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5714 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5715 update_eval_map(union trace_eval_map_item *ptr)
5716 {
5717 if (!ptr->map.eval_string) {
5718 if (ptr->tail.next) {
5719 ptr = ptr->tail.next;
5720 /* Set ptr to the next real item (skip head) */
5721 ptr++;
5722 } else
5723 return NULL;
5724 }
5725 return ptr;
5726 }
5727
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5728 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5729 {
5730 union trace_eval_map_item *ptr = v;
5731
5732 /*
5733 * Paranoid! If ptr points to end, we don't want to increment past it.
5734 * This really should never happen.
5735 */
5736 (*pos)++;
5737 ptr = update_eval_map(ptr);
5738 if (WARN_ON_ONCE(!ptr))
5739 return NULL;
5740
5741 ptr++;
5742 ptr = update_eval_map(ptr);
5743
5744 return ptr;
5745 }
5746
eval_map_start(struct seq_file * m,loff_t * pos)5747 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5748 {
5749 union trace_eval_map_item *v;
5750 loff_t l = 0;
5751
5752 mutex_lock(&trace_eval_mutex);
5753
5754 v = trace_eval_maps;
5755 if (v)
5756 v++;
5757
5758 while (v && l < *pos) {
5759 v = eval_map_next(m, v, &l);
5760 }
5761
5762 return v;
5763 }
5764
eval_map_stop(struct seq_file * m,void * v)5765 static void eval_map_stop(struct seq_file *m, void *v)
5766 {
5767 mutex_unlock(&trace_eval_mutex);
5768 }
5769
eval_map_show(struct seq_file * m,void * v)5770 static int eval_map_show(struct seq_file *m, void *v)
5771 {
5772 union trace_eval_map_item *ptr = v;
5773
5774 seq_printf(m, "%s %ld (%s)\n",
5775 ptr->map.eval_string, ptr->map.eval_value,
5776 ptr->map.system);
5777
5778 return 0;
5779 }
5780
5781 static const struct seq_operations tracing_eval_map_seq_ops = {
5782 .start = eval_map_start,
5783 .next = eval_map_next,
5784 .stop = eval_map_stop,
5785 .show = eval_map_show,
5786 };
5787
tracing_eval_map_open(struct inode * inode,struct file * filp)5788 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5789 {
5790 int ret;
5791
5792 ret = tracing_check_open_get_tr(NULL);
5793 if (ret)
5794 return ret;
5795
5796 return seq_open(filp, &tracing_eval_map_seq_ops);
5797 }
5798
5799 static const struct file_operations tracing_eval_map_fops = {
5800 .open = tracing_eval_map_open,
5801 .read = seq_read,
5802 .llseek = seq_lseek,
5803 .release = seq_release,
5804 };
5805
5806 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5807 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5808 {
5809 /* Return tail of array given the head */
5810 return ptr + ptr->head.length + 1;
5811 }
5812
5813 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5814 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5815 int len)
5816 {
5817 struct trace_eval_map **stop;
5818 struct trace_eval_map **map;
5819 union trace_eval_map_item *map_array;
5820 union trace_eval_map_item *ptr;
5821
5822 stop = start + len;
5823
5824 /*
5825 * The trace_eval_maps contains the map plus a head and tail item,
5826 * where the head holds the module and length of array, and the
5827 * tail holds a pointer to the next list.
5828 */
5829 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5830 if (!map_array) {
5831 pr_warn("Unable to allocate trace eval mapping\n");
5832 return;
5833 }
5834
5835 guard(mutex)(&trace_eval_mutex);
5836
5837 if (!trace_eval_maps)
5838 trace_eval_maps = map_array;
5839 else {
5840 ptr = trace_eval_maps;
5841 for (;;) {
5842 ptr = trace_eval_jmp_to_tail(ptr);
5843 if (!ptr->tail.next)
5844 break;
5845 ptr = ptr->tail.next;
5846
5847 }
5848 ptr->tail.next = map_array;
5849 }
5850 map_array->head.mod = mod;
5851 map_array->head.length = len;
5852 map_array++;
5853
5854 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5855 map_array->map = **map;
5856 map_array++;
5857 }
5858 memset(map_array, 0, sizeof(*map_array));
5859 }
5860
trace_create_eval_file(struct dentry * d_tracer)5861 static void trace_create_eval_file(struct dentry *d_tracer)
5862 {
5863 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5864 NULL, &tracing_eval_map_fops);
5865 }
5866
5867 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5868 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5869 static inline void trace_insert_eval_map_file(struct module *mod,
5870 struct trace_eval_map **start, int len) { }
5871 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5872
5873 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5874 trace_event_update_with_eval_map(struct module *mod,
5875 struct trace_eval_map **start,
5876 int len)
5877 {
5878 struct trace_eval_map **map;
5879
5880 /* Always run sanitizer only if btf_type_tag attr exists. */
5881 if (len <= 0) {
5882 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5883 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5884 __has_attribute(btf_type_tag)))
5885 return;
5886 }
5887
5888 map = start;
5889
5890 trace_event_update_all(map, len);
5891
5892 if (len <= 0)
5893 return;
5894
5895 trace_insert_eval_map_file(mod, start, len);
5896 }
5897
5898 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5899 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5900 size_t cnt, loff_t *ppos)
5901 {
5902 struct trace_array *tr = filp->private_data;
5903 char buf[MAX_TRACER_SIZE+2];
5904 int r;
5905
5906 scoped_guard(mutex, &trace_types_lock) {
5907 r = sprintf(buf, "%s\n", tr->current_trace->name);
5908 }
5909
5910 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5911 }
5912
tracer_init(struct tracer * t,struct trace_array * tr)5913 int tracer_init(struct tracer *t, struct trace_array *tr)
5914 {
5915 tracing_reset_online_cpus(&tr->array_buffer);
5916 return t->init(tr);
5917 }
5918
set_buffer_entries(struct array_buffer * buf,unsigned long val)5919 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5920 {
5921 int cpu;
5922
5923 for_each_tracing_cpu(cpu)
5924 per_cpu_ptr(buf->data, cpu)->entries = val;
5925 }
5926
update_buffer_entries(struct array_buffer * buf,int cpu)5927 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5928 {
5929 if (cpu == RING_BUFFER_ALL_CPUS) {
5930 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5931 } else {
5932 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5933 }
5934 }
5935
5936 #ifdef CONFIG_TRACER_MAX_TRACE
5937 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5938 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5939 struct array_buffer *size_buf, int cpu_id)
5940 {
5941 int cpu, ret = 0;
5942
5943 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5944 for_each_tracing_cpu(cpu) {
5945 ret = ring_buffer_resize(trace_buf->buffer,
5946 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5947 if (ret < 0)
5948 break;
5949 per_cpu_ptr(trace_buf->data, cpu)->entries =
5950 per_cpu_ptr(size_buf->data, cpu)->entries;
5951 }
5952 } else {
5953 ret = ring_buffer_resize(trace_buf->buffer,
5954 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5955 if (ret == 0)
5956 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5957 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5958 }
5959
5960 return ret;
5961 }
5962 #endif /* CONFIG_TRACER_MAX_TRACE */
5963
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5964 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5965 unsigned long size, int cpu)
5966 {
5967 int ret;
5968
5969 /*
5970 * If kernel or user changes the size of the ring buffer
5971 * we use the size that was given, and we can forget about
5972 * expanding it later.
5973 */
5974 trace_set_ring_buffer_expanded(tr);
5975
5976 /* May be called before buffers are initialized */
5977 if (!tr->array_buffer.buffer)
5978 return 0;
5979
5980 /* Do not allow tracing while resizing ring buffer */
5981 tracing_stop_tr(tr);
5982
5983 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5984 if (ret < 0)
5985 goto out_start;
5986
5987 #ifdef CONFIG_TRACER_MAX_TRACE
5988 if (!tr->allocated_snapshot)
5989 goto out;
5990
5991 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5992 if (ret < 0) {
5993 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5994 &tr->array_buffer, cpu);
5995 if (r < 0) {
5996 /*
5997 * AARGH! We are left with different
5998 * size max buffer!!!!
5999 * The max buffer is our "snapshot" buffer.
6000 * When a tracer needs a snapshot (one of the
6001 * latency tracers), it swaps the max buffer
6002 * with the saved snap shot. We succeeded to
6003 * update the size of the main buffer, but failed to
6004 * update the size of the max buffer. But when we tried
6005 * to reset the main buffer to the original size, we
6006 * failed there too. This is very unlikely to
6007 * happen, but if it does, warn and kill all
6008 * tracing.
6009 */
6010 WARN_ON(1);
6011 tracing_disabled = 1;
6012 }
6013 goto out_start;
6014 }
6015
6016 update_buffer_entries(&tr->max_buffer, cpu);
6017
6018 out:
6019 #endif /* CONFIG_TRACER_MAX_TRACE */
6020
6021 update_buffer_entries(&tr->array_buffer, cpu);
6022 out_start:
6023 tracing_start_tr(tr);
6024 return ret;
6025 }
6026
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6027 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6028 unsigned long size, int cpu_id)
6029 {
6030 guard(mutex)(&trace_types_lock);
6031
6032 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6033 /* make sure, this cpu is enabled in the mask */
6034 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6035 return -EINVAL;
6036 }
6037
6038 return __tracing_resize_ring_buffer(tr, size, cpu_id);
6039 }
6040
6041 struct trace_mod_entry {
6042 unsigned long mod_addr;
6043 char mod_name[MODULE_NAME_LEN];
6044 };
6045
6046 struct trace_scratch {
6047 unsigned int clock_id;
6048 unsigned long text_addr;
6049 unsigned long nr_entries;
6050 struct trace_mod_entry entries[];
6051 };
6052
6053 static DEFINE_MUTEX(scratch_mutex);
6054
cmp_mod_entry(const void * key,const void * pivot)6055 static int cmp_mod_entry(const void *key, const void *pivot)
6056 {
6057 unsigned long addr = (unsigned long)key;
6058 const struct trace_mod_entry *ent = pivot;
6059
6060 if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6061 return 0;
6062 else
6063 return addr - ent->mod_addr;
6064 }
6065
6066 /**
6067 * trace_adjust_address() - Adjust prev boot address to current address.
6068 * @tr: Persistent ring buffer's trace_array.
6069 * @addr: Address in @tr which is adjusted.
6070 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6071 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6072 {
6073 struct trace_module_delta *module_delta;
6074 struct trace_scratch *tscratch;
6075 struct trace_mod_entry *entry;
6076 unsigned long raddr;
6077 int idx = 0, nr_entries;
6078
6079 /* If we don't have last boot delta, return the address */
6080 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6081 return addr;
6082
6083 /* tr->module_delta must be protected by rcu. */
6084 guard(rcu)();
6085 tscratch = tr->scratch;
6086 /* if there is no tscrach, module_delta must be NULL. */
6087 module_delta = READ_ONCE(tr->module_delta);
6088 if (!module_delta || !tscratch->nr_entries ||
6089 tscratch->entries[0].mod_addr > addr) {
6090 raddr = addr + tr->text_delta;
6091 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6092 is_kernel_rodata(raddr) ? raddr : addr;
6093 }
6094
6095 /* Note that entries must be sorted. */
6096 nr_entries = tscratch->nr_entries;
6097 if (nr_entries == 1 ||
6098 tscratch->entries[nr_entries - 1].mod_addr < addr)
6099 idx = nr_entries - 1;
6100 else {
6101 entry = __inline_bsearch((void *)addr,
6102 tscratch->entries,
6103 nr_entries - 1,
6104 sizeof(tscratch->entries[0]),
6105 cmp_mod_entry);
6106 if (entry)
6107 idx = entry - tscratch->entries;
6108 }
6109
6110 return addr + module_delta->delta[idx];
6111 }
6112
6113 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6114 static int save_mod(struct module *mod, void *data)
6115 {
6116 struct trace_array *tr = data;
6117 struct trace_scratch *tscratch;
6118 struct trace_mod_entry *entry;
6119 unsigned int size;
6120
6121 tscratch = tr->scratch;
6122 if (!tscratch)
6123 return -1;
6124 size = tr->scratch_size;
6125
6126 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6127 return -1;
6128
6129 entry = &tscratch->entries[tscratch->nr_entries];
6130
6131 tscratch->nr_entries++;
6132
6133 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6134 strscpy(entry->mod_name, mod->name);
6135
6136 return 0;
6137 }
6138 #else
save_mod(struct module * mod,void * data)6139 static int save_mod(struct module *mod, void *data)
6140 {
6141 return 0;
6142 }
6143 #endif
6144
update_last_data(struct trace_array * tr)6145 static void update_last_data(struct trace_array *tr)
6146 {
6147 struct trace_module_delta *module_delta;
6148 struct trace_scratch *tscratch;
6149
6150 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6151 return;
6152
6153 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6154 return;
6155
6156 /* Only if the buffer has previous boot data clear and update it. */
6157 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6158
6159 /* Reset the module list and reload them */
6160 if (tr->scratch) {
6161 struct trace_scratch *tscratch = tr->scratch;
6162
6163 tscratch->clock_id = tr->clock_id;
6164 memset(tscratch->entries, 0,
6165 flex_array_size(tscratch, entries, tscratch->nr_entries));
6166 tscratch->nr_entries = 0;
6167
6168 guard(mutex)(&scratch_mutex);
6169 module_for_each_mod(save_mod, tr);
6170 }
6171
6172 /*
6173 * Need to clear all CPU buffers as there cannot be events
6174 * from the previous boot mixed with events with this boot
6175 * as that will cause a confusing trace. Need to clear all
6176 * CPU buffers, even for those that may currently be offline.
6177 */
6178 tracing_reset_all_cpus(&tr->array_buffer);
6179
6180 /* Using current data now */
6181 tr->text_delta = 0;
6182
6183 if (!tr->scratch)
6184 return;
6185
6186 tscratch = tr->scratch;
6187 module_delta = READ_ONCE(tr->module_delta);
6188 WRITE_ONCE(tr->module_delta, NULL);
6189 kfree_rcu(module_delta, rcu);
6190
6191 /* Set the persistent ring buffer meta data to this address */
6192 tscratch->text_addr = (unsigned long)_text;
6193 }
6194
6195 /**
6196 * tracing_update_buffers - used by tracing facility to expand ring buffers
6197 * @tr: The tracing instance
6198 *
6199 * To save on memory when the tracing is never used on a system with it
6200 * configured in. The ring buffers are set to a minimum size. But once
6201 * a user starts to use the tracing facility, then they need to grow
6202 * to their default size.
6203 *
6204 * This function is to be called when a tracer is about to be used.
6205 */
tracing_update_buffers(struct trace_array * tr)6206 int tracing_update_buffers(struct trace_array *tr)
6207 {
6208 int ret = 0;
6209
6210 guard(mutex)(&trace_types_lock);
6211
6212 update_last_data(tr);
6213
6214 if (!tr->ring_buffer_expanded)
6215 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6216 RING_BUFFER_ALL_CPUS);
6217 return ret;
6218 }
6219
6220 struct trace_option_dentry;
6221
6222 static void
6223 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6224
6225 /*
6226 * Used to clear out the tracer before deletion of an instance.
6227 * Must have trace_types_lock held.
6228 */
tracing_set_nop(struct trace_array * tr)6229 static void tracing_set_nop(struct trace_array *tr)
6230 {
6231 if (tr->current_trace == &nop_trace)
6232 return;
6233
6234 tr->current_trace->enabled--;
6235
6236 if (tr->current_trace->reset)
6237 tr->current_trace->reset(tr);
6238
6239 tr->current_trace = &nop_trace;
6240 }
6241
6242 static bool tracer_options_updated;
6243
add_tracer_options(struct trace_array * tr,struct tracer * t)6244 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6245 {
6246 /* Only enable if the directory has been created already. */
6247 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6248 return;
6249
6250 /* Only create trace option files after update_tracer_options finish */
6251 if (!tracer_options_updated)
6252 return;
6253
6254 create_trace_option_files(tr, t);
6255 }
6256
tracing_set_tracer(struct trace_array * tr,const char * buf)6257 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6258 {
6259 struct tracer *t;
6260 #ifdef CONFIG_TRACER_MAX_TRACE
6261 bool had_max_tr;
6262 #endif
6263 int ret;
6264
6265 guard(mutex)(&trace_types_lock);
6266
6267 update_last_data(tr);
6268
6269 if (!tr->ring_buffer_expanded) {
6270 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6271 RING_BUFFER_ALL_CPUS);
6272 if (ret < 0)
6273 return ret;
6274 ret = 0;
6275 }
6276
6277 for (t = trace_types; t; t = t->next) {
6278 if (strcmp(t->name, buf) == 0)
6279 break;
6280 }
6281 if (!t)
6282 return -EINVAL;
6283
6284 if (t == tr->current_trace)
6285 return 0;
6286
6287 #ifdef CONFIG_TRACER_SNAPSHOT
6288 if (t->use_max_tr) {
6289 local_irq_disable();
6290 arch_spin_lock(&tr->max_lock);
6291 ret = tr->cond_snapshot ? -EBUSY : 0;
6292 arch_spin_unlock(&tr->max_lock);
6293 local_irq_enable();
6294 if (ret)
6295 return ret;
6296 }
6297 #endif
6298 /* Some tracers won't work on kernel command line */
6299 if (system_state < SYSTEM_RUNNING && t->noboot) {
6300 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6301 t->name);
6302 return -EINVAL;
6303 }
6304
6305 /* Some tracers are only allowed for the top level buffer */
6306 if (!trace_ok_for_array(t, tr))
6307 return -EINVAL;
6308
6309 /* If trace pipe files are being read, we can't change the tracer */
6310 if (tr->trace_ref)
6311 return -EBUSY;
6312
6313 trace_branch_disable();
6314
6315 tr->current_trace->enabled--;
6316
6317 if (tr->current_trace->reset)
6318 tr->current_trace->reset(tr);
6319
6320 #ifdef CONFIG_TRACER_MAX_TRACE
6321 had_max_tr = tr->current_trace->use_max_tr;
6322
6323 /* Current trace needs to be nop_trace before synchronize_rcu */
6324 tr->current_trace = &nop_trace;
6325
6326 if (had_max_tr && !t->use_max_tr) {
6327 /*
6328 * We need to make sure that the update_max_tr sees that
6329 * current_trace changed to nop_trace to keep it from
6330 * swapping the buffers after we resize it.
6331 * The update_max_tr is called from interrupts disabled
6332 * so a synchronized_sched() is sufficient.
6333 */
6334 synchronize_rcu();
6335 free_snapshot(tr);
6336 tracing_disarm_snapshot(tr);
6337 }
6338
6339 if (!had_max_tr && t->use_max_tr) {
6340 ret = tracing_arm_snapshot_locked(tr);
6341 if (ret)
6342 return ret;
6343 }
6344 #else
6345 tr->current_trace = &nop_trace;
6346 #endif
6347
6348 if (t->init) {
6349 ret = tracer_init(t, tr);
6350 if (ret) {
6351 #ifdef CONFIG_TRACER_MAX_TRACE
6352 if (t->use_max_tr)
6353 tracing_disarm_snapshot(tr);
6354 #endif
6355 return ret;
6356 }
6357 }
6358
6359 tr->current_trace = t;
6360 tr->current_trace->enabled++;
6361 trace_branch_enable(tr);
6362
6363 return 0;
6364 }
6365
6366 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6367 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6368 size_t cnt, loff_t *ppos)
6369 {
6370 struct trace_array *tr = filp->private_data;
6371 char buf[MAX_TRACER_SIZE+1];
6372 char *name;
6373 size_t ret;
6374 int err;
6375
6376 ret = cnt;
6377
6378 if (cnt > MAX_TRACER_SIZE)
6379 cnt = MAX_TRACER_SIZE;
6380
6381 if (copy_from_user(buf, ubuf, cnt))
6382 return -EFAULT;
6383
6384 buf[cnt] = 0;
6385
6386 name = strim(buf);
6387
6388 err = tracing_set_tracer(tr, name);
6389 if (err)
6390 return err;
6391
6392 *ppos += ret;
6393
6394 return ret;
6395 }
6396
6397 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6398 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6399 size_t cnt, loff_t *ppos)
6400 {
6401 char buf[64];
6402 int r;
6403
6404 r = snprintf(buf, sizeof(buf), "%ld\n",
6405 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6406 if (r > sizeof(buf))
6407 r = sizeof(buf);
6408 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6409 }
6410
6411 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6412 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6413 size_t cnt, loff_t *ppos)
6414 {
6415 unsigned long val;
6416 int ret;
6417
6418 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6419 if (ret)
6420 return ret;
6421
6422 *ptr = val * 1000;
6423
6424 return cnt;
6425 }
6426
6427 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6428 tracing_thresh_read(struct file *filp, char __user *ubuf,
6429 size_t cnt, loff_t *ppos)
6430 {
6431 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6432 }
6433
6434 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6435 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6436 size_t cnt, loff_t *ppos)
6437 {
6438 struct trace_array *tr = filp->private_data;
6439 int ret;
6440
6441 guard(mutex)(&trace_types_lock);
6442 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6443 if (ret < 0)
6444 return ret;
6445
6446 if (tr->current_trace->update_thresh) {
6447 ret = tr->current_trace->update_thresh(tr);
6448 if (ret < 0)
6449 return ret;
6450 }
6451
6452 return cnt;
6453 }
6454
6455 #ifdef CONFIG_TRACER_MAX_TRACE
6456
6457 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6458 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6459 size_t cnt, loff_t *ppos)
6460 {
6461 struct trace_array *tr = filp->private_data;
6462
6463 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6464 }
6465
6466 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6467 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6468 size_t cnt, loff_t *ppos)
6469 {
6470 struct trace_array *tr = filp->private_data;
6471
6472 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6473 }
6474
6475 #endif
6476
open_pipe_on_cpu(struct trace_array * tr,int cpu)6477 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6478 {
6479 if (cpu == RING_BUFFER_ALL_CPUS) {
6480 if (cpumask_empty(tr->pipe_cpumask)) {
6481 cpumask_setall(tr->pipe_cpumask);
6482 return 0;
6483 }
6484 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6485 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6486 return 0;
6487 }
6488 return -EBUSY;
6489 }
6490
close_pipe_on_cpu(struct trace_array * tr,int cpu)6491 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6492 {
6493 if (cpu == RING_BUFFER_ALL_CPUS) {
6494 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6495 cpumask_clear(tr->pipe_cpumask);
6496 } else {
6497 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6498 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6499 }
6500 }
6501
tracing_open_pipe(struct inode * inode,struct file * filp)6502 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6503 {
6504 struct trace_array *tr = inode->i_private;
6505 struct trace_iterator *iter;
6506 int cpu;
6507 int ret;
6508
6509 ret = tracing_check_open_get_tr(tr);
6510 if (ret)
6511 return ret;
6512
6513 guard(mutex)(&trace_types_lock);
6514 cpu = tracing_get_cpu(inode);
6515 ret = open_pipe_on_cpu(tr, cpu);
6516 if (ret)
6517 goto fail_pipe_on_cpu;
6518
6519 /* create a buffer to store the information to pass to userspace */
6520 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6521 if (!iter) {
6522 ret = -ENOMEM;
6523 goto fail_alloc_iter;
6524 }
6525
6526 trace_seq_init(&iter->seq);
6527 iter->trace = tr->current_trace;
6528
6529 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6530 ret = -ENOMEM;
6531 goto fail;
6532 }
6533
6534 /* trace pipe does not show start of buffer */
6535 cpumask_setall(iter->started);
6536
6537 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6538 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6539
6540 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6541 if (trace_clocks[tr->clock_id].in_ns)
6542 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6543
6544 iter->tr = tr;
6545 iter->array_buffer = &tr->array_buffer;
6546 iter->cpu_file = cpu;
6547 mutex_init(&iter->mutex);
6548 filp->private_data = iter;
6549
6550 if (iter->trace->pipe_open)
6551 iter->trace->pipe_open(iter);
6552
6553 nonseekable_open(inode, filp);
6554
6555 tr->trace_ref++;
6556
6557 return ret;
6558
6559 fail:
6560 kfree(iter);
6561 fail_alloc_iter:
6562 close_pipe_on_cpu(tr, cpu);
6563 fail_pipe_on_cpu:
6564 __trace_array_put(tr);
6565 return ret;
6566 }
6567
tracing_release_pipe(struct inode * inode,struct file * file)6568 static int tracing_release_pipe(struct inode *inode, struct file *file)
6569 {
6570 struct trace_iterator *iter = file->private_data;
6571 struct trace_array *tr = inode->i_private;
6572
6573 scoped_guard(mutex, &trace_types_lock) {
6574 tr->trace_ref--;
6575
6576 if (iter->trace->pipe_close)
6577 iter->trace->pipe_close(iter);
6578 close_pipe_on_cpu(tr, iter->cpu_file);
6579 }
6580
6581 free_trace_iter_content(iter);
6582 kfree(iter);
6583
6584 trace_array_put(tr);
6585
6586 return 0;
6587 }
6588
6589 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6590 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6591 {
6592 struct trace_array *tr = iter->tr;
6593
6594 /* Iterators are static, they should be filled or empty */
6595 if (trace_buffer_iter(iter, iter->cpu_file))
6596 return EPOLLIN | EPOLLRDNORM;
6597
6598 if (tr->trace_flags & TRACE_ITER_BLOCK)
6599 /*
6600 * Always select as readable when in blocking mode
6601 */
6602 return EPOLLIN | EPOLLRDNORM;
6603 else
6604 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6605 filp, poll_table, iter->tr->buffer_percent);
6606 }
6607
6608 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6609 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6610 {
6611 struct trace_iterator *iter = filp->private_data;
6612
6613 return trace_poll(iter, filp, poll_table);
6614 }
6615
6616 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6617 static int tracing_wait_pipe(struct file *filp)
6618 {
6619 struct trace_iterator *iter = filp->private_data;
6620 int ret;
6621
6622 while (trace_empty(iter)) {
6623
6624 if ((filp->f_flags & O_NONBLOCK)) {
6625 return -EAGAIN;
6626 }
6627
6628 /*
6629 * We block until we read something and tracing is disabled.
6630 * We still block if tracing is disabled, but we have never
6631 * read anything. This allows a user to cat this file, and
6632 * then enable tracing. But after we have read something,
6633 * we give an EOF when tracing is again disabled.
6634 *
6635 * iter->pos will be 0 if we haven't read anything.
6636 */
6637 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6638 break;
6639
6640 mutex_unlock(&iter->mutex);
6641
6642 ret = wait_on_pipe(iter, 0);
6643
6644 mutex_lock(&iter->mutex);
6645
6646 if (ret)
6647 return ret;
6648 }
6649
6650 return 1;
6651 }
6652
update_last_data_if_empty(struct trace_array * tr)6653 static bool update_last_data_if_empty(struct trace_array *tr)
6654 {
6655 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6656 return false;
6657
6658 if (!ring_buffer_empty(tr->array_buffer.buffer))
6659 return false;
6660
6661 /*
6662 * If the buffer contains the last boot data and all per-cpu
6663 * buffers are empty, reset it from the kernel side.
6664 */
6665 update_last_data(tr);
6666 return true;
6667 }
6668
6669 /*
6670 * Consumer reader.
6671 */
6672 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6673 tracing_read_pipe(struct file *filp, char __user *ubuf,
6674 size_t cnt, loff_t *ppos)
6675 {
6676 struct trace_iterator *iter = filp->private_data;
6677 ssize_t sret;
6678
6679 /*
6680 * Avoid more than one consumer on a single file descriptor
6681 * This is just a matter of traces coherency, the ring buffer itself
6682 * is protected.
6683 */
6684 guard(mutex)(&iter->mutex);
6685
6686 /* return any leftover data */
6687 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6688 if (sret != -EBUSY)
6689 return sret;
6690
6691 trace_seq_init(&iter->seq);
6692
6693 if (iter->trace->read) {
6694 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6695 if (sret)
6696 return sret;
6697 }
6698
6699 waitagain:
6700 if (update_last_data_if_empty(iter->tr))
6701 return 0;
6702
6703 sret = tracing_wait_pipe(filp);
6704 if (sret <= 0)
6705 return sret;
6706
6707 /* stop when tracing is finished */
6708 if (trace_empty(iter))
6709 return 0;
6710
6711 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6712 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6713
6714 /* reset all but tr, trace, and overruns */
6715 trace_iterator_reset(iter);
6716 cpumask_clear(iter->started);
6717 trace_seq_init(&iter->seq);
6718
6719 trace_event_read_lock();
6720 trace_access_lock(iter->cpu_file);
6721 while (trace_find_next_entry_inc(iter) != NULL) {
6722 enum print_line_t ret;
6723 int save_len = iter->seq.seq.len;
6724
6725 ret = print_trace_line(iter);
6726 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6727 /*
6728 * If one print_trace_line() fills entire trace_seq in one shot,
6729 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6730 * In this case, we need to consume it, otherwise, loop will peek
6731 * this event next time, resulting in an infinite loop.
6732 */
6733 if (save_len == 0) {
6734 iter->seq.full = 0;
6735 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6736 trace_consume(iter);
6737 break;
6738 }
6739
6740 /* In other cases, don't print partial lines */
6741 iter->seq.seq.len = save_len;
6742 break;
6743 }
6744 if (ret != TRACE_TYPE_NO_CONSUME)
6745 trace_consume(iter);
6746
6747 if (trace_seq_used(&iter->seq) >= cnt)
6748 break;
6749
6750 /*
6751 * Setting the full flag means we reached the trace_seq buffer
6752 * size and we should leave by partial output condition above.
6753 * One of the trace_seq_* functions is not used properly.
6754 */
6755 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6756 iter->ent->type);
6757 }
6758 trace_access_unlock(iter->cpu_file);
6759 trace_event_read_unlock();
6760
6761 /* Now copy what we have to the user */
6762 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6763 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6764 trace_seq_init(&iter->seq);
6765
6766 /*
6767 * If there was nothing to send to user, in spite of consuming trace
6768 * entries, go back to wait for more entries.
6769 */
6770 if (sret == -EBUSY)
6771 goto waitagain;
6772
6773 return sret;
6774 }
6775
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6776 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6777 unsigned int idx)
6778 {
6779 __free_page(spd->pages[idx]);
6780 }
6781
6782 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6783 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6784 {
6785 size_t count;
6786 int save_len;
6787 int ret;
6788
6789 /* Seq buffer is page-sized, exactly what we need. */
6790 for (;;) {
6791 save_len = iter->seq.seq.len;
6792 ret = print_trace_line(iter);
6793
6794 if (trace_seq_has_overflowed(&iter->seq)) {
6795 iter->seq.seq.len = save_len;
6796 break;
6797 }
6798
6799 /*
6800 * This should not be hit, because it should only
6801 * be set if the iter->seq overflowed. But check it
6802 * anyway to be safe.
6803 */
6804 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6805 iter->seq.seq.len = save_len;
6806 break;
6807 }
6808
6809 count = trace_seq_used(&iter->seq) - save_len;
6810 if (rem < count) {
6811 rem = 0;
6812 iter->seq.seq.len = save_len;
6813 break;
6814 }
6815
6816 if (ret != TRACE_TYPE_NO_CONSUME)
6817 trace_consume(iter);
6818 rem -= count;
6819 if (!trace_find_next_entry_inc(iter)) {
6820 rem = 0;
6821 iter->ent = NULL;
6822 break;
6823 }
6824 }
6825
6826 return rem;
6827 }
6828
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6829 static ssize_t tracing_splice_read_pipe(struct file *filp,
6830 loff_t *ppos,
6831 struct pipe_inode_info *pipe,
6832 size_t len,
6833 unsigned int flags)
6834 {
6835 struct page *pages_def[PIPE_DEF_BUFFERS];
6836 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6837 struct trace_iterator *iter = filp->private_data;
6838 struct splice_pipe_desc spd = {
6839 .pages = pages_def,
6840 .partial = partial_def,
6841 .nr_pages = 0, /* This gets updated below. */
6842 .nr_pages_max = PIPE_DEF_BUFFERS,
6843 .ops = &default_pipe_buf_ops,
6844 .spd_release = tracing_spd_release_pipe,
6845 };
6846 ssize_t ret;
6847 size_t rem;
6848 unsigned int i;
6849
6850 if (splice_grow_spd(pipe, &spd))
6851 return -ENOMEM;
6852
6853 mutex_lock(&iter->mutex);
6854
6855 if (iter->trace->splice_read) {
6856 ret = iter->trace->splice_read(iter, filp,
6857 ppos, pipe, len, flags);
6858 if (ret)
6859 goto out_err;
6860 }
6861
6862 ret = tracing_wait_pipe(filp);
6863 if (ret <= 0)
6864 goto out_err;
6865
6866 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6867 ret = -EFAULT;
6868 goto out_err;
6869 }
6870
6871 trace_event_read_lock();
6872 trace_access_lock(iter->cpu_file);
6873
6874 /* Fill as many pages as possible. */
6875 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6876 spd.pages[i] = alloc_page(GFP_KERNEL);
6877 if (!spd.pages[i])
6878 break;
6879
6880 rem = tracing_fill_pipe_page(rem, iter);
6881
6882 /* Copy the data into the page, so we can start over. */
6883 ret = trace_seq_to_buffer(&iter->seq,
6884 page_address(spd.pages[i]),
6885 min((size_t)trace_seq_used(&iter->seq),
6886 (size_t)PAGE_SIZE));
6887 if (ret < 0) {
6888 __free_page(spd.pages[i]);
6889 break;
6890 }
6891 spd.partial[i].offset = 0;
6892 spd.partial[i].len = ret;
6893
6894 trace_seq_init(&iter->seq);
6895 }
6896
6897 trace_access_unlock(iter->cpu_file);
6898 trace_event_read_unlock();
6899 mutex_unlock(&iter->mutex);
6900
6901 spd.nr_pages = i;
6902
6903 if (i)
6904 ret = splice_to_pipe(pipe, &spd);
6905 else
6906 ret = 0;
6907 out:
6908 splice_shrink_spd(&spd);
6909 return ret;
6910
6911 out_err:
6912 mutex_unlock(&iter->mutex);
6913 goto out;
6914 }
6915
6916 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6917 tracing_entries_read(struct file *filp, char __user *ubuf,
6918 size_t cnt, loff_t *ppos)
6919 {
6920 struct inode *inode = file_inode(filp);
6921 struct trace_array *tr = inode->i_private;
6922 int cpu = tracing_get_cpu(inode);
6923 char buf[64];
6924 int r = 0;
6925 ssize_t ret;
6926
6927 mutex_lock(&trace_types_lock);
6928
6929 if (cpu == RING_BUFFER_ALL_CPUS) {
6930 int cpu, buf_size_same;
6931 unsigned long size;
6932
6933 size = 0;
6934 buf_size_same = 1;
6935 /* check if all cpu sizes are same */
6936 for_each_tracing_cpu(cpu) {
6937 /* fill in the size from first enabled cpu */
6938 if (size == 0)
6939 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6940 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6941 buf_size_same = 0;
6942 break;
6943 }
6944 }
6945
6946 if (buf_size_same) {
6947 if (!tr->ring_buffer_expanded)
6948 r = sprintf(buf, "%lu (expanded: %lu)\n",
6949 size >> 10,
6950 trace_buf_size >> 10);
6951 else
6952 r = sprintf(buf, "%lu\n", size >> 10);
6953 } else
6954 r = sprintf(buf, "X\n");
6955 } else
6956 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6957
6958 mutex_unlock(&trace_types_lock);
6959
6960 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6961 return ret;
6962 }
6963
6964 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6965 tracing_entries_write(struct file *filp, const char __user *ubuf,
6966 size_t cnt, loff_t *ppos)
6967 {
6968 struct inode *inode = file_inode(filp);
6969 struct trace_array *tr = inode->i_private;
6970 unsigned long val;
6971 int ret;
6972
6973 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6974 if (ret)
6975 return ret;
6976
6977 /* must have at least 1 entry */
6978 if (!val)
6979 return -EINVAL;
6980
6981 /* value is in KB */
6982 val <<= 10;
6983 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6984 if (ret < 0)
6985 return ret;
6986
6987 *ppos += cnt;
6988
6989 return cnt;
6990 }
6991
6992 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6993 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6994 size_t cnt, loff_t *ppos)
6995 {
6996 struct trace_array *tr = filp->private_data;
6997 char buf[64];
6998 int r, cpu;
6999 unsigned long size = 0, expanded_size = 0;
7000
7001 mutex_lock(&trace_types_lock);
7002 for_each_tracing_cpu(cpu) {
7003 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7004 if (!tr->ring_buffer_expanded)
7005 expanded_size += trace_buf_size >> 10;
7006 }
7007 if (tr->ring_buffer_expanded)
7008 r = sprintf(buf, "%lu\n", size);
7009 else
7010 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7011 mutex_unlock(&trace_types_lock);
7012
7013 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7014 }
7015
7016 #define LAST_BOOT_HEADER ((void *)1)
7017
l_next(struct seq_file * m,void * v,loff_t * pos)7018 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7019 {
7020 struct trace_array *tr = m->private;
7021 struct trace_scratch *tscratch = tr->scratch;
7022 unsigned int index = *pos;
7023
7024 (*pos)++;
7025
7026 if (*pos == 1)
7027 return LAST_BOOT_HEADER;
7028
7029 /* Only show offsets of the last boot data */
7030 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7031 return NULL;
7032
7033 /* *pos 0 is for the header, 1 is for the first module */
7034 index--;
7035
7036 if (index >= tscratch->nr_entries)
7037 return NULL;
7038
7039 return &tscratch->entries[index];
7040 }
7041
l_start(struct seq_file * m,loff_t * pos)7042 static void *l_start(struct seq_file *m, loff_t *pos)
7043 {
7044 mutex_lock(&scratch_mutex);
7045
7046 return l_next(m, NULL, pos);
7047 }
7048
l_stop(struct seq_file * m,void * p)7049 static void l_stop(struct seq_file *m, void *p)
7050 {
7051 mutex_unlock(&scratch_mutex);
7052 }
7053
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7054 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7055 {
7056 struct trace_scratch *tscratch = tr->scratch;
7057
7058 /*
7059 * Do not leak KASLR address. This only shows the KASLR address of
7060 * the last boot. When the ring buffer is started, the LAST_BOOT
7061 * flag gets cleared, and this should only report "current".
7062 * Otherwise it shows the KASLR address from the previous boot which
7063 * should not be the same as the current boot.
7064 */
7065 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7066 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7067 else
7068 seq_puts(m, "# Current\n");
7069 }
7070
l_show(struct seq_file * m,void * v)7071 static int l_show(struct seq_file *m, void *v)
7072 {
7073 struct trace_array *tr = m->private;
7074 struct trace_mod_entry *entry = v;
7075
7076 if (v == LAST_BOOT_HEADER) {
7077 show_last_boot_header(m, tr);
7078 return 0;
7079 }
7080
7081 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7082 return 0;
7083 }
7084
7085 static const struct seq_operations last_boot_seq_ops = {
7086 .start = l_start,
7087 .next = l_next,
7088 .stop = l_stop,
7089 .show = l_show,
7090 };
7091
tracing_last_boot_open(struct inode * inode,struct file * file)7092 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7093 {
7094 struct trace_array *tr = inode->i_private;
7095 struct seq_file *m;
7096 int ret;
7097
7098 ret = tracing_check_open_get_tr(tr);
7099 if (ret)
7100 return ret;
7101
7102 ret = seq_open(file, &last_boot_seq_ops);
7103 if (ret) {
7104 trace_array_put(tr);
7105 return ret;
7106 }
7107
7108 m = file->private_data;
7109 m->private = tr;
7110
7111 return 0;
7112 }
7113
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7114 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7115 {
7116 struct trace_array *tr = inode->i_private;
7117 int cpu = tracing_get_cpu(inode);
7118 int ret;
7119
7120 ret = tracing_check_open_get_tr(tr);
7121 if (ret)
7122 return ret;
7123
7124 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7125 if (ret < 0)
7126 __trace_array_put(tr);
7127 return ret;
7128 }
7129
7130 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7131 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7132 size_t cnt, loff_t *ppos)
7133 {
7134 /*
7135 * There is no need to read what the user has written, this function
7136 * is just to make sure that there is no error when "echo" is used
7137 */
7138
7139 *ppos += cnt;
7140
7141 return cnt;
7142 }
7143
7144 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7145 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7146 {
7147 struct trace_array *tr = inode->i_private;
7148
7149 /* disable tracing ? */
7150 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7151 tracer_tracing_off(tr);
7152 /* resize the ring buffer to 0 */
7153 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7154
7155 trace_array_put(tr);
7156
7157 return 0;
7158 }
7159
7160 #define TRACE_MARKER_MAX_SIZE 4096
7161
write_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt,unsigned long ip)7162 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7163 size_t cnt, unsigned long ip)
7164 {
7165 struct ring_buffer_event *event;
7166 enum event_trigger_type tt = ETT_NONE;
7167 struct trace_buffer *buffer;
7168 struct print_entry *entry;
7169 int meta_size;
7170 ssize_t written;
7171 size_t size;
7172 int len;
7173
7174 /* Used in tracing_mark_raw_write() as well */
7175 #define FAULTED_STR "<faulted>"
7176 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7177
7178 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
7179 again:
7180 size = cnt + meta_size;
7181
7182 /* If less than "<faulted>", then make sure we can still add that */
7183 if (cnt < FAULTED_SIZE)
7184 size += FAULTED_SIZE - cnt;
7185
7186 buffer = tr->array_buffer.buffer;
7187 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7188 tracing_gen_ctx());
7189 if (unlikely(!event)) {
7190 /*
7191 * If the size was greater than what was allowed, then
7192 * make it smaller and try again.
7193 */
7194 if (size > ring_buffer_max_event_size(buffer)) {
7195 /* cnt < FAULTED size should never be bigger than max */
7196 if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7197 return -EBADF;
7198 cnt = ring_buffer_max_event_size(buffer) - meta_size;
7199 /* The above should only happen once */
7200 if (WARN_ON_ONCE(cnt + meta_size == size))
7201 return -EBADF;
7202 goto again;
7203 }
7204
7205 /* Ring buffer disabled, return as if not open for write */
7206 return -EBADF;
7207 }
7208
7209 entry = ring_buffer_event_data(event);
7210 entry->ip = ip;
7211
7212 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7213 if (len) {
7214 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7215 cnt = FAULTED_SIZE;
7216 written = -EFAULT;
7217 } else
7218 written = cnt;
7219
7220 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7221 /* do not add \n before testing triggers, but add \0 */
7222 entry->buf[cnt] = '\0';
7223 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7224 }
7225
7226 if (entry->buf[cnt - 1] != '\n') {
7227 entry->buf[cnt] = '\n';
7228 entry->buf[cnt + 1] = '\0';
7229 } else
7230 entry->buf[cnt] = '\0';
7231
7232 if (static_branch_unlikely(&trace_marker_exports_enabled))
7233 ftrace_exports(event, TRACE_EXPORT_MARKER);
7234 __buffer_unlock_commit(buffer, event);
7235
7236 if (tt)
7237 event_triggers_post_call(tr->trace_marker_file, tt);
7238
7239 return written;
7240 }
7241
7242 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7243 tracing_mark_write(struct file *filp, const char __user *ubuf,
7244 size_t cnt, loff_t *fpos)
7245 {
7246 struct trace_array *tr = filp->private_data;
7247 ssize_t written = -ENODEV;
7248 unsigned long ip;
7249
7250 if (tracing_disabled)
7251 return -EINVAL;
7252
7253 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7254 return -EINVAL;
7255
7256 if ((ssize_t)cnt < 0)
7257 return -EINVAL;
7258
7259 if (cnt > TRACE_MARKER_MAX_SIZE)
7260 cnt = TRACE_MARKER_MAX_SIZE;
7261
7262 /* The selftests expect this function to be the IP address */
7263 ip = _THIS_IP_;
7264
7265 /* The global trace_marker can go to multiple instances */
7266 if (tr == &global_trace) {
7267 guard(rcu)();
7268 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7269 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7270 if (written < 0)
7271 break;
7272 }
7273 } else {
7274 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7275 }
7276
7277 return written;
7278 }
7279
write_raw_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt)7280 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7281 const char __user *ubuf, size_t cnt)
7282 {
7283 struct ring_buffer_event *event;
7284 struct trace_buffer *buffer;
7285 struct raw_data_entry *entry;
7286 ssize_t written;
7287 int size;
7288 int len;
7289
7290 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7291
7292 size = sizeof(*entry) + cnt;
7293 if (cnt < FAULT_SIZE_ID)
7294 size += FAULT_SIZE_ID - cnt;
7295
7296 buffer = tr->array_buffer.buffer;
7297
7298 if (size > ring_buffer_max_event_size(buffer))
7299 return -EINVAL;
7300
7301 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7302 tracing_gen_ctx());
7303 if (!event)
7304 /* Ring buffer disabled, return as if not open for write */
7305 return -EBADF;
7306
7307 entry = ring_buffer_event_data(event);
7308
7309 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7310 if (len) {
7311 entry->id = -1;
7312 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7313 written = -EFAULT;
7314 } else
7315 written = cnt;
7316
7317 __buffer_unlock_commit(buffer, event);
7318
7319 return written;
7320 }
7321
7322 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7323 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7324 size_t cnt, loff_t *fpos)
7325 {
7326 struct trace_array *tr = filp->private_data;
7327 ssize_t written = -ENODEV;
7328
7329 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7330
7331 if (tracing_disabled)
7332 return -EINVAL;
7333
7334 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7335 return -EINVAL;
7336
7337 /* The marker must at least have a tag id */
7338 if (cnt < sizeof(unsigned int))
7339 return -EINVAL;
7340
7341 /* The global trace_marker_raw can go to multiple instances */
7342 if (tr == &global_trace) {
7343 guard(rcu)();
7344 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7345 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7346 if (written < 0)
7347 break;
7348 }
7349 } else {
7350 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7351 }
7352
7353 return written;
7354 }
7355
tracing_clock_show(struct seq_file * m,void * v)7356 static int tracing_clock_show(struct seq_file *m, void *v)
7357 {
7358 struct trace_array *tr = m->private;
7359 int i;
7360
7361 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7362 seq_printf(m,
7363 "%s%s%s%s", i ? " " : "",
7364 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7365 i == tr->clock_id ? "]" : "");
7366 seq_putc(m, '\n');
7367
7368 return 0;
7369 }
7370
tracing_set_clock(struct trace_array * tr,const char * clockstr)7371 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7372 {
7373 int i;
7374
7375 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7376 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7377 break;
7378 }
7379 if (i == ARRAY_SIZE(trace_clocks))
7380 return -EINVAL;
7381
7382 guard(mutex)(&trace_types_lock);
7383
7384 tr->clock_id = i;
7385
7386 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7387
7388 /*
7389 * New clock may not be consistent with the previous clock.
7390 * Reset the buffer so that it doesn't have incomparable timestamps.
7391 */
7392 tracing_reset_online_cpus(&tr->array_buffer);
7393
7394 #ifdef CONFIG_TRACER_MAX_TRACE
7395 if (tr->max_buffer.buffer)
7396 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7397 tracing_reset_online_cpus(&tr->max_buffer);
7398 #endif
7399
7400 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7401 struct trace_scratch *tscratch = tr->scratch;
7402
7403 tscratch->clock_id = i;
7404 }
7405
7406 return 0;
7407 }
7408
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7409 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7410 size_t cnt, loff_t *fpos)
7411 {
7412 struct seq_file *m = filp->private_data;
7413 struct trace_array *tr = m->private;
7414 char buf[64];
7415 const char *clockstr;
7416 int ret;
7417
7418 if (cnt >= sizeof(buf))
7419 return -EINVAL;
7420
7421 if (copy_from_user(buf, ubuf, cnt))
7422 return -EFAULT;
7423
7424 buf[cnt] = 0;
7425
7426 clockstr = strstrip(buf);
7427
7428 ret = tracing_set_clock(tr, clockstr);
7429 if (ret)
7430 return ret;
7431
7432 *fpos += cnt;
7433
7434 return cnt;
7435 }
7436
tracing_clock_open(struct inode * inode,struct file * file)7437 static int tracing_clock_open(struct inode *inode, struct file *file)
7438 {
7439 struct trace_array *tr = inode->i_private;
7440 int ret;
7441
7442 ret = tracing_check_open_get_tr(tr);
7443 if (ret)
7444 return ret;
7445
7446 ret = single_open(file, tracing_clock_show, inode->i_private);
7447 if (ret < 0)
7448 trace_array_put(tr);
7449
7450 return ret;
7451 }
7452
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7453 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7454 {
7455 struct trace_array *tr = m->private;
7456
7457 guard(mutex)(&trace_types_lock);
7458
7459 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7460 seq_puts(m, "delta [absolute]\n");
7461 else
7462 seq_puts(m, "[delta] absolute\n");
7463
7464 return 0;
7465 }
7466
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7467 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7468 {
7469 struct trace_array *tr = inode->i_private;
7470 int ret;
7471
7472 ret = tracing_check_open_get_tr(tr);
7473 if (ret)
7474 return ret;
7475
7476 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7477 if (ret < 0)
7478 trace_array_put(tr);
7479
7480 return ret;
7481 }
7482
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7483 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7484 {
7485 if (rbe == this_cpu_read(trace_buffered_event))
7486 return ring_buffer_time_stamp(buffer);
7487
7488 return ring_buffer_event_time_stamp(buffer, rbe);
7489 }
7490
7491 /*
7492 * Set or disable using the per CPU trace_buffer_event when possible.
7493 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7494 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7495 {
7496 guard(mutex)(&trace_types_lock);
7497
7498 if (set && tr->no_filter_buffering_ref++)
7499 return 0;
7500
7501 if (!set) {
7502 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7503 return -EINVAL;
7504
7505 --tr->no_filter_buffering_ref;
7506 }
7507
7508 return 0;
7509 }
7510
7511 struct ftrace_buffer_info {
7512 struct trace_iterator iter;
7513 void *spare;
7514 unsigned int spare_cpu;
7515 unsigned int spare_size;
7516 unsigned int read;
7517 };
7518
7519 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7520 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7521 {
7522 struct trace_array *tr = inode->i_private;
7523 struct trace_iterator *iter;
7524 struct seq_file *m;
7525 int ret;
7526
7527 ret = tracing_check_open_get_tr(tr);
7528 if (ret)
7529 return ret;
7530
7531 if (file->f_mode & FMODE_READ) {
7532 iter = __tracing_open(inode, file, true);
7533 if (IS_ERR(iter))
7534 ret = PTR_ERR(iter);
7535 } else {
7536 /* Writes still need the seq_file to hold the private data */
7537 ret = -ENOMEM;
7538 m = kzalloc(sizeof(*m), GFP_KERNEL);
7539 if (!m)
7540 goto out;
7541 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7542 if (!iter) {
7543 kfree(m);
7544 goto out;
7545 }
7546 ret = 0;
7547
7548 iter->tr = tr;
7549 iter->array_buffer = &tr->max_buffer;
7550 iter->cpu_file = tracing_get_cpu(inode);
7551 m->private = iter;
7552 file->private_data = m;
7553 }
7554 out:
7555 if (ret < 0)
7556 trace_array_put(tr);
7557
7558 return ret;
7559 }
7560
tracing_swap_cpu_buffer(void * tr)7561 static void tracing_swap_cpu_buffer(void *tr)
7562 {
7563 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7564 }
7565
7566 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7567 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7568 loff_t *ppos)
7569 {
7570 struct seq_file *m = filp->private_data;
7571 struct trace_iterator *iter = m->private;
7572 struct trace_array *tr = iter->tr;
7573 unsigned long val;
7574 int ret;
7575
7576 ret = tracing_update_buffers(tr);
7577 if (ret < 0)
7578 return ret;
7579
7580 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7581 if (ret)
7582 return ret;
7583
7584 guard(mutex)(&trace_types_lock);
7585
7586 if (tr->current_trace->use_max_tr)
7587 return -EBUSY;
7588
7589 local_irq_disable();
7590 arch_spin_lock(&tr->max_lock);
7591 if (tr->cond_snapshot)
7592 ret = -EBUSY;
7593 arch_spin_unlock(&tr->max_lock);
7594 local_irq_enable();
7595 if (ret)
7596 return ret;
7597
7598 switch (val) {
7599 case 0:
7600 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7601 return -EINVAL;
7602 if (tr->allocated_snapshot)
7603 free_snapshot(tr);
7604 break;
7605 case 1:
7606 /* Only allow per-cpu swap if the ring buffer supports it */
7607 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7608 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7609 return -EINVAL;
7610 #endif
7611 if (tr->allocated_snapshot)
7612 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7613 &tr->array_buffer, iter->cpu_file);
7614
7615 ret = tracing_arm_snapshot_locked(tr);
7616 if (ret)
7617 return ret;
7618
7619 /* Now, we're going to swap */
7620 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7621 local_irq_disable();
7622 update_max_tr(tr, current, smp_processor_id(), NULL);
7623 local_irq_enable();
7624 } else {
7625 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7626 (void *)tr, 1);
7627 }
7628 tracing_disarm_snapshot(tr);
7629 break;
7630 default:
7631 if (tr->allocated_snapshot) {
7632 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7633 tracing_reset_online_cpus(&tr->max_buffer);
7634 else
7635 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7636 }
7637 break;
7638 }
7639
7640 if (ret >= 0) {
7641 *ppos += cnt;
7642 ret = cnt;
7643 }
7644
7645 return ret;
7646 }
7647
tracing_snapshot_release(struct inode * inode,struct file * file)7648 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7649 {
7650 struct seq_file *m = file->private_data;
7651 int ret;
7652
7653 ret = tracing_release(inode, file);
7654
7655 if (file->f_mode & FMODE_READ)
7656 return ret;
7657
7658 /* If write only, the seq_file is just a stub */
7659 if (m)
7660 kfree(m->private);
7661 kfree(m);
7662
7663 return 0;
7664 }
7665
7666 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7667 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7668 size_t count, loff_t *ppos);
7669 static int tracing_buffers_release(struct inode *inode, struct file *file);
7670 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7671 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7672
snapshot_raw_open(struct inode * inode,struct file * filp)7673 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7674 {
7675 struct ftrace_buffer_info *info;
7676 int ret;
7677
7678 /* The following checks for tracefs lockdown */
7679 ret = tracing_buffers_open(inode, filp);
7680 if (ret < 0)
7681 return ret;
7682
7683 info = filp->private_data;
7684
7685 if (info->iter.trace->use_max_tr) {
7686 tracing_buffers_release(inode, filp);
7687 return -EBUSY;
7688 }
7689
7690 info->iter.snapshot = true;
7691 info->iter.array_buffer = &info->iter.tr->max_buffer;
7692
7693 return ret;
7694 }
7695
7696 #endif /* CONFIG_TRACER_SNAPSHOT */
7697
7698
7699 static const struct file_operations tracing_thresh_fops = {
7700 .open = tracing_open_generic,
7701 .read = tracing_thresh_read,
7702 .write = tracing_thresh_write,
7703 .llseek = generic_file_llseek,
7704 };
7705
7706 #ifdef CONFIG_TRACER_MAX_TRACE
7707 static const struct file_operations tracing_max_lat_fops = {
7708 .open = tracing_open_generic_tr,
7709 .read = tracing_max_lat_read,
7710 .write = tracing_max_lat_write,
7711 .llseek = generic_file_llseek,
7712 .release = tracing_release_generic_tr,
7713 };
7714 #endif
7715
7716 static const struct file_operations set_tracer_fops = {
7717 .open = tracing_open_generic_tr,
7718 .read = tracing_set_trace_read,
7719 .write = tracing_set_trace_write,
7720 .llseek = generic_file_llseek,
7721 .release = tracing_release_generic_tr,
7722 };
7723
7724 static const struct file_operations tracing_pipe_fops = {
7725 .open = tracing_open_pipe,
7726 .poll = tracing_poll_pipe,
7727 .read = tracing_read_pipe,
7728 .splice_read = tracing_splice_read_pipe,
7729 .release = tracing_release_pipe,
7730 };
7731
7732 static const struct file_operations tracing_entries_fops = {
7733 .open = tracing_open_generic_tr,
7734 .read = tracing_entries_read,
7735 .write = tracing_entries_write,
7736 .llseek = generic_file_llseek,
7737 .release = tracing_release_generic_tr,
7738 };
7739
7740 static const struct file_operations tracing_buffer_meta_fops = {
7741 .open = tracing_buffer_meta_open,
7742 .read = seq_read,
7743 .llseek = seq_lseek,
7744 .release = tracing_seq_release,
7745 };
7746
7747 static const struct file_operations tracing_total_entries_fops = {
7748 .open = tracing_open_generic_tr,
7749 .read = tracing_total_entries_read,
7750 .llseek = generic_file_llseek,
7751 .release = tracing_release_generic_tr,
7752 };
7753
7754 static const struct file_operations tracing_free_buffer_fops = {
7755 .open = tracing_open_generic_tr,
7756 .write = tracing_free_buffer_write,
7757 .release = tracing_free_buffer_release,
7758 };
7759
7760 static const struct file_operations tracing_mark_fops = {
7761 .open = tracing_mark_open,
7762 .write = tracing_mark_write,
7763 .release = tracing_release_generic_tr,
7764 };
7765
7766 static const struct file_operations tracing_mark_raw_fops = {
7767 .open = tracing_mark_open,
7768 .write = tracing_mark_raw_write,
7769 .release = tracing_release_generic_tr,
7770 };
7771
7772 static const struct file_operations trace_clock_fops = {
7773 .open = tracing_clock_open,
7774 .read = seq_read,
7775 .llseek = seq_lseek,
7776 .release = tracing_single_release_tr,
7777 .write = tracing_clock_write,
7778 };
7779
7780 static const struct file_operations trace_time_stamp_mode_fops = {
7781 .open = tracing_time_stamp_mode_open,
7782 .read = seq_read,
7783 .llseek = seq_lseek,
7784 .release = tracing_single_release_tr,
7785 };
7786
7787 static const struct file_operations last_boot_fops = {
7788 .open = tracing_last_boot_open,
7789 .read = seq_read,
7790 .llseek = seq_lseek,
7791 .release = tracing_seq_release,
7792 };
7793
7794 #ifdef CONFIG_TRACER_SNAPSHOT
7795 static const struct file_operations snapshot_fops = {
7796 .open = tracing_snapshot_open,
7797 .read = seq_read,
7798 .write = tracing_snapshot_write,
7799 .llseek = tracing_lseek,
7800 .release = tracing_snapshot_release,
7801 };
7802
7803 static const struct file_operations snapshot_raw_fops = {
7804 .open = snapshot_raw_open,
7805 .read = tracing_buffers_read,
7806 .release = tracing_buffers_release,
7807 .splice_read = tracing_buffers_splice_read,
7808 };
7809
7810 #endif /* CONFIG_TRACER_SNAPSHOT */
7811
7812 /*
7813 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7814 * @filp: The active open file structure
7815 * @ubuf: The userspace provided buffer to read value into
7816 * @cnt: The maximum number of bytes to read
7817 * @ppos: The current "file" position
7818 *
7819 * This function implements the write interface for a struct trace_min_max_param.
7820 * The filp->private_data must point to a trace_min_max_param structure that
7821 * defines where to write the value, the min and the max acceptable values,
7822 * and a lock to protect the write.
7823 */
7824 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7825 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7826 {
7827 struct trace_min_max_param *param = filp->private_data;
7828 u64 val;
7829 int err;
7830
7831 if (!param)
7832 return -EFAULT;
7833
7834 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7835 if (err)
7836 return err;
7837
7838 if (param->lock)
7839 mutex_lock(param->lock);
7840
7841 if (param->min && val < *param->min)
7842 err = -EINVAL;
7843
7844 if (param->max && val > *param->max)
7845 err = -EINVAL;
7846
7847 if (!err)
7848 *param->val = val;
7849
7850 if (param->lock)
7851 mutex_unlock(param->lock);
7852
7853 if (err)
7854 return err;
7855
7856 return cnt;
7857 }
7858
7859 /*
7860 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7861 * @filp: The active open file structure
7862 * @ubuf: The userspace provided buffer to read value into
7863 * @cnt: The maximum number of bytes to read
7864 * @ppos: The current "file" position
7865 *
7866 * This function implements the read interface for a struct trace_min_max_param.
7867 * The filp->private_data must point to a trace_min_max_param struct with valid
7868 * data.
7869 */
7870 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7871 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7872 {
7873 struct trace_min_max_param *param = filp->private_data;
7874 char buf[U64_STR_SIZE];
7875 int len;
7876 u64 val;
7877
7878 if (!param)
7879 return -EFAULT;
7880
7881 val = *param->val;
7882
7883 if (cnt > sizeof(buf))
7884 cnt = sizeof(buf);
7885
7886 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7887
7888 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7889 }
7890
7891 const struct file_operations trace_min_max_fops = {
7892 .open = tracing_open_generic,
7893 .read = trace_min_max_read,
7894 .write = trace_min_max_write,
7895 };
7896
7897 #define TRACING_LOG_ERRS_MAX 8
7898 #define TRACING_LOG_LOC_MAX 128
7899
7900 #define CMD_PREFIX " Command: "
7901
7902 struct err_info {
7903 const char **errs; /* ptr to loc-specific array of err strings */
7904 u8 type; /* index into errs -> specific err string */
7905 u16 pos; /* caret position */
7906 u64 ts;
7907 };
7908
7909 struct tracing_log_err {
7910 struct list_head list;
7911 struct err_info info;
7912 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7913 char *cmd; /* what caused err */
7914 };
7915
7916 static DEFINE_MUTEX(tracing_err_log_lock);
7917
alloc_tracing_log_err(int len)7918 static struct tracing_log_err *alloc_tracing_log_err(int len)
7919 {
7920 struct tracing_log_err *err;
7921
7922 err = kzalloc(sizeof(*err), GFP_KERNEL);
7923 if (!err)
7924 return ERR_PTR(-ENOMEM);
7925
7926 err->cmd = kzalloc(len, GFP_KERNEL);
7927 if (!err->cmd) {
7928 kfree(err);
7929 return ERR_PTR(-ENOMEM);
7930 }
7931
7932 return err;
7933 }
7934
free_tracing_log_err(struct tracing_log_err * err)7935 static void free_tracing_log_err(struct tracing_log_err *err)
7936 {
7937 kfree(err->cmd);
7938 kfree(err);
7939 }
7940
get_tracing_log_err(struct trace_array * tr,int len)7941 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7942 int len)
7943 {
7944 struct tracing_log_err *err;
7945 char *cmd;
7946
7947 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7948 err = alloc_tracing_log_err(len);
7949 if (PTR_ERR(err) != -ENOMEM)
7950 tr->n_err_log_entries++;
7951
7952 return err;
7953 }
7954 cmd = kzalloc(len, GFP_KERNEL);
7955 if (!cmd)
7956 return ERR_PTR(-ENOMEM);
7957 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7958 kfree(err->cmd);
7959 err->cmd = cmd;
7960 list_del(&err->list);
7961
7962 return err;
7963 }
7964
7965 /**
7966 * err_pos - find the position of a string within a command for error careting
7967 * @cmd: The tracing command that caused the error
7968 * @str: The string to position the caret at within @cmd
7969 *
7970 * Finds the position of the first occurrence of @str within @cmd. The
7971 * return value can be passed to tracing_log_err() for caret placement
7972 * within @cmd.
7973 *
7974 * Returns the index within @cmd of the first occurrence of @str or 0
7975 * if @str was not found.
7976 */
err_pos(char * cmd,const char * str)7977 unsigned int err_pos(char *cmd, const char *str)
7978 {
7979 char *found;
7980
7981 if (WARN_ON(!strlen(cmd)))
7982 return 0;
7983
7984 found = strstr(cmd, str);
7985 if (found)
7986 return found - cmd;
7987
7988 return 0;
7989 }
7990
7991 /**
7992 * tracing_log_err - write an error to the tracing error log
7993 * @tr: The associated trace array for the error (NULL for top level array)
7994 * @loc: A string describing where the error occurred
7995 * @cmd: The tracing command that caused the error
7996 * @errs: The array of loc-specific static error strings
7997 * @type: The index into errs[], which produces the specific static err string
7998 * @pos: The position the caret should be placed in the cmd
7999 *
8000 * Writes an error into tracing/error_log of the form:
8001 *
8002 * <loc>: error: <text>
8003 * Command: <cmd>
8004 * ^
8005 *
8006 * tracing/error_log is a small log file containing the last
8007 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8008 * unless there has been a tracing error, and the error log can be
8009 * cleared and have its memory freed by writing the empty string in
8010 * truncation mode to it i.e. echo > tracing/error_log.
8011 *
8012 * NOTE: the @errs array along with the @type param are used to
8013 * produce a static error string - this string is not copied and saved
8014 * when the error is logged - only a pointer to it is saved. See
8015 * existing callers for examples of how static strings are typically
8016 * defined for use with tracing_log_err().
8017 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8018 void tracing_log_err(struct trace_array *tr,
8019 const char *loc, const char *cmd,
8020 const char **errs, u8 type, u16 pos)
8021 {
8022 struct tracing_log_err *err;
8023 int len = 0;
8024
8025 if (!tr)
8026 tr = &global_trace;
8027
8028 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8029
8030 guard(mutex)(&tracing_err_log_lock);
8031
8032 err = get_tracing_log_err(tr, len);
8033 if (PTR_ERR(err) == -ENOMEM)
8034 return;
8035
8036 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8037 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8038
8039 err->info.errs = errs;
8040 err->info.type = type;
8041 err->info.pos = pos;
8042 err->info.ts = local_clock();
8043
8044 list_add_tail(&err->list, &tr->err_log);
8045 }
8046
clear_tracing_err_log(struct trace_array * tr)8047 static void clear_tracing_err_log(struct trace_array *tr)
8048 {
8049 struct tracing_log_err *err, *next;
8050
8051 guard(mutex)(&tracing_err_log_lock);
8052
8053 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8054 list_del(&err->list);
8055 free_tracing_log_err(err);
8056 }
8057
8058 tr->n_err_log_entries = 0;
8059 }
8060
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8061 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8062 {
8063 struct trace_array *tr = m->private;
8064
8065 mutex_lock(&tracing_err_log_lock);
8066
8067 return seq_list_start(&tr->err_log, *pos);
8068 }
8069
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8070 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8071 {
8072 struct trace_array *tr = m->private;
8073
8074 return seq_list_next(v, &tr->err_log, pos);
8075 }
8076
tracing_err_log_seq_stop(struct seq_file * m,void * v)8077 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8078 {
8079 mutex_unlock(&tracing_err_log_lock);
8080 }
8081
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8082 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8083 {
8084 u16 i;
8085
8086 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8087 seq_putc(m, ' ');
8088 for (i = 0; i < pos; i++)
8089 seq_putc(m, ' ');
8090 seq_puts(m, "^\n");
8091 }
8092
tracing_err_log_seq_show(struct seq_file * m,void * v)8093 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8094 {
8095 struct tracing_log_err *err = v;
8096
8097 if (err) {
8098 const char *err_text = err->info.errs[err->info.type];
8099 u64 sec = err->info.ts;
8100 u32 nsec;
8101
8102 nsec = do_div(sec, NSEC_PER_SEC);
8103 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8104 err->loc, err_text);
8105 seq_printf(m, "%s", err->cmd);
8106 tracing_err_log_show_pos(m, err->info.pos);
8107 }
8108
8109 return 0;
8110 }
8111
8112 static const struct seq_operations tracing_err_log_seq_ops = {
8113 .start = tracing_err_log_seq_start,
8114 .next = tracing_err_log_seq_next,
8115 .stop = tracing_err_log_seq_stop,
8116 .show = tracing_err_log_seq_show
8117 };
8118
tracing_err_log_open(struct inode * inode,struct file * file)8119 static int tracing_err_log_open(struct inode *inode, struct file *file)
8120 {
8121 struct trace_array *tr = inode->i_private;
8122 int ret = 0;
8123
8124 ret = tracing_check_open_get_tr(tr);
8125 if (ret)
8126 return ret;
8127
8128 /* If this file was opened for write, then erase contents */
8129 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8130 clear_tracing_err_log(tr);
8131
8132 if (file->f_mode & FMODE_READ) {
8133 ret = seq_open(file, &tracing_err_log_seq_ops);
8134 if (!ret) {
8135 struct seq_file *m = file->private_data;
8136 m->private = tr;
8137 } else {
8138 trace_array_put(tr);
8139 }
8140 }
8141 return ret;
8142 }
8143
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8144 static ssize_t tracing_err_log_write(struct file *file,
8145 const char __user *buffer,
8146 size_t count, loff_t *ppos)
8147 {
8148 return count;
8149 }
8150
tracing_err_log_release(struct inode * inode,struct file * file)8151 static int tracing_err_log_release(struct inode *inode, struct file *file)
8152 {
8153 struct trace_array *tr = inode->i_private;
8154
8155 trace_array_put(tr);
8156
8157 if (file->f_mode & FMODE_READ)
8158 seq_release(inode, file);
8159
8160 return 0;
8161 }
8162
8163 static const struct file_operations tracing_err_log_fops = {
8164 .open = tracing_err_log_open,
8165 .write = tracing_err_log_write,
8166 .read = seq_read,
8167 .llseek = tracing_lseek,
8168 .release = tracing_err_log_release,
8169 };
8170
tracing_buffers_open(struct inode * inode,struct file * filp)8171 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8172 {
8173 struct trace_array *tr = inode->i_private;
8174 struct ftrace_buffer_info *info;
8175 int ret;
8176
8177 ret = tracing_check_open_get_tr(tr);
8178 if (ret)
8179 return ret;
8180
8181 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8182 if (!info) {
8183 trace_array_put(tr);
8184 return -ENOMEM;
8185 }
8186
8187 mutex_lock(&trace_types_lock);
8188
8189 info->iter.tr = tr;
8190 info->iter.cpu_file = tracing_get_cpu(inode);
8191 info->iter.trace = tr->current_trace;
8192 info->iter.array_buffer = &tr->array_buffer;
8193 info->spare = NULL;
8194 /* Force reading ring buffer for first read */
8195 info->read = (unsigned int)-1;
8196
8197 filp->private_data = info;
8198
8199 tr->trace_ref++;
8200
8201 mutex_unlock(&trace_types_lock);
8202
8203 ret = nonseekable_open(inode, filp);
8204 if (ret < 0)
8205 trace_array_put(tr);
8206
8207 return ret;
8208 }
8209
8210 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8211 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8212 {
8213 struct ftrace_buffer_info *info = filp->private_data;
8214 struct trace_iterator *iter = &info->iter;
8215
8216 return trace_poll(iter, filp, poll_table);
8217 }
8218
8219 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8220 tracing_buffers_read(struct file *filp, char __user *ubuf,
8221 size_t count, loff_t *ppos)
8222 {
8223 struct ftrace_buffer_info *info = filp->private_data;
8224 struct trace_iterator *iter = &info->iter;
8225 void *trace_data;
8226 int page_size;
8227 ssize_t ret = 0;
8228 ssize_t size;
8229
8230 if (!count)
8231 return 0;
8232
8233 #ifdef CONFIG_TRACER_MAX_TRACE
8234 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8235 return -EBUSY;
8236 #endif
8237
8238 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8239
8240 /* Make sure the spare matches the current sub buffer size */
8241 if (info->spare) {
8242 if (page_size != info->spare_size) {
8243 ring_buffer_free_read_page(iter->array_buffer->buffer,
8244 info->spare_cpu, info->spare);
8245 info->spare = NULL;
8246 }
8247 }
8248
8249 if (!info->spare) {
8250 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8251 iter->cpu_file);
8252 if (IS_ERR(info->spare)) {
8253 ret = PTR_ERR(info->spare);
8254 info->spare = NULL;
8255 } else {
8256 info->spare_cpu = iter->cpu_file;
8257 info->spare_size = page_size;
8258 }
8259 }
8260 if (!info->spare)
8261 return ret;
8262
8263 /* Do we have previous read data to read? */
8264 if (info->read < page_size)
8265 goto read;
8266
8267 again:
8268 trace_access_lock(iter->cpu_file);
8269 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8270 info->spare,
8271 count,
8272 iter->cpu_file, 0);
8273 trace_access_unlock(iter->cpu_file);
8274
8275 if (ret < 0) {
8276 if (trace_empty(iter) && !iter->closed) {
8277 if (update_last_data_if_empty(iter->tr))
8278 return 0;
8279
8280 if ((filp->f_flags & O_NONBLOCK))
8281 return -EAGAIN;
8282
8283 ret = wait_on_pipe(iter, 0);
8284 if (ret)
8285 return ret;
8286
8287 goto again;
8288 }
8289 return 0;
8290 }
8291
8292 info->read = 0;
8293 read:
8294 size = page_size - info->read;
8295 if (size > count)
8296 size = count;
8297 trace_data = ring_buffer_read_page_data(info->spare);
8298 ret = copy_to_user(ubuf, trace_data + info->read, size);
8299 if (ret == size)
8300 return -EFAULT;
8301
8302 size -= ret;
8303
8304 *ppos += size;
8305 info->read += size;
8306
8307 return size;
8308 }
8309
tracing_buffers_flush(struct file * file,fl_owner_t id)8310 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8311 {
8312 struct ftrace_buffer_info *info = file->private_data;
8313 struct trace_iterator *iter = &info->iter;
8314
8315 iter->closed = true;
8316 /* Make sure the waiters see the new wait_index */
8317 (void)atomic_fetch_inc_release(&iter->wait_index);
8318
8319 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8320
8321 return 0;
8322 }
8323
tracing_buffers_release(struct inode * inode,struct file * file)8324 static int tracing_buffers_release(struct inode *inode, struct file *file)
8325 {
8326 struct ftrace_buffer_info *info = file->private_data;
8327 struct trace_iterator *iter = &info->iter;
8328
8329 guard(mutex)(&trace_types_lock);
8330
8331 iter->tr->trace_ref--;
8332
8333 __trace_array_put(iter->tr);
8334
8335 if (info->spare)
8336 ring_buffer_free_read_page(iter->array_buffer->buffer,
8337 info->spare_cpu, info->spare);
8338 kvfree(info);
8339
8340 return 0;
8341 }
8342
8343 struct buffer_ref {
8344 struct trace_buffer *buffer;
8345 void *page;
8346 int cpu;
8347 refcount_t refcount;
8348 };
8349
buffer_ref_release(struct buffer_ref * ref)8350 static void buffer_ref_release(struct buffer_ref *ref)
8351 {
8352 if (!refcount_dec_and_test(&ref->refcount))
8353 return;
8354 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8355 kfree(ref);
8356 }
8357
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8358 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8359 struct pipe_buffer *buf)
8360 {
8361 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8362
8363 buffer_ref_release(ref);
8364 buf->private = 0;
8365 }
8366
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8367 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8368 struct pipe_buffer *buf)
8369 {
8370 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8371
8372 if (refcount_read(&ref->refcount) > INT_MAX/2)
8373 return false;
8374
8375 refcount_inc(&ref->refcount);
8376 return true;
8377 }
8378
8379 /* Pipe buffer operations for a buffer. */
8380 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8381 .release = buffer_pipe_buf_release,
8382 .get = buffer_pipe_buf_get,
8383 };
8384
8385 /*
8386 * Callback from splice_to_pipe(), if we need to release some pages
8387 * at the end of the spd in case we error'ed out in filling the pipe.
8388 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8389 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8390 {
8391 struct buffer_ref *ref =
8392 (struct buffer_ref *)spd->partial[i].private;
8393
8394 buffer_ref_release(ref);
8395 spd->partial[i].private = 0;
8396 }
8397
8398 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8399 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8400 struct pipe_inode_info *pipe, size_t len,
8401 unsigned int flags)
8402 {
8403 struct ftrace_buffer_info *info = file->private_data;
8404 struct trace_iterator *iter = &info->iter;
8405 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8406 struct page *pages_def[PIPE_DEF_BUFFERS];
8407 struct splice_pipe_desc spd = {
8408 .pages = pages_def,
8409 .partial = partial_def,
8410 .nr_pages_max = PIPE_DEF_BUFFERS,
8411 .ops = &buffer_pipe_buf_ops,
8412 .spd_release = buffer_spd_release,
8413 };
8414 struct buffer_ref *ref;
8415 bool woken = false;
8416 int page_size;
8417 int entries, i;
8418 ssize_t ret = 0;
8419
8420 #ifdef CONFIG_TRACER_MAX_TRACE
8421 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8422 return -EBUSY;
8423 #endif
8424
8425 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8426 if (*ppos & (page_size - 1))
8427 return -EINVAL;
8428
8429 if (len & (page_size - 1)) {
8430 if (len < page_size)
8431 return -EINVAL;
8432 len &= (~(page_size - 1));
8433 }
8434
8435 if (splice_grow_spd(pipe, &spd))
8436 return -ENOMEM;
8437
8438 again:
8439 trace_access_lock(iter->cpu_file);
8440 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8441
8442 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8443 struct page *page;
8444 int r;
8445
8446 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8447 if (!ref) {
8448 ret = -ENOMEM;
8449 break;
8450 }
8451
8452 refcount_set(&ref->refcount, 1);
8453 ref->buffer = iter->array_buffer->buffer;
8454 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8455 if (IS_ERR(ref->page)) {
8456 ret = PTR_ERR(ref->page);
8457 ref->page = NULL;
8458 kfree(ref);
8459 break;
8460 }
8461 ref->cpu = iter->cpu_file;
8462
8463 r = ring_buffer_read_page(ref->buffer, ref->page,
8464 len, iter->cpu_file, 1);
8465 if (r < 0) {
8466 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8467 ref->page);
8468 kfree(ref);
8469 break;
8470 }
8471
8472 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8473
8474 spd.pages[i] = page;
8475 spd.partial[i].len = page_size;
8476 spd.partial[i].offset = 0;
8477 spd.partial[i].private = (unsigned long)ref;
8478 spd.nr_pages++;
8479 *ppos += page_size;
8480
8481 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8482 }
8483
8484 trace_access_unlock(iter->cpu_file);
8485 spd.nr_pages = i;
8486
8487 /* did we read anything? */
8488 if (!spd.nr_pages) {
8489
8490 if (ret)
8491 goto out;
8492
8493 if (woken)
8494 goto out;
8495
8496 ret = -EAGAIN;
8497 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8498 goto out;
8499
8500 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8501 if (ret)
8502 goto out;
8503
8504 /* No need to wait after waking up when tracing is off */
8505 if (!tracer_tracing_is_on(iter->tr))
8506 goto out;
8507
8508 /* Iterate one more time to collect any new data then exit */
8509 woken = true;
8510
8511 goto again;
8512 }
8513
8514 ret = splice_to_pipe(pipe, &spd);
8515 out:
8516 splice_shrink_spd(&spd);
8517
8518 return ret;
8519 }
8520
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8521 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8522 {
8523 struct ftrace_buffer_info *info = file->private_data;
8524 struct trace_iterator *iter = &info->iter;
8525 int err;
8526
8527 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8528 if (!(file->f_flags & O_NONBLOCK)) {
8529 err = ring_buffer_wait(iter->array_buffer->buffer,
8530 iter->cpu_file,
8531 iter->tr->buffer_percent,
8532 NULL, NULL);
8533 if (err)
8534 return err;
8535 }
8536
8537 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8538 iter->cpu_file);
8539 } else if (cmd) {
8540 return -ENOTTY;
8541 }
8542
8543 /*
8544 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8545 * waiters
8546 */
8547 guard(mutex)(&trace_types_lock);
8548
8549 /* Make sure the waiters see the new wait_index */
8550 (void)atomic_fetch_inc_release(&iter->wait_index);
8551
8552 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8553
8554 return 0;
8555 }
8556
8557 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8558 static int get_snapshot_map(struct trace_array *tr)
8559 {
8560 int err = 0;
8561
8562 /*
8563 * Called with mmap_lock held. lockdep would be unhappy if we would now
8564 * take trace_types_lock. Instead use the specific
8565 * snapshot_trigger_lock.
8566 */
8567 spin_lock(&tr->snapshot_trigger_lock);
8568
8569 if (tr->snapshot || tr->mapped == UINT_MAX)
8570 err = -EBUSY;
8571 else
8572 tr->mapped++;
8573
8574 spin_unlock(&tr->snapshot_trigger_lock);
8575
8576 /* Wait for update_max_tr() to observe iter->tr->mapped */
8577 if (tr->mapped == 1)
8578 synchronize_rcu();
8579
8580 return err;
8581
8582 }
put_snapshot_map(struct trace_array * tr)8583 static void put_snapshot_map(struct trace_array *tr)
8584 {
8585 spin_lock(&tr->snapshot_trigger_lock);
8586 if (!WARN_ON(!tr->mapped))
8587 tr->mapped--;
8588 spin_unlock(&tr->snapshot_trigger_lock);
8589 }
8590 #else
get_snapshot_map(struct trace_array * tr)8591 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8592 static inline void put_snapshot_map(struct trace_array *tr) { }
8593 #endif
8594
tracing_buffers_mmap_close(struct vm_area_struct * vma)8595 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8596 {
8597 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8598 struct trace_iterator *iter = &info->iter;
8599
8600 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8601 put_snapshot_map(iter->tr);
8602 }
8603
8604 static const struct vm_operations_struct tracing_buffers_vmops = {
8605 .close = tracing_buffers_mmap_close,
8606 };
8607
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8608 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8609 {
8610 struct ftrace_buffer_info *info = filp->private_data;
8611 struct trace_iterator *iter = &info->iter;
8612 int ret = 0;
8613
8614 /* A memmap'ed buffer is not supported for user space mmap */
8615 if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8616 return -ENODEV;
8617
8618 ret = get_snapshot_map(iter->tr);
8619 if (ret)
8620 return ret;
8621
8622 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8623 if (ret)
8624 put_snapshot_map(iter->tr);
8625
8626 vma->vm_ops = &tracing_buffers_vmops;
8627
8628 return ret;
8629 }
8630
8631 static const struct file_operations tracing_buffers_fops = {
8632 .open = tracing_buffers_open,
8633 .read = tracing_buffers_read,
8634 .poll = tracing_buffers_poll,
8635 .release = tracing_buffers_release,
8636 .flush = tracing_buffers_flush,
8637 .splice_read = tracing_buffers_splice_read,
8638 .unlocked_ioctl = tracing_buffers_ioctl,
8639 .mmap = tracing_buffers_mmap,
8640 };
8641
8642 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8643 tracing_stats_read(struct file *filp, char __user *ubuf,
8644 size_t count, loff_t *ppos)
8645 {
8646 struct inode *inode = file_inode(filp);
8647 struct trace_array *tr = inode->i_private;
8648 struct array_buffer *trace_buf = &tr->array_buffer;
8649 int cpu = tracing_get_cpu(inode);
8650 struct trace_seq *s;
8651 unsigned long cnt;
8652 unsigned long long t;
8653 unsigned long usec_rem;
8654
8655 s = kmalloc(sizeof(*s), GFP_KERNEL);
8656 if (!s)
8657 return -ENOMEM;
8658
8659 trace_seq_init(s);
8660
8661 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8662 trace_seq_printf(s, "entries: %ld\n", cnt);
8663
8664 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8665 trace_seq_printf(s, "overrun: %ld\n", cnt);
8666
8667 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8668 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8669
8670 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8671 trace_seq_printf(s, "bytes: %ld\n", cnt);
8672
8673 if (trace_clocks[tr->clock_id].in_ns) {
8674 /* local or global for trace_clock */
8675 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8676 usec_rem = do_div(t, USEC_PER_SEC);
8677 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8678 t, usec_rem);
8679
8680 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8681 usec_rem = do_div(t, USEC_PER_SEC);
8682 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8683 } else {
8684 /* counter or tsc mode for trace_clock */
8685 trace_seq_printf(s, "oldest event ts: %llu\n",
8686 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8687
8688 trace_seq_printf(s, "now ts: %llu\n",
8689 ring_buffer_time_stamp(trace_buf->buffer));
8690 }
8691
8692 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8693 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8694
8695 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8696 trace_seq_printf(s, "read events: %ld\n", cnt);
8697
8698 count = simple_read_from_buffer(ubuf, count, ppos,
8699 s->buffer, trace_seq_used(s));
8700
8701 kfree(s);
8702
8703 return count;
8704 }
8705
8706 static const struct file_operations tracing_stats_fops = {
8707 .open = tracing_open_generic_tr,
8708 .read = tracing_stats_read,
8709 .llseek = generic_file_llseek,
8710 .release = tracing_release_generic_tr,
8711 };
8712
8713 #ifdef CONFIG_DYNAMIC_FTRACE
8714
8715 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8716 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8717 size_t cnt, loff_t *ppos)
8718 {
8719 ssize_t ret;
8720 char *buf;
8721 int r;
8722
8723 /* 512 should be plenty to hold the amount needed */
8724 #define DYN_INFO_BUF_SIZE 512
8725
8726 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8727 if (!buf)
8728 return -ENOMEM;
8729
8730 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8731 "%ld pages:%ld groups: %ld\n"
8732 "ftrace boot update time = %llu (ns)\n"
8733 "ftrace module total update time = %llu (ns)\n",
8734 ftrace_update_tot_cnt,
8735 ftrace_number_of_pages,
8736 ftrace_number_of_groups,
8737 ftrace_update_time,
8738 ftrace_total_mod_time);
8739
8740 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8741 kfree(buf);
8742 return ret;
8743 }
8744
8745 static const struct file_operations tracing_dyn_info_fops = {
8746 .open = tracing_open_generic,
8747 .read = tracing_read_dyn_info,
8748 .llseek = generic_file_llseek,
8749 };
8750 #endif /* CONFIG_DYNAMIC_FTRACE */
8751
8752 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8753 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8754 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8755 struct trace_array *tr, struct ftrace_probe_ops *ops,
8756 void *data)
8757 {
8758 tracing_snapshot_instance(tr);
8759 }
8760
8761 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8762 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8763 struct trace_array *tr, struct ftrace_probe_ops *ops,
8764 void *data)
8765 {
8766 struct ftrace_func_mapper *mapper = data;
8767 long *count = NULL;
8768
8769 if (mapper)
8770 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8771
8772 if (count) {
8773
8774 if (*count <= 0)
8775 return;
8776
8777 (*count)--;
8778 }
8779
8780 tracing_snapshot_instance(tr);
8781 }
8782
8783 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8784 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8785 struct ftrace_probe_ops *ops, void *data)
8786 {
8787 struct ftrace_func_mapper *mapper = data;
8788 long *count = NULL;
8789
8790 seq_printf(m, "%ps:", (void *)ip);
8791
8792 seq_puts(m, "snapshot");
8793
8794 if (mapper)
8795 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8796
8797 if (count)
8798 seq_printf(m, ":count=%ld\n", *count);
8799 else
8800 seq_puts(m, ":unlimited\n");
8801
8802 return 0;
8803 }
8804
8805 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8806 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8807 unsigned long ip, void *init_data, void **data)
8808 {
8809 struct ftrace_func_mapper *mapper = *data;
8810
8811 if (!mapper) {
8812 mapper = allocate_ftrace_func_mapper();
8813 if (!mapper)
8814 return -ENOMEM;
8815 *data = mapper;
8816 }
8817
8818 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8819 }
8820
8821 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8822 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8823 unsigned long ip, void *data)
8824 {
8825 struct ftrace_func_mapper *mapper = data;
8826
8827 if (!ip) {
8828 if (!mapper)
8829 return;
8830 free_ftrace_func_mapper(mapper, NULL);
8831 return;
8832 }
8833
8834 ftrace_func_mapper_remove_ip(mapper, ip);
8835 }
8836
8837 static struct ftrace_probe_ops snapshot_probe_ops = {
8838 .func = ftrace_snapshot,
8839 .print = ftrace_snapshot_print,
8840 };
8841
8842 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8843 .func = ftrace_count_snapshot,
8844 .print = ftrace_snapshot_print,
8845 .init = ftrace_snapshot_init,
8846 .free = ftrace_snapshot_free,
8847 };
8848
8849 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8850 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8851 char *glob, char *cmd, char *param, int enable)
8852 {
8853 struct ftrace_probe_ops *ops;
8854 void *count = (void *)-1;
8855 char *number;
8856 int ret;
8857
8858 if (!tr)
8859 return -ENODEV;
8860
8861 /* hash funcs only work with set_ftrace_filter */
8862 if (!enable)
8863 return -EINVAL;
8864
8865 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8866
8867 if (glob[0] == '!') {
8868 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8869 if (!ret)
8870 tracing_disarm_snapshot(tr);
8871
8872 return ret;
8873 }
8874
8875 if (!param)
8876 goto out_reg;
8877
8878 number = strsep(¶m, ":");
8879
8880 if (!strlen(number))
8881 goto out_reg;
8882
8883 /*
8884 * We use the callback data field (which is a pointer)
8885 * as our counter.
8886 */
8887 ret = kstrtoul(number, 0, (unsigned long *)&count);
8888 if (ret)
8889 return ret;
8890
8891 out_reg:
8892 ret = tracing_arm_snapshot(tr);
8893 if (ret < 0)
8894 return ret;
8895
8896 ret = register_ftrace_function_probe(glob, tr, ops, count);
8897 if (ret < 0)
8898 tracing_disarm_snapshot(tr);
8899
8900 return ret < 0 ? ret : 0;
8901 }
8902
8903 static struct ftrace_func_command ftrace_snapshot_cmd = {
8904 .name = "snapshot",
8905 .func = ftrace_trace_snapshot_callback,
8906 };
8907
register_snapshot_cmd(void)8908 static __init int register_snapshot_cmd(void)
8909 {
8910 return register_ftrace_command(&ftrace_snapshot_cmd);
8911 }
8912 #else
register_snapshot_cmd(void)8913 static inline __init int register_snapshot_cmd(void) { return 0; }
8914 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8915
tracing_get_dentry(struct trace_array * tr)8916 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8917 {
8918 /* Top directory uses NULL as the parent */
8919 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8920 return NULL;
8921
8922 if (WARN_ON(!tr->dir))
8923 return ERR_PTR(-ENODEV);
8924
8925 /* All sub buffers have a descriptor */
8926 return tr->dir;
8927 }
8928
tracing_dentry_percpu(struct trace_array * tr,int cpu)8929 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8930 {
8931 struct dentry *d_tracer;
8932
8933 if (tr->percpu_dir)
8934 return tr->percpu_dir;
8935
8936 d_tracer = tracing_get_dentry(tr);
8937 if (IS_ERR(d_tracer))
8938 return NULL;
8939
8940 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8941
8942 MEM_FAIL(!tr->percpu_dir,
8943 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8944
8945 return tr->percpu_dir;
8946 }
8947
8948 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8949 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8950 void *data, long cpu, const struct file_operations *fops)
8951 {
8952 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8953
8954 if (ret) /* See tracing_get_cpu() */
8955 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8956 return ret;
8957 }
8958
8959 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8960 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8961 {
8962 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8963 struct dentry *d_cpu;
8964 char cpu_dir[30]; /* 30 characters should be more than enough */
8965
8966 if (!d_percpu)
8967 return;
8968
8969 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8970 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8971 if (!d_cpu) {
8972 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8973 return;
8974 }
8975
8976 /* per cpu trace_pipe */
8977 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8978 tr, cpu, &tracing_pipe_fops);
8979
8980 /* per cpu trace */
8981 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8982 tr, cpu, &tracing_fops);
8983
8984 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8985 tr, cpu, &tracing_buffers_fops);
8986
8987 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8988 tr, cpu, &tracing_stats_fops);
8989
8990 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8991 tr, cpu, &tracing_entries_fops);
8992
8993 if (tr->range_addr_start)
8994 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8995 tr, cpu, &tracing_buffer_meta_fops);
8996 #ifdef CONFIG_TRACER_SNAPSHOT
8997 if (!tr->range_addr_start) {
8998 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8999 tr, cpu, &snapshot_fops);
9000
9001 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9002 tr, cpu, &snapshot_raw_fops);
9003 }
9004 #endif
9005 }
9006
9007 #ifdef CONFIG_FTRACE_SELFTEST
9008 /* Let selftest have access to static functions in this file */
9009 #include "trace_selftest.c"
9010 #endif
9011
9012 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9013 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9014 loff_t *ppos)
9015 {
9016 struct trace_option_dentry *topt = filp->private_data;
9017 char *buf;
9018
9019 if (topt->flags->val & topt->opt->bit)
9020 buf = "1\n";
9021 else
9022 buf = "0\n";
9023
9024 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9025 }
9026
9027 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9028 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9029 loff_t *ppos)
9030 {
9031 struct trace_option_dentry *topt = filp->private_data;
9032 unsigned long val;
9033 int ret;
9034
9035 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9036 if (ret)
9037 return ret;
9038
9039 if (val != 0 && val != 1)
9040 return -EINVAL;
9041
9042 if (!!(topt->flags->val & topt->opt->bit) != val) {
9043 guard(mutex)(&trace_types_lock);
9044 ret = __set_tracer_option(topt->tr, topt->flags,
9045 topt->opt, !val);
9046 if (ret)
9047 return ret;
9048 }
9049
9050 *ppos += cnt;
9051
9052 return cnt;
9053 }
9054
tracing_open_options(struct inode * inode,struct file * filp)9055 static int tracing_open_options(struct inode *inode, struct file *filp)
9056 {
9057 struct trace_option_dentry *topt = inode->i_private;
9058 int ret;
9059
9060 ret = tracing_check_open_get_tr(topt->tr);
9061 if (ret)
9062 return ret;
9063
9064 filp->private_data = inode->i_private;
9065 return 0;
9066 }
9067
tracing_release_options(struct inode * inode,struct file * file)9068 static int tracing_release_options(struct inode *inode, struct file *file)
9069 {
9070 struct trace_option_dentry *topt = file->private_data;
9071
9072 trace_array_put(topt->tr);
9073 return 0;
9074 }
9075
9076 static const struct file_operations trace_options_fops = {
9077 .open = tracing_open_options,
9078 .read = trace_options_read,
9079 .write = trace_options_write,
9080 .llseek = generic_file_llseek,
9081 .release = tracing_release_options,
9082 };
9083
9084 /*
9085 * In order to pass in both the trace_array descriptor as well as the index
9086 * to the flag that the trace option file represents, the trace_array
9087 * has a character array of trace_flags_index[], which holds the index
9088 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9089 * The address of this character array is passed to the flag option file
9090 * read/write callbacks.
9091 *
9092 * In order to extract both the index and the trace_array descriptor,
9093 * get_tr_index() uses the following algorithm.
9094 *
9095 * idx = *ptr;
9096 *
9097 * As the pointer itself contains the address of the index (remember
9098 * index[1] == 1).
9099 *
9100 * Then to get the trace_array descriptor, by subtracting that index
9101 * from the ptr, we get to the start of the index itself.
9102 *
9103 * ptr - idx == &index[0]
9104 *
9105 * Then a simple container_of() from that pointer gets us to the
9106 * trace_array descriptor.
9107 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9108 static void get_tr_index(void *data, struct trace_array **ptr,
9109 unsigned int *pindex)
9110 {
9111 *pindex = *(unsigned char *)data;
9112
9113 *ptr = container_of(data - *pindex, struct trace_array,
9114 trace_flags_index);
9115 }
9116
9117 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9118 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9119 loff_t *ppos)
9120 {
9121 void *tr_index = filp->private_data;
9122 struct trace_array *tr;
9123 unsigned int index;
9124 char *buf;
9125
9126 get_tr_index(tr_index, &tr, &index);
9127
9128 if (tr->trace_flags & (1 << index))
9129 buf = "1\n";
9130 else
9131 buf = "0\n";
9132
9133 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9134 }
9135
9136 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9137 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9138 loff_t *ppos)
9139 {
9140 void *tr_index = filp->private_data;
9141 struct trace_array *tr;
9142 unsigned int index;
9143 unsigned long val;
9144 int ret;
9145
9146 get_tr_index(tr_index, &tr, &index);
9147
9148 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9149 if (ret)
9150 return ret;
9151
9152 if (val != 0 && val != 1)
9153 return -EINVAL;
9154
9155 mutex_lock(&event_mutex);
9156 mutex_lock(&trace_types_lock);
9157 ret = set_tracer_flag(tr, 1 << index, val);
9158 mutex_unlock(&trace_types_lock);
9159 mutex_unlock(&event_mutex);
9160
9161 if (ret < 0)
9162 return ret;
9163
9164 *ppos += cnt;
9165
9166 return cnt;
9167 }
9168
9169 static const struct file_operations trace_options_core_fops = {
9170 .open = tracing_open_generic,
9171 .read = trace_options_core_read,
9172 .write = trace_options_core_write,
9173 .llseek = generic_file_llseek,
9174 };
9175
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9176 struct dentry *trace_create_file(const char *name,
9177 umode_t mode,
9178 struct dentry *parent,
9179 void *data,
9180 const struct file_operations *fops)
9181 {
9182 struct dentry *ret;
9183
9184 ret = tracefs_create_file(name, mode, parent, data, fops);
9185 if (!ret)
9186 pr_warn("Could not create tracefs '%s' entry\n", name);
9187
9188 return ret;
9189 }
9190
9191
trace_options_init_dentry(struct trace_array * tr)9192 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9193 {
9194 struct dentry *d_tracer;
9195
9196 if (tr->options)
9197 return tr->options;
9198
9199 d_tracer = tracing_get_dentry(tr);
9200 if (IS_ERR(d_tracer))
9201 return NULL;
9202
9203 tr->options = tracefs_create_dir("options", d_tracer);
9204 if (!tr->options) {
9205 pr_warn("Could not create tracefs directory 'options'\n");
9206 return NULL;
9207 }
9208
9209 return tr->options;
9210 }
9211
9212 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9213 create_trace_option_file(struct trace_array *tr,
9214 struct trace_option_dentry *topt,
9215 struct tracer_flags *flags,
9216 struct tracer_opt *opt)
9217 {
9218 struct dentry *t_options;
9219
9220 t_options = trace_options_init_dentry(tr);
9221 if (!t_options)
9222 return;
9223
9224 topt->flags = flags;
9225 topt->opt = opt;
9226 topt->tr = tr;
9227
9228 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9229 t_options, topt, &trace_options_fops);
9230
9231 }
9232
9233 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9234 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9235 {
9236 struct trace_option_dentry *topts;
9237 struct trace_options *tr_topts;
9238 struct tracer_flags *flags;
9239 struct tracer_opt *opts;
9240 int cnt;
9241 int i;
9242
9243 if (!tracer)
9244 return;
9245
9246 flags = tracer->flags;
9247
9248 if (!flags || !flags->opts)
9249 return;
9250
9251 /*
9252 * If this is an instance, only create flags for tracers
9253 * the instance may have.
9254 */
9255 if (!trace_ok_for_array(tracer, tr))
9256 return;
9257
9258 for (i = 0; i < tr->nr_topts; i++) {
9259 /* Make sure there's no duplicate flags. */
9260 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9261 return;
9262 }
9263
9264 opts = flags->opts;
9265
9266 for (cnt = 0; opts[cnt].name; cnt++)
9267 ;
9268
9269 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9270 if (!topts)
9271 return;
9272
9273 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9274 GFP_KERNEL);
9275 if (!tr_topts) {
9276 kfree(topts);
9277 return;
9278 }
9279
9280 tr->topts = tr_topts;
9281 tr->topts[tr->nr_topts].tracer = tracer;
9282 tr->topts[tr->nr_topts].topts = topts;
9283 tr->nr_topts++;
9284
9285 for (cnt = 0; opts[cnt].name; cnt++) {
9286 create_trace_option_file(tr, &topts[cnt], flags,
9287 &opts[cnt]);
9288 MEM_FAIL(topts[cnt].entry == NULL,
9289 "Failed to create trace option: %s",
9290 opts[cnt].name);
9291 }
9292 }
9293
9294 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9295 create_trace_option_core_file(struct trace_array *tr,
9296 const char *option, long index)
9297 {
9298 struct dentry *t_options;
9299
9300 t_options = trace_options_init_dentry(tr);
9301 if (!t_options)
9302 return NULL;
9303
9304 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9305 (void *)&tr->trace_flags_index[index],
9306 &trace_options_core_fops);
9307 }
9308
create_trace_options_dir(struct trace_array * tr)9309 static void create_trace_options_dir(struct trace_array *tr)
9310 {
9311 struct dentry *t_options;
9312 bool top_level = tr == &global_trace;
9313 int i;
9314
9315 t_options = trace_options_init_dentry(tr);
9316 if (!t_options)
9317 return;
9318
9319 for (i = 0; trace_options[i]; i++) {
9320 if (top_level ||
9321 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9322 create_trace_option_core_file(tr, trace_options[i], i);
9323 }
9324 }
9325
9326 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9327 rb_simple_read(struct file *filp, char __user *ubuf,
9328 size_t cnt, loff_t *ppos)
9329 {
9330 struct trace_array *tr = filp->private_data;
9331 char buf[64];
9332 int r;
9333
9334 r = tracer_tracing_is_on(tr);
9335 r = sprintf(buf, "%d\n", r);
9336
9337 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9338 }
9339
9340 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9341 rb_simple_write(struct file *filp, const char __user *ubuf,
9342 size_t cnt, loff_t *ppos)
9343 {
9344 struct trace_array *tr = filp->private_data;
9345 struct trace_buffer *buffer = tr->array_buffer.buffer;
9346 unsigned long val;
9347 int ret;
9348
9349 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9350 if (ret)
9351 return ret;
9352
9353 if (buffer) {
9354 guard(mutex)(&trace_types_lock);
9355 if (!!val == tracer_tracing_is_on(tr)) {
9356 val = 0; /* do nothing */
9357 } else if (val) {
9358 tracer_tracing_on(tr);
9359 if (tr->current_trace->start)
9360 tr->current_trace->start(tr);
9361 } else {
9362 tracer_tracing_off(tr);
9363 if (tr->current_trace->stop)
9364 tr->current_trace->stop(tr);
9365 /* Wake up any waiters */
9366 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9367 }
9368 }
9369
9370 (*ppos)++;
9371
9372 return cnt;
9373 }
9374
9375 static const struct file_operations rb_simple_fops = {
9376 .open = tracing_open_generic_tr,
9377 .read = rb_simple_read,
9378 .write = rb_simple_write,
9379 .release = tracing_release_generic_tr,
9380 .llseek = default_llseek,
9381 };
9382
9383 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9384 buffer_percent_read(struct file *filp, char __user *ubuf,
9385 size_t cnt, loff_t *ppos)
9386 {
9387 struct trace_array *tr = filp->private_data;
9388 char buf[64];
9389 int r;
9390
9391 r = tr->buffer_percent;
9392 r = sprintf(buf, "%d\n", r);
9393
9394 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9395 }
9396
9397 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9398 buffer_percent_write(struct file *filp, const char __user *ubuf,
9399 size_t cnt, loff_t *ppos)
9400 {
9401 struct trace_array *tr = filp->private_data;
9402 unsigned long val;
9403 int ret;
9404
9405 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9406 if (ret)
9407 return ret;
9408
9409 if (val > 100)
9410 return -EINVAL;
9411
9412 tr->buffer_percent = val;
9413
9414 (*ppos)++;
9415
9416 return cnt;
9417 }
9418
9419 static const struct file_operations buffer_percent_fops = {
9420 .open = tracing_open_generic_tr,
9421 .read = buffer_percent_read,
9422 .write = buffer_percent_write,
9423 .release = tracing_release_generic_tr,
9424 .llseek = default_llseek,
9425 };
9426
9427 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9428 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9429 {
9430 struct trace_array *tr = filp->private_data;
9431 size_t size;
9432 char buf[64];
9433 int order;
9434 int r;
9435
9436 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9437 size = (PAGE_SIZE << order) / 1024;
9438
9439 r = sprintf(buf, "%zd\n", size);
9440
9441 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9442 }
9443
9444 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9445 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9446 size_t cnt, loff_t *ppos)
9447 {
9448 struct trace_array *tr = filp->private_data;
9449 unsigned long val;
9450 int old_order;
9451 int order;
9452 int pages;
9453 int ret;
9454
9455 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9456 if (ret)
9457 return ret;
9458
9459 val *= 1024; /* value passed in is in KB */
9460
9461 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9462 order = fls(pages - 1);
9463
9464 /* limit between 1 and 128 system pages */
9465 if (order < 0 || order > 7)
9466 return -EINVAL;
9467
9468 /* Do not allow tracing while changing the order of the ring buffer */
9469 tracing_stop_tr(tr);
9470
9471 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9472 if (old_order == order)
9473 goto out;
9474
9475 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9476 if (ret)
9477 goto out;
9478
9479 #ifdef CONFIG_TRACER_MAX_TRACE
9480
9481 if (!tr->allocated_snapshot)
9482 goto out_max;
9483
9484 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9485 if (ret) {
9486 /* Put back the old order */
9487 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9488 if (WARN_ON_ONCE(cnt)) {
9489 /*
9490 * AARGH! We are left with different orders!
9491 * The max buffer is our "snapshot" buffer.
9492 * When a tracer needs a snapshot (one of the
9493 * latency tracers), it swaps the max buffer
9494 * with the saved snap shot. We succeeded to
9495 * update the order of the main buffer, but failed to
9496 * update the order of the max buffer. But when we tried
9497 * to reset the main buffer to the original size, we
9498 * failed there too. This is very unlikely to
9499 * happen, but if it does, warn and kill all
9500 * tracing.
9501 */
9502 tracing_disabled = 1;
9503 }
9504 goto out;
9505 }
9506 out_max:
9507 #endif
9508 (*ppos)++;
9509 out:
9510 if (ret)
9511 cnt = ret;
9512 tracing_start_tr(tr);
9513 return cnt;
9514 }
9515
9516 static const struct file_operations buffer_subbuf_size_fops = {
9517 .open = tracing_open_generic_tr,
9518 .read = buffer_subbuf_size_read,
9519 .write = buffer_subbuf_size_write,
9520 .release = tracing_release_generic_tr,
9521 .llseek = default_llseek,
9522 };
9523
9524 static struct dentry *trace_instance_dir;
9525
9526 static void
9527 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9528
9529 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9530 static int make_mod_delta(struct module *mod, void *data)
9531 {
9532 struct trace_module_delta *module_delta;
9533 struct trace_scratch *tscratch;
9534 struct trace_mod_entry *entry;
9535 struct trace_array *tr = data;
9536 int i;
9537
9538 tscratch = tr->scratch;
9539 module_delta = READ_ONCE(tr->module_delta);
9540 for (i = 0; i < tscratch->nr_entries; i++) {
9541 entry = &tscratch->entries[i];
9542 if (strcmp(mod->name, entry->mod_name))
9543 continue;
9544 if (mod->state == MODULE_STATE_GOING)
9545 module_delta->delta[i] = 0;
9546 else
9547 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9548 - entry->mod_addr;
9549 break;
9550 }
9551 return 0;
9552 }
9553 #else
make_mod_delta(struct module * mod,void * data)9554 static int make_mod_delta(struct module *mod, void *data)
9555 {
9556 return 0;
9557 }
9558 #endif
9559
mod_addr_comp(const void * a,const void * b,const void * data)9560 static int mod_addr_comp(const void *a, const void *b, const void *data)
9561 {
9562 const struct trace_mod_entry *e1 = a;
9563 const struct trace_mod_entry *e2 = b;
9564
9565 return e1->mod_addr > e2->mod_addr ? 1 : -1;
9566 }
9567
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9568 static void setup_trace_scratch(struct trace_array *tr,
9569 struct trace_scratch *tscratch, unsigned int size)
9570 {
9571 struct trace_module_delta *module_delta;
9572 struct trace_mod_entry *entry;
9573 int i, nr_entries;
9574
9575 if (!tscratch)
9576 return;
9577
9578 tr->scratch = tscratch;
9579 tr->scratch_size = size;
9580
9581 if (tscratch->text_addr)
9582 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9583
9584 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9585 goto reset;
9586
9587 /* Check if each module name is a valid string */
9588 for (i = 0; i < tscratch->nr_entries; i++) {
9589 int n;
9590
9591 entry = &tscratch->entries[i];
9592
9593 for (n = 0; n < MODULE_NAME_LEN; n++) {
9594 if (entry->mod_name[n] == '\0')
9595 break;
9596 if (!isprint(entry->mod_name[n]))
9597 goto reset;
9598 }
9599 if (n == MODULE_NAME_LEN)
9600 goto reset;
9601 }
9602
9603 /* Sort the entries so that we can find appropriate module from address. */
9604 nr_entries = tscratch->nr_entries;
9605 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9606 mod_addr_comp, NULL, NULL);
9607
9608 if (IS_ENABLED(CONFIG_MODULES)) {
9609 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9610 if (!module_delta) {
9611 pr_info("module_delta allocation failed. Not able to decode module address.");
9612 goto reset;
9613 }
9614 init_rcu_head(&module_delta->rcu);
9615 } else
9616 module_delta = NULL;
9617 WRITE_ONCE(tr->module_delta, module_delta);
9618
9619 /* Scan modules to make text delta for modules. */
9620 module_for_each_mod(make_mod_delta, tr);
9621
9622 /* Set trace_clock as the same of the previous boot. */
9623 if (tscratch->clock_id != tr->clock_id) {
9624 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9625 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9626 pr_info("the previous trace_clock info is not valid.");
9627 goto reset;
9628 }
9629 }
9630 return;
9631 reset:
9632 /* Invalid trace modules */
9633 memset(tscratch, 0, size);
9634 }
9635
9636 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9637 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9638 {
9639 enum ring_buffer_flags rb_flags;
9640 struct trace_scratch *tscratch;
9641 unsigned int scratch_size = 0;
9642
9643 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9644
9645 buf->tr = tr;
9646
9647 if (tr->range_addr_start && tr->range_addr_size) {
9648 /* Add scratch buffer to handle 128 modules */
9649 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9650 tr->range_addr_start,
9651 tr->range_addr_size,
9652 struct_size(tscratch, entries, 128));
9653
9654 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9655 setup_trace_scratch(tr, tscratch, scratch_size);
9656
9657 /*
9658 * This is basically the same as a mapped buffer,
9659 * with the same restrictions.
9660 */
9661 tr->mapped++;
9662 } else {
9663 buf->buffer = ring_buffer_alloc(size, rb_flags);
9664 }
9665 if (!buf->buffer)
9666 return -ENOMEM;
9667
9668 buf->data = alloc_percpu(struct trace_array_cpu);
9669 if (!buf->data) {
9670 ring_buffer_free(buf->buffer);
9671 buf->buffer = NULL;
9672 return -ENOMEM;
9673 }
9674
9675 /* Allocate the first page for all buffers */
9676 set_buffer_entries(&tr->array_buffer,
9677 ring_buffer_size(tr->array_buffer.buffer, 0));
9678
9679 return 0;
9680 }
9681
free_trace_buffer(struct array_buffer * buf)9682 static void free_trace_buffer(struct array_buffer *buf)
9683 {
9684 if (buf->buffer) {
9685 ring_buffer_free(buf->buffer);
9686 buf->buffer = NULL;
9687 free_percpu(buf->data);
9688 buf->data = NULL;
9689 }
9690 }
9691
allocate_trace_buffers(struct trace_array * tr,int size)9692 static int allocate_trace_buffers(struct trace_array *tr, int size)
9693 {
9694 int ret;
9695
9696 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9697 if (ret)
9698 return ret;
9699
9700 #ifdef CONFIG_TRACER_MAX_TRACE
9701 /* Fix mapped buffer trace arrays do not have snapshot buffers */
9702 if (tr->range_addr_start)
9703 return 0;
9704
9705 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9706 allocate_snapshot ? size : 1);
9707 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9708 free_trace_buffer(&tr->array_buffer);
9709 return -ENOMEM;
9710 }
9711 tr->allocated_snapshot = allocate_snapshot;
9712
9713 allocate_snapshot = false;
9714 #endif
9715
9716 return 0;
9717 }
9718
free_trace_buffers(struct trace_array * tr)9719 static void free_trace_buffers(struct trace_array *tr)
9720 {
9721 if (!tr)
9722 return;
9723
9724 free_trace_buffer(&tr->array_buffer);
9725 kfree(tr->module_delta);
9726
9727 #ifdef CONFIG_TRACER_MAX_TRACE
9728 free_trace_buffer(&tr->max_buffer);
9729 #endif
9730 }
9731
init_trace_flags_index(struct trace_array * tr)9732 static void init_trace_flags_index(struct trace_array *tr)
9733 {
9734 int i;
9735
9736 /* Used by the trace options files */
9737 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9738 tr->trace_flags_index[i] = i;
9739 }
9740
__update_tracer_options(struct trace_array * tr)9741 static void __update_tracer_options(struct trace_array *tr)
9742 {
9743 struct tracer *t;
9744
9745 for (t = trace_types; t; t = t->next)
9746 add_tracer_options(tr, t);
9747 }
9748
update_tracer_options(struct trace_array * tr)9749 static void update_tracer_options(struct trace_array *tr)
9750 {
9751 guard(mutex)(&trace_types_lock);
9752 tracer_options_updated = true;
9753 __update_tracer_options(tr);
9754 }
9755
9756 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9757 struct trace_array *trace_array_find(const char *instance)
9758 {
9759 struct trace_array *tr, *found = NULL;
9760
9761 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9762 if (tr->name && strcmp(tr->name, instance) == 0) {
9763 found = tr;
9764 break;
9765 }
9766 }
9767
9768 return found;
9769 }
9770
trace_array_find_get(const char * instance)9771 struct trace_array *trace_array_find_get(const char *instance)
9772 {
9773 struct trace_array *tr;
9774
9775 guard(mutex)(&trace_types_lock);
9776 tr = trace_array_find(instance);
9777 if (tr)
9778 tr->ref++;
9779
9780 return tr;
9781 }
9782
trace_array_create_dir(struct trace_array * tr)9783 static int trace_array_create_dir(struct trace_array *tr)
9784 {
9785 int ret;
9786
9787 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9788 if (!tr->dir)
9789 return -EINVAL;
9790
9791 ret = event_trace_add_tracer(tr->dir, tr);
9792 if (ret) {
9793 tracefs_remove(tr->dir);
9794 return ret;
9795 }
9796
9797 init_tracer_tracefs(tr, tr->dir);
9798 __update_tracer_options(tr);
9799
9800 return ret;
9801 }
9802
9803 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9804 trace_array_create_systems(const char *name, const char *systems,
9805 unsigned long range_addr_start,
9806 unsigned long range_addr_size)
9807 {
9808 struct trace_array *tr;
9809 int ret;
9810
9811 ret = -ENOMEM;
9812 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9813 if (!tr)
9814 return ERR_PTR(ret);
9815
9816 tr->name = kstrdup(name, GFP_KERNEL);
9817 if (!tr->name)
9818 goto out_free_tr;
9819
9820 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9821 goto out_free_tr;
9822
9823 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9824 goto out_free_tr;
9825
9826 if (systems) {
9827 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9828 if (!tr->system_names)
9829 goto out_free_tr;
9830 }
9831
9832 /* Only for boot up memory mapped ring buffers */
9833 tr->range_addr_start = range_addr_start;
9834 tr->range_addr_size = range_addr_size;
9835
9836 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9837
9838 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9839
9840 raw_spin_lock_init(&tr->start_lock);
9841
9842 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9843 #ifdef CONFIG_TRACER_MAX_TRACE
9844 spin_lock_init(&tr->snapshot_trigger_lock);
9845 #endif
9846 tr->current_trace = &nop_trace;
9847
9848 INIT_LIST_HEAD(&tr->systems);
9849 INIT_LIST_HEAD(&tr->events);
9850 INIT_LIST_HEAD(&tr->hist_vars);
9851 INIT_LIST_HEAD(&tr->err_log);
9852 INIT_LIST_HEAD(&tr->marker_list);
9853
9854 #ifdef CONFIG_MODULES
9855 INIT_LIST_HEAD(&tr->mod_events);
9856 #endif
9857
9858 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9859 goto out_free_tr;
9860
9861 /* The ring buffer is defaultly expanded */
9862 trace_set_ring_buffer_expanded(tr);
9863
9864 if (ftrace_allocate_ftrace_ops(tr) < 0)
9865 goto out_free_tr;
9866
9867 ftrace_init_trace_array(tr);
9868
9869 init_trace_flags_index(tr);
9870
9871 if (trace_instance_dir) {
9872 ret = trace_array_create_dir(tr);
9873 if (ret)
9874 goto out_free_tr;
9875 } else
9876 __trace_early_add_events(tr);
9877
9878 list_add(&tr->list, &ftrace_trace_arrays);
9879
9880 tr->ref++;
9881
9882 return tr;
9883
9884 out_free_tr:
9885 ftrace_free_ftrace_ops(tr);
9886 free_trace_buffers(tr);
9887 free_cpumask_var(tr->pipe_cpumask);
9888 free_cpumask_var(tr->tracing_cpumask);
9889 kfree_const(tr->system_names);
9890 kfree(tr->range_name);
9891 kfree(tr->name);
9892 kfree(tr);
9893
9894 return ERR_PTR(ret);
9895 }
9896
trace_array_create(const char * name)9897 static struct trace_array *trace_array_create(const char *name)
9898 {
9899 return trace_array_create_systems(name, NULL, 0, 0);
9900 }
9901
instance_mkdir(const char * name)9902 static int instance_mkdir(const char *name)
9903 {
9904 struct trace_array *tr;
9905 int ret;
9906
9907 guard(mutex)(&event_mutex);
9908 guard(mutex)(&trace_types_lock);
9909
9910 ret = -EEXIST;
9911 if (trace_array_find(name))
9912 return -EEXIST;
9913
9914 tr = trace_array_create(name);
9915
9916 ret = PTR_ERR_OR_ZERO(tr);
9917
9918 return ret;
9919 }
9920
9921 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9922 static u64 map_pages(unsigned long start, unsigned long size)
9923 {
9924 unsigned long vmap_start, vmap_end;
9925 struct vm_struct *area;
9926 int ret;
9927
9928 area = get_vm_area(size, VM_IOREMAP);
9929 if (!area)
9930 return 0;
9931
9932 vmap_start = (unsigned long) area->addr;
9933 vmap_end = vmap_start + size;
9934
9935 ret = vmap_page_range(vmap_start, vmap_end,
9936 start, pgprot_nx(PAGE_KERNEL));
9937 if (ret < 0) {
9938 free_vm_area(area);
9939 return 0;
9940 }
9941
9942 return (u64)vmap_start;
9943 }
9944 #else
map_pages(unsigned long start,unsigned long size)9945 static inline u64 map_pages(unsigned long start, unsigned long size)
9946 {
9947 return 0;
9948 }
9949 #endif
9950
9951 /**
9952 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9953 * @name: The name of the trace array to be looked up/created.
9954 * @systems: A list of systems to create event directories for (NULL for all)
9955 *
9956 * Returns pointer to trace array with given name.
9957 * NULL, if it cannot be created.
9958 *
9959 * NOTE: This function increments the reference counter associated with the
9960 * trace array returned. This makes sure it cannot be freed while in use.
9961 * Use trace_array_put() once the trace array is no longer needed.
9962 * If the trace_array is to be freed, trace_array_destroy() needs to
9963 * be called after the trace_array_put(), or simply let user space delete
9964 * it from the tracefs instances directory. But until the
9965 * trace_array_put() is called, user space can not delete it.
9966 *
9967 */
trace_array_get_by_name(const char * name,const char * systems)9968 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9969 {
9970 struct trace_array *tr;
9971
9972 guard(mutex)(&event_mutex);
9973 guard(mutex)(&trace_types_lock);
9974
9975 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9976 if (tr->name && strcmp(tr->name, name) == 0) {
9977 tr->ref++;
9978 return tr;
9979 }
9980 }
9981
9982 tr = trace_array_create_systems(name, systems, 0, 0);
9983
9984 if (IS_ERR(tr))
9985 tr = NULL;
9986 else
9987 tr->ref++;
9988
9989 return tr;
9990 }
9991 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9992
__remove_instance(struct trace_array * tr)9993 static int __remove_instance(struct trace_array *tr)
9994 {
9995 int i;
9996
9997 /* Reference counter for a newly created trace array = 1. */
9998 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9999 return -EBUSY;
10000
10001 list_del(&tr->list);
10002
10003 /* Disable all the flags that were enabled coming in */
10004 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10005 if ((1 << i) & ZEROED_TRACE_FLAGS)
10006 set_tracer_flag(tr, 1 << i, 0);
10007 }
10008
10009 if (printk_trace == tr)
10010 update_printk_trace(&global_trace);
10011
10012 if (update_marker_trace(tr, 0))
10013 synchronize_rcu();
10014
10015 tracing_set_nop(tr);
10016 clear_ftrace_function_probes(tr);
10017 event_trace_del_tracer(tr);
10018 ftrace_clear_pids(tr);
10019 ftrace_destroy_function_files(tr);
10020 tracefs_remove(tr->dir);
10021 free_percpu(tr->last_func_repeats);
10022 free_trace_buffers(tr);
10023 clear_tracing_err_log(tr);
10024
10025 if (tr->range_name) {
10026 reserve_mem_release_by_name(tr->range_name);
10027 kfree(tr->range_name);
10028 }
10029
10030 for (i = 0; i < tr->nr_topts; i++) {
10031 kfree(tr->topts[i].topts);
10032 }
10033 kfree(tr->topts);
10034
10035 free_cpumask_var(tr->pipe_cpumask);
10036 free_cpumask_var(tr->tracing_cpumask);
10037 kfree_const(tr->system_names);
10038 kfree(tr->name);
10039 kfree(tr);
10040
10041 return 0;
10042 }
10043
trace_array_destroy(struct trace_array * this_tr)10044 int trace_array_destroy(struct trace_array *this_tr)
10045 {
10046 struct trace_array *tr;
10047
10048 if (!this_tr)
10049 return -EINVAL;
10050
10051 guard(mutex)(&event_mutex);
10052 guard(mutex)(&trace_types_lock);
10053
10054
10055 /* Making sure trace array exists before destroying it. */
10056 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10057 if (tr == this_tr)
10058 return __remove_instance(tr);
10059 }
10060
10061 return -ENODEV;
10062 }
10063 EXPORT_SYMBOL_GPL(trace_array_destroy);
10064
instance_rmdir(const char * name)10065 static int instance_rmdir(const char *name)
10066 {
10067 struct trace_array *tr;
10068
10069 guard(mutex)(&event_mutex);
10070 guard(mutex)(&trace_types_lock);
10071
10072 tr = trace_array_find(name);
10073 if (!tr)
10074 return -ENODEV;
10075
10076 return __remove_instance(tr);
10077 }
10078
create_trace_instances(struct dentry * d_tracer)10079 static __init void create_trace_instances(struct dentry *d_tracer)
10080 {
10081 struct trace_array *tr;
10082
10083 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10084 instance_mkdir,
10085 instance_rmdir);
10086 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10087 return;
10088
10089 guard(mutex)(&event_mutex);
10090 guard(mutex)(&trace_types_lock);
10091
10092 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10093 if (!tr->name)
10094 continue;
10095 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10096 "Failed to create instance directory\n"))
10097 return;
10098 }
10099 }
10100
10101 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10102 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10103 {
10104 int cpu;
10105
10106 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10107 tr, &show_traces_fops);
10108
10109 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10110 tr, &set_tracer_fops);
10111
10112 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10113 tr, &tracing_cpumask_fops);
10114
10115 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10116 tr, &tracing_iter_fops);
10117
10118 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10119 tr, &tracing_fops);
10120
10121 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10122 tr, &tracing_pipe_fops);
10123
10124 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10125 tr, &tracing_entries_fops);
10126
10127 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10128 tr, &tracing_total_entries_fops);
10129
10130 trace_create_file("free_buffer", 0200, d_tracer,
10131 tr, &tracing_free_buffer_fops);
10132
10133 trace_create_file("trace_marker", 0220, d_tracer,
10134 tr, &tracing_mark_fops);
10135
10136 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10137
10138 trace_create_file("trace_marker_raw", 0220, d_tracer,
10139 tr, &tracing_mark_raw_fops);
10140
10141 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10142 &trace_clock_fops);
10143
10144 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10145 tr, &rb_simple_fops);
10146
10147 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10148 &trace_time_stamp_mode_fops);
10149
10150 tr->buffer_percent = 50;
10151
10152 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10153 tr, &buffer_percent_fops);
10154
10155 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10156 tr, &buffer_subbuf_size_fops);
10157
10158 create_trace_options_dir(tr);
10159
10160 #ifdef CONFIG_TRACER_MAX_TRACE
10161 trace_create_maxlat_file(tr, d_tracer);
10162 #endif
10163
10164 if (ftrace_create_function_files(tr, d_tracer))
10165 MEM_FAIL(1, "Could not allocate function filter files");
10166
10167 if (tr->range_addr_start) {
10168 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10169 tr, &last_boot_fops);
10170 #ifdef CONFIG_TRACER_SNAPSHOT
10171 } else {
10172 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10173 tr, &snapshot_fops);
10174 #endif
10175 }
10176
10177 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10178 tr, &tracing_err_log_fops);
10179
10180 for_each_tracing_cpu(cpu)
10181 tracing_init_tracefs_percpu(tr, cpu);
10182
10183 ftrace_init_tracefs(tr, d_tracer);
10184 }
10185
10186 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10187 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10188 {
10189 struct vfsmount *mnt;
10190 struct file_system_type *type;
10191 struct fs_context *fc;
10192 int ret;
10193
10194 /*
10195 * To maintain backward compatibility for tools that mount
10196 * debugfs to get to the tracing facility, tracefs is automatically
10197 * mounted to the debugfs/tracing directory.
10198 */
10199 type = get_fs_type("tracefs");
10200 if (!type)
10201 return NULL;
10202
10203 fc = fs_context_for_submount(type, mntpt);
10204 put_filesystem(type);
10205 if (IS_ERR(fc))
10206 return ERR_CAST(fc);
10207
10208 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10209
10210 ret = vfs_parse_fs_string(fc, "source",
10211 "tracefs", strlen("tracefs"));
10212 if (!ret)
10213 mnt = fc_mount(fc);
10214 else
10215 mnt = ERR_PTR(ret);
10216
10217 put_fs_context(fc);
10218 return mnt;
10219 }
10220 #endif
10221
10222 /**
10223 * tracing_init_dentry - initialize top level trace array
10224 *
10225 * This is called when creating files or directories in the tracing
10226 * directory. It is called via fs_initcall() by any of the boot up code
10227 * and expects to return the dentry of the top level tracing directory.
10228 */
tracing_init_dentry(void)10229 int tracing_init_dentry(void)
10230 {
10231 struct trace_array *tr = &global_trace;
10232
10233 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10234 pr_warn("Tracing disabled due to lockdown\n");
10235 return -EPERM;
10236 }
10237
10238 /* The top level trace array uses NULL as parent */
10239 if (tr->dir)
10240 return 0;
10241
10242 if (WARN_ON(!tracefs_initialized()))
10243 return -ENODEV;
10244
10245 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10246 /*
10247 * As there may still be users that expect the tracing
10248 * files to exist in debugfs/tracing, we must automount
10249 * the tracefs file system there, so older tools still
10250 * work with the newer kernel.
10251 */
10252 tr->dir = debugfs_create_automount("tracing", NULL,
10253 trace_automount, NULL);
10254 #endif
10255
10256 return 0;
10257 }
10258
10259 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10260 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10261
10262 static struct workqueue_struct *eval_map_wq __initdata;
10263 static struct work_struct eval_map_work __initdata;
10264 static struct work_struct tracerfs_init_work __initdata;
10265
eval_map_work_func(struct work_struct * work)10266 static void __init eval_map_work_func(struct work_struct *work)
10267 {
10268 int len;
10269
10270 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10271 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10272 }
10273
trace_eval_init(void)10274 static int __init trace_eval_init(void)
10275 {
10276 INIT_WORK(&eval_map_work, eval_map_work_func);
10277
10278 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10279 if (!eval_map_wq) {
10280 pr_err("Unable to allocate eval_map_wq\n");
10281 /* Do work here */
10282 eval_map_work_func(&eval_map_work);
10283 return -ENOMEM;
10284 }
10285
10286 queue_work(eval_map_wq, &eval_map_work);
10287 return 0;
10288 }
10289
10290 subsys_initcall(trace_eval_init);
10291
trace_eval_sync(void)10292 static int __init trace_eval_sync(void)
10293 {
10294 /* Make sure the eval map updates are finished */
10295 if (eval_map_wq)
10296 destroy_workqueue(eval_map_wq);
10297 return 0;
10298 }
10299
10300 late_initcall_sync(trace_eval_sync);
10301
10302
10303 #ifdef CONFIG_MODULES
10304
module_exists(const char * module)10305 bool module_exists(const char *module)
10306 {
10307 /* All modules have the symbol __this_module */
10308 static const char this_mod[] = "__this_module";
10309 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10310 unsigned long val;
10311 int n;
10312
10313 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10314
10315 if (n > sizeof(modname) - 1)
10316 return false;
10317
10318 val = module_kallsyms_lookup_name(modname);
10319 return val != 0;
10320 }
10321
trace_module_add_evals(struct module * mod)10322 static void trace_module_add_evals(struct module *mod)
10323 {
10324 /*
10325 * Modules with bad taint do not have events created, do
10326 * not bother with enums either.
10327 */
10328 if (trace_module_has_bad_taint(mod))
10329 return;
10330
10331 /* Even if no trace_evals, this need to sanitize field types. */
10332 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10333 }
10334
10335 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10336 static void trace_module_remove_evals(struct module *mod)
10337 {
10338 union trace_eval_map_item *map;
10339 union trace_eval_map_item **last = &trace_eval_maps;
10340
10341 if (!mod->num_trace_evals)
10342 return;
10343
10344 guard(mutex)(&trace_eval_mutex);
10345
10346 map = trace_eval_maps;
10347
10348 while (map) {
10349 if (map->head.mod == mod)
10350 break;
10351 map = trace_eval_jmp_to_tail(map);
10352 last = &map->tail.next;
10353 map = map->tail.next;
10354 }
10355 if (!map)
10356 return;
10357
10358 *last = trace_eval_jmp_to_tail(map)->tail.next;
10359 kfree(map);
10360 }
10361 #else
trace_module_remove_evals(struct module * mod)10362 static inline void trace_module_remove_evals(struct module *mod) { }
10363 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10364
trace_module_record(struct module * mod,bool add)10365 static void trace_module_record(struct module *mod, bool add)
10366 {
10367 struct trace_array *tr;
10368 unsigned long flags;
10369
10370 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10371 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10372 /* Update any persistent trace array that has already been started */
10373 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10374 guard(mutex)(&scratch_mutex);
10375 save_mod(mod, tr);
10376 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10377 /* Update delta if the module loaded in previous boot */
10378 make_mod_delta(mod, tr);
10379 }
10380 }
10381 }
10382
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10383 static int trace_module_notify(struct notifier_block *self,
10384 unsigned long val, void *data)
10385 {
10386 struct module *mod = data;
10387
10388 switch (val) {
10389 case MODULE_STATE_COMING:
10390 trace_module_add_evals(mod);
10391 trace_module_record(mod, true);
10392 break;
10393 case MODULE_STATE_GOING:
10394 trace_module_remove_evals(mod);
10395 trace_module_record(mod, false);
10396 break;
10397 }
10398
10399 return NOTIFY_OK;
10400 }
10401
10402 static struct notifier_block trace_module_nb = {
10403 .notifier_call = trace_module_notify,
10404 .priority = 0,
10405 };
10406 #endif /* CONFIG_MODULES */
10407
tracer_init_tracefs_work_func(struct work_struct * work)10408 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10409 {
10410
10411 event_trace_init();
10412
10413 init_tracer_tracefs(&global_trace, NULL);
10414 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10415
10416 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10417 &global_trace, &tracing_thresh_fops);
10418
10419 trace_create_file("README", TRACE_MODE_READ, NULL,
10420 NULL, &tracing_readme_fops);
10421
10422 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10423 NULL, &tracing_saved_cmdlines_fops);
10424
10425 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10426 NULL, &tracing_saved_cmdlines_size_fops);
10427
10428 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10429 NULL, &tracing_saved_tgids_fops);
10430
10431 trace_create_eval_file(NULL);
10432
10433 #ifdef CONFIG_MODULES
10434 register_module_notifier(&trace_module_nb);
10435 #endif
10436
10437 #ifdef CONFIG_DYNAMIC_FTRACE
10438 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10439 NULL, &tracing_dyn_info_fops);
10440 #endif
10441
10442 create_trace_instances(NULL);
10443
10444 update_tracer_options(&global_trace);
10445 }
10446
tracer_init_tracefs(void)10447 static __init int tracer_init_tracefs(void)
10448 {
10449 int ret;
10450
10451 trace_access_lock_init();
10452
10453 ret = tracing_init_dentry();
10454 if (ret)
10455 return 0;
10456
10457 if (eval_map_wq) {
10458 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10459 queue_work(eval_map_wq, &tracerfs_init_work);
10460 } else {
10461 tracer_init_tracefs_work_func(NULL);
10462 }
10463
10464 rv_init_interface();
10465
10466 return 0;
10467 }
10468
10469 fs_initcall(tracer_init_tracefs);
10470
10471 static int trace_die_panic_handler(struct notifier_block *self,
10472 unsigned long ev, void *unused);
10473
10474 static struct notifier_block trace_panic_notifier = {
10475 .notifier_call = trace_die_panic_handler,
10476 .priority = INT_MAX - 1,
10477 };
10478
10479 static struct notifier_block trace_die_notifier = {
10480 .notifier_call = trace_die_panic_handler,
10481 .priority = INT_MAX - 1,
10482 };
10483
10484 /*
10485 * The idea is to execute the following die/panic callback early, in order
10486 * to avoid showing irrelevant information in the trace (like other panic
10487 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10488 * warnings get disabled (to prevent potential log flooding).
10489 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10490 static int trace_die_panic_handler(struct notifier_block *self,
10491 unsigned long ev, void *unused)
10492 {
10493 if (!ftrace_dump_on_oops_enabled())
10494 return NOTIFY_DONE;
10495
10496 /* The die notifier requires DIE_OOPS to trigger */
10497 if (self == &trace_die_notifier && ev != DIE_OOPS)
10498 return NOTIFY_DONE;
10499
10500 ftrace_dump(DUMP_PARAM);
10501
10502 return NOTIFY_DONE;
10503 }
10504
10505 /*
10506 * printk is set to max of 1024, we really don't need it that big.
10507 * Nothing should be printing 1000 characters anyway.
10508 */
10509 #define TRACE_MAX_PRINT 1000
10510
10511 /*
10512 * Define here KERN_TRACE so that we have one place to modify
10513 * it if we decide to change what log level the ftrace dump
10514 * should be at.
10515 */
10516 #define KERN_TRACE KERN_EMERG
10517
10518 void
trace_printk_seq(struct trace_seq * s)10519 trace_printk_seq(struct trace_seq *s)
10520 {
10521 /* Probably should print a warning here. */
10522 if (s->seq.len >= TRACE_MAX_PRINT)
10523 s->seq.len = TRACE_MAX_PRINT;
10524
10525 /*
10526 * More paranoid code. Although the buffer size is set to
10527 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10528 * an extra layer of protection.
10529 */
10530 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10531 s->seq.len = s->seq.size - 1;
10532
10533 /* should be zero ended, but we are paranoid. */
10534 s->buffer[s->seq.len] = 0;
10535
10536 printk(KERN_TRACE "%s", s->buffer);
10537
10538 trace_seq_init(s);
10539 }
10540
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10541 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10542 {
10543 iter->tr = tr;
10544 iter->trace = iter->tr->current_trace;
10545 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10546 iter->array_buffer = &tr->array_buffer;
10547
10548 if (iter->trace && iter->trace->open)
10549 iter->trace->open(iter);
10550
10551 /* Annotate start of buffers if we had overruns */
10552 if (ring_buffer_overruns(iter->array_buffer->buffer))
10553 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10554
10555 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10556 if (trace_clocks[iter->tr->clock_id].in_ns)
10557 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10558
10559 /* Can not use kmalloc for iter.temp and iter.fmt */
10560 iter->temp = static_temp_buf;
10561 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10562 iter->fmt = static_fmt_buf;
10563 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10564 }
10565
trace_init_global_iter(struct trace_iterator * iter)10566 void trace_init_global_iter(struct trace_iterator *iter)
10567 {
10568 trace_init_iter(iter, &global_trace);
10569 }
10570
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10571 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10572 {
10573 /* use static because iter can be a bit big for the stack */
10574 static struct trace_iterator iter;
10575 unsigned int old_userobj;
10576 unsigned long flags;
10577 int cnt = 0;
10578
10579 /*
10580 * Always turn off tracing when we dump.
10581 * We don't need to show trace output of what happens
10582 * between multiple crashes.
10583 *
10584 * If the user does a sysrq-z, then they can re-enable
10585 * tracing with echo 1 > tracing_on.
10586 */
10587 tracer_tracing_off(tr);
10588
10589 local_irq_save(flags);
10590
10591 /* Simulate the iterator */
10592 trace_init_iter(&iter, tr);
10593
10594 /* While dumping, do not allow the buffer to be enable */
10595 tracer_tracing_disable(tr);
10596
10597 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10598
10599 /* don't look at user memory in panic mode */
10600 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10601
10602 if (dump_mode == DUMP_ORIG)
10603 iter.cpu_file = raw_smp_processor_id();
10604 else
10605 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10606
10607 if (tr == &global_trace)
10608 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10609 else
10610 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10611
10612 /* Did function tracer already get disabled? */
10613 if (ftrace_is_dead()) {
10614 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10615 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10616 }
10617
10618 /*
10619 * We need to stop all tracing on all CPUS to read
10620 * the next buffer. This is a bit expensive, but is
10621 * not done often. We fill all what we can read,
10622 * and then release the locks again.
10623 */
10624
10625 while (!trace_empty(&iter)) {
10626
10627 if (!cnt)
10628 printk(KERN_TRACE "---------------------------------\n");
10629
10630 cnt++;
10631
10632 trace_iterator_reset(&iter);
10633 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10634
10635 if (trace_find_next_entry_inc(&iter) != NULL) {
10636 int ret;
10637
10638 ret = print_trace_line(&iter);
10639 if (ret != TRACE_TYPE_NO_CONSUME)
10640 trace_consume(&iter);
10641
10642 trace_printk_seq(&iter.seq);
10643 }
10644 touch_nmi_watchdog();
10645 }
10646
10647 if (!cnt)
10648 printk(KERN_TRACE " (ftrace buffer empty)\n");
10649 else
10650 printk(KERN_TRACE "---------------------------------\n");
10651
10652 tr->trace_flags |= old_userobj;
10653
10654 tracer_tracing_enable(tr);
10655 local_irq_restore(flags);
10656 }
10657
ftrace_dump_by_param(void)10658 static void ftrace_dump_by_param(void)
10659 {
10660 bool first_param = true;
10661 char dump_param[MAX_TRACER_SIZE];
10662 char *buf, *token, *inst_name;
10663 struct trace_array *tr;
10664
10665 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10666 buf = dump_param;
10667
10668 while ((token = strsep(&buf, ",")) != NULL) {
10669 if (first_param) {
10670 first_param = false;
10671 if (!strcmp("0", token))
10672 continue;
10673 else if (!strcmp("1", token)) {
10674 ftrace_dump_one(&global_trace, DUMP_ALL);
10675 continue;
10676 }
10677 else if (!strcmp("2", token) ||
10678 !strcmp("orig_cpu", token)) {
10679 ftrace_dump_one(&global_trace, DUMP_ORIG);
10680 continue;
10681 }
10682 }
10683
10684 inst_name = strsep(&token, "=");
10685 tr = trace_array_find(inst_name);
10686 if (!tr) {
10687 printk(KERN_TRACE "Instance %s not found\n", inst_name);
10688 continue;
10689 }
10690
10691 if (token && (!strcmp("2", token) ||
10692 !strcmp("orig_cpu", token)))
10693 ftrace_dump_one(tr, DUMP_ORIG);
10694 else
10695 ftrace_dump_one(tr, DUMP_ALL);
10696 }
10697 }
10698
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10699 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10700 {
10701 static atomic_t dump_running;
10702
10703 /* Only allow one dump user at a time. */
10704 if (atomic_inc_return(&dump_running) != 1) {
10705 atomic_dec(&dump_running);
10706 return;
10707 }
10708
10709 switch (oops_dump_mode) {
10710 case DUMP_ALL:
10711 ftrace_dump_one(&global_trace, DUMP_ALL);
10712 break;
10713 case DUMP_ORIG:
10714 ftrace_dump_one(&global_trace, DUMP_ORIG);
10715 break;
10716 case DUMP_PARAM:
10717 ftrace_dump_by_param();
10718 break;
10719 case DUMP_NONE:
10720 break;
10721 default:
10722 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10723 ftrace_dump_one(&global_trace, DUMP_ALL);
10724 }
10725
10726 atomic_dec(&dump_running);
10727 }
10728 EXPORT_SYMBOL_GPL(ftrace_dump);
10729
10730 #define WRITE_BUFSIZE 4096
10731
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10732 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10733 size_t count, loff_t *ppos,
10734 int (*createfn)(const char *))
10735 {
10736 char *kbuf __free(kfree) = NULL;
10737 char *buf, *tmp;
10738 int ret = 0;
10739 size_t done = 0;
10740 size_t size;
10741
10742 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10743 if (!kbuf)
10744 return -ENOMEM;
10745
10746 while (done < count) {
10747 size = count - done;
10748
10749 if (size >= WRITE_BUFSIZE)
10750 size = WRITE_BUFSIZE - 1;
10751
10752 if (copy_from_user(kbuf, buffer + done, size))
10753 return -EFAULT;
10754
10755 kbuf[size] = '\0';
10756 buf = kbuf;
10757 do {
10758 tmp = strchr(buf, '\n');
10759 if (tmp) {
10760 *tmp = '\0';
10761 size = tmp - buf + 1;
10762 } else {
10763 size = strlen(buf);
10764 if (done + size < count) {
10765 if (buf != kbuf)
10766 break;
10767 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10768 pr_warn("Line length is too long: Should be less than %d\n",
10769 WRITE_BUFSIZE - 2);
10770 return -EINVAL;
10771 }
10772 }
10773 done += size;
10774
10775 /* Remove comments */
10776 tmp = strchr(buf, '#');
10777
10778 if (tmp)
10779 *tmp = '\0';
10780
10781 ret = createfn(buf);
10782 if (ret)
10783 return ret;
10784 buf += size;
10785
10786 } while (done < count);
10787 }
10788 return done;
10789 }
10790
10791 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10792 __init static bool tr_needs_alloc_snapshot(const char *name)
10793 {
10794 char *test;
10795 int len = strlen(name);
10796 bool ret;
10797
10798 if (!boot_snapshot_index)
10799 return false;
10800
10801 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10802 boot_snapshot_info[len] == '\t')
10803 return true;
10804
10805 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10806 if (!test)
10807 return false;
10808
10809 sprintf(test, "\t%s\t", name);
10810 ret = strstr(boot_snapshot_info, test) == NULL;
10811 kfree(test);
10812 return ret;
10813 }
10814
do_allocate_snapshot(const char * name)10815 __init static void do_allocate_snapshot(const char *name)
10816 {
10817 if (!tr_needs_alloc_snapshot(name))
10818 return;
10819
10820 /*
10821 * When allocate_snapshot is set, the next call to
10822 * allocate_trace_buffers() (called by trace_array_get_by_name())
10823 * will allocate the snapshot buffer. That will alse clear
10824 * this flag.
10825 */
10826 allocate_snapshot = true;
10827 }
10828 #else
do_allocate_snapshot(const char * name)10829 static inline void do_allocate_snapshot(const char *name) { }
10830 #endif
10831
enable_instances(void)10832 __init static void enable_instances(void)
10833 {
10834 struct trace_array *tr;
10835 bool memmap_area = false;
10836 char *curr_str;
10837 char *name;
10838 char *str;
10839 char *tok;
10840
10841 /* A tab is always appended */
10842 boot_instance_info[boot_instance_index - 1] = '\0';
10843 str = boot_instance_info;
10844
10845 while ((curr_str = strsep(&str, "\t"))) {
10846 phys_addr_t start = 0;
10847 phys_addr_t size = 0;
10848 unsigned long addr = 0;
10849 bool traceprintk = false;
10850 bool traceoff = false;
10851 char *flag_delim;
10852 char *addr_delim;
10853 char *rname __free(kfree) = NULL;
10854
10855 tok = strsep(&curr_str, ",");
10856
10857 flag_delim = strchr(tok, '^');
10858 addr_delim = strchr(tok, '@');
10859
10860 if (addr_delim)
10861 *addr_delim++ = '\0';
10862
10863 if (flag_delim)
10864 *flag_delim++ = '\0';
10865
10866 name = tok;
10867
10868 if (flag_delim) {
10869 char *flag;
10870
10871 while ((flag = strsep(&flag_delim, "^"))) {
10872 if (strcmp(flag, "traceoff") == 0) {
10873 traceoff = true;
10874 } else if ((strcmp(flag, "printk") == 0) ||
10875 (strcmp(flag, "traceprintk") == 0) ||
10876 (strcmp(flag, "trace_printk") == 0)) {
10877 traceprintk = true;
10878 } else {
10879 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10880 flag, name);
10881 }
10882 }
10883 }
10884
10885 tok = addr_delim;
10886 if (tok && isdigit(*tok)) {
10887 start = memparse(tok, &tok);
10888 if (!start) {
10889 pr_warn("Tracing: Invalid boot instance address for %s\n",
10890 name);
10891 continue;
10892 }
10893 if (*tok != ':') {
10894 pr_warn("Tracing: No size specified for instance %s\n", name);
10895 continue;
10896 }
10897 tok++;
10898 size = memparse(tok, &tok);
10899 if (!size) {
10900 pr_warn("Tracing: Invalid boot instance size for %s\n",
10901 name);
10902 continue;
10903 }
10904 memmap_area = true;
10905 } else if (tok) {
10906 if (!reserve_mem_find_by_name(tok, &start, &size)) {
10907 start = 0;
10908 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10909 continue;
10910 }
10911 rname = kstrdup(tok, GFP_KERNEL);
10912 }
10913
10914 if (start) {
10915 /* Start and size must be page aligned */
10916 if (start & ~PAGE_MASK) {
10917 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10918 continue;
10919 }
10920 if (size & ~PAGE_MASK) {
10921 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10922 continue;
10923 }
10924
10925 if (memmap_area)
10926 addr = map_pages(start, size);
10927 else
10928 addr = (unsigned long)phys_to_virt(start);
10929 if (addr) {
10930 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10931 name, &start, (unsigned long)size);
10932 } else {
10933 pr_warn("Tracing: Failed to map boot instance %s\n", name);
10934 continue;
10935 }
10936 } else {
10937 /* Only non mapped buffers have snapshot buffers */
10938 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10939 do_allocate_snapshot(name);
10940 }
10941
10942 tr = trace_array_create_systems(name, NULL, addr, size);
10943 if (IS_ERR(tr)) {
10944 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10945 continue;
10946 }
10947
10948 if (traceoff)
10949 tracer_tracing_off(tr);
10950
10951 if (traceprintk)
10952 update_printk_trace(tr);
10953
10954 /*
10955 * memmap'd buffers can not be freed.
10956 */
10957 if (memmap_area) {
10958 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10959 tr->ref++;
10960 }
10961
10962 if (start) {
10963 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10964 tr->range_name = no_free_ptr(rname);
10965 }
10966
10967 while ((tok = strsep(&curr_str, ","))) {
10968 early_enable_events(tr, tok, true);
10969 }
10970 }
10971 }
10972
tracer_alloc_buffers(void)10973 __init static int tracer_alloc_buffers(void)
10974 {
10975 int ring_buf_size;
10976 int ret = -ENOMEM;
10977
10978
10979 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10980 pr_warn("Tracing disabled due to lockdown\n");
10981 return -EPERM;
10982 }
10983
10984 /*
10985 * Make sure we don't accidentally add more trace options
10986 * than we have bits for.
10987 */
10988 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10989
10990 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10991 return -ENOMEM;
10992
10993 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10994 goto out_free_buffer_mask;
10995
10996 /* Only allocate trace_printk buffers if a trace_printk exists */
10997 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10998 /* Must be called before global_trace.buffer is allocated */
10999 trace_printk_init_buffers();
11000
11001 /* To save memory, keep the ring buffer size to its minimum */
11002 if (global_trace.ring_buffer_expanded)
11003 ring_buf_size = trace_buf_size;
11004 else
11005 ring_buf_size = 1;
11006
11007 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11008 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11009
11010 raw_spin_lock_init(&global_trace.start_lock);
11011
11012 /*
11013 * The prepare callbacks allocates some memory for the ring buffer. We
11014 * don't free the buffer if the CPU goes down. If we were to free
11015 * the buffer, then the user would lose any trace that was in the
11016 * buffer. The memory will be removed once the "instance" is removed.
11017 */
11018 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11019 "trace/RB:prepare", trace_rb_cpu_prepare,
11020 NULL);
11021 if (ret < 0)
11022 goto out_free_cpumask;
11023 /* Used for event triggers */
11024 ret = -ENOMEM;
11025 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11026 if (!temp_buffer)
11027 goto out_rm_hp_state;
11028
11029 if (trace_create_savedcmd() < 0)
11030 goto out_free_temp_buffer;
11031
11032 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11033 goto out_free_savedcmd;
11034
11035 /* TODO: make the number of buffers hot pluggable with CPUS */
11036 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11037 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11038 goto out_free_pipe_cpumask;
11039 }
11040 if (global_trace.buffer_disabled)
11041 tracing_off();
11042
11043 if (trace_boot_clock) {
11044 ret = tracing_set_clock(&global_trace, trace_boot_clock);
11045 if (ret < 0)
11046 pr_warn("Trace clock %s not defined, going back to default\n",
11047 trace_boot_clock);
11048 }
11049
11050 /*
11051 * register_tracer() might reference current_trace, so it
11052 * needs to be set before we register anything. This is
11053 * just a bootstrap of current_trace anyway.
11054 */
11055 global_trace.current_trace = &nop_trace;
11056
11057 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11058 #ifdef CONFIG_TRACER_MAX_TRACE
11059 spin_lock_init(&global_trace.snapshot_trigger_lock);
11060 #endif
11061 ftrace_init_global_array_ops(&global_trace);
11062
11063 #ifdef CONFIG_MODULES
11064 INIT_LIST_HEAD(&global_trace.mod_events);
11065 #endif
11066
11067 init_trace_flags_index(&global_trace);
11068
11069 register_tracer(&nop_trace);
11070
11071 /* Function tracing may start here (via kernel command line) */
11072 init_function_trace();
11073
11074 /* All seems OK, enable tracing */
11075 tracing_disabled = 0;
11076
11077 atomic_notifier_chain_register(&panic_notifier_list,
11078 &trace_panic_notifier);
11079
11080 register_die_notifier(&trace_die_notifier);
11081
11082 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11083
11084 INIT_LIST_HEAD(&global_trace.systems);
11085 INIT_LIST_HEAD(&global_trace.events);
11086 INIT_LIST_HEAD(&global_trace.hist_vars);
11087 INIT_LIST_HEAD(&global_trace.err_log);
11088 list_add(&global_trace.marker_list, &marker_copies);
11089 list_add(&global_trace.list, &ftrace_trace_arrays);
11090
11091 apply_trace_boot_options();
11092
11093 register_snapshot_cmd();
11094
11095 return 0;
11096
11097 out_free_pipe_cpumask:
11098 free_cpumask_var(global_trace.pipe_cpumask);
11099 out_free_savedcmd:
11100 trace_free_saved_cmdlines_buffer();
11101 out_free_temp_buffer:
11102 ring_buffer_free(temp_buffer);
11103 out_rm_hp_state:
11104 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11105 out_free_cpumask:
11106 free_cpumask_var(global_trace.tracing_cpumask);
11107 out_free_buffer_mask:
11108 free_cpumask_var(tracing_buffer_mask);
11109 return ret;
11110 }
11111
11112 #ifdef CONFIG_FUNCTION_TRACER
11113 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11114 __init struct trace_array *trace_get_global_array(void)
11115 {
11116 return &global_trace;
11117 }
11118 #endif
11119
ftrace_boot_snapshot(void)11120 void __init ftrace_boot_snapshot(void)
11121 {
11122 #ifdef CONFIG_TRACER_MAX_TRACE
11123 struct trace_array *tr;
11124
11125 if (!snapshot_at_boot)
11126 return;
11127
11128 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11129 if (!tr->allocated_snapshot)
11130 continue;
11131
11132 tracing_snapshot_instance(tr);
11133 trace_array_puts(tr, "** Boot snapshot taken **\n");
11134 }
11135 #endif
11136 }
11137
early_trace_init(void)11138 void __init early_trace_init(void)
11139 {
11140 if (tracepoint_printk) {
11141 tracepoint_print_iter =
11142 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11143 if (MEM_FAIL(!tracepoint_print_iter,
11144 "Failed to allocate trace iterator\n"))
11145 tracepoint_printk = 0;
11146 else
11147 static_key_enable(&tracepoint_printk_key.key);
11148 }
11149 tracer_alloc_buffers();
11150
11151 init_events();
11152 }
11153
trace_init(void)11154 void __init trace_init(void)
11155 {
11156 trace_event_init();
11157
11158 if (boot_instance_index)
11159 enable_instances();
11160 }
11161
clear_boot_tracer(void)11162 __init static void clear_boot_tracer(void)
11163 {
11164 /*
11165 * The default tracer at boot buffer is an init section.
11166 * This function is called in lateinit. If we did not
11167 * find the boot tracer, then clear it out, to prevent
11168 * later registration from accessing the buffer that is
11169 * about to be freed.
11170 */
11171 if (!default_bootup_tracer)
11172 return;
11173
11174 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11175 default_bootup_tracer);
11176 default_bootup_tracer = NULL;
11177 }
11178
11179 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11180 __init static void tracing_set_default_clock(void)
11181 {
11182 /* sched_clock_stable() is determined in late_initcall */
11183 if (!trace_boot_clock && !sched_clock_stable()) {
11184 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11185 pr_warn("Can not set tracing clock due to lockdown\n");
11186 return;
11187 }
11188
11189 printk(KERN_WARNING
11190 "Unstable clock detected, switching default tracing clock to \"global\"\n"
11191 "If you want to keep using the local clock, then add:\n"
11192 " \"trace_clock=local\"\n"
11193 "on the kernel command line\n");
11194 tracing_set_clock(&global_trace, "global");
11195 }
11196 }
11197 #else
tracing_set_default_clock(void)11198 static inline void tracing_set_default_clock(void) { }
11199 #endif
11200
late_trace_init(void)11201 __init static int late_trace_init(void)
11202 {
11203 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11204 static_key_disable(&tracepoint_printk_key.key);
11205 tracepoint_printk = 0;
11206 }
11207
11208 if (traceoff_after_boot)
11209 tracing_off();
11210
11211 tracing_set_default_clock();
11212 clear_boot_tracer();
11213 return 0;
11214 }
11215
11216 late_initcall_sync(late_trace_init);
11217