1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57
58 #include "trace.h"
59 #include "trace_output.h"
60
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63 * We need to change this state when a selftest is running.
64 * A selftest will lurk into the ring-buffer to count the
65 * entries inserted during the selftest although some concurrent
66 * insertions into the ring-buffer such as trace_printk could occurred
67 * at the same time, giving false positive or negative results.
68 */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72 * If boot-time tracing including tracers/events via kernel cmdline
73 * is running, we do not want to run SELFTEST.
74 */
75 bool __read_mostly tracing_selftest_disabled;
76
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 }
83 }
84 #else
85 #define tracing_selftest_running 0
86 #define tracing_selftest_disabled 0
87 #endif
88
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 { }
99 };
100
101 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 return 0;
105 }
106
107 /*
108 * To prevent the comm cache from being overwritten when no
109 * tracing is active, only save the comm when a trace event
110 * occurred.
111 */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113
114 /*
115 * Kill all tracing for good (never come back).
116 * It is initialized to 1 but will turn to zero if the initialization
117 * of the tracer is successful. But that is the only place that sets
118 * this back to zero.
119 */
120 static int tracing_disabled = 1;
121
122 cpumask_var_t __read_mostly tracing_buffer_mask;
123
124 #define MAX_TRACER_SIZE 100
125 /*
126 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127 *
128 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129 * is set, then ftrace_dump is called. This will output the contents
130 * of the ftrace buffers to the console. This is very useful for
131 * capturing traces that lead to crashes and outputing it to a
132 * serial console.
133 *
134 * It is default off, but you can enable it with either specifying
135 * "ftrace_dump_on_oops" in the kernel command line, or setting
136 * /proc/sys/kernel/ftrace_dump_on_oops
137 * Set 1 if you want to dump buffers of all CPUs
138 * Set 2 if you want to dump the buffer of the CPU that triggered oops
139 * Set instance name if you want to dump the specific trace instance
140 * Multiple instance dump is also supported, and instances are seperated
141 * by commas.
142 */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 {
153 .procname = "ftrace_dump_on_oops",
154 .data = &ftrace_dump_on_oops,
155 .maxlen = MAX_TRACER_SIZE,
156 .mode = 0644,
157 .proc_handler = proc_dostring,
158 },
159 {
160 .procname = "traceoff_on_warning",
161 .data = &__disable_trace_on_warning,
162 .maxlen = sizeof(__disable_trace_on_warning),
163 .mode = 0644,
164 .proc_handler = proc_dointvec,
165 },
166 {
167 .procname = "tracepoint_printk",
168 .data = &tracepoint_printk,
169 .maxlen = sizeof(tracepoint_printk),
170 .mode = 0644,
171 .proc_handler = tracepoint_printk_sysctl,
172 },
173 };
174
init_trace_sysctls(void)175 static int __init init_trace_sysctls(void)
176 {
177 register_sysctl_init("kernel", trace_sysctl_table);
178 return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 struct module *mod;
186 unsigned long length;
187 };
188
189 union trace_eval_map_item;
190
191 struct trace_eval_map_tail {
192 /*
193 * "end" is first and points to NULL as it must be different
194 * than "mod" or "eval_string"
195 */
196 union trace_eval_map_item *next;
197 const char *end; /* points to NULL */
198 };
199
200 static DEFINE_MUTEX(trace_eval_mutex);
201
202 /*
203 * The trace_eval_maps are saved in an array with two extra elements,
204 * one at the beginning, and one at the end. The beginning item contains
205 * the count of the saved maps (head.length), and the module they
206 * belong to if not built in (head.mod). The ending item contains a
207 * pointer to the next array of saved eval_map items.
208 */
209 union trace_eval_map_item {
210 struct trace_eval_map map;
211 struct trace_eval_map_head head;
212 struct trace_eval_map_tail tail;
213 };
214
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 struct trace_buffer *buffer,
221 unsigned int trace_ctx);
222
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234
set_cmdline_ftrace(char * str)235 static int __init set_cmdline_ftrace(char *str)
236 {
237 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 default_bootup_tracer = bootup_tracer_buf;
239 /* We are using ftrace early, expand it */
240 trace_set_ring_buffer_expanded(NULL);
241 return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244
ftrace_dump_on_oops_enabled(void)245 int ftrace_dump_on_oops_enabled(void)
246 {
247 if (!strcmp("0", ftrace_dump_on_oops))
248 return 0;
249 else
250 return 1;
251 }
252
set_ftrace_dump_on_oops(char * str)253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 if (!*str) {
256 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 return 1;
258 }
259
260 if (*str == ',') {
261 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 return 1;
264 }
265
266 if (*str++ == '=') {
267 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 return 1;
269 }
270
271 return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274
stop_trace_on_warning(char * str)275 static int __init stop_trace_on_warning(char *str)
276 {
277 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 __disable_trace_on_warning = 1;
279 return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282
boot_alloc_snapshot(char * str)283 static int __init boot_alloc_snapshot(char *str)
284 {
285 char *slot = boot_snapshot_info + boot_snapshot_index;
286 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 int ret;
288
289 if (str[0] == '=') {
290 str++;
291 if (strlen(str) >= left)
292 return -1;
293
294 ret = snprintf(slot, left, "%s\t", str);
295 boot_snapshot_index += ret;
296 } else {
297 allocate_snapshot = true;
298 /* We also need the main ring buffer expanded */
299 trace_set_ring_buffer_expanded(NULL);
300 }
301 return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304
305
boot_snapshot(char * str)306 static int __init boot_snapshot(char *str)
307 {
308 snapshot_at_boot = true;
309 boot_alloc_snapshot(str);
310 return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313
314
boot_instance(char * str)315 static int __init boot_instance(char *str)
316 {
317 char *slot = boot_instance_info + boot_instance_index;
318 int left = sizeof(boot_instance_info) - boot_instance_index;
319 int ret;
320
321 if (strlen(str) >= left)
322 return -1;
323
324 ret = snprintf(slot, left, "%s\t", str);
325 boot_instance_index += ret;
326
327 return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330
331
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333
set_trace_boot_options(char * str)334 static int __init set_trace_boot_options(char *str)
335 {
336 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343
set_trace_boot_clock(char * str)344 static int __init set_trace_boot_clock(char *str)
345 {
346 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 trace_boot_clock = trace_boot_clock_buf;
348 return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351
set_tracepoint_printk(char * str)352 static int __init set_tracepoint_printk(char *str)
353 {
354 /* Ignore the "tp_printk_stop_on_boot" param */
355 if (*str == '_')
356 return 0;
357
358 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 tracepoint_printk = 1;
360 return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363
set_tracepoint_printk_stop(char * str)364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 tracepoint_printk_stop_on_boot = true;
367 return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370
set_traceoff_after_boot(char * str)371 static int __init set_traceoff_after_boot(char *str)
372 {
373 traceoff_after_boot = true;
374 return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377
ns2usecs(u64 nsec)378 unsigned long long ns2usecs(u64 nsec)
379 {
380 nsec += 500;
381 do_div(nsec, 1000);
382 return nsec;
383 }
384
385 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)386 trace_process_export(struct trace_export *export,
387 struct ring_buffer_event *event, int flag)
388 {
389 struct trace_entry *entry;
390 unsigned int size = 0;
391
392 if (export->flags & flag) {
393 entry = ring_buffer_event_data(event);
394 size = ring_buffer_event_length(event);
395 export->write(export, entry, size);
396 }
397 }
398
399 static DEFINE_MUTEX(ftrace_export_lock);
400
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406
ftrace_exports_enable(struct trace_export * export)407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 if (export->flags & TRACE_EXPORT_FUNCTION)
410 static_branch_inc(&trace_function_exports_enabled);
411
412 if (export->flags & TRACE_EXPORT_EVENT)
413 static_branch_inc(&trace_event_exports_enabled);
414
415 if (export->flags & TRACE_EXPORT_MARKER)
416 static_branch_inc(&trace_marker_exports_enabled);
417 }
418
ftrace_exports_disable(struct trace_export * export)419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 if (export->flags & TRACE_EXPORT_FUNCTION)
422 static_branch_dec(&trace_function_exports_enabled);
423
424 if (export->flags & TRACE_EXPORT_EVENT)
425 static_branch_dec(&trace_event_exports_enabled);
426
427 if (export->flags & TRACE_EXPORT_MARKER)
428 static_branch_dec(&trace_marker_exports_enabled);
429 }
430
ftrace_exports(struct ring_buffer_event * event,int flag)431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 struct trace_export *export;
434
435 guard(preempt_notrace)();
436
437 export = rcu_dereference_raw_check(ftrace_exports_list);
438 while (export) {
439 trace_process_export(export, event, flag);
440 export = rcu_dereference_raw_check(export->next);
441 }
442 }
443
444 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)445 add_trace_export(struct trace_export **list, struct trace_export *export)
446 {
447 rcu_assign_pointer(export->next, *list);
448 /*
449 * We are entering export into the list but another
450 * CPU might be walking that list. We need to make sure
451 * the export->next pointer is valid before another CPU sees
452 * the export pointer included into the list.
453 */
454 rcu_assign_pointer(*list, export);
455 }
456
457 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)458 rm_trace_export(struct trace_export **list, struct trace_export *export)
459 {
460 struct trace_export **p;
461
462 for (p = list; *p != NULL; p = &(*p)->next)
463 if (*p == export)
464 break;
465
466 if (*p != export)
467 return -1;
468
469 rcu_assign_pointer(*p, (*p)->next);
470
471 return 0;
472 }
473
474 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)475 add_ftrace_export(struct trace_export **list, struct trace_export *export)
476 {
477 ftrace_exports_enable(export);
478
479 add_trace_export(list, export);
480 }
481
482 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)483 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
484 {
485 int ret;
486
487 ret = rm_trace_export(list, export);
488 ftrace_exports_disable(export);
489
490 return ret;
491 }
492
register_ftrace_export(struct trace_export * export)493 int register_ftrace_export(struct trace_export *export)
494 {
495 if (WARN_ON_ONCE(!export->write))
496 return -1;
497
498 guard(mutex)(&ftrace_export_lock);
499
500 add_ftrace_export(&ftrace_exports_list, export);
501
502 return 0;
503 }
504 EXPORT_SYMBOL_GPL(register_ftrace_export);
505
unregister_ftrace_export(struct trace_export * export)506 int unregister_ftrace_export(struct trace_export *export)
507 {
508 guard(mutex)(&ftrace_export_lock);
509 return rm_ftrace_export(&ftrace_exports_list, export);
510 }
511 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
512
513 /* trace_flags holds trace_options default values */
514 #define TRACE_DEFAULT_FLAGS \
515 (FUNCTION_DEFAULT_FLAGS | \
516 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
517 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
518 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
519 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
520 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \
521 TRACE_ITER_COPY_MARKER)
522
523 /* trace_options that are only supported by global_trace */
524 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
525 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
526
527 /* trace_flags that are default zero for instances */
528 #define ZEROED_TRACE_FLAGS \
529 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
530 TRACE_ITER_COPY_MARKER)
531
532 /*
533 * The global_trace is the descriptor that holds the top-level tracing
534 * buffers for the live tracing.
535 */
536 static struct trace_array global_trace = {
537 .trace_flags = TRACE_DEFAULT_FLAGS,
538 };
539
540 static struct trace_array *printk_trace = &global_trace;
541
542 /* List of trace_arrays interested in the top level trace_marker */
543 static LIST_HEAD(marker_copies);
544
printk_binsafe(struct trace_array * tr)545 static __always_inline bool printk_binsafe(struct trace_array *tr)
546 {
547 /*
548 * The binary format of traceprintk can cause a crash if used
549 * by a buffer from another boot. Force the use of the
550 * non binary version of trace_printk if the trace_printk
551 * buffer is a boot mapped ring buffer.
552 */
553 return !(tr->flags & TRACE_ARRAY_FL_BOOT);
554 }
555
update_printk_trace(struct trace_array * tr)556 static void update_printk_trace(struct trace_array *tr)
557 {
558 if (printk_trace == tr)
559 return;
560
561 printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
562 printk_trace = tr;
563 tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
564 }
565
566 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)567 static bool update_marker_trace(struct trace_array *tr, int enabled)
568 {
569 lockdep_assert_held(&event_mutex);
570
571 if (enabled) {
572 if (!list_empty(&tr->marker_list))
573 return false;
574
575 list_add_rcu(&tr->marker_list, &marker_copies);
576 tr->trace_flags |= TRACE_ITER_COPY_MARKER;
577 return true;
578 }
579
580 if (list_empty(&tr->marker_list))
581 return false;
582
583 list_del_init(&tr->marker_list);
584 tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
585 return true;
586 }
587
trace_set_ring_buffer_expanded(struct trace_array * tr)588 void trace_set_ring_buffer_expanded(struct trace_array *tr)
589 {
590 if (!tr)
591 tr = &global_trace;
592 tr->ring_buffer_expanded = true;
593 }
594
595 LIST_HEAD(ftrace_trace_arrays);
596
trace_array_get(struct trace_array * this_tr)597 int trace_array_get(struct trace_array *this_tr)
598 {
599 struct trace_array *tr;
600
601 guard(mutex)(&trace_types_lock);
602 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
603 if (tr == this_tr) {
604 tr->ref++;
605 return 0;
606 }
607 }
608
609 return -ENODEV;
610 }
611
__trace_array_put(struct trace_array * this_tr)612 static void __trace_array_put(struct trace_array *this_tr)
613 {
614 WARN_ON(!this_tr->ref);
615 this_tr->ref--;
616 }
617
618 /**
619 * trace_array_put - Decrement the reference counter for this trace array.
620 * @this_tr : pointer to the trace array
621 *
622 * NOTE: Use this when we no longer need the trace array returned by
623 * trace_array_get_by_name(). This ensures the trace array can be later
624 * destroyed.
625 *
626 */
trace_array_put(struct trace_array * this_tr)627 void trace_array_put(struct trace_array *this_tr)
628 {
629 if (!this_tr)
630 return;
631
632 guard(mutex)(&trace_types_lock);
633 __trace_array_put(this_tr);
634 }
635 EXPORT_SYMBOL_GPL(trace_array_put);
636
tracing_check_open_get_tr(struct trace_array * tr)637 int tracing_check_open_get_tr(struct trace_array *tr)
638 {
639 int ret;
640
641 ret = security_locked_down(LOCKDOWN_TRACEFS);
642 if (ret)
643 return ret;
644
645 if (tracing_disabled)
646 return -ENODEV;
647
648 if (tr && trace_array_get(tr) < 0)
649 return -ENODEV;
650
651 return 0;
652 }
653
654 /**
655 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
656 * @filtered_pids: The list of pids to check
657 * @search_pid: The PID to find in @filtered_pids
658 *
659 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
660 */
661 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)662 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
663 {
664 return trace_pid_list_is_set(filtered_pids, search_pid);
665 }
666
667 /**
668 * trace_ignore_this_task - should a task be ignored for tracing
669 * @filtered_pids: The list of pids to check
670 * @filtered_no_pids: The list of pids not to be traced
671 * @task: The task that should be ignored if not filtered
672 *
673 * Checks if @task should be traced or not from @filtered_pids.
674 * Returns true if @task should *NOT* be traced.
675 * Returns false if @task should be traced.
676 */
677 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)678 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
679 struct trace_pid_list *filtered_no_pids,
680 struct task_struct *task)
681 {
682 /*
683 * If filtered_no_pids is not empty, and the task's pid is listed
684 * in filtered_no_pids, then return true.
685 * Otherwise, if filtered_pids is empty, that means we can
686 * trace all tasks. If it has content, then only trace pids
687 * within filtered_pids.
688 */
689
690 return (filtered_pids &&
691 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
692 (filtered_no_pids &&
693 trace_find_filtered_pid(filtered_no_pids, task->pid));
694 }
695
696 /**
697 * trace_filter_add_remove_task - Add or remove a task from a pid_list
698 * @pid_list: The list to modify
699 * @self: The current task for fork or NULL for exit
700 * @task: The task to add or remove
701 *
702 * If adding a task, if @self is defined, the task is only added if @self
703 * is also included in @pid_list. This happens on fork and tasks should
704 * only be added when the parent is listed. If @self is NULL, then the
705 * @task pid will be removed from the list, which would happen on exit
706 * of a task.
707 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)708 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
709 struct task_struct *self,
710 struct task_struct *task)
711 {
712 if (!pid_list)
713 return;
714
715 /* For forks, we only add if the forking task is listed */
716 if (self) {
717 if (!trace_find_filtered_pid(pid_list, self->pid))
718 return;
719 }
720
721 /* "self" is set for forks, and NULL for exits */
722 if (self)
723 trace_pid_list_set(pid_list, task->pid);
724 else
725 trace_pid_list_clear(pid_list, task->pid);
726 }
727
728 /**
729 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
730 * @pid_list: The pid list to show
731 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
732 * @pos: The position of the file
733 *
734 * This is used by the seq_file "next" operation to iterate the pids
735 * listed in a trace_pid_list structure.
736 *
737 * Returns the pid+1 as we want to display pid of zero, but NULL would
738 * stop the iteration.
739 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)740 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
741 {
742 long pid = (unsigned long)v;
743 unsigned int next;
744
745 (*pos)++;
746
747 /* pid already is +1 of the actual previous bit */
748 if (trace_pid_list_next(pid_list, pid, &next) < 0)
749 return NULL;
750
751 pid = next;
752
753 /* Return pid + 1 to allow zero to be represented */
754 return (void *)(pid + 1);
755 }
756
757 /**
758 * trace_pid_start - Used for seq_file to start reading pid lists
759 * @pid_list: The pid list to show
760 * @pos: The position of the file
761 *
762 * This is used by seq_file "start" operation to start the iteration
763 * of listing pids.
764 *
765 * Returns the pid+1 as we want to display pid of zero, but NULL would
766 * stop the iteration.
767 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)768 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
769 {
770 unsigned long pid;
771 unsigned int first;
772 loff_t l = 0;
773
774 if (trace_pid_list_first(pid_list, &first) < 0)
775 return NULL;
776
777 pid = first;
778
779 /* Return pid + 1 so that zero can be the exit value */
780 for (pid++; pid && l < *pos;
781 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
782 ;
783 return (void *)pid;
784 }
785
786 /**
787 * trace_pid_show - show the current pid in seq_file processing
788 * @m: The seq_file structure to write into
789 * @v: A void pointer of the pid (+1) value to display
790 *
791 * Can be directly used by seq_file operations to display the current
792 * pid value.
793 */
trace_pid_show(struct seq_file * m,void * v)794 int trace_pid_show(struct seq_file *m, void *v)
795 {
796 unsigned long pid = (unsigned long)v - 1;
797
798 seq_printf(m, "%lu\n", pid);
799 return 0;
800 }
801
802 /* 128 should be much more than enough */
803 #define PID_BUF_SIZE 127
804
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)805 int trace_pid_write(struct trace_pid_list *filtered_pids,
806 struct trace_pid_list **new_pid_list,
807 const char __user *ubuf, size_t cnt)
808 {
809 struct trace_pid_list *pid_list;
810 struct trace_parser parser;
811 unsigned long val;
812 int nr_pids = 0;
813 ssize_t read = 0;
814 ssize_t ret;
815 loff_t pos;
816 pid_t pid;
817
818 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
819 return -ENOMEM;
820
821 /*
822 * Always recreate a new array. The write is an all or nothing
823 * operation. Always create a new array when adding new pids by
824 * the user. If the operation fails, then the current list is
825 * not modified.
826 */
827 pid_list = trace_pid_list_alloc();
828 if (!pid_list) {
829 trace_parser_put(&parser);
830 return -ENOMEM;
831 }
832
833 if (filtered_pids) {
834 /* copy the current bits to the new max */
835 ret = trace_pid_list_first(filtered_pids, &pid);
836 while (!ret) {
837 ret = trace_pid_list_set(pid_list, pid);
838 if (ret < 0)
839 goto out;
840
841 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
842 nr_pids++;
843 }
844 }
845
846 ret = 0;
847 while (cnt > 0) {
848
849 pos = 0;
850
851 ret = trace_get_user(&parser, ubuf, cnt, &pos);
852 if (ret < 0)
853 break;
854
855 read += ret;
856 ubuf += ret;
857 cnt -= ret;
858
859 if (!trace_parser_loaded(&parser))
860 break;
861
862 ret = -EINVAL;
863 if (kstrtoul(parser.buffer, 0, &val))
864 break;
865
866 pid = (pid_t)val;
867
868 if (trace_pid_list_set(pid_list, pid) < 0) {
869 ret = -1;
870 break;
871 }
872 nr_pids++;
873
874 trace_parser_clear(&parser);
875 ret = 0;
876 }
877 out:
878 trace_parser_put(&parser);
879
880 if (ret < 0) {
881 trace_pid_list_free(pid_list);
882 return ret;
883 }
884
885 if (!nr_pids) {
886 /* Cleared the list of pids */
887 trace_pid_list_free(pid_list);
888 pid_list = NULL;
889 }
890
891 *new_pid_list = pid_list;
892
893 return read;
894 }
895
buffer_ftrace_now(struct array_buffer * buf,int cpu)896 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
897 {
898 u64 ts;
899
900 /* Early boot up does not have a buffer yet */
901 if (!buf->buffer)
902 return trace_clock_local();
903
904 ts = ring_buffer_time_stamp(buf->buffer);
905 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
906
907 return ts;
908 }
909
ftrace_now(int cpu)910 u64 ftrace_now(int cpu)
911 {
912 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
913 }
914
915 /**
916 * tracing_is_enabled - Show if global_trace has been enabled
917 *
918 * Shows if the global trace has been enabled or not. It uses the
919 * mirror flag "buffer_disabled" to be used in fast paths such as for
920 * the irqsoff tracer. But it may be inaccurate due to races. If you
921 * need to know the accurate state, use tracing_is_on() which is a little
922 * slower, but accurate.
923 */
tracing_is_enabled(void)924 int tracing_is_enabled(void)
925 {
926 /*
927 * For quick access (irqsoff uses this in fast path), just
928 * return the mirror variable of the state of the ring buffer.
929 * It's a little racy, but we don't really care.
930 */
931 return !global_trace.buffer_disabled;
932 }
933
934 /*
935 * trace_buf_size is the size in bytes that is allocated
936 * for a buffer. Note, the number of bytes is always rounded
937 * to page size.
938 *
939 * This number is purposely set to a low number of 16384.
940 * If the dump on oops happens, it will be much appreciated
941 * to not have to wait for all that output. Anyway this can be
942 * boot time and run time configurable.
943 */
944 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
945
946 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
947
948 /* trace_types holds a link list of available tracers. */
949 static struct tracer *trace_types __read_mostly;
950
951 /*
952 * trace_types_lock is used to protect the trace_types list.
953 */
954 DEFINE_MUTEX(trace_types_lock);
955
956 /*
957 * serialize the access of the ring buffer
958 *
959 * ring buffer serializes readers, but it is low level protection.
960 * The validity of the events (which returns by ring_buffer_peek() ..etc)
961 * are not protected by ring buffer.
962 *
963 * The content of events may become garbage if we allow other process consumes
964 * these events concurrently:
965 * A) the page of the consumed events may become a normal page
966 * (not reader page) in ring buffer, and this page will be rewritten
967 * by events producer.
968 * B) The page of the consumed events may become a page for splice_read,
969 * and this page will be returned to system.
970 *
971 * These primitives allow multi process access to different cpu ring buffer
972 * concurrently.
973 *
974 * These primitives don't distinguish read-only and read-consume access.
975 * Multi read-only access are also serialized.
976 */
977
978 #ifdef CONFIG_SMP
979 static DECLARE_RWSEM(all_cpu_access_lock);
980 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
981
trace_access_lock(int cpu)982 static inline void trace_access_lock(int cpu)
983 {
984 if (cpu == RING_BUFFER_ALL_CPUS) {
985 /* gain it for accessing the whole ring buffer. */
986 down_write(&all_cpu_access_lock);
987 } else {
988 /* gain it for accessing a cpu ring buffer. */
989
990 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
991 down_read(&all_cpu_access_lock);
992
993 /* Secondly block other access to this @cpu ring buffer. */
994 mutex_lock(&per_cpu(cpu_access_lock, cpu));
995 }
996 }
997
trace_access_unlock(int cpu)998 static inline void trace_access_unlock(int cpu)
999 {
1000 if (cpu == RING_BUFFER_ALL_CPUS) {
1001 up_write(&all_cpu_access_lock);
1002 } else {
1003 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1004 up_read(&all_cpu_access_lock);
1005 }
1006 }
1007
trace_access_lock_init(void)1008 static inline void trace_access_lock_init(void)
1009 {
1010 int cpu;
1011
1012 for_each_possible_cpu(cpu)
1013 mutex_init(&per_cpu(cpu_access_lock, cpu));
1014 }
1015
1016 #else
1017
1018 static DEFINE_MUTEX(access_lock);
1019
trace_access_lock(int cpu)1020 static inline void trace_access_lock(int cpu)
1021 {
1022 (void)cpu;
1023 mutex_lock(&access_lock);
1024 }
1025
trace_access_unlock(int cpu)1026 static inline void trace_access_unlock(int cpu)
1027 {
1028 (void)cpu;
1029 mutex_unlock(&access_lock);
1030 }
1031
trace_access_lock_init(void)1032 static inline void trace_access_lock_init(void)
1033 {
1034 }
1035
1036 #endif
1037
1038 #ifdef CONFIG_STACKTRACE
1039 static void __ftrace_trace_stack(struct trace_array *tr,
1040 struct trace_buffer *buffer,
1041 unsigned int trace_ctx,
1042 int skip, struct pt_regs *regs);
1043 static inline void ftrace_trace_stack(struct trace_array *tr,
1044 struct trace_buffer *buffer,
1045 unsigned int trace_ctx,
1046 int skip, struct pt_regs *regs);
1047
1048 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1049 static inline void __ftrace_trace_stack(struct trace_array *tr,
1050 struct trace_buffer *buffer,
1051 unsigned int trace_ctx,
1052 int skip, struct pt_regs *regs)
1053 {
1054 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1055 static inline void ftrace_trace_stack(struct trace_array *tr,
1056 struct trace_buffer *buffer,
1057 unsigned long trace_ctx,
1058 int skip, struct pt_regs *regs)
1059 {
1060 }
1061
1062 #endif
1063
1064 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1065 trace_event_setup(struct ring_buffer_event *event,
1066 int type, unsigned int trace_ctx)
1067 {
1068 struct trace_entry *ent = ring_buffer_event_data(event);
1069
1070 tracing_generic_entry_update(ent, type, trace_ctx);
1071 }
1072
1073 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1074 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1075 int type,
1076 unsigned long len,
1077 unsigned int trace_ctx)
1078 {
1079 struct ring_buffer_event *event;
1080
1081 event = ring_buffer_lock_reserve(buffer, len);
1082 if (event != NULL)
1083 trace_event_setup(event, type, trace_ctx);
1084
1085 return event;
1086 }
1087
tracer_tracing_on(struct trace_array * tr)1088 void tracer_tracing_on(struct trace_array *tr)
1089 {
1090 if (tr->array_buffer.buffer)
1091 ring_buffer_record_on(tr->array_buffer.buffer);
1092 /*
1093 * This flag is looked at when buffers haven't been allocated
1094 * yet, or by some tracers (like irqsoff), that just want to
1095 * know if the ring buffer has been disabled, but it can handle
1096 * races of where it gets disabled but we still do a record.
1097 * As the check is in the fast path of the tracers, it is more
1098 * important to be fast than accurate.
1099 */
1100 tr->buffer_disabled = 0;
1101 }
1102
1103 /**
1104 * tracing_on - enable tracing buffers
1105 *
1106 * This function enables tracing buffers that may have been
1107 * disabled with tracing_off.
1108 */
tracing_on(void)1109 void tracing_on(void)
1110 {
1111 tracer_tracing_on(&global_trace);
1112 }
1113 EXPORT_SYMBOL_GPL(tracing_on);
1114
1115
1116 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1117 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1118 {
1119 __this_cpu_write(trace_taskinfo_save, true);
1120
1121 /* If this is the temp buffer, we need to commit fully */
1122 if (this_cpu_read(trace_buffered_event) == event) {
1123 /* Length is in event->array[0] */
1124 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1125 /* Release the temp buffer */
1126 this_cpu_dec(trace_buffered_event_cnt);
1127 /* ring_buffer_unlock_commit() enables preemption */
1128 preempt_enable_notrace();
1129 } else
1130 ring_buffer_unlock_commit(buffer);
1131 }
1132
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1133 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1134 const char *str, int size)
1135 {
1136 struct ring_buffer_event *event;
1137 struct trace_buffer *buffer;
1138 struct print_entry *entry;
1139 unsigned int trace_ctx;
1140 int alloc;
1141
1142 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1143 return 0;
1144
1145 if (unlikely(tracing_selftest_running && tr == &global_trace))
1146 return 0;
1147
1148 if (unlikely(tracing_disabled))
1149 return 0;
1150
1151 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1152
1153 trace_ctx = tracing_gen_ctx();
1154 buffer = tr->array_buffer.buffer;
1155 guard(ring_buffer_nest)(buffer);
1156 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1157 trace_ctx);
1158 if (!event)
1159 return 0;
1160
1161 entry = ring_buffer_event_data(event);
1162 entry->ip = ip;
1163
1164 memcpy(&entry->buf, str, size);
1165
1166 /* Add a newline if necessary */
1167 if (entry->buf[size - 1] != '\n') {
1168 entry->buf[size] = '\n';
1169 entry->buf[size + 1] = '\0';
1170 } else
1171 entry->buf[size] = '\0';
1172
1173 __buffer_unlock_commit(buffer, event);
1174 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1175 return size;
1176 }
1177 EXPORT_SYMBOL_GPL(__trace_array_puts);
1178
1179 /**
1180 * __trace_puts - write a constant string into the trace buffer.
1181 * @ip: The address of the caller
1182 * @str: The constant string to write
1183 * @size: The size of the string.
1184 */
__trace_puts(unsigned long ip,const char * str,int size)1185 int __trace_puts(unsigned long ip, const char *str, int size)
1186 {
1187 return __trace_array_puts(printk_trace, ip, str, size);
1188 }
1189 EXPORT_SYMBOL_GPL(__trace_puts);
1190
1191 /**
1192 * __trace_bputs - write the pointer to a constant string into trace buffer
1193 * @ip: The address of the caller
1194 * @str: The constant string to write to the buffer to
1195 */
__trace_bputs(unsigned long ip,const char * str)1196 int __trace_bputs(unsigned long ip, const char *str)
1197 {
1198 struct trace_array *tr = READ_ONCE(printk_trace);
1199 struct ring_buffer_event *event;
1200 struct trace_buffer *buffer;
1201 struct bputs_entry *entry;
1202 unsigned int trace_ctx;
1203 int size = sizeof(struct bputs_entry);
1204
1205 if (!printk_binsafe(tr))
1206 return __trace_puts(ip, str, strlen(str));
1207
1208 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1209 return 0;
1210
1211 if (unlikely(tracing_selftest_running || tracing_disabled))
1212 return 0;
1213
1214 trace_ctx = tracing_gen_ctx();
1215 buffer = tr->array_buffer.buffer;
1216
1217 guard(ring_buffer_nest)(buffer);
1218 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1219 trace_ctx);
1220 if (!event)
1221 return 0;
1222
1223 entry = ring_buffer_event_data(event);
1224 entry->ip = ip;
1225 entry->str = str;
1226
1227 __buffer_unlock_commit(buffer, event);
1228 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1229
1230 return 1;
1231 }
1232 EXPORT_SYMBOL_GPL(__trace_bputs);
1233
1234 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1235 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1236 void *cond_data)
1237 {
1238 struct tracer *tracer = tr->current_trace;
1239 unsigned long flags;
1240
1241 if (in_nmi()) {
1242 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1243 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1244 return;
1245 }
1246
1247 if (!tr->allocated_snapshot) {
1248 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1249 trace_array_puts(tr, "*** stopping trace here! ***\n");
1250 tracer_tracing_off(tr);
1251 return;
1252 }
1253
1254 /* Note, snapshot can not be used when the tracer uses it */
1255 if (tracer->use_max_tr) {
1256 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1257 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1258 return;
1259 }
1260
1261 if (tr->mapped) {
1262 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1263 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1264 return;
1265 }
1266
1267 local_irq_save(flags);
1268 update_max_tr(tr, current, smp_processor_id(), cond_data);
1269 local_irq_restore(flags);
1270 }
1271
tracing_snapshot_instance(struct trace_array * tr)1272 void tracing_snapshot_instance(struct trace_array *tr)
1273 {
1274 tracing_snapshot_instance_cond(tr, NULL);
1275 }
1276
1277 /**
1278 * tracing_snapshot - take a snapshot of the current buffer.
1279 *
1280 * This causes a swap between the snapshot buffer and the current live
1281 * tracing buffer. You can use this to take snapshots of the live
1282 * trace when some condition is triggered, but continue to trace.
1283 *
1284 * Note, make sure to allocate the snapshot with either
1285 * a tracing_snapshot_alloc(), or by doing it manually
1286 * with: echo 1 > /sys/kernel/tracing/snapshot
1287 *
1288 * If the snapshot buffer is not allocated, it will stop tracing.
1289 * Basically making a permanent snapshot.
1290 */
tracing_snapshot(void)1291 void tracing_snapshot(void)
1292 {
1293 struct trace_array *tr = &global_trace;
1294
1295 tracing_snapshot_instance(tr);
1296 }
1297 EXPORT_SYMBOL_GPL(tracing_snapshot);
1298
1299 /**
1300 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1301 * @tr: The tracing instance to snapshot
1302 * @cond_data: The data to be tested conditionally, and possibly saved
1303 *
1304 * This is the same as tracing_snapshot() except that the snapshot is
1305 * conditional - the snapshot will only happen if the
1306 * cond_snapshot.update() implementation receiving the cond_data
1307 * returns true, which means that the trace array's cond_snapshot
1308 * update() operation used the cond_data to determine whether the
1309 * snapshot should be taken, and if it was, presumably saved it along
1310 * with the snapshot.
1311 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1312 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1313 {
1314 tracing_snapshot_instance_cond(tr, cond_data);
1315 }
1316 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1317
1318 /**
1319 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1320 * @tr: The tracing instance
1321 *
1322 * When the user enables a conditional snapshot using
1323 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1324 * with the snapshot. This accessor is used to retrieve it.
1325 *
1326 * Should not be called from cond_snapshot.update(), since it takes
1327 * the tr->max_lock lock, which the code calling
1328 * cond_snapshot.update() has already done.
1329 *
1330 * Returns the cond_data associated with the trace array's snapshot.
1331 */
tracing_cond_snapshot_data(struct trace_array * tr)1332 void *tracing_cond_snapshot_data(struct trace_array *tr)
1333 {
1334 void *cond_data = NULL;
1335
1336 local_irq_disable();
1337 arch_spin_lock(&tr->max_lock);
1338
1339 if (tr->cond_snapshot)
1340 cond_data = tr->cond_snapshot->cond_data;
1341
1342 arch_spin_unlock(&tr->max_lock);
1343 local_irq_enable();
1344
1345 return cond_data;
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1348
1349 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1350 struct array_buffer *size_buf, int cpu_id);
1351 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1352
tracing_alloc_snapshot_instance(struct trace_array * tr)1353 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1354 {
1355 int order;
1356 int ret;
1357
1358 if (!tr->allocated_snapshot) {
1359
1360 /* Make the snapshot buffer have the same order as main buffer */
1361 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1362 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1363 if (ret < 0)
1364 return ret;
1365
1366 /* allocate spare buffer */
1367 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1368 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1369 if (ret < 0)
1370 return ret;
1371
1372 tr->allocated_snapshot = true;
1373 }
1374
1375 return 0;
1376 }
1377
free_snapshot(struct trace_array * tr)1378 static void free_snapshot(struct trace_array *tr)
1379 {
1380 /*
1381 * We don't free the ring buffer. instead, resize it because
1382 * The max_tr ring buffer has some state (e.g. ring->clock) and
1383 * we want preserve it.
1384 */
1385 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1386 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1387 set_buffer_entries(&tr->max_buffer, 1);
1388 tracing_reset_online_cpus(&tr->max_buffer);
1389 tr->allocated_snapshot = false;
1390 }
1391
tracing_arm_snapshot_locked(struct trace_array * tr)1392 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1393 {
1394 int ret;
1395
1396 lockdep_assert_held(&trace_types_lock);
1397
1398 spin_lock(&tr->snapshot_trigger_lock);
1399 if (tr->snapshot == UINT_MAX || tr->mapped) {
1400 spin_unlock(&tr->snapshot_trigger_lock);
1401 return -EBUSY;
1402 }
1403
1404 tr->snapshot++;
1405 spin_unlock(&tr->snapshot_trigger_lock);
1406
1407 ret = tracing_alloc_snapshot_instance(tr);
1408 if (ret) {
1409 spin_lock(&tr->snapshot_trigger_lock);
1410 tr->snapshot--;
1411 spin_unlock(&tr->snapshot_trigger_lock);
1412 }
1413
1414 return ret;
1415 }
1416
tracing_arm_snapshot(struct trace_array * tr)1417 int tracing_arm_snapshot(struct trace_array *tr)
1418 {
1419 guard(mutex)(&trace_types_lock);
1420 return tracing_arm_snapshot_locked(tr);
1421 }
1422
tracing_disarm_snapshot(struct trace_array * tr)1423 void tracing_disarm_snapshot(struct trace_array *tr)
1424 {
1425 spin_lock(&tr->snapshot_trigger_lock);
1426 if (!WARN_ON(!tr->snapshot))
1427 tr->snapshot--;
1428 spin_unlock(&tr->snapshot_trigger_lock);
1429 }
1430
1431 /**
1432 * tracing_alloc_snapshot - allocate snapshot buffer.
1433 *
1434 * This only allocates the snapshot buffer if it isn't already
1435 * allocated - it doesn't also take a snapshot.
1436 *
1437 * This is meant to be used in cases where the snapshot buffer needs
1438 * to be set up for events that can't sleep but need to be able to
1439 * trigger a snapshot.
1440 */
tracing_alloc_snapshot(void)1441 int tracing_alloc_snapshot(void)
1442 {
1443 struct trace_array *tr = &global_trace;
1444 int ret;
1445
1446 ret = tracing_alloc_snapshot_instance(tr);
1447 WARN_ON(ret < 0);
1448
1449 return ret;
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1452
1453 /**
1454 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1455 *
1456 * This is similar to tracing_snapshot(), but it will allocate the
1457 * snapshot buffer if it isn't already allocated. Use this only
1458 * where it is safe to sleep, as the allocation may sleep.
1459 *
1460 * This causes a swap between the snapshot buffer and the current live
1461 * tracing buffer. You can use this to take snapshots of the live
1462 * trace when some condition is triggered, but continue to trace.
1463 */
tracing_snapshot_alloc(void)1464 void tracing_snapshot_alloc(void)
1465 {
1466 int ret;
1467
1468 ret = tracing_alloc_snapshot();
1469 if (ret < 0)
1470 return;
1471
1472 tracing_snapshot();
1473 }
1474 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1475
1476 /**
1477 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1478 * @tr: The tracing instance
1479 * @cond_data: User data to associate with the snapshot
1480 * @update: Implementation of the cond_snapshot update function
1481 *
1482 * Check whether the conditional snapshot for the given instance has
1483 * already been enabled, or if the current tracer is already using a
1484 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1485 * save the cond_data and update function inside.
1486 *
1487 * Returns 0 if successful, error otherwise.
1488 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1489 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1490 cond_update_fn_t update)
1491 {
1492 struct cond_snapshot *cond_snapshot __free(kfree) =
1493 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1494 int ret;
1495
1496 if (!cond_snapshot)
1497 return -ENOMEM;
1498
1499 cond_snapshot->cond_data = cond_data;
1500 cond_snapshot->update = update;
1501
1502 guard(mutex)(&trace_types_lock);
1503
1504 if (tr->current_trace->use_max_tr)
1505 return -EBUSY;
1506
1507 /*
1508 * The cond_snapshot can only change to NULL without the
1509 * trace_types_lock. We don't care if we race with it going
1510 * to NULL, but we want to make sure that it's not set to
1511 * something other than NULL when we get here, which we can
1512 * do safely with only holding the trace_types_lock and not
1513 * having to take the max_lock.
1514 */
1515 if (tr->cond_snapshot)
1516 return -EBUSY;
1517
1518 ret = tracing_arm_snapshot_locked(tr);
1519 if (ret)
1520 return ret;
1521
1522 local_irq_disable();
1523 arch_spin_lock(&tr->max_lock);
1524 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1525 arch_spin_unlock(&tr->max_lock);
1526 local_irq_enable();
1527
1528 return 0;
1529 }
1530 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1531
1532 /**
1533 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1534 * @tr: The tracing instance
1535 *
1536 * Check whether the conditional snapshot for the given instance is
1537 * enabled; if so, free the cond_snapshot associated with it,
1538 * otherwise return -EINVAL.
1539 *
1540 * Returns 0 if successful, error otherwise.
1541 */
tracing_snapshot_cond_disable(struct trace_array * tr)1542 int tracing_snapshot_cond_disable(struct trace_array *tr)
1543 {
1544 int ret = 0;
1545
1546 local_irq_disable();
1547 arch_spin_lock(&tr->max_lock);
1548
1549 if (!tr->cond_snapshot)
1550 ret = -EINVAL;
1551 else {
1552 kfree(tr->cond_snapshot);
1553 tr->cond_snapshot = NULL;
1554 }
1555
1556 arch_spin_unlock(&tr->max_lock);
1557 local_irq_enable();
1558
1559 tracing_disarm_snapshot(tr);
1560
1561 return ret;
1562 }
1563 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1564 #else
tracing_snapshot(void)1565 void tracing_snapshot(void)
1566 {
1567 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1568 }
1569 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1570 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1571 {
1572 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1573 }
1574 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1575 int tracing_alloc_snapshot(void)
1576 {
1577 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1578 return -ENODEV;
1579 }
1580 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1581 void tracing_snapshot_alloc(void)
1582 {
1583 /* Give warning */
1584 tracing_snapshot();
1585 }
1586 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1587 void *tracing_cond_snapshot_data(struct trace_array *tr)
1588 {
1589 return NULL;
1590 }
1591 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1592 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1593 {
1594 return -ENODEV;
1595 }
1596 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1597 int tracing_snapshot_cond_disable(struct trace_array *tr)
1598 {
1599 return false;
1600 }
1601 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1602 #define free_snapshot(tr) do { } while (0)
1603 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1604 #endif /* CONFIG_TRACER_SNAPSHOT */
1605
tracer_tracing_off(struct trace_array * tr)1606 void tracer_tracing_off(struct trace_array *tr)
1607 {
1608 if (tr->array_buffer.buffer)
1609 ring_buffer_record_off(tr->array_buffer.buffer);
1610 /*
1611 * This flag is looked at when buffers haven't been allocated
1612 * yet, or by some tracers (like irqsoff), that just want to
1613 * know if the ring buffer has been disabled, but it can handle
1614 * races of where it gets disabled but we still do a record.
1615 * As the check is in the fast path of the tracers, it is more
1616 * important to be fast than accurate.
1617 */
1618 tr->buffer_disabled = 1;
1619 }
1620
1621 /**
1622 * tracer_tracing_disable() - temporary disable the buffer from write
1623 * @tr: The trace array to disable its buffer for
1624 *
1625 * Expects trace_tracing_enable() to re-enable tracing.
1626 * The difference between this and tracer_tracing_off() is that this
1627 * is a counter and can nest, whereas, tracer_tracing_off() can
1628 * be called multiple times and a single trace_tracing_on() will
1629 * enable it.
1630 */
tracer_tracing_disable(struct trace_array * tr)1631 void tracer_tracing_disable(struct trace_array *tr)
1632 {
1633 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1634 return;
1635
1636 ring_buffer_record_disable(tr->array_buffer.buffer);
1637 }
1638
1639 /**
1640 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1641 * @tr: The trace array that had tracer_tracincg_disable() called on it
1642 *
1643 * This is called after tracer_tracing_disable() has been called on @tr,
1644 * when it's safe to re-enable tracing.
1645 */
tracer_tracing_enable(struct trace_array * tr)1646 void tracer_tracing_enable(struct trace_array *tr)
1647 {
1648 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1649 return;
1650
1651 ring_buffer_record_enable(tr->array_buffer.buffer);
1652 }
1653
1654 /**
1655 * tracing_off - turn off tracing buffers
1656 *
1657 * This function stops the tracing buffers from recording data.
1658 * It does not disable any overhead the tracers themselves may
1659 * be causing. This function simply causes all recording to
1660 * the ring buffers to fail.
1661 */
tracing_off(void)1662 void tracing_off(void)
1663 {
1664 tracer_tracing_off(&global_trace);
1665 }
1666 EXPORT_SYMBOL_GPL(tracing_off);
1667
disable_trace_on_warning(void)1668 void disable_trace_on_warning(void)
1669 {
1670 if (__disable_trace_on_warning) {
1671 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1672 "Disabling tracing due to warning\n");
1673 tracing_off();
1674 }
1675 }
1676
1677 /**
1678 * tracer_tracing_is_on - show real state of ring buffer enabled
1679 * @tr : the trace array to know if ring buffer is enabled
1680 *
1681 * Shows real state of the ring buffer if it is enabled or not.
1682 */
tracer_tracing_is_on(struct trace_array * tr)1683 bool tracer_tracing_is_on(struct trace_array *tr)
1684 {
1685 if (tr->array_buffer.buffer)
1686 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1687 return !tr->buffer_disabled;
1688 }
1689
1690 /**
1691 * tracing_is_on - show state of ring buffers enabled
1692 */
tracing_is_on(void)1693 int tracing_is_on(void)
1694 {
1695 return tracer_tracing_is_on(&global_trace);
1696 }
1697 EXPORT_SYMBOL_GPL(tracing_is_on);
1698
set_buf_size(char * str)1699 static int __init set_buf_size(char *str)
1700 {
1701 unsigned long buf_size;
1702
1703 if (!str)
1704 return 0;
1705 buf_size = memparse(str, &str);
1706 /*
1707 * nr_entries can not be zero and the startup
1708 * tests require some buffer space. Therefore
1709 * ensure we have at least 4096 bytes of buffer.
1710 */
1711 trace_buf_size = max(4096UL, buf_size);
1712 return 1;
1713 }
1714 __setup("trace_buf_size=", set_buf_size);
1715
set_tracing_thresh(char * str)1716 static int __init set_tracing_thresh(char *str)
1717 {
1718 unsigned long threshold;
1719 int ret;
1720
1721 if (!str)
1722 return 0;
1723 ret = kstrtoul(str, 0, &threshold);
1724 if (ret < 0)
1725 return 0;
1726 tracing_thresh = threshold * 1000;
1727 return 1;
1728 }
1729 __setup("tracing_thresh=", set_tracing_thresh);
1730
nsecs_to_usecs(unsigned long nsecs)1731 unsigned long nsecs_to_usecs(unsigned long nsecs)
1732 {
1733 return nsecs / 1000;
1734 }
1735
1736 /*
1737 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1738 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1739 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1740 * of strings in the order that the evals (enum) were defined.
1741 */
1742 #undef C
1743 #define C(a, b) b
1744
1745 /* These must match the bit positions in trace_iterator_flags */
1746 static const char *trace_options[] = {
1747 TRACE_FLAGS
1748 NULL
1749 };
1750
1751 static struct {
1752 u64 (*func)(void);
1753 const char *name;
1754 int in_ns; /* is this clock in nanoseconds? */
1755 } trace_clocks[] = {
1756 { trace_clock_local, "local", 1 },
1757 { trace_clock_global, "global", 1 },
1758 { trace_clock_counter, "counter", 0 },
1759 { trace_clock_jiffies, "uptime", 0 },
1760 { trace_clock, "perf", 1 },
1761 { ktime_get_mono_fast_ns, "mono", 1 },
1762 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1763 { ktime_get_boot_fast_ns, "boot", 1 },
1764 { ktime_get_tai_fast_ns, "tai", 1 },
1765 ARCH_TRACE_CLOCKS
1766 };
1767
trace_clock_in_ns(struct trace_array * tr)1768 bool trace_clock_in_ns(struct trace_array *tr)
1769 {
1770 if (trace_clocks[tr->clock_id].in_ns)
1771 return true;
1772
1773 return false;
1774 }
1775
1776 /*
1777 * trace_parser_get_init - gets the buffer for trace parser
1778 */
trace_parser_get_init(struct trace_parser * parser,int size)1779 int trace_parser_get_init(struct trace_parser *parser, int size)
1780 {
1781 memset(parser, 0, sizeof(*parser));
1782
1783 parser->buffer = kmalloc(size, GFP_KERNEL);
1784 if (!parser->buffer)
1785 return 1;
1786
1787 parser->size = size;
1788 return 0;
1789 }
1790
1791 /*
1792 * trace_parser_put - frees the buffer for trace parser
1793 */
trace_parser_put(struct trace_parser * parser)1794 void trace_parser_put(struct trace_parser *parser)
1795 {
1796 kfree(parser->buffer);
1797 parser->buffer = NULL;
1798 }
1799
1800 /*
1801 * trace_get_user - reads the user input string separated by space
1802 * (matched by isspace(ch))
1803 *
1804 * For each string found the 'struct trace_parser' is updated,
1805 * and the function returns.
1806 *
1807 * Returns number of bytes read.
1808 *
1809 * See kernel/trace/trace.h for 'struct trace_parser' details.
1810 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1811 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1812 size_t cnt, loff_t *ppos)
1813 {
1814 char ch;
1815 size_t read = 0;
1816 ssize_t ret;
1817
1818 if (!*ppos)
1819 trace_parser_clear(parser);
1820
1821 ret = get_user(ch, ubuf++);
1822 if (ret)
1823 goto fail;
1824
1825 read++;
1826 cnt--;
1827
1828 /*
1829 * The parser is not finished with the last write,
1830 * continue reading the user input without skipping spaces.
1831 */
1832 if (!parser->cont) {
1833 /* skip white space */
1834 while (cnt && isspace(ch)) {
1835 ret = get_user(ch, ubuf++);
1836 if (ret)
1837 goto fail;
1838 read++;
1839 cnt--;
1840 }
1841
1842 parser->idx = 0;
1843
1844 /* only spaces were written */
1845 if (isspace(ch) || !ch) {
1846 *ppos += read;
1847 return read;
1848 }
1849 }
1850
1851 /* read the non-space input */
1852 while (cnt && !isspace(ch) && ch) {
1853 if (parser->idx < parser->size - 1)
1854 parser->buffer[parser->idx++] = ch;
1855 else {
1856 ret = -EINVAL;
1857 goto fail;
1858 }
1859
1860 ret = get_user(ch, ubuf++);
1861 if (ret)
1862 goto fail;
1863 read++;
1864 cnt--;
1865 }
1866
1867 /* We either got finished input or we have to wait for another call. */
1868 if (isspace(ch) || !ch) {
1869 parser->buffer[parser->idx] = 0;
1870 parser->cont = false;
1871 } else if (parser->idx < parser->size - 1) {
1872 parser->cont = true;
1873 parser->buffer[parser->idx++] = ch;
1874 /* Make sure the parsed string always terminates with '\0'. */
1875 parser->buffer[parser->idx] = 0;
1876 } else {
1877 ret = -EINVAL;
1878 goto fail;
1879 }
1880
1881 *ppos += read;
1882 return read;
1883 fail:
1884 trace_parser_fail(parser);
1885 return ret;
1886 }
1887
1888 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1889 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1890 {
1891 int len;
1892
1893 if (trace_seq_used(s) <= s->readpos)
1894 return -EBUSY;
1895
1896 len = trace_seq_used(s) - s->readpos;
1897 if (cnt > len)
1898 cnt = len;
1899 memcpy(buf, s->buffer + s->readpos, cnt);
1900
1901 s->readpos += cnt;
1902 return cnt;
1903 }
1904
1905 unsigned long __read_mostly tracing_thresh;
1906
1907 #ifdef CONFIG_TRACER_MAX_TRACE
1908 static const struct file_operations tracing_max_lat_fops;
1909
1910 #ifdef LATENCY_FS_NOTIFY
1911
1912 static struct workqueue_struct *fsnotify_wq;
1913
latency_fsnotify_workfn(struct work_struct * work)1914 static void latency_fsnotify_workfn(struct work_struct *work)
1915 {
1916 struct trace_array *tr = container_of(work, struct trace_array,
1917 fsnotify_work);
1918 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1919 }
1920
latency_fsnotify_workfn_irq(struct irq_work * iwork)1921 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1922 {
1923 struct trace_array *tr = container_of(iwork, struct trace_array,
1924 fsnotify_irqwork);
1925 queue_work(fsnotify_wq, &tr->fsnotify_work);
1926 }
1927
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1928 static void trace_create_maxlat_file(struct trace_array *tr,
1929 struct dentry *d_tracer)
1930 {
1931 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1932 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1933 tr->d_max_latency = trace_create_file("tracing_max_latency",
1934 TRACE_MODE_WRITE,
1935 d_tracer, tr,
1936 &tracing_max_lat_fops);
1937 }
1938
latency_fsnotify_init(void)1939 __init static int latency_fsnotify_init(void)
1940 {
1941 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1942 WQ_UNBOUND | WQ_HIGHPRI, 0);
1943 if (!fsnotify_wq) {
1944 pr_err("Unable to allocate tr_max_lat_wq\n");
1945 return -ENOMEM;
1946 }
1947 return 0;
1948 }
1949
1950 late_initcall_sync(latency_fsnotify_init);
1951
latency_fsnotify(struct trace_array * tr)1952 void latency_fsnotify(struct trace_array *tr)
1953 {
1954 if (!fsnotify_wq)
1955 return;
1956 /*
1957 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1958 * possible that we are called from __schedule() or do_idle(), which
1959 * could cause a deadlock.
1960 */
1961 irq_work_queue(&tr->fsnotify_irqwork);
1962 }
1963
1964 #else /* !LATENCY_FS_NOTIFY */
1965
1966 #define trace_create_maxlat_file(tr, d_tracer) \
1967 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1968 d_tracer, tr, &tracing_max_lat_fops)
1969
1970 #endif
1971
1972 /*
1973 * Copy the new maximum trace into the separate maximum-trace
1974 * structure. (this way the maximum trace is permanently saved,
1975 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1976 */
1977 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1978 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1979 {
1980 struct array_buffer *trace_buf = &tr->array_buffer;
1981 struct array_buffer *max_buf = &tr->max_buffer;
1982 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1983 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1984
1985 max_buf->cpu = cpu;
1986 max_buf->time_start = data->preempt_timestamp;
1987
1988 max_data->saved_latency = tr->max_latency;
1989 max_data->critical_start = data->critical_start;
1990 max_data->critical_end = data->critical_end;
1991
1992 strscpy(max_data->comm, tsk->comm);
1993 max_data->pid = tsk->pid;
1994 /*
1995 * If tsk == current, then use current_uid(), as that does not use
1996 * RCU. The irq tracer can be called out of RCU scope.
1997 */
1998 if (tsk == current)
1999 max_data->uid = current_uid();
2000 else
2001 max_data->uid = task_uid(tsk);
2002
2003 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2004 max_data->policy = tsk->policy;
2005 max_data->rt_priority = tsk->rt_priority;
2006
2007 /* record this tasks comm */
2008 tracing_record_cmdline(tsk);
2009 latency_fsnotify(tr);
2010 }
2011
2012 /**
2013 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2014 * @tr: tracer
2015 * @tsk: the task with the latency
2016 * @cpu: The cpu that initiated the trace.
2017 * @cond_data: User data associated with a conditional snapshot
2018 *
2019 * Flip the buffers between the @tr and the max_tr and record information
2020 * about which task was the cause of this latency.
2021 */
2022 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2023 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2024 void *cond_data)
2025 {
2026 if (tr->stop_count)
2027 return;
2028
2029 WARN_ON_ONCE(!irqs_disabled());
2030
2031 if (!tr->allocated_snapshot) {
2032 /* Only the nop tracer should hit this when disabling */
2033 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2034 return;
2035 }
2036
2037 arch_spin_lock(&tr->max_lock);
2038
2039 /* Inherit the recordable setting from array_buffer */
2040 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2041 ring_buffer_record_on(tr->max_buffer.buffer);
2042 else
2043 ring_buffer_record_off(tr->max_buffer.buffer);
2044
2045 #ifdef CONFIG_TRACER_SNAPSHOT
2046 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2047 arch_spin_unlock(&tr->max_lock);
2048 return;
2049 }
2050 #endif
2051 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2052
2053 __update_max_tr(tr, tsk, cpu);
2054
2055 arch_spin_unlock(&tr->max_lock);
2056
2057 /* Any waiters on the old snapshot buffer need to wake up */
2058 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2059 }
2060
2061 /**
2062 * update_max_tr_single - only copy one trace over, and reset the rest
2063 * @tr: tracer
2064 * @tsk: task with the latency
2065 * @cpu: the cpu of the buffer to copy.
2066 *
2067 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2068 */
2069 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2070 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2071 {
2072 int ret;
2073
2074 if (tr->stop_count)
2075 return;
2076
2077 WARN_ON_ONCE(!irqs_disabled());
2078 if (!tr->allocated_snapshot) {
2079 /* Only the nop tracer should hit this when disabling */
2080 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2081 return;
2082 }
2083
2084 arch_spin_lock(&tr->max_lock);
2085
2086 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2087
2088 if (ret == -EBUSY) {
2089 /*
2090 * We failed to swap the buffer due to a commit taking
2091 * place on this CPU. We fail to record, but we reset
2092 * the max trace buffer (no one writes directly to it)
2093 * and flag that it failed.
2094 * Another reason is resize is in progress.
2095 */
2096 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2097 "Failed to swap buffers due to commit or resize in progress\n");
2098 }
2099
2100 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2101
2102 __update_max_tr(tr, tsk, cpu);
2103 arch_spin_unlock(&tr->max_lock);
2104 }
2105
2106 #endif /* CONFIG_TRACER_MAX_TRACE */
2107
2108 struct pipe_wait {
2109 struct trace_iterator *iter;
2110 int wait_index;
2111 };
2112
wait_pipe_cond(void * data)2113 static bool wait_pipe_cond(void *data)
2114 {
2115 struct pipe_wait *pwait = data;
2116 struct trace_iterator *iter = pwait->iter;
2117
2118 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2119 return true;
2120
2121 return iter->closed;
2122 }
2123
wait_on_pipe(struct trace_iterator * iter,int full)2124 static int wait_on_pipe(struct trace_iterator *iter, int full)
2125 {
2126 struct pipe_wait pwait;
2127 int ret;
2128
2129 /* Iterators are static, they should be filled or empty */
2130 if (trace_buffer_iter(iter, iter->cpu_file))
2131 return 0;
2132
2133 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2134 pwait.iter = iter;
2135
2136 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2137 wait_pipe_cond, &pwait);
2138
2139 #ifdef CONFIG_TRACER_MAX_TRACE
2140 /*
2141 * Make sure this is still the snapshot buffer, as if a snapshot were
2142 * to happen, this would now be the main buffer.
2143 */
2144 if (iter->snapshot)
2145 iter->array_buffer = &iter->tr->max_buffer;
2146 #endif
2147 return ret;
2148 }
2149
2150 #ifdef CONFIG_FTRACE_STARTUP_TEST
2151 static bool selftests_can_run;
2152
2153 struct trace_selftests {
2154 struct list_head list;
2155 struct tracer *type;
2156 };
2157
2158 static LIST_HEAD(postponed_selftests);
2159
save_selftest(struct tracer * type)2160 static int save_selftest(struct tracer *type)
2161 {
2162 struct trace_selftests *selftest;
2163
2164 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2165 if (!selftest)
2166 return -ENOMEM;
2167
2168 selftest->type = type;
2169 list_add(&selftest->list, &postponed_selftests);
2170 return 0;
2171 }
2172
run_tracer_selftest(struct tracer * type)2173 static int run_tracer_selftest(struct tracer *type)
2174 {
2175 struct trace_array *tr = &global_trace;
2176 struct tracer *saved_tracer = tr->current_trace;
2177 int ret;
2178
2179 if (!type->selftest || tracing_selftest_disabled)
2180 return 0;
2181
2182 /*
2183 * If a tracer registers early in boot up (before scheduling is
2184 * initialized and such), then do not run its selftests yet.
2185 * Instead, run it a little later in the boot process.
2186 */
2187 if (!selftests_can_run)
2188 return save_selftest(type);
2189
2190 if (!tracing_is_on()) {
2191 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2192 type->name);
2193 return 0;
2194 }
2195
2196 /*
2197 * Run a selftest on this tracer.
2198 * Here we reset the trace buffer, and set the current
2199 * tracer to be this tracer. The tracer can then run some
2200 * internal tracing to verify that everything is in order.
2201 * If we fail, we do not register this tracer.
2202 */
2203 tracing_reset_online_cpus(&tr->array_buffer);
2204
2205 tr->current_trace = type;
2206
2207 #ifdef CONFIG_TRACER_MAX_TRACE
2208 if (type->use_max_tr) {
2209 /* If we expanded the buffers, make sure the max is expanded too */
2210 if (tr->ring_buffer_expanded)
2211 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2212 RING_BUFFER_ALL_CPUS);
2213 tr->allocated_snapshot = true;
2214 }
2215 #endif
2216
2217 /* the test is responsible for initializing and enabling */
2218 pr_info("Testing tracer %s: ", type->name);
2219 ret = type->selftest(type, tr);
2220 /* the test is responsible for resetting too */
2221 tr->current_trace = saved_tracer;
2222 if (ret) {
2223 printk(KERN_CONT "FAILED!\n");
2224 /* Add the warning after printing 'FAILED' */
2225 WARN_ON(1);
2226 return -1;
2227 }
2228 /* Only reset on passing, to avoid touching corrupted buffers */
2229 tracing_reset_online_cpus(&tr->array_buffer);
2230
2231 #ifdef CONFIG_TRACER_MAX_TRACE
2232 if (type->use_max_tr) {
2233 tr->allocated_snapshot = false;
2234
2235 /* Shrink the max buffer again */
2236 if (tr->ring_buffer_expanded)
2237 ring_buffer_resize(tr->max_buffer.buffer, 1,
2238 RING_BUFFER_ALL_CPUS);
2239 }
2240 #endif
2241
2242 printk(KERN_CONT "PASSED\n");
2243 return 0;
2244 }
2245
do_run_tracer_selftest(struct tracer * type)2246 static int do_run_tracer_selftest(struct tracer *type)
2247 {
2248 int ret;
2249
2250 /*
2251 * Tests can take a long time, especially if they are run one after the
2252 * other, as does happen during bootup when all the tracers are
2253 * registered. This could cause the soft lockup watchdog to trigger.
2254 */
2255 cond_resched();
2256
2257 tracing_selftest_running = true;
2258 ret = run_tracer_selftest(type);
2259 tracing_selftest_running = false;
2260
2261 return ret;
2262 }
2263
init_trace_selftests(void)2264 static __init int init_trace_selftests(void)
2265 {
2266 struct trace_selftests *p, *n;
2267 struct tracer *t, **last;
2268 int ret;
2269
2270 selftests_can_run = true;
2271
2272 guard(mutex)(&trace_types_lock);
2273
2274 if (list_empty(&postponed_selftests))
2275 return 0;
2276
2277 pr_info("Running postponed tracer tests:\n");
2278
2279 tracing_selftest_running = true;
2280 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2281 /* This loop can take minutes when sanitizers are enabled, so
2282 * lets make sure we allow RCU processing.
2283 */
2284 cond_resched();
2285 ret = run_tracer_selftest(p->type);
2286 /* If the test fails, then warn and remove from available_tracers */
2287 if (ret < 0) {
2288 WARN(1, "tracer: %s failed selftest, disabling\n",
2289 p->type->name);
2290 last = &trace_types;
2291 for (t = trace_types; t; t = t->next) {
2292 if (t == p->type) {
2293 *last = t->next;
2294 break;
2295 }
2296 last = &t->next;
2297 }
2298 }
2299 list_del(&p->list);
2300 kfree(p);
2301 }
2302 tracing_selftest_running = false;
2303
2304 return 0;
2305 }
2306 core_initcall(init_trace_selftests);
2307 #else
do_run_tracer_selftest(struct tracer * type)2308 static inline int do_run_tracer_selftest(struct tracer *type)
2309 {
2310 return 0;
2311 }
2312 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2313
2314 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2315
2316 static void __init apply_trace_boot_options(void);
2317
2318 /**
2319 * register_tracer - register a tracer with the ftrace system.
2320 * @type: the plugin for the tracer
2321 *
2322 * Register a new plugin tracer.
2323 */
register_tracer(struct tracer * type)2324 int __init register_tracer(struct tracer *type)
2325 {
2326 struct tracer *t;
2327 int ret = 0;
2328
2329 if (!type->name) {
2330 pr_info("Tracer must have a name\n");
2331 return -1;
2332 }
2333
2334 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2335 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2336 return -1;
2337 }
2338
2339 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2340 pr_warn("Can not register tracer %s due to lockdown\n",
2341 type->name);
2342 return -EPERM;
2343 }
2344
2345 mutex_lock(&trace_types_lock);
2346
2347 for (t = trace_types; t; t = t->next) {
2348 if (strcmp(type->name, t->name) == 0) {
2349 /* already found */
2350 pr_info("Tracer %s already registered\n",
2351 type->name);
2352 ret = -1;
2353 goto out;
2354 }
2355 }
2356
2357 if (!type->set_flag)
2358 type->set_flag = &dummy_set_flag;
2359 if (!type->flags) {
2360 /*allocate a dummy tracer_flags*/
2361 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2362 if (!type->flags) {
2363 ret = -ENOMEM;
2364 goto out;
2365 }
2366 type->flags->val = 0;
2367 type->flags->opts = dummy_tracer_opt;
2368 } else
2369 if (!type->flags->opts)
2370 type->flags->opts = dummy_tracer_opt;
2371
2372 /* store the tracer for __set_tracer_option */
2373 type->flags->trace = type;
2374
2375 ret = do_run_tracer_selftest(type);
2376 if (ret < 0)
2377 goto out;
2378
2379 type->next = trace_types;
2380 trace_types = type;
2381 add_tracer_options(&global_trace, type);
2382
2383 out:
2384 mutex_unlock(&trace_types_lock);
2385
2386 if (ret || !default_bootup_tracer)
2387 return ret;
2388
2389 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2390 return 0;
2391
2392 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2393 /* Do we want this tracer to start on bootup? */
2394 tracing_set_tracer(&global_trace, type->name);
2395 default_bootup_tracer = NULL;
2396
2397 apply_trace_boot_options();
2398
2399 /* disable other selftests, since this will break it. */
2400 disable_tracing_selftest("running a tracer");
2401
2402 return 0;
2403 }
2404
tracing_reset_cpu(struct array_buffer * buf,int cpu)2405 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2406 {
2407 struct trace_buffer *buffer = buf->buffer;
2408
2409 if (!buffer)
2410 return;
2411
2412 ring_buffer_record_disable(buffer);
2413
2414 /* Make sure all commits have finished */
2415 synchronize_rcu();
2416 ring_buffer_reset_cpu(buffer, cpu);
2417
2418 ring_buffer_record_enable(buffer);
2419 }
2420
tracing_reset_online_cpus(struct array_buffer * buf)2421 void tracing_reset_online_cpus(struct array_buffer *buf)
2422 {
2423 struct trace_buffer *buffer = buf->buffer;
2424
2425 if (!buffer)
2426 return;
2427
2428 ring_buffer_record_disable(buffer);
2429
2430 /* Make sure all commits have finished */
2431 synchronize_rcu();
2432
2433 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2434
2435 ring_buffer_reset_online_cpus(buffer);
2436
2437 ring_buffer_record_enable(buffer);
2438 }
2439
tracing_reset_all_cpus(struct array_buffer * buf)2440 static void tracing_reset_all_cpus(struct array_buffer *buf)
2441 {
2442 struct trace_buffer *buffer = buf->buffer;
2443
2444 if (!buffer)
2445 return;
2446
2447 ring_buffer_record_disable(buffer);
2448
2449 /* Make sure all commits have finished */
2450 synchronize_rcu();
2451
2452 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2453
2454 ring_buffer_reset(buffer);
2455
2456 ring_buffer_record_enable(buffer);
2457 }
2458
2459 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2460 void tracing_reset_all_online_cpus_unlocked(void)
2461 {
2462 struct trace_array *tr;
2463
2464 lockdep_assert_held(&trace_types_lock);
2465
2466 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2467 if (!tr->clear_trace)
2468 continue;
2469 tr->clear_trace = false;
2470 tracing_reset_online_cpus(&tr->array_buffer);
2471 #ifdef CONFIG_TRACER_MAX_TRACE
2472 tracing_reset_online_cpus(&tr->max_buffer);
2473 #endif
2474 }
2475 }
2476
tracing_reset_all_online_cpus(void)2477 void tracing_reset_all_online_cpus(void)
2478 {
2479 guard(mutex)(&trace_types_lock);
2480 tracing_reset_all_online_cpus_unlocked();
2481 }
2482
is_tracing_stopped(void)2483 int is_tracing_stopped(void)
2484 {
2485 return global_trace.stop_count;
2486 }
2487
tracing_start_tr(struct trace_array * tr)2488 static void tracing_start_tr(struct trace_array *tr)
2489 {
2490 struct trace_buffer *buffer;
2491
2492 if (tracing_disabled)
2493 return;
2494
2495 guard(raw_spinlock_irqsave)(&tr->start_lock);
2496 if (--tr->stop_count) {
2497 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2498 /* Someone screwed up their debugging */
2499 tr->stop_count = 0;
2500 }
2501 return;
2502 }
2503
2504 /* Prevent the buffers from switching */
2505 arch_spin_lock(&tr->max_lock);
2506
2507 buffer = tr->array_buffer.buffer;
2508 if (buffer)
2509 ring_buffer_record_enable(buffer);
2510
2511 #ifdef CONFIG_TRACER_MAX_TRACE
2512 buffer = tr->max_buffer.buffer;
2513 if (buffer)
2514 ring_buffer_record_enable(buffer);
2515 #endif
2516
2517 arch_spin_unlock(&tr->max_lock);
2518 }
2519
2520 /**
2521 * tracing_start - quick start of the tracer
2522 *
2523 * If tracing is enabled but was stopped by tracing_stop,
2524 * this will start the tracer back up.
2525 */
tracing_start(void)2526 void tracing_start(void)
2527
2528 {
2529 return tracing_start_tr(&global_trace);
2530 }
2531
tracing_stop_tr(struct trace_array * tr)2532 static void tracing_stop_tr(struct trace_array *tr)
2533 {
2534 struct trace_buffer *buffer;
2535
2536 guard(raw_spinlock_irqsave)(&tr->start_lock);
2537 if (tr->stop_count++)
2538 return;
2539
2540 /* Prevent the buffers from switching */
2541 arch_spin_lock(&tr->max_lock);
2542
2543 buffer = tr->array_buffer.buffer;
2544 if (buffer)
2545 ring_buffer_record_disable(buffer);
2546
2547 #ifdef CONFIG_TRACER_MAX_TRACE
2548 buffer = tr->max_buffer.buffer;
2549 if (buffer)
2550 ring_buffer_record_disable(buffer);
2551 #endif
2552
2553 arch_spin_unlock(&tr->max_lock);
2554 }
2555
2556 /**
2557 * tracing_stop - quick stop of the tracer
2558 *
2559 * Light weight way to stop tracing. Use in conjunction with
2560 * tracing_start.
2561 */
tracing_stop(void)2562 void tracing_stop(void)
2563 {
2564 return tracing_stop_tr(&global_trace);
2565 }
2566
2567 /*
2568 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2569 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2570 * simplifies those functions and keeps them in sync.
2571 */
trace_handle_return(struct trace_seq * s)2572 enum print_line_t trace_handle_return(struct trace_seq *s)
2573 {
2574 return trace_seq_has_overflowed(s) ?
2575 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2576 }
2577 EXPORT_SYMBOL_GPL(trace_handle_return);
2578
migration_disable_value(void)2579 static unsigned short migration_disable_value(void)
2580 {
2581 #if defined(CONFIG_SMP)
2582 return current->migration_disabled;
2583 #else
2584 return 0;
2585 #endif
2586 }
2587
tracing_gen_ctx_irq_test(unsigned int irqs_status)2588 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2589 {
2590 unsigned int trace_flags = irqs_status;
2591 unsigned int pc;
2592
2593 pc = preempt_count();
2594
2595 if (pc & NMI_MASK)
2596 trace_flags |= TRACE_FLAG_NMI;
2597 if (pc & HARDIRQ_MASK)
2598 trace_flags |= TRACE_FLAG_HARDIRQ;
2599 if (in_serving_softirq())
2600 trace_flags |= TRACE_FLAG_SOFTIRQ;
2601 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2602 trace_flags |= TRACE_FLAG_BH_OFF;
2603
2604 if (tif_need_resched())
2605 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2606 if (test_preempt_need_resched())
2607 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2608 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2609 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2610 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2611 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2612 }
2613
2614 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2615 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2616 int type,
2617 unsigned long len,
2618 unsigned int trace_ctx)
2619 {
2620 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2621 }
2622
2623 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2624 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2625 static int trace_buffered_event_ref;
2626
2627 /**
2628 * trace_buffered_event_enable - enable buffering events
2629 *
2630 * When events are being filtered, it is quicker to use a temporary
2631 * buffer to write the event data into if there's a likely chance
2632 * that it will not be committed. The discard of the ring buffer
2633 * is not as fast as committing, and is much slower than copying
2634 * a commit.
2635 *
2636 * When an event is to be filtered, allocate per cpu buffers to
2637 * write the event data into, and if the event is filtered and discarded
2638 * it is simply dropped, otherwise, the entire data is to be committed
2639 * in one shot.
2640 */
trace_buffered_event_enable(void)2641 void trace_buffered_event_enable(void)
2642 {
2643 struct ring_buffer_event *event;
2644 struct page *page;
2645 int cpu;
2646
2647 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2648
2649 if (trace_buffered_event_ref++)
2650 return;
2651
2652 for_each_tracing_cpu(cpu) {
2653 page = alloc_pages_node(cpu_to_node(cpu),
2654 GFP_KERNEL | __GFP_NORETRY, 0);
2655 /* This is just an optimization and can handle failures */
2656 if (!page) {
2657 pr_err("Failed to allocate event buffer\n");
2658 break;
2659 }
2660
2661 event = page_address(page);
2662 memset(event, 0, sizeof(*event));
2663
2664 per_cpu(trace_buffered_event, cpu) = event;
2665
2666 scoped_guard(preempt,) {
2667 if (cpu == smp_processor_id() &&
2668 __this_cpu_read(trace_buffered_event) !=
2669 per_cpu(trace_buffered_event, cpu))
2670 WARN_ON_ONCE(1);
2671 }
2672 }
2673 }
2674
enable_trace_buffered_event(void * data)2675 static void enable_trace_buffered_event(void *data)
2676 {
2677 this_cpu_dec(trace_buffered_event_cnt);
2678 }
2679
disable_trace_buffered_event(void * data)2680 static void disable_trace_buffered_event(void *data)
2681 {
2682 this_cpu_inc(trace_buffered_event_cnt);
2683 }
2684
2685 /**
2686 * trace_buffered_event_disable - disable buffering events
2687 *
2688 * When a filter is removed, it is faster to not use the buffered
2689 * events, and to commit directly into the ring buffer. Free up
2690 * the temp buffers when there are no more users. This requires
2691 * special synchronization with current events.
2692 */
trace_buffered_event_disable(void)2693 void trace_buffered_event_disable(void)
2694 {
2695 int cpu;
2696
2697 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2698
2699 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2700 return;
2701
2702 if (--trace_buffered_event_ref)
2703 return;
2704
2705 /* For each CPU, set the buffer as used. */
2706 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2707 NULL, true);
2708
2709 /* Wait for all current users to finish */
2710 synchronize_rcu();
2711
2712 for_each_tracing_cpu(cpu) {
2713 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2714 per_cpu(trace_buffered_event, cpu) = NULL;
2715 }
2716
2717 /*
2718 * Wait for all CPUs that potentially started checking if they can use
2719 * their event buffer only after the previous synchronize_rcu() call and
2720 * they still read a valid pointer from trace_buffered_event. It must be
2721 * ensured they don't see cleared trace_buffered_event_cnt else they
2722 * could wrongly decide to use the pointed-to buffer which is now freed.
2723 */
2724 synchronize_rcu();
2725
2726 /* For each CPU, relinquish the buffer */
2727 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2728 true);
2729 }
2730
2731 static struct trace_buffer *temp_buffer;
2732
2733 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2734 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2735 struct trace_event_file *trace_file,
2736 int type, unsigned long len,
2737 unsigned int trace_ctx)
2738 {
2739 struct ring_buffer_event *entry;
2740 struct trace_array *tr = trace_file->tr;
2741 int val;
2742
2743 *current_rb = tr->array_buffer.buffer;
2744
2745 if (!tr->no_filter_buffering_ref &&
2746 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2747 preempt_disable_notrace();
2748 /*
2749 * Filtering is on, so try to use the per cpu buffer first.
2750 * This buffer will simulate a ring_buffer_event,
2751 * where the type_len is zero and the array[0] will
2752 * hold the full length.
2753 * (see include/linux/ring-buffer.h for details on
2754 * how the ring_buffer_event is structured).
2755 *
2756 * Using a temp buffer during filtering and copying it
2757 * on a matched filter is quicker than writing directly
2758 * into the ring buffer and then discarding it when
2759 * it doesn't match. That is because the discard
2760 * requires several atomic operations to get right.
2761 * Copying on match and doing nothing on a failed match
2762 * is still quicker than no copy on match, but having
2763 * to discard out of the ring buffer on a failed match.
2764 */
2765 if ((entry = __this_cpu_read(trace_buffered_event))) {
2766 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2767
2768 val = this_cpu_inc_return(trace_buffered_event_cnt);
2769
2770 /*
2771 * Preemption is disabled, but interrupts and NMIs
2772 * can still come in now. If that happens after
2773 * the above increment, then it will have to go
2774 * back to the old method of allocating the event
2775 * on the ring buffer, and if the filter fails, it
2776 * will have to call ring_buffer_discard_commit()
2777 * to remove it.
2778 *
2779 * Need to also check the unlikely case that the
2780 * length is bigger than the temp buffer size.
2781 * If that happens, then the reserve is pretty much
2782 * guaranteed to fail, as the ring buffer currently
2783 * only allows events less than a page. But that may
2784 * change in the future, so let the ring buffer reserve
2785 * handle the failure in that case.
2786 */
2787 if (val == 1 && likely(len <= max_len)) {
2788 trace_event_setup(entry, type, trace_ctx);
2789 entry->array[0] = len;
2790 /* Return with preemption disabled */
2791 return entry;
2792 }
2793 this_cpu_dec(trace_buffered_event_cnt);
2794 }
2795 /* __trace_buffer_lock_reserve() disables preemption */
2796 preempt_enable_notrace();
2797 }
2798
2799 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2800 trace_ctx);
2801 /*
2802 * If tracing is off, but we have triggers enabled
2803 * we still need to look at the event data. Use the temp_buffer
2804 * to store the trace event for the trigger to use. It's recursive
2805 * safe and will not be recorded anywhere.
2806 */
2807 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2808 *current_rb = temp_buffer;
2809 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2810 trace_ctx);
2811 }
2812 return entry;
2813 }
2814 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2815
2816 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2817 static DEFINE_MUTEX(tracepoint_printk_mutex);
2818
output_printk(struct trace_event_buffer * fbuffer)2819 static void output_printk(struct trace_event_buffer *fbuffer)
2820 {
2821 struct trace_event_call *event_call;
2822 struct trace_event_file *file;
2823 struct trace_event *event;
2824 unsigned long flags;
2825 struct trace_iterator *iter = tracepoint_print_iter;
2826
2827 /* We should never get here if iter is NULL */
2828 if (WARN_ON_ONCE(!iter))
2829 return;
2830
2831 event_call = fbuffer->trace_file->event_call;
2832 if (!event_call || !event_call->event.funcs ||
2833 !event_call->event.funcs->trace)
2834 return;
2835
2836 file = fbuffer->trace_file;
2837 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2838 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2839 !filter_match_preds(file->filter, fbuffer->entry)))
2840 return;
2841
2842 event = &fbuffer->trace_file->event_call->event;
2843
2844 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2845 trace_seq_init(&iter->seq);
2846 iter->ent = fbuffer->entry;
2847 event_call->event.funcs->trace(iter, 0, event);
2848 trace_seq_putc(&iter->seq, 0);
2849 printk("%s", iter->seq.buffer);
2850
2851 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2852 }
2853
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2854 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2855 void *buffer, size_t *lenp,
2856 loff_t *ppos)
2857 {
2858 int save_tracepoint_printk;
2859 int ret;
2860
2861 guard(mutex)(&tracepoint_printk_mutex);
2862 save_tracepoint_printk = tracepoint_printk;
2863
2864 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2865
2866 /*
2867 * This will force exiting early, as tracepoint_printk
2868 * is always zero when tracepoint_printk_iter is not allocated
2869 */
2870 if (!tracepoint_print_iter)
2871 tracepoint_printk = 0;
2872
2873 if (save_tracepoint_printk == tracepoint_printk)
2874 return ret;
2875
2876 if (tracepoint_printk)
2877 static_key_enable(&tracepoint_printk_key.key);
2878 else
2879 static_key_disable(&tracepoint_printk_key.key);
2880
2881 return ret;
2882 }
2883
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2884 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2885 {
2886 enum event_trigger_type tt = ETT_NONE;
2887 struct trace_event_file *file = fbuffer->trace_file;
2888
2889 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2890 fbuffer->entry, &tt))
2891 goto discard;
2892
2893 if (static_key_false(&tracepoint_printk_key.key))
2894 output_printk(fbuffer);
2895
2896 if (static_branch_unlikely(&trace_event_exports_enabled))
2897 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2898
2899 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2900 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2901
2902 discard:
2903 if (tt)
2904 event_triggers_post_call(file, tt);
2905
2906 }
2907 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2908
2909 /*
2910 * Skip 3:
2911 *
2912 * trace_buffer_unlock_commit_regs()
2913 * trace_event_buffer_commit()
2914 * trace_event_raw_event_xxx()
2915 */
2916 # define STACK_SKIP 3
2917
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2918 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2919 struct trace_buffer *buffer,
2920 struct ring_buffer_event *event,
2921 unsigned int trace_ctx,
2922 struct pt_regs *regs)
2923 {
2924 __buffer_unlock_commit(buffer, event);
2925
2926 /*
2927 * If regs is not set, then skip the necessary functions.
2928 * Note, we can still get here via blktrace, wakeup tracer
2929 * and mmiotrace, but that's ok if they lose a function or
2930 * two. They are not that meaningful.
2931 */
2932 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2933 ftrace_trace_userstack(tr, buffer, trace_ctx);
2934 }
2935
2936 /*
2937 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2938 */
2939 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2940 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2941 struct ring_buffer_event *event)
2942 {
2943 __buffer_unlock_commit(buffer, event);
2944 }
2945
2946 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2947 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2948 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2949 {
2950 struct trace_buffer *buffer = tr->array_buffer.buffer;
2951 struct ring_buffer_event *event;
2952 struct ftrace_entry *entry;
2953 int size = sizeof(*entry);
2954
2955 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2956
2957 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2958 trace_ctx);
2959 if (!event)
2960 return;
2961 entry = ring_buffer_event_data(event);
2962 entry->ip = ip;
2963 entry->parent_ip = parent_ip;
2964
2965 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2966 if (fregs) {
2967 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2968 entry->args[i] = ftrace_regs_get_argument(fregs, i);
2969 }
2970 #endif
2971
2972 if (static_branch_unlikely(&trace_function_exports_enabled))
2973 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2974 __buffer_unlock_commit(buffer, event);
2975 }
2976
2977 #ifdef CONFIG_STACKTRACE
2978
2979 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2980 #define FTRACE_KSTACK_NESTING 4
2981
2982 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
2983
2984 struct ftrace_stack {
2985 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2986 };
2987
2988
2989 struct ftrace_stacks {
2990 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2991 };
2992
2993 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2994 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2995
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2996 static void __ftrace_trace_stack(struct trace_array *tr,
2997 struct trace_buffer *buffer,
2998 unsigned int trace_ctx,
2999 int skip, struct pt_regs *regs)
3000 {
3001 struct ring_buffer_event *event;
3002 unsigned int size, nr_entries;
3003 struct ftrace_stack *fstack;
3004 struct stack_entry *entry;
3005 int stackidx;
3006
3007 /*
3008 * Add one, for this function and the call to save_stack_trace()
3009 * If regs is set, then these functions will not be in the way.
3010 */
3011 #ifndef CONFIG_UNWINDER_ORC
3012 if (!regs)
3013 skip++;
3014 #endif
3015
3016 guard(preempt_notrace)();
3017
3018 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3019
3020 /* This should never happen. If it does, yell once and skip */
3021 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3022 goto out;
3023
3024 /*
3025 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3026 * interrupt will either see the value pre increment or post
3027 * increment. If the interrupt happens pre increment it will have
3028 * restored the counter when it returns. We just need a barrier to
3029 * keep gcc from moving things around.
3030 */
3031 barrier();
3032
3033 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3034 size = ARRAY_SIZE(fstack->calls);
3035
3036 if (regs) {
3037 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3038 size, skip);
3039 } else {
3040 nr_entries = stack_trace_save(fstack->calls, size, skip);
3041 }
3042
3043 #ifdef CONFIG_DYNAMIC_FTRACE
3044 /* Mark entry of stack trace as trampoline code */
3045 if (tr->ops && tr->ops->trampoline) {
3046 unsigned long tramp_start = tr->ops->trampoline;
3047 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3048 unsigned long *calls = fstack->calls;
3049
3050 for (int i = 0; i < nr_entries; i++) {
3051 if (calls[i] >= tramp_start && calls[i] < tramp_end)
3052 calls[i] = FTRACE_TRAMPOLINE_MARKER;
3053 }
3054 }
3055 #endif
3056
3057 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3058 struct_size(entry, caller, nr_entries),
3059 trace_ctx);
3060 if (!event)
3061 goto out;
3062 entry = ring_buffer_event_data(event);
3063
3064 entry->size = nr_entries;
3065 memcpy(&entry->caller, fstack->calls,
3066 flex_array_size(entry, caller, nr_entries));
3067
3068 __buffer_unlock_commit(buffer, event);
3069
3070 out:
3071 /* Again, don't let gcc optimize things here */
3072 barrier();
3073 __this_cpu_dec(ftrace_stack_reserve);
3074 }
3075
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3076 static inline void ftrace_trace_stack(struct trace_array *tr,
3077 struct trace_buffer *buffer,
3078 unsigned int trace_ctx,
3079 int skip, struct pt_regs *regs)
3080 {
3081 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3082 return;
3083
3084 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3085 }
3086
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3087 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3088 int skip)
3089 {
3090 struct trace_buffer *buffer = tr->array_buffer.buffer;
3091
3092 if (rcu_is_watching()) {
3093 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3094 return;
3095 }
3096
3097 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3098 return;
3099
3100 /*
3101 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3102 * but if the above rcu_is_watching() failed, then the NMI
3103 * triggered someplace critical, and ct_irq_enter() should
3104 * not be called from NMI.
3105 */
3106 if (unlikely(in_nmi()))
3107 return;
3108
3109 ct_irq_enter_irqson();
3110 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3111 ct_irq_exit_irqson();
3112 }
3113
3114 /**
3115 * trace_dump_stack - record a stack back trace in the trace buffer
3116 * @skip: Number of functions to skip (helper handlers)
3117 */
trace_dump_stack(int skip)3118 void trace_dump_stack(int skip)
3119 {
3120 if (tracing_disabled || tracing_selftest_running)
3121 return;
3122
3123 #ifndef CONFIG_UNWINDER_ORC
3124 /* Skip 1 to skip this function. */
3125 skip++;
3126 #endif
3127 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3128 tracing_gen_ctx(), skip, NULL);
3129 }
3130 EXPORT_SYMBOL_GPL(trace_dump_stack);
3131
3132 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3133 static DEFINE_PER_CPU(int, user_stack_count);
3134
3135 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3136 ftrace_trace_userstack(struct trace_array *tr,
3137 struct trace_buffer *buffer, unsigned int trace_ctx)
3138 {
3139 struct ring_buffer_event *event;
3140 struct userstack_entry *entry;
3141
3142 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3143 return;
3144
3145 /*
3146 * NMIs can not handle page faults, even with fix ups.
3147 * The save user stack can (and often does) fault.
3148 */
3149 if (unlikely(in_nmi()))
3150 return;
3151
3152 /*
3153 * prevent recursion, since the user stack tracing may
3154 * trigger other kernel events.
3155 */
3156 guard(preempt)();
3157 if (__this_cpu_read(user_stack_count))
3158 return;
3159
3160 __this_cpu_inc(user_stack_count);
3161
3162 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3163 sizeof(*entry), trace_ctx);
3164 if (!event)
3165 goto out_drop_count;
3166 entry = ring_buffer_event_data(event);
3167
3168 entry->tgid = current->tgid;
3169 memset(&entry->caller, 0, sizeof(entry->caller));
3170
3171 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3172 __buffer_unlock_commit(buffer, event);
3173
3174 out_drop_count:
3175 __this_cpu_dec(user_stack_count);
3176 }
3177 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3178 static void ftrace_trace_userstack(struct trace_array *tr,
3179 struct trace_buffer *buffer,
3180 unsigned int trace_ctx)
3181 {
3182 }
3183 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3184
3185 #endif /* CONFIG_STACKTRACE */
3186
3187 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3188 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3189 unsigned long long delta)
3190 {
3191 entry->bottom_delta_ts = delta & U32_MAX;
3192 entry->top_delta_ts = (delta >> 32);
3193 }
3194
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3195 void trace_last_func_repeats(struct trace_array *tr,
3196 struct trace_func_repeats *last_info,
3197 unsigned int trace_ctx)
3198 {
3199 struct trace_buffer *buffer = tr->array_buffer.buffer;
3200 struct func_repeats_entry *entry;
3201 struct ring_buffer_event *event;
3202 u64 delta;
3203
3204 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3205 sizeof(*entry), trace_ctx);
3206 if (!event)
3207 return;
3208
3209 delta = ring_buffer_event_time_stamp(buffer, event) -
3210 last_info->ts_last_call;
3211
3212 entry = ring_buffer_event_data(event);
3213 entry->ip = last_info->ip;
3214 entry->parent_ip = last_info->parent_ip;
3215 entry->count = last_info->count;
3216 func_repeats_set_delta_ts(entry, delta);
3217
3218 __buffer_unlock_commit(buffer, event);
3219 }
3220
3221 /* created for use with alloc_percpu */
3222 struct trace_buffer_struct {
3223 int nesting;
3224 char buffer[4][TRACE_BUF_SIZE];
3225 };
3226
3227 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3228
3229 /*
3230 * This allows for lockless recording. If we're nested too deeply, then
3231 * this returns NULL.
3232 */
get_trace_buf(void)3233 static char *get_trace_buf(void)
3234 {
3235 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3236
3237 if (!trace_percpu_buffer || buffer->nesting >= 4)
3238 return NULL;
3239
3240 buffer->nesting++;
3241
3242 /* Interrupts must see nesting incremented before we use the buffer */
3243 barrier();
3244 return &buffer->buffer[buffer->nesting - 1][0];
3245 }
3246
put_trace_buf(void)3247 static void put_trace_buf(void)
3248 {
3249 /* Don't let the decrement of nesting leak before this */
3250 barrier();
3251 this_cpu_dec(trace_percpu_buffer->nesting);
3252 }
3253
alloc_percpu_trace_buffer(void)3254 static int alloc_percpu_trace_buffer(void)
3255 {
3256 struct trace_buffer_struct __percpu *buffers;
3257
3258 if (trace_percpu_buffer)
3259 return 0;
3260
3261 buffers = alloc_percpu(struct trace_buffer_struct);
3262 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3263 return -ENOMEM;
3264
3265 trace_percpu_buffer = buffers;
3266 return 0;
3267 }
3268
3269 static int buffers_allocated;
3270
trace_printk_init_buffers(void)3271 void trace_printk_init_buffers(void)
3272 {
3273 if (buffers_allocated)
3274 return;
3275
3276 if (alloc_percpu_trace_buffer())
3277 return;
3278
3279 /* trace_printk() is for debug use only. Don't use it in production. */
3280
3281 pr_warn("\n");
3282 pr_warn("**********************************************************\n");
3283 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3284 pr_warn("** **\n");
3285 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3286 pr_warn("** **\n");
3287 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3288 pr_warn("** unsafe for production use. **\n");
3289 pr_warn("** **\n");
3290 pr_warn("** If you see this message and you are not debugging **\n");
3291 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3292 pr_warn("** **\n");
3293 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3294 pr_warn("**********************************************************\n");
3295
3296 /* Expand the buffers to set size */
3297 tracing_update_buffers(&global_trace);
3298
3299 buffers_allocated = 1;
3300
3301 /*
3302 * trace_printk_init_buffers() can be called by modules.
3303 * If that happens, then we need to start cmdline recording
3304 * directly here. If the global_trace.buffer is already
3305 * allocated here, then this was called by module code.
3306 */
3307 if (global_trace.array_buffer.buffer)
3308 tracing_start_cmdline_record();
3309 }
3310 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3311
trace_printk_start_comm(void)3312 void trace_printk_start_comm(void)
3313 {
3314 /* Start tracing comms if trace printk is set */
3315 if (!buffers_allocated)
3316 return;
3317 tracing_start_cmdline_record();
3318 }
3319
trace_printk_start_stop_comm(int enabled)3320 static void trace_printk_start_stop_comm(int enabled)
3321 {
3322 if (!buffers_allocated)
3323 return;
3324
3325 if (enabled)
3326 tracing_start_cmdline_record();
3327 else
3328 tracing_stop_cmdline_record();
3329 }
3330
3331 /**
3332 * trace_vbprintk - write binary msg to tracing buffer
3333 * @ip: The address of the caller
3334 * @fmt: The string format to write to the buffer
3335 * @args: Arguments for @fmt
3336 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3337 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3338 {
3339 struct ring_buffer_event *event;
3340 struct trace_buffer *buffer;
3341 struct trace_array *tr = READ_ONCE(printk_trace);
3342 struct bprint_entry *entry;
3343 unsigned int trace_ctx;
3344 char *tbuffer;
3345 int len = 0, size;
3346
3347 if (!printk_binsafe(tr))
3348 return trace_vprintk(ip, fmt, args);
3349
3350 if (unlikely(tracing_selftest_running || tracing_disabled))
3351 return 0;
3352
3353 /* Don't pollute graph traces with trace_vprintk internals */
3354 pause_graph_tracing();
3355
3356 trace_ctx = tracing_gen_ctx();
3357 guard(preempt_notrace)();
3358
3359 tbuffer = get_trace_buf();
3360 if (!tbuffer) {
3361 len = 0;
3362 goto out_nobuffer;
3363 }
3364
3365 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3366
3367 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3368 goto out_put;
3369
3370 size = sizeof(*entry) + sizeof(u32) * len;
3371 buffer = tr->array_buffer.buffer;
3372 scoped_guard(ring_buffer_nest, buffer) {
3373 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3374 trace_ctx);
3375 if (!event)
3376 goto out_put;
3377 entry = ring_buffer_event_data(event);
3378 entry->ip = ip;
3379 entry->fmt = fmt;
3380
3381 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3382 __buffer_unlock_commit(buffer, event);
3383 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3384 }
3385 out_put:
3386 put_trace_buf();
3387
3388 out_nobuffer:
3389 unpause_graph_tracing();
3390
3391 return len;
3392 }
3393 EXPORT_SYMBOL_GPL(trace_vbprintk);
3394
3395 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3396 int __trace_array_vprintk(struct trace_buffer *buffer,
3397 unsigned long ip, const char *fmt, va_list args)
3398 {
3399 struct ring_buffer_event *event;
3400 int len = 0, size;
3401 struct print_entry *entry;
3402 unsigned int trace_ctx;
3403 char *tbuffer;
3404
3405 if (tracing_disabled)
3406 return 0;
3407
3408 /* Don't pollute graph traces with trace_vprintk internals */
3409 pause_graph_tracing();
3410
3411 trace_ctx = tracing_gen_ctx();
3412 guard(preempt_notrace)();
3413
3414
3415 tbuffer = get_trace_buf();
3416 if (!tbuffer) {
3417 len = 0;
3418 goto out_nobuffer;
3419 }
3420
3421 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3422
3423 size = sizeof(*entry) + len + 1;
3424 scoped_guard(ring_buffer_nest, buffer) {
3425 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3426 trace_ctx);
3427 if (!event)
3428 goto out;
3429 entry = ring_buffer_event_data(event);
3430 entry->ip = ip;
3431
3432 memcpy(&entry->buf, tbuffer, len + 1);
3433 __buffer_unlock_commit(buffer, event);
3434 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3435 }
3436 out:
3437 put_trace_buf();
3438
3439 out_nobuffer:
3440 unpause_graph_tracing();
3441
3442 return len;
3443 }
3444
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3445 int trace_array_vprintk(struct trace_array *tr,
3446 unsigned long ip, const char *fmt, va_list args)
3447 {
3448 if (tracing_selftest_running && tr == &global_trace)
3449 return 0;
3450
3451 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3452 }
3453
3454 /**
3455 * trace_array_printk - Print a message to a specific instance
3456 * @tr: The instance trace_array descriptor
3457 * @ip: The instruction pointer that this is called from.
3458 * @fmt: The format to print (printf format)
3459 *
3460 * If a subsystem sets up its own instance, they have the right to
3461 * printk strings into their tracing instance buffer using this
3462 * function. Note, this function will not write into the top level
3463 * buffer (use trace_printk() for that), as writing into the top level
3464 * buffer should only have events that can be individually disabled.
3465 * trace_printk() is only used for debugging a kernel, and should not
3466 * be ever incorporated in normal use.
3467 *
3468 * trace_array_printk() can be used, as it will not add noise to the
3469 * top level tracing buffer.
3470 *
3471 * Note, trace_array_init_printk() must be called on @tr before this
3472 * can be used.
3473 */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3474 int trace_array_printk(struct trace_array *tr,
3475 unsigned long ip, const char *fmt, ...)
3476 {
3477 int ret;
3478 va_list ap;
3479
3480 if (!tr)
3481 return -ENOENT;
3482
3483 /* This is only allowed for created instances */
3484 if (tr == &global_trace)
3485 return 0;
3486
3487 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3488 return 0;
3489
3490 va_start(ap, fmt);
3491 ret = trace_array_vprintk(tr, ip, fmt, ap);
3492 va_end(ap);
3493 return ret;
3494 }
3495 EXPORT_SYMBOL_GPL(trace_array_printk);
3496
3497 /**
3498 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3499 * @tr: The trace array to initialize the buffers for
3500 *
3501 * As trace_array_printk() only writes into instances, they are OK to
3502 * have in the kernel (unlike trace_printk()). This needs to be called
3503 * before trace_array_printk() can be used on a trace_array.
3504 */
trace_array_init_printk(struct trace_array * tr)3505 int trace_array_init_printk(struct trace_array *tr)
3506 {
3507 if (!tr)
3508 return -ENOENT;
3509
3510 /* This is only allowed for created instances */
3511 if (tr == &global_trace)
3512 return -EINVAL;
3513
3514 return alloc_percpu_trace_buffer();
3515 }
3516 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3517
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3518 int trace_array_printk_buf(struct trace_buffer *buffer,
3519 unsigned long ip, const char *fmt, ...)
3520 {
3521 int ret;
3522 va_list ap;
3523
3524 if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3525 return 0;
3526
3527 va_start(ap, fmt);
3528 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3529 va_end(ap);
3530 return ret;
3531 }
3532
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3533 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3534 {
3535 return trace_array_vprintk(printk_trace, ip, fmt, args);
3536 }
3537 EXPORT_SYMBOL_GPL(trace_vprintk);
3538
trace_iterator_increment(struct trace_iterator * iter)3539 static void trace_iterator_increment(struct trace_iterator *iter)
3540 {
3541 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3542
3543 iter->idx++;
3544 if (buf_iter)
3545 ring_buffer_iter_advance(buf_iter);
3546 }
3547
3548 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3549 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3550 unsigned long *lost_events)
3551 {
3552 struct ring_buffer_event *event;
3553 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3554
3555 if (buf_iter) {
3556 event = ring_buffer_iter_peek(buf_iter, ts);
3557 if (lost_events)
3558 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3559 (unsigned long)-1 : 0;
3560 } else {
3561 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3562 lost_events);
3563 }
3564
3565 if (event) {
3566 iter->ent_size = ring_buffer_event_length(event);
3567 return ring_buffer_event_data(event);
3568 }
3569 iter->ent_size = 0;
3570 return NULL;
3571 }
3572
3573 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3574 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3575 unsigned long *missing_events, u64 *ent_ts)
3576 {
3577 struct trace_buffer *buffer = iter->array_buffer->buffer;
3578 struct trace_entry *ent, *next = NULL;
3579 unsigned long lost_events = 0, next_lost = 0;
3580 int cpu_file = iter->cpu_file;
3581 u64 next_ts = 0, ts;
3582 int next_cpu = -1;
3583 int next_size = 0;
3584 int cpu;
3585
3586 /*
3587 * If we are in a per_cpu trace file, don't bother by iterating over
3588 * all cpu and peek directly.
3589 */
3590 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3591 if (ring_buffer_empty_cpu(buffer, cpu_file))
3592 return NULL;
3593 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3594 if (ent_cpu)
3595 *ent_cpu = cpu_file;
3596
3597 return ent;
3598 }
3599
3600 for_each_tracing_cpu(cpu) {
3601
3602 if (ring_buffer_empty_cpu(buffer, cpu))
3603 continue;
3604
3605 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3606
3607 /*
3608 * Pick the entry with the smallest timestamp:
3609 */
3610 if (ent && (!next || ts < next_ts)) {
3611 next = ent;
3612 next_cpu = cpu;
3613 next_ts = ts;
3614 next_lost = lost_events;
3615 next_size = iter->ent_size;
3616 }
3617 }
3618
3619 iter->ent_size = next_size;
3620
3621 if (ent_cpu)
3622 *ent_cpu = next_cpu;
3623
3624 if (ent_ts)
3625 *ent_ts = next_ts;
3626
3627 if (missing_events)
3628 *missing_events = next_lost;
3629
3630 return next;
3631 }
3632
3633 #define STATIC_FMT_BUF_SIZE 128
3634 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3635
trace_iter_expand_format(struct trace_iterator * iter)3636 char *trace_iter_expand_format(struct trace_iterator *iter)
3637 {
3638 char *tmp;
3639
3640 /*
3641 * iter->tr is NULL when used with tp_printk, which makes
3642 * this get called where it is not safe to call krealloc().
3643 */
3644 if (!iter->tr || iter->fmt == static_fmt_buf)
3645 return NULL;
3646
3647 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3648 GFP_KERNEL);
3649 if (tmp) {
3650 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3651 iter->fmt = tmp;
3652 }
3653
3654 return tmp;
3655 }
3656
3657 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3658 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3659 {
3660 unsigned long addr = (unsigned long)str;
3661 struct trace_event *trace_event;
3662 struct trace_event_call *event;
3663
3664 /* OK if part of the event data */
3665 if ((addr >= (unsigned long)iter->ent) &&
3666 (addr < (unsigned long)iter->ent + iter->ent_size))
3667 return true;
3668
3669 /* OK if part of the temp seq buffer */
3670 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3671 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3672 return true;
3673
3674 /* Core rodata can not be freed */
3675 if (is_kernel_rodata(addr))
3676 return true;
3677
3678 if (trace_is_tracepoint_string(str))
3679 return true;
3680
3681 /*
3682 * Now this could be a module event, referencing core module
3683 * data, which is OK.
3684 */
3685 if (!iter->ent)
3686 return false;
3687
3688 trace_event = ftrace_find_event(iter->ent->type);
3689 if (!trace_event)
3690 return false;
3691
3692 event = container_of(trace_event, struct trace_event_call, event);
3693 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3694 return false;
3695
3696 /* Would rather have rodata, but this will suffice */
3697 if (within_module_core(addr, event->module))
3698 return true;
3699
3700 return false;
3701 }
3702
3703 /**
3704 * ignore_event - Check dereferenced fields while writing to the seq buffer
3705 * @iter: The iterator that holds the seq buffer and the event being printed
3706 *
3707 * At boot up, test_event_printk() will flag any event that dereferences
3708 * a string with "%s" that does exist in the ring buffer. It may still
3709 * be valid, as the string may point to a static string in the kernel
3710 * rodata that never gets freed. But if the string pointer is pointing
3711 * to something that was allocated, there's a chance that it can be freed
3712 * by the time the user reads the trace. This would cause a bad memory
3713 * access by the kernel and possibly crash the system.
3714 *
3715 * This function will check if the event has any fields flagged as needing
3716 * to be checked at runtime and perform those checks.
3717 *
3718 * If it is found that a field is unsafe, it will write into the @iter->seq
3719 * a message stating what was found to be unsafe.
3720 *
3721 * @return: true if the event is unsafe and should be ignored,
3722 * false otherwise.
3723 */
ignore_event(struct trace_iterator * iter)3724 bool ignore_event(struct trace_iterator *iter)
3725 {
3726 struct ftrace_event_field *field;
3727 struct trace_event *trace_event;
3728 struct trace_event_call *event;
3729 struct list_head *head;
3730 struct trace_seq *seq;
3731 const void *ptr;
3732
3733 trace_event = ftrace_find_event(iter->ent->type);
3734
3735 seq = &iter->seq;
3736
3737 if (!trace_event) {
3738 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3739 return true;
3740 }
3741
3742 event = container_of(trace_event, struct trace_event_call, event);
3743 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3744 return false;
3745
3746 head = trace_get_fields(event);
3747 if (!head) {
3748 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3749 trace_event_name(event));
3750 return true;
3751 }
3752
3753 /* Offsets are from the iter->ent that points to the raw event */
3754 ptr = iter->ent;
3755
3756 list_for_each_entry(field, head, link) {
3757 const char *str;
3758 bool good;
3759
3760 if (!field->needs_test)
3761 continue;
3762
3763 str = *(const char **)(ptr + field->offset);
3764
3765 good = trace_safe_str(iter, str);
3766
3767 /*
3768 * If you hit this warning, it is likely that the
3769 * trace event in question used %s on a string that
3770 * was saved at the time of the event, but may not be
3771 * around when the trace is read. Use __string(),
3772 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3773 * instead. See samples/trace_events/trace-events-sample.h
3774 * for reference.
3775 */
3776 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3777 trace_event_name(event), field->name)) {
3778 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3779 trace_event_name(event), field->name);
3780 return true;
3781 }
3782 }
3783 return false;
3784 }
3785
trace_event_format(struct trace_iterator * iter,const char * fmt)3786 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3787 {
3788 const char *p, *new_fmt;
3789 char *q;
3790
3791 if (WARN_ON_ONCE(!fmt))
3792 return fmt;
3793
3794 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3795 return fmt;
3796
3797 p = fmt;
3798 new_fmt = q = iter->fmt;
3799 while (*p) {
3800 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3801 if (!trace_iter_expand_format(iter))
3802 return fmt;
3803
3804 q += iter->fmt - new_fmt;
3805 new_fmt = iter->fmt;
3806 }
3807
3808 *q++ = *p++;
3809
3810 /* Replace %p with %px */
3811 if (p[-1] == '%') {
3812 if (p[0] == '%') {
3813 *q++ = *p++;
3814 } else if (p[0] == 'p' && !isalnum(p[1])) {
3815 *q++ = *p++;
3816 *q++ = 'x';
3817 }
3818 }
3819 }
3820 *q = '\0';
3821
3822 return new_fmt;
3823 }
3824
3825 #define STATIC_TEMP_BUF_SIZE 128
3826 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3827
3828 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3829 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3830 int *ent_cpu, u64 *ent_ts)
3831 {
3832 /* __find_next_entry will reset ent_size */
3833 int ent_size = iter->ent_size;
3834 struct trace_entry *entry;
3835
3836 /*
3837 * If called from ftrace_dump(), then the iter->temp buffer
3838 * will be the static_temp_buf and not created from kmalloc.
3839 * If the entry size is greater than the buffer, we can
3840 * not save it. Just return NULL in that case. This is only
3841 * used to add markers when two consecutive events' time
3842 * stamps have a large delta. See trace_print_lat_context()
3843 */
3844 if (iter->temp == static_temp_buf &&
3845 STATIC_TEMP_BUF_SIZE < ent_size)
3846 return NULL;
3847
3848 /*
3849 * The __find_next_entry() may call peek_next_entry(), which may
3850 * call ring_buffer_peek() that may make the contents of iter->ent
3851 * undefined. Need to copy iter->ent now.
3852 */
3853 if (iter->ent && iter->ent != iter->temp) {
3854 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3855 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3856 void *temp;
3857 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3858 if (!temp)
3859 return NULL;
3860 kfree(iter->temp);
3861 iter->temp = temp;
3862 iter->temp_size = iter->ent_size;
3863 }
3864 memcpy(iter->temp, iter->ent, iter->ent_size);
3865 iter->ent = iter->temp;
3866 }
3867 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3868 /* Put back the original ent_size */
3869 iter->ent_size = ent_size;
3870
3871 return entry;
3872 }
3873
3874 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3875 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3876 {
3877 iter->ent = __find_next_entry(iter, &iter->cpu,
3878 &iter->lost_events, &iter->ts);
3879
3880 if (iter->ent)
3881 trace_iterator_increment(iter);
3882
3883 return iter->ent ? iter : NULL;
3884 }
3885
trace_consume(struct trace_iterator * iter)3886 static void trace_consume(struct trace_iterator *iter)
3887 {
3888 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3889 &iter->lost_events);
3890 }
3891
s_next(struct seq_file * m,void * v,loff_t * pos)3892 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3893 {
3894 struct trace_iterator *iter = m->private;
3895 int i = (int)*pos;
3896 void *ent;
3897
3898 WARN_ON_ONCE(iter->leftover);
3899
3900 (*pos)++;
3901
3902 /* can't go backwards */
3903 if (iter->idx > i)
3904 return NULL;
3905
3906 if (iter->idx < 0)
3907 ent = trace_find_next_entry_inc(iter);
3908 else
3909 ent = iter;
3910
3911 while (ent && iter->idx < i)
3912 ent = trace_find_next_entry_inc(iter);
3913
3914 iter->pos = *pos;
3915
3916 return ent;
3917 }
3918
tracing_iter_reset(struct trace_iterator * iter,int cpu)3919 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3920 {
3921 struct ring_buffer_iter *buf_iter;
3922 unsigned long entries = 0;
3923 u64 ts;
3924
3925 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3926
3927 buf_iter = trace_buffer_iter(iter, cpu);
3928 if (!buf_iter)
3929 return;
3930
3931 ring_buffer_iter_reset(buf_iter);
3932
3933 /*
3934 * We could have the case with the max latency tracers
3935 * that a reset never took place on a cpu. This is evident
3936 * by the timestamp being before the start of the buffer.
3937 */
3938 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3939 if (ts >= iter->array_buffer->time_start)
3940 break;
3941 entries++;
3942 ring_buffer_iter_advance(buf_iter);
3943 /* This could be a big loop */
3944 cond_resched();
3945 }
3946
3947 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3948 }
3949
3950 /*
3951 * The current tracer is copied to avoid a global locking
3952 * all around.
3953 */
s_start(struct seq_file * m,loff_t * pos)3954 static void *s_start(struct seq_file *m, loff_t *pos)
3955 {
3956 struct trace_iterator *iter = m->private;
3957 struct trace_array *tr = iter->tr;
3958 int cpu_file = iter->cpu_file;
3959 void *p = NULL;
3960 loff_t l = 0;
3961 int cpu;
3962
3963 mutex_lock(&trace_types_lock);
3964 if (unlikely(tr->current_trace != iter->trace)) {
3965 /* Close iter->trace before switching to the new current tracer */
3966 if (iter->trace->close)
3967 iter->trace->close(iter);
3968 iter->trace = tr->current_trace;
3969 /* Reopen the new current tracer */
3970 if (iter->trace->open)
3971 iter->trace->open(iter);
3972 }
3973 mutex_unlock(&trace_types_lock);
3974
3975 #ifdef CONFIG_TRACER_MAX_TRACE
3976 if (iter->snapshot && iter->trace->use_max_tr)
3977 return ERR_PTR(-EBUSY);
3978 #endif
3979
3980 if (*pos != iter->pos) {
3981 iter->ent = NULL;
3982 iter->cpu = 0;
3983 iter->idx = -1;
3984
3985 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3986 for_each_tracing_cpu(cpu)
3987 tracing_iter_reset(iter, cpu);
3988 } else
3989 tracing_iter_reset(iter, cpu_file);
3990
3991 iter->leftover = 0;
3992 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3993 ;
3994
3995 } else {
3996 /*
3997 * If we overflowed the seq_file before, then we want
3998 * to just reuse the trace_seq buffer again.
3999 */
4000 if (iter->leftover)
4001 p = iter;
4002 else {
4003 l = *pos - 1;
4004 p = s_next(m, p, &l);
4005 }
4006 }
4007
4008 trace_event_read_lock();
4009 trace_access_lock(cpu_file);
4010 return p;
4011 }
4012
s_stop(struct seq_file * m,void * p)4013 static void s_stop(struct seq_file *m, void *p)
4014 {
4015 struct trace_iterator *iter = m->private;
4016
4017 #ifdef CONFIG_TRACER_MAX_TRACE
4018 if (iter->snapshot && iter->trace->use_max_tr)
4019 return;
4020 #endif
4021
4022 trace_access_unlock(iter->cpu_file);
4023 trace_event_read_unlock();
4024 }
4025
4026 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4027 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4028 unsigned long *entries, int cpu)
4029 {
4030 unsigned long count;
4031
4032 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4033 /*
4034 * If this buffer has skipped entries, then we hold all
4035 * entries for the trace and we need to ignore the
4036 * ones before the time stamp.
4037 */
4038 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4039 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4040 /* total is the same as the entries */
4041 *total = count;
4042 } else
4043 *total = count +
4044 ring_buffer_overrun_cpu(buf->buffer, cpu);
4045 *entries = count;
4046 }
4047
4048 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4049 get_total_entries(struct array_buffer *buf,
4050 unsigned long *total, unsigned long *entries)
4051 {
4052 unsigned long t, e;
4053 int cpu;
4054
4055 *total = 0;
4056 *entries = 0;
4057
4058 for_each_tracing_cpu(cpu) {
4059 get_total_entries_cpu(buf, &t, &e, cpu);
4060 *total += t;
4061 *entries += e;
4062 }
4063 }
4064
trace_total_entries_cpu(struct trace_array * tr,int cpu)4065 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4066 {
4067 unsigned long total, entries;
4068
4069 if (!tr)
4070 tr = &global_trace;
4071
4072 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4073
4074 return entries;
4075 }
4076
trace_total_entries(struct trace_array * tr)4077 unsigned long trace_total_entries(struct trace_array *tr)
4078 {
4079 unsigned long total, entries;
4080
4081 if (!tr)
4082 tr = &global_trace;
4083
4084 get_total_entries(&tr->array_buffer, &total, &entries);
4085
4086 return entries;
4087 }
4088
print_lat_help_header(struct seq_file * m)4089 static void print_lat_help_header(struct seq_file *m)
4090 {
4091 seq_puts(m, "# _------=> CPU# \n"
4092 "# / _-----=> irqs-off/BH-disabled\n"
4093 "# | / _----=> need-resched \n"
4094 "# || / _---=> hardirq/softirq \n"
4095 "# ||| / _--=> preempt-depth \n"
4096 "# |||| / _-=> migrate-disable \n"
4097 "# ||||| / delay \n"
4098 "# cmd pid |||||| time | caller \n"
4099 "# \\ / |||||| \\ | / \n");
4100 }
4101
print_event_info(struct array_buffer * buf,struct seq_file * m)4102 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4103 {
4104 unsigned long total;
4105 unsigned long entries;
4106
4107 get_total_entries(buf, &total, &entries);
4108 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4109 entries, total, num_online_cpus());
4110 seq_puts(m, "#\n");
4111 }
4112
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4113 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4114 unsigned int flags)
4115 {
4116 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4117
4118 print_event_info(buf, m);
4119
4120 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4121 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4122 }
4123
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4124 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4125 unsigned int flags)
4126 {
4127 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4128 static const char space[] = " ";
4129 int prec = tgid ? 12 : 2;
4130
4131 print_event_info(buf, m);
4132
4133 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4134 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4135 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4136 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4137 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4138 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4139 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4140 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4141 }
4142
4143 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4144 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4145 {
4146 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4147 struct array_buffer *buf = iter->array_buffer;
4148 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4149 struct tracer *type = iter->trace;
4150 unsigned long entries;
4151 unsigned long total;
4152 const char *name = type->name;
4153
4154 get_total_entries(buf, &total, &entries);
4155
4156 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4157 name, init_utsname()->release);
4158 seq_puts(m, "# -----------------------------------"
4159 "---------------------------------\n");
4160 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4161 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4162 nsecs_to_usecs(data->saved_latency),
4163 entries,
4164 total,
4165 buf->cpu,
4166 preempt_model_str(),
4167 /* These are reserved for later use */
4168 0, 0, 0, 0);
4169 #ifdef CONFIG_SMP
4170 seq_printf(m, " #P:%d)\n", num_online_cpus());
4171 #else
4172 seq_puts(m, ")\n");
4173 #endif
4174 seq_puts(m, "# -----------------\n");
4175 seq_printf(m, "# | task: %.16s-%d "
4176 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4177 data->comm, data->pid,
4178 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4179 data->policy, data->rt_priority);
4180 seq_puts(m, "# -----------------\n");
4181
4182 if (data->critical_start) {
4183 seq_puts(m, "# => started at: ");
4184 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4185 trace_print_seq(m, &iter->seq);
4186 seq_puts(m, "\n# => ended at: ");
4187 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4188 trace_print_seq(m, &iter->seq);
4189 seq_puts(m, "\n#\n");
4190 }
4191
4192 seq_puts(m, "#\n");
4193 }
4194
test_cpu_buff_start(struct trace_iterator * iter)4195 static void test_cpu_buff_start(struct trace_iterator *iter)
4196 {
4197 struct trace_seq *s = &iter->seq;
4198 struct trace_array *tr = iter->tr;
4199
4200 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4201 return;
4202
4203 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4204 return;
4205
4206 if (cpumask_available(iter->started) &&
4207 cpumask_test_cpu(iter->cpu, iter->started))
4208 return;
4209
4210 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4211 return;
4212
4213 if (cpumask_available(iter->started))
4214 cpumask_set_cpu(iter->cpu, iter->started);
4215
4216 /* Don't print started cpu buffer for the first entry of the trace */
4217 if (iter->idx > 1)
4218 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4219 iter->cpu);
4220 }
4221
print_trace_fmt(struct trace_iterator * iter)4222 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4223 {
4224 struct trace_array *tr = iter->tr;
4225 struct trace_seq *s = &iter->seq;
4226 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4227 struct trace_entry *entry;
4228 struct trace_event *event;
4229
4230 entry = iter->ent;
4231
4232 test_cpu_buff_start(iter);
4233
4234 event = ftrace_find_event(entry->type);
4235
4236 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4237 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4238 trace_print_lat_context(iter);
4239 else
4240 trace_print_context(iter);
4241 }
4242
4243 if (trace_seq_has_overflowed(s))
4244 return TRACE_TYPE_PARTIAL_LINE;
4245
4246 if (event) {
4247 if (tr->trace_flags & TRACE_ITER_FIELDS)
4248 return print_event_fields(iter, event);
4249 /*
4250 * For TRACE_EVENT() events, the print_fmt is not
4251 * safe to use if the array has delta offsets
4252 * Force printing via the fields.
4253 */
4254 if ((tr->text_delta) &&
4255 event->type > __TRACE_LAST_TYPE)
4256 return print_event_fields(iter, event);
4257
4258 return event->funcs->trace(iter, sym_flags, event);
4259 }
4260
4261 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4262
4263 return trace_handle_return(s);
4264 }
4265
print_raw_fmt(struct trace_iterator * iter)4266 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4267 {
4268 struct trace_array *tr = iter->tr;
4269 struct trace_seq *s = &iter->seq;
4270 struct trace_entry *entry;
4271 struct trace_event *event;
4272
4273 entry = iter->ent;
4274
4275 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4276 trace_seq_printf(s, "%d %d %llu ",
4277 entry->pid, iter->cpu, iter->ts);
4278
4279 if (trace_seq_has_overflowed(s))
4280 return TRACE_TYPE_PARTIAL_LINE;
4281
4282 event = ftrace_find_event(entry->type);
4283 if (event)
4284 return event->funcs->raw(iter, 0, event);
4285
4286 trace_seq_printf(s, "%d ?\n", entry->type);
4287
4288 return trace_handle_return(s);
4289 }
4290
print_hex_fmt(struct trace_iterator * iter)4291 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4292 {
4293 struct trace_array *tr = iter->tr;
4294 struct trace_seq *s = &iter->seq;
4295 unsigned char newline = '\n';
4296 struct trace_entry *entry;
4297 struct trace_event *event;
4298
4299 entry = iter->ent;
4300
4301 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4302 SEQ_PUT_HEX_FIELD(s, entry->pid);
4303 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4304 SEQ_PUT_HEX_FIELD(s, iter->ts);
4305 if (trace_seq_has_overflowed(s))
4306 return TRACE_TYPE_PARTIAL_LINE;
4307 }
4308
4309 event = ftrace_find_event(entry->type);
4310 if (event) {
4311 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4312 if (ret != TRACE_TYPE_HANDLED)
4313 return ret;
4314 }
4315
4316 SEQ_PUT_FIELD(s, newline);
4317
4318 return trace_handle_return(s);
4319 }
4320
print_bin_fmt(struct trace_iterator * iter)4321 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4322 {
4323 struct trace_array *tr = iter->tr;
4324 struct trace_seq *s = &iter->seq;
4325 struct trace_entry *entry;
4326 struct trace_event *event;
4327
4328 entry = iter->ent;
4329
4330 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4331 SEQ_PUT_FIELD(s, entry->pid);
4332 SEQ_PUT_FIELD(s, iter->cpu);
4333 SEQ_PUT_FIELD(s, iter->ts);
4334 if (trace_seq_has_overflowed(s))
4335 return TRACE_TYPE_PARTIAL_LINE;
4336 }
4337
4338 event = ftrace_find_event(entry->type);
4339 return event ? event->funcs->binary(iter, 0, event) :
4340 TRACE_TYPE_HANDLED;
4341 }
4342
trace_empty(struct trace_iterator * iter)4343 int trace_empty(struct trace_iterator *iter)
4344 {
4345 struct ring_buffer_iter *buf_iter;
4346 int cpu;
4347
4348 /* If we are looking at one CPU buffer, only check that one */
4349 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4350 cpu = iter->cpu_file;
4351 buf_iter = trace_buffer_iter(iter, cpu);
4352 if (buf_iter) {
4353 if (!ring_buffer_iter_empty(buf_iter))
4354 return 0;
4355 } else {
4356 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4357 return 0;
4358 }
4359 return 1;
4360 }
4361
4362 for_each_tracing_cpu(cpu) {
4363 buf_iter = trace_buffer_iter(iter, cpu);
4364 if (buf_iter) {
4365 if (!ring_buffer_iter_empty(buf_iter))
4366 return 0;
4367 } else {
4368 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4369 return 0;
4370 }
4371 }
4372
4373 return 1;
4374 }
4375
4376 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4377 enum print_line_t print_trace_line(struct trace_iterator *iter)
4378 {
4379 struct trace_array *tr = iter->tr;
4380 unsigned long trace_flags = tr->trace_flags;
4381 enum print_line_t ret;
4382
4383 if (iter->lost_events) {
4384 if (iter->lost_events == (unsigned long)-1)
4385 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4386 iter->cpu);
4387 else
4388 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4389 iter->cpu, iter->lost_events);
4390 if (trace_seq_has_overflowed(&iter->seq))
4391 return TRACE_TYPE_PARTIAL_LINE;
4392 }
4393
4394 if (iter->trace && iter->trace->print_line) {
4395 ret = iter->trace->print_line(iter);
4396 if (ret != TRACE_TYPE_UNHANDLED)
4397 return ret;
4398 }
4399
4400 if (iter->ent->type == TRACE_BPUTS &&
4401 trace_flags & TRACE_ITER_PRINTK &&
4402 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4403 return trace_print_bputs_msg_only(iter);
4404
4405 if (iter->ent->type == TRACE_BPRINT &&
4406 trace_flags & TRACE_ITER_PRINTK &&
4407 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4408 return trace_print_bprintk_msg_only(iter);
4409
4410 if (iter->ent->type == TRACE_PRINT &&
4411 trace_flags & TRACE_ITER_PRINTK &&
4412 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4413 return trace_print_printk_msg_only(iter);
4414
4415 if (trace_flags & TRACE_ITER_BIN)
4416 return print_bin_fmt(iter);
4417
4418 if (trace_flags & TRACE_ITER_HEX)
4419 return print_hex_fmt(iter);
4420
4421 if (trace_flags & TRACE_ITER_RAW)
4422 return print_raw_fmt(iter);
4423
4424 return print_trace_fmt(iter);
4425 }
4426
trace_latency_header(struct seq_file * m)4427 void trace_latency_header(struct seq_file *m)
4428 {
4429 struct trace_iterator *iter = m->private;
4430 struct trace_array *tr = iter->tr;
4431
4432 /* print nothing if the buffers are empty */
4433 if (trace_empty(iter))
4434 return;
4435
4436 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4437 print_trace_header(m, iter);
4438
4439 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4440 print_lat_help_header(m);
4441 }
4442
trace_default_header(struct seq_file * m)4443 void trace_default_header(struct seq_file *m)
4444 {
4445 struct trace_iterator *iter = m->private;
4446 struct trace_array *tr = iter->tr;
4447 unsigned long trace_flags = tr->trace_flags;
4448
4449 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4450 return;
4451
4452 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4453 /* print nothing if the buffers are empty */
4454 if (trace_empty(iter))
4455 return;
4456 print_trace_header(m, iter);
4457 if (!(trace_flags & TRACE_ITER_VERBOSE))
4458 print_lat_help_header(m);
4459 } else {
4460 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4461 if (trace_flags & TRACE_ITER_IRQ_INFO)
4462 print_func_help_header_irq(iter->array_buffer,
4463 m, trace_flags);
4464 else
4465 print_func_help_header(iter->array_buffer, m,
4466 trace_flags);
4467 }
4468 }
4469 }
4470
test_ftrace_alive(struct seq_file * m)4471 static void test_ftrace_alive(struct seq_file *m)
4472 {
4473 if (!ftrace_is_dead())
4474 return;
4475 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4476 "# MAY BE MISSING FUNCTION EVENTS\n");
4477 }
4478
4479 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4480 static void show_snapshot_main_help(struct seq_file *m)
4481 {
4482 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4483 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4484 "# Takes a snapshot of the main buffer.\n"
4485 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4486 "# (Doesn't have to be '2' works with any number that\n"
4487 "# is not a '0' or '1')\n");
4488 }
4489
show_snapshot_percpu_help(struct seq_file * m)4490 static void show_snapshot_percpu_help(struct seq_file *m)
4491 {
4492 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4494 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4495 "# Takes a snapshot of the main buffer for this cpu.\n");
4496 #else
4497 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4498 "# Must use main snapshot file to allocate.\n");
4499 #endif
4500 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4501 "# (Doesn't have to be '2' works with any number that\n"
4502 "# is not a '0' or '1')\n");
4503 }
4504
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4506 {
4507 if (iter->tr->allocated_snapshot)
4508 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4509 else
4510 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4511
4512 seq_puts(m, "# Snapshot commands:\n");
4513 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4514 show_snapshot_main_help(m);
4515 else
4516 show_snapshot_percpu_help(m);
4517 }
4518 #else
4519 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4521 #endif
4522
s_show(struct seq_file * m,void * v)4523 static int s_show(struct seq_file *m, void *v)
4524 {
4525 struct trace_iterator *iter = v;
4526 int ret;
4527
4528 if (iter->ent == NULL) {
4529 if (iter->tr) {
4530 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4531 seq_puts(m, "#\n");
4532 test_ftrace_alive(m);
4533 }
4534 if (iter->snapshot && trace_empty(iter))
4535 print_snapshot_help(m, iter);
4536 else if (iter->trace && iter->trace->print_header)
4537 iter->trace->print_header(m);
4538 else
4539 trace_default_header(m);
4540
4541 } else if (iter->leftover) {
4542 /*
4543 * If we filled the seq_file buffer earlier, we
4544 * want to just show it now.
4545 */
4546 ret = trace_print_seq(m, &iter->seq);
4547
4548 /* ret should this time be zero, but you never know */
4549 iter->leftover = ret;
4550
4551 } else {
4552 ret = print_trace_line(iter);
4553 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4554 iter->seq.full = 0;
4555 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4556 }
4557 ret = trace_print_seq(m, &iter->seq);
4558 /*
4559 * If we overflow the seq_file buffer, then it will
4560 * ask us for this data again at start up.
4561 * Use that instead.
4562 * ret is 0 if seq_file write succeeded.
4563 * -1 otherwise.
4564 */
4565 iter->leftover = ret;
4566 }
4567
4568 return 0;
4569 }
4570
4571 /*
4572 * Should be used after trace_array_get(), trace_types_lock
4573 * ensures that i_cdev was already initialized.
4574 */
tracing_get_cpu(struct inode * inode)4575 static inline int tracing_get_cpu(struct inode *inode)
4576 {
4577 if (inode->i_cdev) /* See trace_create_cpu_file() */
4578 return (long)inode->i_cdev - 1;
4579 return RING_BUFFER_ALL_CPUS;
4580 }
4581
4582 static const struct seq_operations tracer_seq_ops = {
4583 .start = s_start,
4584 .next = s_next,
4585 .stop = s_stop,
4586 .show = s_show,
4587 };
4588
4589 /*
4590 * Note, as iter itself can be allocated and freed in different
4591 * ways, this function is only used to free its content, and not
4592 * the iterator itself. The only requirement to all the allocations
4593 * is that it must zero all fields (kzalloc), as freeing works with
4594 * ethier allocated content or NULL.
4595 */
free_trace_iter_content(struct trace_iterator * iter)4596 static void free_trace_iter_content(struct trace_iterator *iter)
4597 {
4598 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4599 if (iter->fmt != static_fmt_buf)
4600 kfree(iter->fmt);
4601
4602 kfree(iter->temp);
4603 kfree(iter->buffer_iter);
4604 mutex_destroy(&iter->mutex);
4605 free_cpumask_var(iter->started);
4606 }
4607
4608 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4609 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4610 {
4611 struct trace_array *tr = inode->i_private;
4612 struct trace_iterator *iter;
4613 int cpu;
4614
4615 if (tracing_disabled)
4616 return ERR_PTR(-ENODEV);
4617
4618 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4619 if (!iter)
4620 return ERR_PTR(-ENOMEM);
4621
4622 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4623 GFP_KERNEL);
4624 if (!iter->buffer_iter)
4625 goto release;
4626
4627 /*
4628 * trace_find_next_entry() may need to save off iter->ent.
4629 * It will place it into the iter->temp buffer. As most
4630 * events are less than 128, allocate a buffer of that size.
4631 * If one is greater, then trace_find_next_entry() will
4632 * allocate a new buffer to adjust for the bigger iter->ent.
4633 * It's not critical if it fails to get allocated here.
4634 */
4635 iter->temp = kmalloc(128, GFP_KERNEL);
4636 if (iter->temp)
4637 iter->temp_size = 128;
4638
4639 /*
4640 * trace_event_printf() may need to modify given format
4641 * string to replace %p with %px so that it shows real address
4642 * instead of hash value. However, that is only for the event
4643 * tracing, other tracer may not need. Defer the allocation
4644 * until it is needed.
4645 */
4646 iter->fmt = NULL;
4647 iter->fmt_size = 0;
4648
4649 mutex_lock(&trace_types_lock);
4650 iter->trace = tr->current_trace;
4651
4652 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4653 goto fail;
4654
4655 iter->tr = tr;
4656
4657 #ifdef CONFIG_TRACER_MAX_TRACE
4658 /* Currently only the top directory has a snapshot */
4659 if (tr->current_trace->print_max || snapshot)
4660 iter->array_buffer = &tr->max_buffer;
4661 else
4662 #endif
4663 iter->array_buffer = &tr->array_buffer;
4664 iter->snapshot = snapshot;
4665 iter->pos = -1;
4666 iter->cpu_file = tracing_get_cpu(inode);
4667 mutex_init(&iter->mutex);
4668
4669 /* Notify the tracer early; before we stop tracing. */
4670 if (iter->trace->open)
4671 iter->trace->open(iter);
4672
4673 /* Annotate start of buffers if we had overruns */
4674 if (ring_buffer_overruns(iter->array_buffer->buffer))
4675 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4676
4677 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4678 if (trace_clocks[tr->clock_id].in_ns)
4679 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4680
4681 /*
4682 * If pause-on-trace is enabled, then stop the trace while
4683 * dumping, unless this is the "snapshot" file
4684 */
4685 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4686 tracing_stop_tr(tr);
4687
4688 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4689 for_each_tracing_cpu(cpu) {
4690 iter->buffer_iter[cpu] =
4691 ring_buffer_read_start(iter->array_buffer->buffer,
4692 cpu, GFP_KERNEL);
4693 tracing_iter_reset(iter, cpu);
4694 }
4695 } else {
4696 cpu = iter->cpu_file;
4697 iter->buffer_iter[cpu] =
4698 ring_buffer_read_start(iter->array_buffer->buffer,
4699 cpu, GFP_KERNEL);
4700 tracing_iter_reset(iter, cpu);
4701 }
4702
4703 mutex_unlock(&trace_types_lock);
4704
4705 return iter;
4706
4707 fail:
4708 mutex_unlock(&trace_types_lock);
4709 free_trace_iter_content(iter);
4710 release:
4711 seq_release_private(inode, file);
4712 return ERR_PTR(-ENOMEM);
4713 }
4714
tracing_open_generic(struct inode * inode,struct file * filp)4715 int tracing_open_generic(struct inode *inode, struct file *filp)
4716 {
4717 int ret;
4718
4719 ret = tracing_check_open_get_tr(NULL);
4720 if (ret)
4721 return ret;
4722
4723 filp->private_data = inode->i_private;
4724 return 0;
4725 }
4726
tracing_is_disabled(void)4727 bool tracing_is_disabled(void)
4728 {
4729 return (tracing_disabled) ? true: false;
4730 }
4731
4732 /*
4733 * Open and update trace_array ref count.
4734 * Must have the current trace_array passed to it.
4735 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4736 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4737 {
4738 struct trace_array *tr = inode->i_private;
4739 int ret;
4740
4741 ret = tracing_check_open_get_tr(tr);
4742 if (ret)
4743 return ret;
4744
4745 filp->private_data = inode->i_private;
4746
4747 return 0;
4748 }
4749
4750 /*
4751 * The private pointer of the inode is the trace_event_file.
4752 * Update the tr ref count associated to it.
4753 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4754 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4755 {
4756 struct trace_event_file *file = inode->i_private;
4757 int ret;
4758
4759 ret = tracing_check_open_get_tr(file->tr);
4760 if (ret)
4761 return ret;
4762
4763 guard(mutex)(&event_mutex);
4764
4765 /* Fail if the file is marked for removal */
4766 if (file->flags & EVENT_FILE_FL_FREED) {
4767 trace_array_put(file->tr);
4768 return -ENODEV;
4769 } else {
4770 event_file_get(file);
4771 }
4772
4773 filp->private_data = inode->i_private;
4774
4775 return 0;
4776 }
4777
tracing_release_file_tr(struct inode * inode,struct file * filp)4778 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4779 {
4780 struct trace_event_file *file = inode->i_private;
4781
4782 trace_array_put(file->tr);
4783 event_file_put(file);
4784
4785 return 0;
4786 }
4787
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4788 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4789 {
4790 tracing_release_file_tr(inode, filp);
4791 return single_release(inode, filp);
4792 }
4793
tracing_mark_open(struct inode * inode,struct file * filp)4794 static int tracing_mark_open(struct inode *inode, struct file *filp)
4795 {
4796 stream_open(inode, filp);
4797 return tracing_open_generic_tr(inode, filp);
4798 }
4799
tracing_release(struct inode * inode,struct file * file)4800 static int tracing_release(struct inode *inode, struct file *file)
4801 {
4802 struct trace_array *tr = inode->i_private;
4803 struct seq_file *m = file->private_data;
4804 struct trace_iterator *iter;
4805 int cpu;
4806
4807 if (!(file->f_mode & FMODE_READ)) {
4808 trace_array_put(tr);
4809 return 0;
4810 }
4811
4812 /* Writes do not use seq_file */
4813 iter = m->private;
4814 mutex_lock(&trace_types_lock);
4815
4816 for_each_tracing_cpu(cpu) {
4817 if (iter->buffer_iter[cpu])
4818 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4819 }
4820
4821 if (iter->trace && iter->trace->close)
4822 iter->trace->close(iter);
4823
4824 if (!iter->snapshot && tr->stop_count)
4825 /* reenable tracing if it was previously enabled */
4826 tracing_start_tr(tr);
4827
4828 __trace_array_put(tr);
4829
4830 mutex_unlock(&trace_types_lock);
4831
4832 free_trace_iter_content(iter);
4833 seq_release_private(inode, file);
4834
4835 return 0;
4836 }
4837
tracing_release_generic_tr(struct inode * inode,struct file * file)4838 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4839 {
4840 struct trace_array *tr = inode->i_private;
4841
4842 trace_array_put(tr);
4843 return 0;
4844 }
4845
tracing_single_release_tr(struct inode * inode,struct file * file)4846 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4847 {
4848 struct trace_array *tr = inode->i_private;
4849
4850 trace_array_put(tr);
4851
4852 return single_release(inode, file);
4853 }
4854
tracing_open(struct inode * inode,struct file * file)4855 static int tracing_open(struct inode *inode, struct file *file)
4856 {
4857 struct trace_array *tr = inode->i_private;
4858 struct trace_iterator *iter;
4859 int ret;
4860
4861 ret = tracing_check_open_get_tr(tr);
4862 if (ret)
4863 return ret;
4864
4865 /* If this file was open for write, then erase contents */
4866 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4867 int cpu = tracing_get_cpu(inode);
4868 struct array_buffer *trace_buf = &tr->array_buffer;
4869
4870 #ifdef CONFIG_TRACER_MAX_TRACE
4871 if (tr->current_trace->print_max)
4872 trace_buf = &tr->max_buffer;
4873 #endif
4874
4875 if (cpu == RING_BUFFER_ALL_CPUS)
4876 tracing_reset_online_cpus(trace_buf);
4877 else
4878 tracing_reset_cpu(trace_buf, cpu);
4879 }
4880
4881 if (file->f_mode & FMODE_READ) {
4882 iter = __tracing_open(inode, file, false);
4883 if (IS_ERR(iter))
4884 ret = PTR_ERR(iter);
4885 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4886 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4887 }
4888
4889 if (ret < 0)
4890 trace_array_put(tr);
4891
4892 return ret;
4893 }
4894
4895 /*
4896 * Some tracers are not suitable for instance buffers.
4897 * A tracer is always available for the global array (toplevel)
4898 * or if it explicitly states that it is.
4899 */
4900 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4901 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4902 {
4903 #ifdef CONFIG_TRACER_SNAPSHOT
4904 /* arrays with mapped buffer range do not have snapshots */
4905 if (tr->range_addr_start && t->use_max_tr)
4906 return false;
4907 #endif
4908 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4909 }
4910
4911 /* Find the next tracer that this trace array may use */
4912 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4913 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4914 {
4915 while (t && !trace_ok_for_array(t, tr))
4916 t = t->next;
4917
4918 return t;
4919 }
4920
4921 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4922 t_next(struct seq_file *m, void *v, loff_t *pos)
4923 {
4924 struct trace_array *tr = m->private;
4925 struct tracer *t = v;
4926
4927 (*pos)++;
4928
4929 if (t)
4930 t = get_tracer_for_array(tr, t->next);
4931
4932 return t;
4933 }
4934
t_start(struct seq_file * m,loff_t * pos)4935 static void *t_start(struct seq_file *m, loff_t *pos)
4936 {
4937 struct trace_array *tr = m->private;
4938 struct tracer *t;
4939 loff_t l = 0;
4940
4941 mutex_lock(&trace_types_lock);
4942
4943 t = get_tracer_for_array(tr, trace_types);
4944 for (; t && l < *pos; t = t_next(m, t, &l))
4945 ;
4946
4947 return t;
4948 }
4949
t_stop(struct seq_file * m,void * p)4950 static void t_stop(struct seq_file *m, void *p)
4951 {
4952 mutex_unlock(&trace_types_lock);
4953 }
4954
t_show(struct seq_file * m,void * v)4955 static int t_show(struct seq_file *m, void *v)
4956 {
4957 struct tracer *t = v;
4958
4959 if (!t)
4960 return 0;
4961
4962 seq_puts(m, t->name);
4963 if (t->next)
4964 seq_putc(m, ' ');
4965 else
4966 seq_putc(m, '\n');
4967
4968 return 0;
4969 }
4970
4971 static const struct seq_operations show_traces_seq_ops = {
4972 .start = t_start,
4973 .next = t_next,
4974 .stop = t_stop,
4975 .show = t_show,
4976 };
4977
show_traces_open(struct inode * inode,struct file * file)4978 static int show_traces_open(struct inode *inode, struct file *file)
4979 {
4980 struct trace_array *tr = inode->i_private;
4981 struct seq_file *m;
4982 int ret;
4983
4984 ret = tracing_check_open_get_tr(tr);
4985 if (ret)
4986 return ret;
4987
4988 ret = seq_open(file, &show_traces_seq_ops);
4989 if (ret) {
4990 trace_array_put(tr);
4991 return ret;
4992 }
4993
4994 m = file->private_data;
4995 m->private = tr;
4996
4997 return 0;
4998 }
4999
tracing_seq_release(struct inode * inode,struct file * file)5000 static int tracing_seq_release(struct inode *inode, struct file *file)
5001 {
5002 struct trace_array *tr = inode->i_private;
5003
5004 trace_array_put(tr);
5005 return seq_release(inode, file);
5006 }
5007
5008 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5009 tracing_write_stub(struct file *filp, const char __user *ubuf,
5010 size_t count, loff_t *ppos)
5011 {
5012 return count;
5013 }
5014
tracing_lseek(struct file * file,loff_t offset,int whence)5015 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5016 {
5017 int ret;
5018
5019 if (file->f_mode & FMODE_READ)
5020 ret = seq_lseek(file, offset, whence);
5021 else
5022 file->f_pos = ret = 0;
5023
5024 return ret;
5025 }
5026
5027 static const struct file_operations tracing_fops = {
5028 .open = tracing_open,
5029 .read = seq_read,
5030 .read_iter = seq_read_iter,
5031 .splice_read = copy_splice_read,
5032 .write = tracing_write_stub,
5033 .llseek = tracing_lseek,
5034 .release = tracing_release,
5035 };
5036
5037 static const struct file_operations show_traces_fops = {
5038 .open = show_traces_open,
5039 .read = seq_read,
5040 .llseek = seq_lseek,
5041 .release = tracing_seq_release,
5042 };
5043
5044 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5045 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5046 size_t count, loff_t *ppos)
5047 {
5048 struct trace_array *tr = file_inode(filp)->i_private;
5049 char *mask_str __free(kfree) = NULL;
5050 int len;
5051
5052 len = snprintf(NULL, 0, "%*pb\n",
5053 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5054 mask_str = kmalloc(len, GFP_KERNEL);
5055 if (!mask_str)
5056 return -ENOMEM;
5057
5058 len = snprintf(mask_str, len, "%*pb\n",
5059 cpumask_pr_args(tr->tracing_cpumask));
5060 if (len >= count)
5061 return -EINVAL;
5062
5063 return simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5064 }
5065
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5066 int tracing_set_cpumask(struct trace_array *tr,
5067 cpumask_var_t tracing_cpumask_new)
5068 {
5069 int cpu;
5070
5071 if (!tr)
5072 return -EINVAL;
5073
5074 local_irq_disable();
5075 arch_spin_lock(&tr->max_lock);
5076 for_each_tracing_cpu(cpu) {
5077 /*
5078 * Increase/decrease the disabled counter if we are
5079 * about to flip a bit in the cpumask:
5080 */
5081 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5082 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5083 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5084 #ifdef CONFIG_TRACER_MAX_TRACE
5085 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5086 #endif
5087 }
5088 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5089 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5090 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5091 #ifdef CONFIG_TRACER_MAX_TRACE
5092 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5093 #endif
5094 }
5095 }
5096 arch_spin_unlock(&tr->max_lock);
5097 local_irq_enable();
5098
5099 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5100
5101 return 0;
5102 }
5103
5104 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5105 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5106 size_t count, loff_t *ppos)
5107 {
5108 struct trace_array *tr = file_inode(filp)->i_private;
5109 cpumask_var_t tracing_cpumask_new;
5110 int err;
5111
5112 if (count == 0 || count > KMALLOC_MAX_SIZE)
5113 return -EINVAL;
5114
5115 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5116 return -ENOMEM;
5117
5118 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5119 if (err)
5120 goto err_free;
5121
5122 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5123 if (err)
5124 goto err_free;
5125
5126 free_cpumask_var(tracing_cpumask_new);
5127
5128 return count;
5129
5130 err_free:
5131 free_cpumask_var(tracing_cpumask_new);
5132
5133 return err;
5134 }
5135
5136 static const struct file_operations tracing_cpumask_fops = {
5137 .open = tracing_open_generic_tr,
5138 .read = tracing_cpumask_read,
5139 .write = tracing_cpumask_write,
5140 .release = tracing_release_generic_tr,
5141 .llseek = generic_file_llseek,
5142 };
5143
tracing_trace_options_show(struct seq_file * m,void * v)5144 static int tracing_trace_options_show(struct seq_file *m, void *v)
5145 {
5146 struct tracer_opt *trace_opts;
5147 struct trace_array *tr = m->private;
5148 u32 tracer_flags;
5149 int i;
5150
5151 guard(mutex)(&trace_types_lock);
5152
5153 tracer_flags = tr->current_trace->flags->val;
5154 trace_opts = tr->current_trace->flags->opts;
5155
5156 for (i = 0; trace_options[i]; i++) {
5157 if (tr->trace_flags & (1 << i))
5158 seq_printf(m, "%s\n", trace_options[i]);
5159 else
5160 seq_printf(m, "no%s\n", trace_options[i]);
5161 }
5162
5163 for (i = 0; trace_opts[i].name; i++) {
5164 if (tracer_flags & trace_opts[i].bit)
5165 seq_printf(m, "%s\n", trace_opts[i].name);
5166 else
5167 seq_printf(m, "no%s\n", trace_opts[i].name);
5168 }
5169
5170 return 0;
5171 }
5172
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5173 static int __set_tracer_option(struct trace_array *tr,
5174 struct tracer_flags *tracer_flags,
5175 struct tracer_opt *opts, int neg)
5176 {
5177 struct tracer *trace = tracer_flags->trace;
5178 int ret;
5179
5180 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5181 if (ret)
5182 return ret;
5183
5184 if (neg)
5185 tracer_flags->val &= ~opts->bit;
5186 else
5187 tracer_flags->val |= opts->bit;
5188 return 0;
5189 }
5190
5191 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5192 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5193 {
5194 struct tracer *trace = tr->current_trace;
5195 struct tracer_flags *tracer_flags = trace->flags;
5196 struct tracer_opt *opts = NULL;
5197 int i;
5198
5199 for (i = 0; tracer_flags->opts[i].name; i++) {
5200 opts = &tracer_flags->opts[i];
5201
5202 if (strcmp(cmp, opts->name) == 0)
5203 return __set_tracer_option(tr, trace->flags, opts, neg);
5204 }
5205
5206 return -EINVAL;
5207 }
5208
5209 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5210 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5211 {
5212 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5213 return -1;
5214
5215 return 0;
5216 }
5217
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5218 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5219 {
5220 if ((mask == TRACE_ITER_RECORD_TGID) ||
5221 (mask == TRACE_ITER_RECORD_CMD) ||
5222 (mask == TRACE_ITER_TRACE_PRINTK) ||
5223 (mask == TRACE_ITER_COPY_MARKER))
5224 lockdep_assert_held(&event_mutex);
5225
5226 /* do nothing if flag is already set */
5227 if (!!(tr->trace_flags & mask) == !!enabled)
5228 return 0;
5229
5230 /* Give the tracer a chance to approve the change */
5231 if (tr->current_trace->flag_changed)
5232 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5233 return -EINVAL;
5234
5235 if (mask == TRACE_ITER_TRACE_PRINTK) {
5236 if (enabled) {
5237 update_printk_trace(tr);
5238 } else {
5239 /*
5240 * The global_trace cannot clear this.
5241 * It's flag only gets cleared if another instance sets it.
5242 */
5243 if (printk_trace == &global_trace)
5244 return -EINVAL;
5245 /*
5246 * An instance must always have it set.
5247 * by default, that's the global_trace instane.
5248 */
5249 if (printk_trace == tr)
5250 update_printk_trace(&global_trace);
5251 }
5252 }
5253
5254 if (mask == TRACE_ITER_COPY_MARKER)
5255 update_marker_trace(tr, enabled);
5256
5257 if (enabled)
5258 tr->trace_flags |= mask;
5259 else
5260 tr->trace_flags &= ~mask;
5261
5262 if (mask == TRACE_ITER_RECORD_CMD)
5263 trace_event_enable_cmd_record(enabled);
5264
5265 if (mask == TRACE_ITER_RECORD_TGID) {
5266
5267 if (trace_alloc_tgid_map() < 0) {
5268 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5269 return -ENOMEM;
5270 }
5271
5272 trace_event_enable_tgid_record(enabled);
5273 }
5274
5275 if (mask == TRACE_ITER_EVENT_FORK)
5276 trace_event_follow_fork(tr, enabled);
5277
5278 if (mask == TRACE_ITER_FUNC_FORK)
5279 ftrace_pid_follow_fork(tr, enabled);
5280
5281 if (mask == TRACE_ITER_OVERWRITE) {
5282 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5283 #ifdef CONFIG_TRACER_MAX_TRACE
5284 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5285 #endif
5286 }
5287
5288 if (mask == TRACE_ITER_PRINTK) {
5289 trace_printk_start_stop_comm(enabled);
5290 trace_printk_control(enabled);
5291 }
5292
5293 return 0;
5294 }
5295
trace_set_options(struct trace_array * tr,char * option)5296 int trace_set_options(struct trace_array *tr, char *option)
5297 {
5298 char *cmp;
5299 int neg = 0;
5300 int ret;
5301 size_t orig_len = strlen(option);
5302 int len;
5303
5304 cmp = strstrip(option);
5305
5306 len = str_has_prefix(cmp, "no");
5307 if (len)
5308 neg = 1;
5309
5310 cmp += len;
5311
5312 mutex_lock(&event_mutex);
5313 mutex_lock(&trace_types_lock);
5314
5315 ret = match_string(trace_options, -1, cmp);
5316 /* If no option could be set, test the specific tracer options */
5317 if (ret < 0)
5318 ret = set_tracer_option(tr, cmp, neg);
5319 else
5320 ret = set_tracer_flag(tr, 1 << ret, !neg);
5321
5322 mutex_unlock(&trace_types_lock);
5323 mutex_unlock(&event_mutex);
5324
5325 /*
5326 * If the first trailing whitespace is replaced with '\0' by strstrip,
5327 * turn it back into a space.
5328 */
5329 if (orig_len > strlen(option))
5330 option[strlen(option)] = ' ';
5331
5332 return ret;
5333 }
5334
apply_trace_boot_options(void)5335 static void __init apply_trace_boot_options(void)
5336 {
5337 char *buf = trace_boot_options_buf;
5338 char *option;
5339
5340 while (true) {
5341 option = strsep(&buf, ",");
5342
5343 if (!option)
5344 break;
5345
5346 if (*option)
5347 trace_set_options(&global_trace, option);
5348
5349 /* Put back the comma to allow this to be called again */
5350 if (buf)
5351 *(buf - 1) = ',';
5352 }
5353 }
5354
5355 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5356 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5357 size_t cnt, loff_t *ppos)
5358 {
5359 struct seq_file *m = filp->private_data;
5360 struct trace_array *tr = m->private;
5361 char buf[64];
5362 int ret;
5363
5364 if (cnt >= sizeof(buf))
5365 return -EINVAL;
5366
5367 if (copy_from_user(buf, ubuf, cnt))
5368 return -EFAULT;
5369
5370 buf[cnt] = 0;
5371
5372 ret = trace_set_options(tr, buf);
5373 if (ret < 0)
5374 return ret;
5375
5376 *ppos += cnt;
5377
5378 return cnt;
5379 }
5380
tracing_trace_options_open(struct inode * inode,struct file * file)5381 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5382 {
5383 struct trace_array *tr = inode->i_private;
5384 int ret;
5385
5386 ret = tracing_check_open_get_tr(tr);
5387 if (ret)
5388 return ret;
5389
5390 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5391 if (ret < 0)
5392 trace_array_put(tr);
5393
5394 return ret;
5395 }
5396
5397 static const struct file_operations tracing_iter_fops = {
5398 .open = tracing_trace_options_open,
5399 .read = seq_read,
5400 .llseek = seq_lseek,
5401 .release = tracing_single_release_tr,
5402 .write = tracing_trace_options_write,
5403 };
5404
5405 static const char readme_msg[] =
5406 "tracing mini-HOWTO:\n\n"
5407 "By default tracefs removes all OTH file permission bits.\n"
5408 "When mounting tracefs an optional group id can be specified\n"
5409 "which adds the group to every directory and file in tracefs:\n\n"
5410 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5411 "# echo 0 > tracing_on : quick way to disable tracing\n"
5412 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5413 " Important files:\n"
5414 " trace\t\t\t- The static contents of the buffer\n"
5415 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5416 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5417 " current_tracer\t- function and latency tracers\n"
5418 " available_tracers\t- list of configured tracers for current_tracer\n"
5419 " error_log\t- error log for failed commands (that support it)\n"
5420 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5421 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5422 " trace_clock\t\t- change the clock used to order events\n"
5423 " local: Per cpu clock but may not be synced across CPUs\n"
5424 " global: Synced across CPUs but slows tracing down.\n"
5425 " counter: Not a clock, but just an increment\n"
5426 " uptime: Jiffy counter from time of boot\n"
5427 " perf: Same clock that perf events use\n"
5428 #ifdef CONFIG_X86_64
5429 " x86-tsc: TSC cycle counter\n"
5430 #endif
5431 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5432 " delta: Delta difference against a buffer-wide timestamp\n"
5433 " absolute: Absolute (standalone) timestamp\n"
5434 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5435 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5436 " tracing_cpumask\t- Limit which CPUs to trace\n"
5437 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5438 "\t\t\t Remove sub-buffer with rmdir\n"
5439 " trace_options\t\t- Set format or modify how tracing happens\n"
5440 "\t\t\t Disable an option by prefixing 'no' to the\n"
5441 "\t\t\t option name\n"
5442 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5443 #ifdef CONFIG_DYNAMIC_FTRACE
5444 "\n available_filter_functions - list of functions that can be filtered on\n"
5445 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5446 "\t\t\t functions\n"
5447 "\t accepts: func_full_name or glob-matching-pattern\n"
5448 "\t modules: Can select a group via module\n"
5449 "\t Format: :mod:<module-name>\n"
5450 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5451 "\t triggers: a command to perform when function is hit\n"
5452 "\t Format: <function>:<trigger>[:count]\n"
5453 "\t trigger: traceon, traceoff\n"
5454 "\t\t enable_event:<system>:<event>\n"
5455 "\t\t disable_event:<system>:<event>\n"
5456 #ifdef CONFIG_STACKTRACE
5457 "\t\t stacktrace\n"
5458 #endif
5459 #ifdef CONFIG_TRACER_SNAPSHOT
5460 "\t\t snapshot\n"
5461 #endif
5462 "\t\t dump\n"
5463 "\t\t cpudump\n"
5464 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5465 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5466 "\t The first one will disable tracing every time do_fault is hit\n"
5467 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5468 "\t The first time do trap is hit and it disables tracing, the\n"
5469 "\t counter will decrement to 2. If tracing is already disabled,\n"
5470 "\t the counter will not decrement. It only decrements when the\n"
5471 "\t trigger did work\n"
5472 "\t To remove trigger without count:\n"
5473 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5474 "\t To remove trigger with a count:\n"
5475 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5476 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5477 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5478 "\t modules: Can select a group via module command :mod:\n"
5479 "\t Does not accept triggers\n"
5480 #endif /* CONFIG_DYNAMIC_FTRACE */
5481 #ifdef CONFIG_FUNCTION_TRACER
5482 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5483 "\t\t (function)\n"
5484 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5485 "\t\t (function)\n"
5486 #endif
5487 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5488 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5489 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5490 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5491 #endif
5492 #ifdef CONFIG_TRACER_SNAPSHOT
5493 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5494 "\t\t\t snapshot buffer. Read the contents for more\n"
5495 "\t\t\t information\n"
5496 #endif
5497 #ifdef CONFIG_STACK_TRACER
5498 " stack_trace\t\t- Shows the max stack trace when active\n"
5499 " stack_max_size\t- Shows current max stack size that was traced\n"
5500 "\t\t\t Write into this file to reset the max size (trigger a\n"
5501 "\t\t\t new trace)\n"
5502 #ifdef CONFIG_DYNAMIC_FTRACE
5503 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5504 "\t\t\t traces\n"
5505 #endif
5506 #endif /* CONFIG_STACK_TRACER */
5507 #ifdef CONFIG_DYNAMIC_EVENTS
5508 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5509 "\t\t\t Write into this file to define/undefine new trace events.\n"
5510 #endif
5511 #ifdef CONFIG_KPROBE_EVENTS
5512 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5513 "\t\t\t Write into this file to define/undefine new trace events.\n"
5514 #endif
5515 #ifdef CONFIG_UPROBE_EVENTS
5516 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5517 "\t\t\t Write into this file to define/undefine new trace events.\n"
5518 #endif
5519 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5520 defined(CONFIG_FPROBE_EVENTS)
5521 "\t accepts: event-definitions (one definition per line)\n"
5522 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5523 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5524 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5525 #endif
5526 #ifdef CONFIG_FPROBE_EVENTS
5527 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5528 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5529 #endif
5530 #ifdef CONFIG_HIST_TRIGGERS
5531 "\t s:[synthetic/]<event> <field> [<field>]\n"
5532 #endif
5533 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5534 "\t -:[<group>/][<event>]\n"
5535 #ifdef CONFIG_KPROBE_EVENTS
5536 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5537 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5538 #endif
5539 #ifdef CONFIG_UPROBE_EVENTS
5540 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5541 #endif
5542 "\t args: <name>=fetcharg[:type]\n"
5543 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5544 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5545 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5546 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5547 "\t <argname>[->field[->field|.field...]],\n"
5548 #endif
5549 #else
5550 "\t $stack<index>, $stack, $retval, $comm,\n"
5551 #endif
5552 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5553 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5554 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5555 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5556 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5557 #ifdef CONFIG_HIST_TRIGGERS
5558 "\t field: <stype> <name>;\n"
5559 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5560 "\t [unsigned] char/int/long\n"
5561 #endif
5562 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5563 "\t of the <attached-group>/<attached-event>.\n"
5564 #endif
5565 " set_event\t\t- Enables events by name written into it\n"
5566 "\t\t\t Can enable module events via: :mod:<module>\n"
5567 " events/\t\t- Directory containing all trace event subsystems:\n"
5568 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5569 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5570 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5571 "\t\t\t events\n"
5572 " filter\t\t- If set, only events passing filter are traced\n"
5573 " events/<system>/<event>/\t- Directory containing control files for\n"
5574 "\t\t\t <event>:\n"
5575 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5576 " filter\t\t- If set, only events passing filter are traced\n"
5577 " trigger\t\t- If set, a command to perform when event is hit\n"
5578 "\t Format: <trigger>[:count][if <filter>]\n"
5579 "\t trigger: traceon, traceoff\n"
5580 "\t enable_event:<system>:<event>\n"
5581 "\t disable_event:<system>:<event>\n"
5582 #ifdef CONFIG_HIST_TRIGGERS
5583 "\t enable_hist:<system>:<event>\n"
5584 "\t disable_hist:<system>:<event>\n"
5585 #endif
5586 #ifdef CONFIG_STACKTRACE
5587 "\t\t stacktrace\n"
5588 #endif
5589 #ifdef CONFIG_TRACER_SNAPSHOT
5590 "\t\t snapshot\n"
5591 #endif
5592 #ifdef CONFIG_HIST_TRIGGERS
5593 "\t\t hist (see below)\n"
5594 #endif
5595 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5596 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5597 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5598 "\t events/block/block_unplug/trigger\n"
5599 "\t The first disables tracing every time block_unplug is hit.\n"
5600 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5601 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5602 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5603 "\t Like function triggers, the counter is only decremented if it\n"
5604 "\t enabled or disabled tracing.\n"
5605 "\t To remove a trigger without a count:\n"
5606 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5607 "\t To remove a trigger with a count:\n"
5608 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5609 "\t Filters can be ignored when removing a trigger.\n"
5610 #ifdef CONFIG_HIST_TRIGGERS
5611 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5612 "\t Format: hist:keys=<field1[,field2,...]>\n"
5613 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5614 "\t [:values=<field1[,field2,...]>]\n"
5615 "\t [:sort=<field1[,field2,...]>]\n"
5616 "\t [:size=#entries]\n"
5617 "\t [:pause][:continue][:clear]\n"
5618 "\t [:name=histname1]\n"
5619 "\t [:nohitcount]\n"
5620 "\t [:<handler>.<action>]\n"
5621 "\t [if <filter>]\n\n"
5622 "\t Note, special fields can be used as well:\n"
5623 "\t common_timestamp - to record current timestamp\n"
5624 "\t common_cpu - to record the CPU the event happened on\n"
5625 "\n"
5626 "\t A hist trigger variable can be:\n"
5627 "\t - a reference to a field e.g. x=current_timestamp,\n"
5628 "\t - a reference to another variable e.g. y=$x,\n"
5629 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5630 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5631 "\n"
5632 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5633 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5634 "\t variable reference, field or numeric literal.\n"
5635 "\n"
5636 "\t When a matching event is hit, an entry is added to a hash\n"
5637 "\t table using the key(s) and value(s) named, and the value of a\n"
5638 "\t sum called 'hitcount' is incremented. Keys and values\n"
5639 "\t correspond to fields in the event's format description. Keys\n"
5640 "\t can be any field, or the special string 'common_stacktrace'.\n"
5641 "\t Compound keys consisting of up to two fields can be specified\n"
5642 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5643 "\t fields. Sort keys consisting of up to two fields can be\n"
5644 "\t specified using the 'sort' keyword. The sort direction can\n"
5645 "\t be modified by appending '.descending' or '.ascending' to a\n"
5646 "\t sort field. The 'size' parameter can be used to specify more\n"
5647 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5648 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5649 "\t its histogram data will be shared with other triggers of the\n"
5650 "\t same name, and trigger hits will update this common data.\n\n"
5651 "\t Reading the 'hist' file for the event will dump the hash\n"
5652 "\t table in its entirety to stdout. If there are multiple hist\n"
5653 "\t triggers attached to an event, there will be a table for each\n"
5654 "\t trigger in the output. The table displayed for a named\n"
5655 "\t trigger will be the same as any other instance having the\n"
5656 "\t same name. The default format used to display a given field\n"
5657 "\t can be modified by appending any of the following modifiers\n"
5658 "\t to the field name, as applicable:\n\n"
5659 "\t .hex display a number as a hex value\n"
5660 "\t .sym display an address as a symbol\n"
5661 "\t .sym-offset display an address as a symbol and offset\n"
5662 "\t .execname display a common_pid as a program name\n"
5663 "\t .syscall display a syscall id as a syscall name\n"
5664 "\t .log2 display log2 value rather than raw number\n"
5665 "\t .buckets=size display values in groups of size rather than raw number\n"
5666 "\t .usecs display a common_timestamp in microseconds\n"
5667 "\t .percent display a number of percentage value\n"
5668 "\t .graph display a bar-graph of a value\n\n"
5669 "\t The 'pause' parameter can be used to pause an existing hist\n"
5670 "\t trigger or to start a hist trigger but not log any events\n"
5671 "\t until told to do so. 'continue' can be used to start or\n"
5672 "\t restart a paused hist trigger.\n\n"
5673 "\t The 'clear' parameter will clear the contents of a running\n"
5674 "\t hist trigger and leave its current paused/active state\n"
5675 "\t unchanged.\n\n"
5676 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5677 "\t raw hitcount in the histogram.\n\n"
5678 "\t The enable_hist and disable_hist triggers can be used to\n"
5679 "\t have one event conditionally start and stop another event's\n"
5680 "\t already-attached hist trigger. The syntax is analogous to\n"
5681 "\t the enable_event and disable_event triggers.\n\n"
5682 "\t Hist trigger handlers and actions are executed whenever a\n"
5683 "\t a histogram entry is added or updated. They take the form:\n\n"
5684 "\t <handler>.<action>\n\n"
5685 "\t The available handlers are:\n\n"
5686 "\t onmatch(matching.event) - invoke on addition or update\n"
5687 "\t onmax(var) - invoke if var exceeds current max\n"
5688 "\t onchange(var) - invoke action if var changes\n\n"
5689 "\t The available actions are:\n\n"
5690 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5691 "\t save(field,...) - save current event fields\n"
5692 #ifdef CONFIG_TRACER_SNAPSHOT
5693 "\t snapshot() - snapshot the trace buffer\n\n"
5694 #endif
5695 #ifdef CONFIG_SYNTH_EVENTS
5696 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5697 "\t Write into this file to define/undefine new synthetic events.\n"
5698 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5699 #endif
5700 #endif
5701 ;
5702
5703 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5704 tracing_readme_read(struct file *filp, char __user *ubuf,
5705 size_t cnt, loff_t *ppos)
5706 {
5707 return simple_read_from_buffer(ubuf, cnt, ppos,
5708 readme_msg, strlen(readme_msg));
5709 }
5710
5711 static const struct file_operations tracing_readme_fops = {
5712 .open = tracing_open_generic,
5713 .read = tracing_readme_read,
5714 .llseek = generic_file_llseek,
5715 };
5716
5717 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5718 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5719 update_eval_map(union trace_eval_map_item *ptr)
5720 {
5721 if (!ptr->map.eval_string) {
5722 if (ptr->tail.next) {
5723 ptr = ptr->tail.next;
5724 /* Set ptr to the next real item (skip head) */
5725 ptr++;
5726 } else
5727 return NULL;
5728 }
5729 return ptr;
5730 }
5731
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5732 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5733 {
5734 union trace_eval_map_item *ptr = v;
5735
5736 /*
5737 * Paranoid! If ptr points to end, we don't want to increment past it.
5738 * This really should never happen.
5739 */
5740 (*pos)++;
5741 ptr = update_eval_map(ptr);
5742 if (WARN_ON_ONCE(!ptr))
5743 return NULL;
5744
5745 ptr++;
5746 ptr = update_eval_map(ptr);
5747
5748 return ptr;
5749 }
5750
eval_map_start(struct seq_file * m,loff_t * pos)5751 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5752 {
5753 union trace_eval_map_item *v;
5754 loff_t l = 0;
5755
5756 mutex_lock(&trace_eval_mutex);
5757
5758 v = trace_eval_maps;
5759 if (v)
5760 v++;
5761
5762 while (v && l < *pos) {
5763 v = eval_map_next(m, v, &l);
5764 }
5765
5766 return v;
5767 }
5768
eval_map_stop(struct seq_file * m,void * v)5769 static void eval_map_stop(struct seq_file *m, void *v)
5770 {
5771 mutex_unlock(&trace_eval_mutex);
5772 }
5773
eval_map_show(struct seq_file * m,void * v)5774 static int eval_map_show(struct seq_file *m, void *v)
5775 {
5776 union trace_eval_map_item *ptr = v;
5777
5778 seq_printf(m, "%s %ld (%s)\n",
5779 ptr->map.eval_string, ptr->map.eval_value,
5780 ptr->map.system);
5781
5782 return 0;
5783 }
5784
5785 static const struct seq_operations tracing_eval_map_seq_ops = {
5786 .start = eval_map_start,
5787 .next = eval_map_next,
5788 .stop = eval_map_stop,
5789 .show = eval_map_show,
5790 };
5791
tracing_eval_map_open(struct inode * inode,struct file * filp)5792 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5793 {
5794 int ret;
5795
5796 ret = tracing_check_open_get_tr(NULL);
5797 if (ret)
5798 return ret;
5799
5800 return seq_open(filp, &tracing_eval_map_seq_ops);
5801 }
5802
5803 static const struct file_operations tracing_eval_map_fops = {
5804 .open = tracing_eval_map_open,
5805 .read = seq_read,
5806 .llseek = seq_lseek,
5807 .release = seq_release,
5808 };
5809
5810 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5811 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5812 {
5813 /* Return tail of array given the head */
5814 return ptr + ptr->head.length + 1;
5815 }
5816
5817 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5818 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5819 int len)
5820 {
5821 struct trace_eval_map **stop;
5822 struct trace_eval_map **map;
5823 union trace_eval_map_item *map_array;
5824 union trace_eval_map_item *ptr;
5825
5826 stop = start + len;
5827
5828 /*
5829 * The trace_eval_maps contains the map plus a head and tail item,
5830 * where the head holds the module and length of array, and the
5831 * tail holds a pointer to the next list.
5832 */
5833 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5834 if (!map_array) {
5835 pr_warn("Unable to allocate trace eval mapping\n");
5836 return;
5837 }
5838
5839 guard(mutex)(&trace_eval_mutex);
5840
5841 if (!trace_eval_maps)
5842 trace_eval_maps = map_array;
5843 else {
5844 ptr = trace_eval_maps;
5845 for (;;) {
5846 ptr = trace_eval_jmp_to_tail(ptr);
5847 if (!ptr->tail.next)
5848 break;
5849 ptr = ptr->tail.next;
5850
5851 }
5852 ptr->tail.next = map_array;
5853 }
5854 map_array->head.mod = mod;
5855 map_array->head.length = len;
5856 map_array++;
5857
5858 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5859 map_array->map = **map;
5860 map_array++;
5861 }
5862 memset(map_array, 0, sizeof(*map_array));
5863 }
5864
trace_create_eval_file(struct dentry * d_tracer)5865 static void trace_create_eval_file(struct dentry *d_tracer)
5866 {
5867 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5868 NULL, &tracing_eval_map_fops);
5869 }
5870
5871 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5872 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5873 static inline void trace_insert_eval_map_file(struct module *mod,
5874 struct trace_eval_map **start, int len) { }
5875 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5876
5877 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5878 trace_event_update_with_eval_map(struct module *mod,
5879 struct trace_eval_map **start,
5880 int len)
5881 {
5882 struct trace_eval_map **map;
5883
5884 /* Always run sanitizer only if btf_type_tag attr exists. */
5885 if (len <= 0) {
5886 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5887 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5888 __has_attribute(btf_type_tag)))
5889 return;
5890 }
5891
5892 map = start;
5893
5894 trace_event_update_all(map, len);
5895
5896 if (len <= 0)
5897 return;
5898
5899 trace_insert_eval_map_file(mod, start, len);
5900 }
5901
5902 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5903 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5904 size_t cnt, loff_t *ppos)
5905 {
5906 struct trace_array *tr = filp->private_data;
5907 char buf[MAX_TRACER_SIZE+2];
5908 int r;
5909
5910 scoped_guard(mutex, &trace_types_lock) {
5911 r = sprintf(buf, "%s\n", tr->current_trace->name);
5912 }
5913
5914 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5915 }
5916
tracer_init(struct tracer * t,struct trace_array * tr)5917 int tracer_init(struct tracer *t, struct trace_array *tr)
5918 {
5919 tracing_reset_online_cpus(&tr->array_buffer);
5920 return t->init(tr);
5921 }
5922
set_buffer_entries(struct array_buffer * buf,unsigned long val)5923 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5924 {
5925 int cpu;
5926
5927 for_each_tracing_cpu(cpu)
5928 per_cpu_ptr(buf->data, cpu)->entries = val;
5929 }
5930
update_buffer_entries(struct array_buffer * buf,int cpu)5931 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5932 {
5933 if (cpu == RING_BUFFER_ALL_CPUS) {
5934 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5935 } else {
5936 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5937 }
5938 }
5939
5940 #ifdef CONFIG_TRACER_MAX_TRACE
5941 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5942 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5943 struct array_buffer *size_buf, int cpu_id)
5944 {
5945 int cpu, ret = 0;
5946
5947 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5948 for_each_tracing_cpu(cpu) {
5949 ret = ring_buffer_resize(trace_buf->buffer,
5950 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5951 if (ret < 0)
5952 break;
5953 per_cpu_ptr(trace_buf->data, cpu)->entries =
5954 per_cpu_ptr(size_buf->data, cpu)->entries;
5955 }
5956 } else {
5957 ret = ring_buffer_resize(trace_buf->buffer,
5958 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5959 if (ret == 0)
5960 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5961 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5962 }
5963
5964 return ret;
5965 }
5966 #endif /* CONFIG_TRACER_MAX_TRACE */
5967
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5968 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5969 unsigned long size, int cpu)
5970 {
5971 int ret;
5972
5973 /*
5974 * If kernel or user changes the size of the ring buffer
5975 * we use the size that was given, and we can forget about
5976 * expanding it later.
5977 */
5978 trace_set_ring_buffer_expanded(tr);
5979
5980 /* May be called before buffers are initialized */
5981 if (!tr->array_buffer.buffer)
5982 return 0;
5983
5984 /* Do not allow tracing while resizing ring buffer */
5985 tracing_stop_tr(tr);
5986
5987 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5988 if (ret < 0)
5989 goto out_start;
5990
5991 #ifdef CONFIG_TRACER_MAX_TRACE
5992 if (!tr->allocated_snapshot)
5993 goto out;
5994
5995 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5996 if (ret < 0) {
5997 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5998 &tr->array_buffer, cpu);
5999 if (r < 0) {
6000 /*
6001 * AARGH! We are left with different
6002 * size max buffer!!!!
6003 * The max buffer is our "snapshot" buffer.
6004 * When a tracer needs a snapshot (one of the
6005 * latency tracers), it swaps the max buffer
6006 * with the saved snap shot. We succeeded to
6007 * update the size of the main buffer, but failed to
6008 * update the size of the max buffer. But when we tried
6009 * to reset the main buffer to the original size, we
6010 * failed there too. This is very unlikely to
6011 * happen, but if it does, warn and kill all
6012 * tracing.
6013 */
6014 WARN_ON(1);
6015 tracing_disabled = 1;
6016 }
6017 goto out_start;
6018 }
6019
6020 update_buffer_entries(&tr->max_buffer, cpu);
6021
6022 out:
6023 #endif /* CONFIG_TRACER_MAX_TRACE */
6024
6025 update_buffer_entries(&tr->array_buffer, cpu);
6026 out_start:
6027 tracing_start_tr(tr);
6028 return ret;
6029 }
6030
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6031 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6032 unsigned long size, int cpu_id)
6033 {
6034 guard(mutex)(&trace_types_lock);
6035
6036 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6037 /* make sure, this cpu is enabled in the mask */
6038 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6039 return -EINVAL;
6040 }
6041
6042 return __tracing_resize_ring_buffer(tr, size, cpu_id);
6043 }
6044
6045 struct trace_mod_entry {
6046 unsigned long mod_addr;
6047 char mod_name[MODULE_NAME_LEN];
6048 };
6049
6050 struct trace_scratch {
6051 unsigned int clock_id;
6052 unsigned long text_addr;
6053 unsigned long nr_entries;
6054 struct trace_mod_entry entries[];
6055 };
6056
6057 static DEFINE_MUTEX(scratch_mutex);
6058
cmp_mod_entry(const void * key,const void * pivot)6059 static int cmp_mod_entry(const void *key, const void *pivot)
6060 {
6061 unsigned long addr = (unsigned long)key;
6062 const struct trace_mod_entry *ent = pivot;
6063
6064 if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6065 return 0;
6066 else
6067 return addr - ent->mod_addr;
6068 }
6069
6070 /**
6071 * trace_adjust_address() - Adjust prev boot address to current address.
6072 * @tr: Persistent ring buffer's trace_array.
6073 * @addr: Address in @tr which is adjusted.
6074 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6075 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6076 {
6077 struct trace_module_delta *module_delta;
6078 struct trace_scratch *tscratch;
6079 struct trace_mod_entry *entry;
6080 unsigned long raddr;
6081 int idx = 0, nr_entries;
6082
6083 /* If we don't have last boot delta, return the address */
6084 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6085 return addr;
6086
6087 /* tr->module_delta must be protected by rcu. */
6088 guard(rcu)();
6089 tscratch = tr->scratch;
6090 /* if there is no tscrach, module_delta must be NULL. */
6091 module_delta = READ_ONCE(tr->module_delta);
6092 if (!module_delta || !tscratch->nr_entries ||
6093 tscratch->entries[0].mod_addr > addr) {
6094 raddr = addr + tr->text_delta;
6095 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6096 is_kernel_rodata(raddr) ? raddr : addr;
6097 }
6098
6099 /* Note that entries must be sorted. */
6100 nr_entries = tscratch->nr_entries;
6101 if (nr_entries == 1 ||
6102 tscratch->entries[nr_entries - 1].mod_addr < addr)
6103 idx = nr_entries - 1;
6104 else {
6105 entry = __inline_bsearch((void *)addr,
6106 tscratch->entries,
6107 nr_entries - 1,
6108 sizeof(tscratch->entries[0]),
6109 cmp_mod_entry);
6110 if (entry)
6111 idx = entry - tscratch->entries;
6112 }
6113
6114 return addr + module_delta->delta[idx];
6115 }
6116
6117 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6118 static int save_mod(struct module *mod, void *data)
6119 {
6120 struct trace_array *tr = data;
6121 struct trace_scratch *tscratch;
6122 struct trace_mod_entry *entry;
6123 unsigned int size;
6124
6125 tscratch = tr->scratch;
6126 if (!tscratch)
6127 return -1;
6128 size = tr->scratch_size;
6129
6130 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6131 return -1;
6132
6133 entry = &tscratch->entries[tscratch->nr_entries];
6134
6135 tscratch->nr_entries++;
6136
6137 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6138 strscpy(entry->mod_name, mod->name);
6139
6140 return 0;
6141 }
6142 #else
save_mod(struct module * mod,void * data)6143 static int save_mod(struct module *mod, void *data)
6144 {
6145 return 0;
6146 }
6147 #endif
6148
update_last_data(struct trace_array * tr)6149 static void update_last_data(struct trace_array *tr)
6150 {
6151 struct trace_module_delta *module_delta;
6152 struct trace_scratch *tscratch;
6153
6154 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6155 return;
6156
6157 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6158 return;
6159
6160 /* Only if the buffer has previous boot data clear and update it. */
6161 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6162
6163 /* Reset the module list and reload them */
6164 if (tr->scratch) {
6165 struct trace_scratch *tscratch = tr->scratch;
6166
6167 tscratch->clock_id = tr->clock_id;
6168 memset(tscratch->entries, 0,
6169 flex_array_size(tscratch, entries, tscratch->nr_entries));
6170 tscratch->nr_entries = 0;
6171
6172 guard(mutex)(&scratch_mutex);
6173 module_for_each_mod(save_mod, tr);
6174 }
6175
6176 /*
6177 * Need to clear all CPU buffers as there cannot be events
6178 * from the previous boot mixed with events with this boot
6179 * as that will cause a confusing trace. Need to clear all
6180 * CPU buffers, even for those that may currently be offline.
6181 */
6182 tracing_reset_all_cpus(&tr->array_buffer);
6183
6184 /* Using current data now */
6185 tr->text_delta = 0;
6186
6187 if (!tr->scratch)
6188 return;
6189
6190 tscratch = tr->scratch;
6191 module_delta = READ_ONCE(tr->module_delta);
6192 WRITE_ONCE(tr->module_delta, NULL);
6193 kfree_rcu(module_delta, rcu);
6194
6195 /* Set the persistent ring buffer meta data to this address */
6196 tscratch->text_addr = (unsigned long)_text;
6197 }
6198
6199 /**
6200 * tracing_update_buffers - used by tracing facility to expand ring buffers
6201 * @tr: The tracing instance
6202 *
6203 * To save on memory when the tracing is never used on a system with it
6204 * configured in. The ring buffers are set to a minimum size. But once
6205 * a user starts to use the tracing facility, then they need to grow
6206 * to their default size.
6207 *
6208 * This function is to be called when a tracer is about to be used.
6209 */
tracing_update_buffers(struct trace_array * tr)6210 int tracing_update_buffers(struct trace_array *tr)
6211 {
6212 int ret = 0;
6213
6214 guard(mutex)(&trace_types_lock);
6215
6216 update_last_data(tr);
6217
6218 if (!tr->ring_buffer_expanded)
6219 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6220 RING_BUFFER_ALL_CPUS);
6221 return ret;
6222 }
6223
6224 struct trace_option_dentry;
6225
6226 static void
6227 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6228
6229 /*
6230 * Used to clear out the tracer before deletion of an instance.
6231 * Must have trace_types_lock held.
6232 */
tracing_set_nop(struct trace_array * tr)6233 static void tracing_set_nop(struct trace_array *tr)
6234 {
6235 if (tr->current_trace == &nop_trace)
6236 return;
6237
6238 tr->current_trace->enabled--;
6239
6240 if (tr->current_trace->reset)
6241 tr->current_trace->reset(tr);
6242
6243 tr->current_trace = &nop_trace;
6244 }
6245
6246 static bool tracer_options_updated;
6247
add_tracer_options(struct trace_array * tr,struct tracer * t)6248 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6249 {
6250 /* Only enable if the directory has been created already. */
6251 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6252 return;
6253
6254 /* Only create trace option files after update_tracer_options finish */
6255 if (!tracer_options_updated)
6256 return;
6257
6258 create_trace_option_files(tr, t);
6259 }
6260
tracing_set_tracer(struct trace_array * tr,const char * buf)6261 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6262 {
6263 struct tracer *t;
6264 #ifdef CONFIG_TRACER_MAX_TRACE
6265 bool had_max_tr;
6266 #endif
6267 int ret;
6268
6269 guard(mutex)(&trace_types_lock);
6270
6271 update_last_data(tr);
6272
6273 if (!tr->ring_buffer_expanded) {
6274 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6275 RING_BUFFER_ALL_CPUS);
6276 if (ret < 0)
6277 return ret;
6278 ret = 0;
6279 }
6280
6281 for (t = trace_types; t; t = t->next) {
6282 if (strcmp(t->name, buf) == 0)
6283 break;
6284 }
6285 if (!t)
6286 return -EINVAL;
6287
6288 if (t == tr->current_trace)
6289 return 0;
6290
6291 #ifdef CONFIG_TRACER_SNAPSHOT
6292 if (t->use_max_tr) {
6293 local_irq_disable();
6294 arch_spin_lock(&tr->max_lock);
6295 ret = tr->cond_snapshot ? -EBUSY : 0;
6296 arch_spin_unlock(&tr->max_lock);
6297 local_irq_enable();
6298 if (ret)
6299 return ret;
6300 }
6301 #endif
6302 /* Some tracers won't work on kernel command line */
6303 if (system_state < SYSTEM_RUNNING && t->noboot) {
6304 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6305 t->name);
6306 return -EINVAL;
6307 }
6308
6309 /* Some tracers are only allowed for the top level buffer */
6310 if (!trace_ok_for_array(t, tr))
6311 return -EINVAL;
6312
6313 /* If trace pipe files are being read, we can't change the tracer */
6314 if (tr->trace_ref)
6315 return -EBUSY;
6316
6317 trace_branch_disable();
6318
6319 tr->current_trace->enabled--;
6320
6321 if (tr->current_trace->reset)
6322 tr->current_trace->reset(tr);
6323
6324 #ifdef CONFIG_TRACER_MAX_TRACE
6325 had_max_tr = tr->current_trace->use_max_tr;
6326
6327 /* Current trace needs to be nop_trace before synchronize_rcu */
6328 tr->current_trace = &nop_trace;
6329
6330 if (had_max_tr && !t->use_max_tr) {
6331 /*
6332 * We need to make sure that the update_max_tr sees that
6333 * current_trace changed to nop_trace to keep it from
6334 * swapping the buffers after we resize it.
6335 * The update_max_tr is called from interrupts disabled
6336 * so a synchronized_sched() is sufficient.
6337 */
6338 synchronize_rcu();
6339 free_snapshot(tr);
6340 tracing_disarm_snapshot(tr);
6341 }
6342
6343 if (!had_max_tr && t->use_max_tr) {
6344 ret = tracing_arm_snapshot_locked(tr);
6345 if (ret)
6346 return ret;
6347 }
6348 #else
6349 tr->current_trace = &nop_trace;
6350 #endif
6351
6352 if (t->init) {
6353 ret = tracer_init(t, tr);
6354 if (ret) {
6355 #ifdef CONFIG_TRACER_MAX_TRACE
6356 if (t->use_max_tr)
6357 tracing_disarm_snapshot(tr);
6358 #endif
6359 return ret;
6360 }
6361 }
6362
6363 tr->current_trace = t;
6364 tr->current_trace->enabled++;
6365 trace_branch_enable(tr);
6366
6367 return 0;
6368 }
6369
6370 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6371 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6372 size_t cnt, loff_t *ppos)
6373 {
6374 struct trace_array *tr = filp->private_data;
6375 char buf[MAX_TRACER_SIZE+1];
6376 char *name;
6377 size_t ret;
6378 int err;
6379
6380 ret = cnt;
6381
6382 if (cnt > MAX_TRACER_SIZE)
6383 cnt = MAX_TRACER_SIZE;
6384
6385 if (copy_from_user(buf, ubuf, cnt))
6386 return -EFAULT;
6387
6388 buf[cnt] = 0;
6389
6390 name = strim(buf);
6391
6392 err = tracing_set_tracer(tr, name);
6393 if (err)
6394 return err;
6395
6396 *ppos += ret;
6397
6398 return ret;
6399 }
6400
6401 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6402 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6403 size_t cnt, loff_t *ppos)
6404 {
6405 char buf[64];
6406 int r;
6407
6408 r = snprintf(buf, sizeof(buf), "%ld\n",
6409 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6410 if (r > sizeof(buf))
6411 r = sizeof(buf);
6412 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6413 }
6414
6415 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6416 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6417 size_t cnt, loff_t *ppos)
6418 {
6419 unsigned long val;
6420 int ret;
6421
6422 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6423 if (ret)
6424 return ret;
6425
6426 *ptr = val * 1000;
6427
6428 return cnt;
6429 }
6430
6431 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6432 tracing_thresh_read(struct file *filp, char __user *ubuf,
6433 size_t cnt, loff_t *ppos)
6434 {
6435 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6436 }
6437
6438 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6439 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6440 size_t cnt, loff_t *ppos)
6441 {
6442 struct trace_array *tr = filp->private_data;
6443 int ret;
6444
6445 guard(mutex)(&trace_types_lock);
6446 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6447 if (ret < 0)
6448 return ret;
6449
6450 if (tr->current_trace->update_thresh) {
6451 ret = tr->current_trace->update_thresh(tr);
6452 if (ret < 0)
6453 return ret;
6454 }
6455
6456 return cnt;
6457 }
6458
6459 #ifdef CONFIG_TRACER_MAX_TRACE
6460
6461 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6462 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6463 size_t cnt, loff_t *ppos)
6464 {
6465 struct trace_array *tr = filp->private_data;
6466
6467 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6468 }
6469
6470 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6471 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6472 size_t cnt, loff_t *ppos)
6473 {
6474 struct trace_array *tr = filp->private_data;
6475
6476 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6477 }
6478
6479 #endif
6480
open_pipe_on_cpu(struct trace_array * tr,int cpu)6481 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6482 {
6483 if (cpu == RING_BUFFER_ALL_CPUS) {
6484 if (cpumask_empty(tr->pipe_cpumask)) {
6485 cpumask_setall(tr->pipe_cpumask);
6486 return 0;
6487 }
6488 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6489 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6490 return 0;
6491 }
6492 return -EBUSY;
6493 }
6494
close_pipe_on_cpu(struct trace_array * tr,int cpu)6495 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6496 {
6497 if (cpu == RING_BUFFER_ALL_CPUS) {
6498 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6499 cpumask_clear(tr->pipe_cpumask);
6500 } else {
6501 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6502 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6503 }
6504 }
6505
tracing_open_pipe(struct inode * inode,struct file * filp)6506 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6507 {
6508 struct trace_array *tr = inode->i_private;
6509 struct trace_iterator *iter;
6510 int cpu;
6511 int ret;
6512
6513 ret = tracing_check_open_get_tr(tr);
6514 if (ret)
6515 return ret;
6516
6517 guard(mutex)(&trace_types_lock);
6518 cpu = tracing_get_cpu(inode);
6519 ret = open_pipe_on_cpu(tr, cpu);
6520 if (ret)
6521 goto fail_pipe_on_cpu;
6522
6523 /* create a buffer to store the information to pass to userspace */
6524 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6525 if (!iter) {
6526 ret = -ENOMEM;
6527 goto fail_alloc_iter;
6528 }
6529
6530 trace_seq_init(&iter->seq);
6531 iter->trace = tr->current_trace;
6532
6533 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6534 ret = -ENOMEM;
6535 goto fail;
6536 }
6537
6538 /* trace pipe does not show start of buffer */
6539 cpumask_setall(iter->started);
6540
6541 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6542 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6543
6544 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6545 if (trace_clocks[tr->clock_id].in_ns)
6546 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6547
6548 iter->tr = tr;
6549 iter->array_buffer = &tr->array_buffer;
6550 iter->cpu_file = cpu;
6551 mutex_init(&iter->mutex);
6552 filp->private_data = iter;
6553
6554 if (iter->trace->pipe_open)
6555 iter->trace->pipe_open(iter);
6556
6557 nonseekable_open(inode, filp);
6558
6559 tr->trace_ref++;
6560
6561 return ret;
6562
6563 fail:
6564 kfree(iter);
6565 fail_alloc_iter:
6566 close_pipe_on_cpu(tr, cpu);
6567 fail_pipe_on_cpu:
6568 __trace_array_put(tr);
6569 return ret;
6570 }
6571
tracing_release_pipe(struct inode * inode,struct file * file)6572 static int tracing_release_pipe(struct inode *inode, struct file *file)
6573 {
6574 struct trace_iterator *iter = file->private_data;
6575 struct trace_array *tr = inode->i_private;
6576
6577 scoped_guard(mutex, &trace_types_lock) {
6578 tr->trace_ref--;
6579
6580 if (iter->trace->pipe_close)
6581 iter->trace->pipe_close(iter);
6582 close_pipe_on_cpu(tr, iter->cpu_file);
6583 }
6584
6585 free_trace_iter_content(iter);
6586 kfree(iter);
6587
6588 trace_array_put(tr);
6589
6590 return 0;
6591 }
6592
6593 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6594 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6595 {
6596 struct trace_array *tr = iter->tr;
6597
6598 /* Iterators are static, they should be filled or empty */
6599 if (trace_buffer_iter(iter, iter->cpu_file))
6600 return EPOLLIN | EPOLLRDNORM;
6601
6602 if (tr->trace_flags & TRACE_ITER_BLOCK)
6603 /*
6604 * Always select as readable when in blocking mode
6605 */
6606 return EPOLLIN | EPOLLRDNORM;
6607 else
6608 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6609 filp, poll_table, iter->tr->buffer_percent);
6610 }
6611
6612 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6613 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6614 {
6615 struct trace_iterator *iter = filp->private_data;
6616
6617 return trace_poll(iter, filp, poll_table);
6618 }
6619
6620 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6621 static int tracing_wait_pipe(struct file *filp)
6622 {
6623 struct trace_iterator *iter = filp->private_data;
6624 int ret;
6625
6626 while (trace_empty(iter)) {
6627
6628 if ((filp->f_flags & O_NONBLOCK)) {
6629 return -EAGAIN;
6630 }
6631
6632 /*
6633 * We block until we read something and tracing is disabled.
6634 * We still block if tracing is disabled, but we have never
6635 * read anything. This allows a user to cat this file, and
6636 * then enable tracing. But after we have read something,
6637 * we give an EOF when tracing is again disabled.
6638 *
6639 * iter->pos will be 0 if we haven't read anything.
6640 */
6641 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6642 break;
6643
6644 mutex_unlock(&iter->mutex);
6645
6646 ret = wait_on_pipe(iter, 0);
6647
6648 mutex_lock(&iter->mutex);
6649
6650 if (ret)
6651 return ret;
6652 }
6653
6654 return 1;
6655 }
6656
update_last_data_if_empty(struct trace_array * tr)6657 static bool update_last_data_if_empty(struct trace_array *tr)
6658 {
6659 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6660 return false;
6661
6662 if (!ring_buffer_empty(tr->array_buffer.buffer))
6663 return false;
6664
6665 /*
6666 * If the buffer contains the last boot data and all per-cpu
6667 * buffers are empty, reset it from the kernel side.
6668 */
6669 update_last_data(tr);
6670 return true;
6671 }
6672
6673 /*
6674 * Consumer reader.
6675 */
6676 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6677 tracing_read_pipe(struct file *filp, char __user *ubuf,
6678 size_t cnt, loff_t *ppos)
6679 {
6680 struct trace_iterator *iter = filp->private_data;
6681 ssize_t sret;
6682
6683 /*
6684 * Avoid more than one consumer on a single file descriptor
6685 * This is just a matter of traces coherency, the ring buffer itself
6686 * is protected.
6687 */
6688 guard(mutex)(&iter->mutex);
6689
6690 /* return any leftover data */
6691 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6692 if (sret != -EBUSY)
6693 return sret;
6694
6695 trace_seq_init(&iter->seq);
6696
6697 if (iter->trace->read) {
6698 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6699 if (sret)
6700 return sret;
6701 }
6702
6703 waitagain:
6704 if (update_last_data_if_empty(iter->tr))
6705 return 0;
6706
6707 sret = tracing_wait_pipe(filp);
6708 if (sret <= 0)
6709 return sret;
6710
6711 /* stop when tracing is finished */
6712 if (trace_empty(iter))
6713 return 0;
6714
6715 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6716 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6717
6718 /* reset all but tr, trace, and overruns */
6719 trace_iterator_reset(iter);
6720 cpumask_clear(iter->started);
6721 trace_seq_init(&iter->seq);
6722
6723 trace_event_read_lock();
6724 trace_access_lock(iter->cpu_file);
6725 while (trace_find_next_entry_inc(iter) != NULL) {
6726 enum print_line_t ret;
6727 int save_len = iter->seq.seq.len;
6728
6729 ret = print_trace_line(iter);
6730 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6731 /*
6732 * If one print_trace_line() fills entire trace_seq in one shot,
6733 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6734 * In this case, we need to consume it, otherwise, loop will peek
6735 * this event next time, resulting in an infinite loop.
6736 */
6737 if (save_len == 0) {
6738 iter->seq.full = 0;
6739 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6740 trace_consume(iter);
6741 break;
6742 }
6743
6744 /* In other cases, don't print partial lines */
6745 iter->seq.seq.len = save_len;
6746 break;
6747 }
6748 if (ret != TRACE_TYPE_NO_CONSUME)
6749 trace_consume(iter);
6750
6751 if (trace_seq_used(&iter->seq) >= cnt)
6752 break;
6753
6754 /*
6755 * Setting the full flag means we reached the trace_seq buffer
6756 * size and we should leave by partial output condition above.
6757 * One of the trace_seq_* functions is not used properly.
6758 */
6759 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6760 iter->ent->type);
6761 }
6762 trace_access_unlock(iter->cpu_file);
6763 trace_event_read_unlock();
6764
6765 /* Now copy what we have to the user */
6766 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6767 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6768 trace_seq_init(&iter->seq);
6769
6770 /*
6771 * If there was nothing to send to user, in spite of consuming trace
6772 * entries, go back to wait for more entries.
6773 */
6774 if (sret == -EBUSY)
6775 goto waitagain;
6776
6777 return sret;
6778 }
6779
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6780 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6781 unsigned int idx)
6782 {
6783 __free_page(spd->pages[idx]);
6784 }
6785
6786 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6787 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6788 {
6789 size_t count;
6790 int save_len;
6791 int ret;
6792
6793 /* Seq buffer is page-sized, exactly what we need. */
6794 for (;;) {
6795 save_len = iter->seq.seq.len;
6796 ret = print_trace_line(iter);
6797
6798 if (trace_seq_has_overflowed(&iter->seq)) {
6799 iter->seq.seq.len = save_len;
6800 break;
6801 }
6802
6803 /*
6804 * This should not be hit, because it should only
6805 * be set if the iter->seq overflowed. But check it
6806 * anyway to be safe.
6807 */
6808 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6809 iter->seq.seq.len = save_len;
6810 break;
6811 }
6812
6813 count = trace_seq_used(&iter->seq) - save_len;
6814 if (rem < count) {
6815 rem = 0;
6816 iter->seq.seq.len = save_len;
6817 break;
6818 }
6819
6820 if (ret != TRACE_TYPE_NO_CONSUME)
6821 trace_consume(iter);
6822 rem -= count;
6823 if (!trace_find_next_entry_inc(iter)) {
6824 rem = 0;
6825 iter->ent = NULL;
6826 break;
6827 }
6828 }
6829
6830 return rem;
6831 }
6832
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6833 static ssize_t tracing_splice_read_pipe(struct file *filp,
6834 loff_t *ppos,
6835 struct pipe_inode_info *pipe,
6836 size_t len,
6837 unsigned int flags)
6838 {
6839 struct page *pages_def[PIPE_DEF_BUFFERS];
6840 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6841 struct trace_iterator *iter = filp->private_data;
6842 struct splice_pipe_desc spd = {
6843 .pages = pages_def,
6844 .partial = partial_def,
6845 .nr_pages = 0, /* This gets updated below. */
6846 .nr_pages_max = PIPE_DEF_BUFFERS,
6847 .ops = &default_pipe_buf_ops,
6848 .spd_release = tracing_spd_release_pipe,
6849 };
6850 ssize_t ret;
6851 size_t rem;
6852 unsigned int i;
6853
6854 if (splice_grow_spd(pipe, &spd))
6855 return -ENOMEM;
6856
6857 mutex_lock(&iter->mutex);
6858
6859 if (iter->trace->splice_read) {
6860 ret = iter->trace->splice_read(iter, filp,
6861 ppos, pipe, len, flags);
6862 if (ret)
6863 goto out_err;
6864 }
6865
6866 ret = tracing_wait_pipe(filp);
6867 if (ret <= 0)
6868 goto out_err;
6869
6870 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6871 ret = -EFAULT;
6872 goto out_err;
6873 }
6874
6875 trace_event_read_lock();
6876 trace_access_lock(iter->cpu_file);
6877
6878 /* Fill as many pages as possible. */
6879 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6880 spd.pages[i] = alloc_page(GFP_KERNEL);
6881 if (!spd.pages[i])
6882 break;
6883
6884 rem = tracing_fill_pipe_page(rem, iter);
6885
6886 /* Copy the data into the page, so we can start over. */
6887 ret = trace_seq_to_buffer(&iter->seq,
6888 page_address(spd.pages[i]),
6889 min((size_t)trace_seq_used(&iter->seq),
6890 (size_t)PAGE_SIZE));
6891 if (ret < 0) {
6892 __free_page(spd.pages[i]);
6893 break;
6894 }
6895 spd.partial[i].offset = 0;
6896 spd.partial[i].len = ret;
6897
6898 trace_seq_init(&iter->seq);
6899 }
6900
6901 trace_access_unlock(iter->cpu_file);
6902 trace_event_read_unlock();
6903 mutex_unlock(&iter->mutex);
6904
6905 spd.nr_pages = i;
6906
6907 if (i)
6908 ret = splice_to_pipe(pipe, &spd);
6909 else
6910 ret = 0;
6911 out:
6912 splice_shrink_spd(&spd);
6913 return ret;
6914
6915 out_err:
6916 mutex_unlock(&iter->mutex);
6917 goto out;
6918 }
6919
6920 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6921 tracing_entries_read(struct file *filp, char __user *ubuf,
6922 size_t cnt, loff_t *ppos)
6923 {
6924 struct inode *inode = file_inode(filp);
6925 struct trace_array *tr = inode->i_private;
6926 int cpu = tracing_get_cpu(inode);
6927 char buf[64];
6928 int r = 0;
6929 ssize_t ret;
6930
6931 mutex_lock(&trace_types_lock);
6932
6933 if (cpu == RING_BUFFER_ALL_CPUS) {
6934 int cpu, buf_size_same;
6935 unsigned long size;
6936
6937 size = 0;
6938 buf_size_same = 1;
6939 /* check if all cpu sizes are same */
6940 for_each_tracing_cpu(cpu) {
6941 /* fill in the size from first enabled cpu */
6942 if (size == 0)
6943 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6944 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6945 buf_size_same = 0;
6946 break;
6947 }
6948 }
6949
6950 if (buf_size_same) {
6951 if (!tr->ring_buffer_expanded)
6952 r = sprintf(buf, "%lu (expanded: %lu)\n",
6953 size >> 10,
6954 trace_buf_size >> 10);
6955 else
6956 r = sprintf(buf, "%lu\n", size >> 10);
6957 } else
6958 r = sprintf(buf, "X\n");
6959 } else
6960 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6961
6962 mutex_unlock(&trace_types_lock);
6963
6964 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6965 return ret;
6966 }
6967
6968 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6969 tracing_entries_write(struct file *filp, const char __user *ubuf,
6970 size_t cnt, loff_t *ppos)
6971 {
6972 struct inode *inode = file_inode(filp);
6973 struct trace_array *tr = inode->i_private;
6974 unsigned long val;
6975 int ret;
6976
6977 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6978 if (ret)
6979 return ret;
6980
6981 /* must have at least 1 entry */
6982 if (!val)
6983 return -EINVAL;
6984
6985 /* value is in KB */
6986 val <<= 10;
6987 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6988 if (ret < 0)
6989 return ret;
6990
6991 *ppos += cnt;
6992
6993 return cnt;
6994 }
6995
6996 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6997 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6998 size_t cnt, loff_t *ppos)
6999 {
7000 struct trace_array *tr = filp->private_data;
7001 char buf[64];
7002 int r, cpu;
7003 unsigned long size = 0, expanded_size = 0;
7004
7005 mutex_lock(&trace_types_lock);
7006 for_each_tracing_cpu(cpu) {
7007 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7008 if (!tr->ring_buffer_expanded)
7009 expanded_size += trace_buf_size >> 10;
7010 }
7011 if (tr->ring_buffer_expanded)
7012 r = sprintf(buf, "%lu\n", size);
7013 else
7014 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7015 mutex_unlock(&trace_types_lock);
7016
7017 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7018 }
7019
7020 #define LAST_BOOT_HEADER ((void *)1)
7021
l_next(struct seq_file * m,void * v,loff_t * pos)7022 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7023 {
7024 struct trace_array *tr = m->private;
7025 struct trace_scratch *tscratch = tr->scratch;
7026 unsigned int index = *pos;
7027
7028 (*pos)++;
7029
7030 if (*pos == 1)
7031 return LAST_BOOT_HEADER;
7032
7033 /* Only show offsets of the last boot data */
7034 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7035 return NULL;
7036
7037 /* *pos 0 is for the header, 1 is for the first module */
7038 index--;
7039
7040 if (index >= tscratch->nr_entries)
7041 return NULL;
7042
7043 return &tscratch->entries[index];
7044 }
7045
l_start(struct seq_file * m,loff_t * pos)7046 static void *l_start(struct seq_file *m, loff_t *pos)
7047 {
7048 mutex_lock(&scratch_mutex);
7049
7050 return l_next(m, NULL, pos);
7051 }
7052
l_stop(struct seq_file * m,void * p)7053 static void l_stop(struct seq_file *m, void *p)
7054 {
7055 mutex_unlock(&scratch_mutex);
7056 }
7057
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7058 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7059 {
7060 struct trace_scratch *tscratch = tr->scratch;
7061
7062 /*
7063 * Do not leak KASLR address. This only shows the KASLR address of
7064 * the last boot. When the ring buffer is started, the LAST_BOOT
7065 * flag gets cleared, and this should only report "current".
7066 * Otherwise it shows the KASLR address from the previous boot which
7067 * should not be the same as the current boot.
7068 */
7069 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7070 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7071 else
7072 seq_puts(m, "# Current\n");
7073 }
7074
l_show(struct seq_file * m,void * v)7075 static int l_show(struct seq_file *m, void *v)
7076 {
7077 struct trace_array *tr = m->private;
7078 struct trace_mod_entry *entry = v;
7079
7080 if (v == LAST_BOOT_HEADER) {
7081 show_last_boot_header(m, tr);
7082 return 0;
7083 }
7084
7085 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7086 return 0;
7087 }
7088
7089 static const struct seq_operations last_boot_seq_ops = {
7090 .start = l_start,
7091 .next = l_next,
7092 .stop = l_stop,
7093 .show = l_show,
7094 };
7095
tracing_last_boot_open(struct inode * inode,struct file * file)7096 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7097 {
7098 struct trace_array *tr = inode->i_private;
7099 struct seq_file *m;
7100 int ret;
7101
7102 ret = tracing_check_open_get_tr(tr);
7103 if (ret)
7104 return ret;
7105
7106 ret = seq_open(file, &last_boot_seq_ops);
7107 if (ret) {
7108 trace_array_put(tr);
7109 return ret;
7110 }
7111
7112 m = file->private_data;
7113 m->private = tr;
7114
7115 return 0;
7116 }
7117
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7118 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7119 {
7120 struct trace_array *tr = inode->i_private;
7121 int cpu = tracing_get_cpu(inode);
7122 int ret;
7123
7124 ret = tracing_check_open_get_tr(tr);
7125 if (ret)
7126 return ret;
7127
7128 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7129 if (ret < 0)
7130 __trace_array_put(tr);
7131 return ret;
7132 }
7133
7134 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7135 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7136 size_t cnt, loff_t *ppos)
7137 {
7138 /*
7139 * There is no need to read what the user has written, this function
7140 * is just to make sure that there is no error when "echo" is used
7141 */
7142
7143 *ppos += cnt;
7144
7145 return cnt;
7146 }
7147
7148 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7149 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7150 {
7151 struct trace_array *tr = inode->i_private;
7152
7153 /* disable tracing ? */
7154 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7155 tracer_tracing_off(tr);
7156 /* resize the ring buffer to 0 */
7157 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7158
7159 trace_array_put(tr);
7160
7161 return 0;
7162 }
7163
7164 #define TRACE_MARKER_MAX_SIZE 4096
7165
write_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt,unsigned long ip)7166 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7167 size_t cnt, unsigned long ip)
7168 {
7169 struct ring_buffer_event *event;
7170 enum event_trigger_type tt = ETT_NONE;
7171 struct trace_buffer *buffer;
7172 struct print_entry *entry;
7173 int meta_size;
7174 ssize_t written;
7175 size_t size;
7176 int len;
7177
7178 /* Used in tracing_mark_raw_write() as well */
7179 #define FAULTED_STR "<faulted>"
7180 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7181
7182 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
7183 again:
7184 size = cnt + meta_size;
7185
7186 /* If less than "<faulted>", then make sure we can still add that */
7187 if (cnt < FAULTED_SIZE)
7188 size += FAULTED_SIZE - cnt;
7189
7190 buffer = tr->array_buffer.buffer;
7191 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7192 tracing_gen_ctx());
7193 if (unlikely(!event)) {
7194 /*
7195 * If the size was greater than what was allowed, then
7196 * make it smaller and try again.
7197 */
7198 if (size > ring_buffer_max_event_size(buffer)) {
7199 /* cnt < FAULTED size should never be bigger than max */
7200 if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7201 return -EBADF;
7202 cnt = ring_buffer_max_event_size(buffer) - meta_size;
7203 /* The above should only happen once */
7204 if (WARN_ON_ONCE(cnt + meta_size == size))
7205 return -EBADF;
7206 goto again;
7207 }
7208
7209 /* Ring buffer disabled, return as if not open for write */
7210 return -EBADF;
7211 }
7212
7213 entry = ring_buffer_event_data(event);
7214 entry->ip = ip;
7215
7216 len = copy_from_user_nofault(&entry->buf, ubuf, cnt);
7217 if (len) {
7218 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7219 cnt = FAULTED_SIZE;
7220 written = -EFAULT;
7221 } else
7222 written = cnt;
7223
7224 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7225 /* do not add \n before testing triggers, but add \0 */
7226 entry->buf[cnt] = '\0';
7227 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7228 }
7229
7230 if (entry->buf[cnt - 1] != '\n') {
7231 entry->buf[cnt] = '\n';
7232 entry->buf[cnt + 1] = '\0';
7233 } else
7234 entry->buf[cnt] = '\0';
7235
7236 if (static_branch_unlikely(&trace_marker_exports_enabled))
7237 ftrace_exports(event, TRACE_EXPORT_MARKER);
7238 __buffer_unlock_commit(buffer, event);
7239
7240 if (tt)
7241 event_triggers_post_call(tr->trace_marker_file, tt);
7242
7243 return written;
7244 }
7245
7246 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7247 tracing_mark_write(struct file *filp, const char __user *ubuf,
7248 size_t cnt, loff_t *fpos)
7249 {
7250 struct trace_array *tr = filp->private_data;
7251 ssize_t written = -ENODEV;
7252 unsigned long ip;
7253
7254 if (tracing_disabled)
7255 return -EINVAL;
7256
7257 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7258 return -EINVAL;
7259
7260 if ((ssize_t)cnt < 0)
7261 return -EINVAL;
7262
7263 if (cnt > TRACE_MARKER_MAX_SIZE)
7264 cnt = TRACE_MARKER_MAX_SIZE;
7265
7266 /* The selftests expect this function to be the IP address */
7267 ip = _THIS_IP_;
7268
7269 /* The global trace_marker can go to multiple instances */
7270 if (tr == &global_trace) {
7271 guard(rcu)();
7272 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7273 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7274 if (written < 0)
7275 break;
7276 }
7277 } else {
7278 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7279 }
7280
7281 return written;
7282 }
7283
write_raw_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt)7284 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7285 const char __user *ubuf, size_t cnt)
7286 {
7287 struct ring_buffer_event *event;
7288 struct trace_buffer *buffer;
7289 struct raw_data_entry *entry;
7290 ssize_t written;
7291 int size;
7292 int len;
7293
7294 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7295
7296 size = sizeof(*entry) + cnt;
7297 if (cnt < FAULT_SIZE_ID)
7298 size += FAULT_SIZE_ID - cnt;
7299
7300 buffer = tr->array_buffer.buffer;
7301
7302 if (size > ring_buffer_max_event_size(buffer))
7303 return -EINVAL;
7304
7305 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7306 tracing_gen_ctx());
7307 if (!event)
7308 /* Ring buffer disabled, return as if not open for write */
7309 return -EBADF;
7310
7311 entry = ring_buffer_event_data(event);
7312
7313 len = copy_from_user_nofault(&entry->id, ubuf, cnt);
7314 if (len) {
7315 entry->id = -1;
7316 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7317 written = -EFAULT;
7318 } else
7319 written = cnt;
7320
7321 __buffer_unlock_commit(buffer, event);
7322
7323 return written;
7324 }
7325
7326 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7327 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7328 size_t cnt, loff_t *fpos)
7329 {
7330 struct trace_array *tr = filp->private_data;
7331 ssize_t written = -ENODEV;
7332
7333 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7334
7335 if (tracing_disabled)
7336 return -EINVAL;
7337
7338 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7339 return -EINVAL;
7340
7341 /* The marker must at least have a tag id */
7342 if (cnt < sizeof(unsigned int))
7343 return -EINVAL;
7344
7345 /* The global trace_marker_raw can go to multiple instances */
7346 if (tr == &global_trace) {
7347 guard(rcu)();
7348 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7349 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7350 if (written < 0)
7351 break;
7352 }
7353 } else {
7354 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7355 }
7356
7357 return written;
7358 }
7359
tracing_clock_show(struct seq_file * m,void * v)7360 static int tracing_clock_show(struct seq_file *m, void *v)
7361 {
7362 struct trace_array *tr = m->private;
7363 int i;
7364
7365 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7366 seq_printf(m,
7367 "%s%s%s%s", i ? " " : "",
7368 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7369 i == tr->clock_id ? "]" : "");
7370 seq_putc(m, '\n');
7371
7372 return 0;
7373 }
7374
tracing_set_clock(struct trace_array * tr,const char * clockstr)7375 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7376 {
7377 int i;
7378
7379 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7380 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7381 break;
7382 }
7383 if (i == ARRAY_SIZE(trace_clocks))
7384 return -EINVAL;
7385
7386 guard(mutex)(&trace_types_lock);
7387
7388 tr->clock_id = i;
7389
7390 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7391
7392 /*
7393 * New clock may not be consistent with the previous clock.
7394 * Reset the buffer so that it doesn't have incomparable timestamps.
7395 */
7396 tracing_reset_online_cpus(&tr->array_buffer);
7397
7398 #ifdef CONFIG_TRACER_MAX_TRACE
7399 if (tr->max_buffer.buffer)
7400 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7401 tracing_reset_online_cpus(&tr->max_buffer);
7402 #endif
7403
7404 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7405 struct trace_scratch *tscratch = tr->scratch;
7406
7407 tscratch->clock_id = i;
7408 }
7409
7410 return 0;
7411 }
7412
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7413 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7414 size_t cnt, loff_t *fpos)
7415 {
7416 struct seq_file *m = filp->private_data;
7417 struct trace_array *tr = m->private;
7418 char buf[64];
7419 const char *clockstr;
7420 int ret;
7421
7422 if (cnt >= sizeof(buf))
7423 return -EINVAL;
7424
7425 if (copy_from_user(buf, ubuf, cnt))
7426 return -EFAULT;
7427
7428 buf[cnt] = 0;
7429
7430 clockstr = strstrip(buf);
7431
7432 ret = tracing_set_clock(tr, clockstr);
7433 if (ret)
7434 return ret;
7435
7436 *fpos += cnt;
7437
7438 return cnt;
7439 }
7440
tracing_clock_open(struct inode * inode,struct file * file)7441 static int tracing_clock_open(struct inode *inode, struct file *file)
7442 {
7443 struct trace_array *tr = inode->i_private;
7444 int ret;
7445
7446 ret = tracing_check_open_get_tr(tr);
7447 if (ret)
7448 return ret;
7449
7450 ret = single_open(file, tracing_clock_show, inode->i_private);
7451 if (ret < 0)
7452 trace_array_put(tr);
7453
7454 return ret;
7455 }
7456
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7457 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7458 {
7459 struct trace_array *tr = m->private;
7460
7461 guard(mutex)(&trace_types_lock);
7462
7463 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7464 seq_puts(m, "delta [absolute]\n");
7465 else
7466 seq_puts(m, "[delta] absolute\n");
7467
7468 return 0;
7469 }
7470
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7471 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7472 {
7473 struct trace_array *tr = inode->i_private;
7474 int ret;
7475
7476 ret = tracing_check_open_get_tr(tr);
7477 if (ret)
7478 return ret;
7479
7480 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7481 if (ret < 0)
7482 trace_array_put(tr);
7483
7484 return ret;
7485 }
7486
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7487 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7488 {
7489 if (rbe == this_cpu_read(trace_buffered_event))
7490 return ring_buffer_time_stamp(buffer);
7491
7492 return ring_buffer_event_time_stamp(buffer, rbe);
7493 }
7494
7495 /*
7496 * Set or disable using the per CPU trace_buffer_event when possible.
7497 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7498 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7499 {
7500 guard(mutex)(&trace_types_lock);
7501
7502 if (set && tr->no_filter_buffering_ref++)
7503 return 0;
7504
7505 if (!set) {
7506 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7507 return -EINVAL;
7508
7509 --tr->no_filter_buffering_ref;
7510 }
7511
7512 return 0;
7513 }
7514
7515 struct ftrace_buffer_info {
7516 struct trace_iterator iter;
7517 void *spare;
7518 unsigned int spare_cpu;
7519 unsigned int spare_size;
7520 unsigned int read;
7521 };
7522
7523 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7524 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7525 {
7526 struct trace_array *tr = inode->i_private;
7527 struct trace_iterator *iter;
7528 struct seq_file *m;
7529 int ret;
7530
7531 ret = tracing_check_open_get_tr(tr);
7532 if (ret)
7533 return ret;
7534
7535 if (file->f_mode & FMODE_READ) {
7536 iter = __tracing_open(inode, file, true);
7537 if (IS_ERR(iter))
7538 ret = PTR_ERR(iter);
7539 } else {
7540 /* Writes still need the seq_file to hold the private data */
7541 ret = -ENOMEM;
7542 m = kzalloc(sizeof(*m), GFP_KERNEL);
7543 if (!m)
7544 goto out;
7545 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7546 if (!iter) {
7547 kfree(m);
7548 goto out;
7549 }
7550 ret = 0;
7551
7552 iter->tr = tr;
7553 iter->array_buffer = &tr->max_buffer;
7554 iter->cpu_file = tracing_get_cpu(inode);
7555 m->private = iter;
7556 file->private_data = m;
7557 }
7558 out:
7559 if (ret < 0)
7560 trace_array_put(tr);
7561
7562 return ret;
7563 }
7564
tracing_swap_cpu_buffer(void * tr)7565 static void tracing_swap_cpu_buffer(void *tr)
7566 {
7567 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7568 }
7569
7570 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7571 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7572 loff_t *ppos)
7573 {
7574 struct seq_file *m = filp->private_data;
7575 struct trace_iterator *iter = m->private;
7576 struct trace_array *tr = iter->tr;
7577 unsigned long val;
7578 int ret;
7579
7580 ret = tracing_update_buffers(tr);
7581 if (ret < 0)
7582 return ret;
7583
7584 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7585 if (ret)
7586 return ret;
7587
7588 guard(mutex)(&trace_types_lock);
7589
7590 if (tr->current_trace->use_max_tr)
7591 return -EBUSY;
7592
7593 local_irq_disable();
7594 arch_spin_lock(&tr->max_lock);
7595 if (tr->cond_snapshot)
7596 ret = -EBUSY;
7597 arch_spin_unlock(&tr->max_lock);
7598 local_irq_enable();
7599 if (ret)
7600 return ret;
7601
7602 switch (val) {
7603 case 0:
7604 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7605 return -EINVAL;
7606 if (tr->allocated_snapshot)
7607 free_snapshot(tr);
7608 break;
7609 case 1:
7610 /* Only allow per-cpu swap if the ring buffer supports it */
7611 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7612 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7613 return -EINVAL;
7614 #endif
7615 if (tr->allocated_snapshot)
7616 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7617 &tr->array_buffer, iter->cpu_file);
7618
7619 ret = tracing_arm_snapshot_locked(tr);
7620 if (ret)
7621 return ret;
7622
7623 /* Now, we're going to swap */
7624 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7625 local_irq_disable();
7626 update_max_tr(tr, current, smp_processor_id(), NULL);
7627 local_irq_enable();
7628 } else {
7629 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7630 (void *)tr, 1);
7631 }
7632 tracing_disarm_snapshot(tr);
7633 break;
7634 default:
7635 if (tr->allocated_snapshot) {
7636 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7637 tracing_reset_online_cpus(&tr->max_buffer);
7638 else
7639 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7640 }
7641 break;
7642 }
7643
7644 if (ret >= 0) {
7645 *ppos += cnt;
7646 ret = cnt;
7647 }
7648
7649 return ret;
7650 }
7651
tracing_snapshot_release(struct inode * inode,struct file * file)7652 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7653 {
7654 struct seq_file *m = file->private_data;
7655 int ret;
7656
7657 ret = tracing_release(inode, file);
7658
7659 if (file->f_mode & FMODE_READ)
7660 return ret;
7661
7662 /* If write only, the seq_file is just a stub */
7663 if (m)
7664 kfree(m->private);
7665 kfree(m);
7666
7667 return 0;
7668 }
7669
7670 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7671 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7672 size_t count, loff_t *ppos);
7673 static int tracing_buffers_release(struct inode *inode, struct file *file);
7674 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7675 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7676
snapshot_raw_open(struct inode * inode,struct file * filp)7677 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7678 {
7679 struct ftrace_buffer_info *info;
7680 int ret;
7681
7682 /* The following checks for tracefs lockdown */
7683 ret = tracing_buffers_open(inode, filp);
7684 if (ret < 0)
7685 return ret;
7686
7687 info = filp->private_data;
7688
7689 if (info->iter.trace->use_max_tr) {
7690 tracing_buffers_release(inode, filp);
7691 return -EBUSY;
7692 }
7693
7694 info->iter.snapshot = true;
7695 info->iter.array_buffer = &info->iter.tr->max_buffer;
7696
7697 return ret;
7698 }
7699
7700 #endif /* CONFIG_TRACER_SNAPSHOT */
7701
7702
7703 static const struct file_operations tracing_thresh_fops = {
7704 .open = tracing_open_generic,
7705 .read = tracing_thresh_read,
7706 .write = tracing_thresh_write,
7707 .llseek = generic_file_llseek,
7708 };
7709
7710 #ifdef CONFIG_TRACER_MAX_TRACE
7711 static const struct file_operations tracing_max_lat_fops = {
7712 .open = tracing_open_generic_tr,
7713 .read = tracing_max_lat_read,
7714 .write = tracing_max_lat_write,
7715 .llseek = generic_file_llseek,
7716 .release = tracing_release_generic_tr,
7717 };
7718 #endif
7719
7720 static const struct file_operations set_tracer_fops = {
7721 .open = tracing_open_generic_tr,
7722 .read = tracing_set_trace_read,
7723 .write = tracing_set_trace_write,
7724 .llseek = generic_file_llseek,
7725 .release = tracing_release_generic_tr,
7726 };
7727
7728 static const struct file_operations tracing_pipe_fops = {
7729 .open = tracing_open_pipe,
7730 .poll = tracing_poll_pipe,
7731 .read = tracing_read_pipe,
7732 .splice_read = tracing_splice_read_pipe,
7733 .release = tracing_release_pipe,
7734 };
7735
7736 static const struct file_operations tracing_entries_fops = {
7737 .open = tracing_open_generic_tr,
7738 .read = tracing_entries_read,
7739 .write = tracing_entries_write,
7740 .llseek = generic_file_llseek,
7741 .release = tracing_release_generic_tr,
7742 };
7743
7744 static const struct file_operations tracing_buffer_meta_fops = {
7745 .open = tracing_buffer_meta_open,
7746 .read = seq_read,
7747 .llseek = seq_lseek,
7748 .release = tracing_seq_release,
7749 };
7750
7751 static const struct file_operations tracing_total_entries_fops = {
7752 .open = tracing_open_generic_tr,
7753 .read = tracing_total_entries_read,
7754 .llseek = generic_file_llseek,
7755 .release = tracing_release_generic_tr,
7756 };
7757
7758 static const struct file_operations tracing_free_buffer_fops = {
7759 .open = tracing_open_generic_tr,
7760 .write = tracing_free_buffer_write,
7761 .release = tracing_free_buffer_release,
7762 };
7763
7764 static const struct file_operations tracing_mark_fops = {
7765 .open = tracing_mark_open,
7766 .write = tracing_mark_write,
7767 .release = tracing_release_generic_tr,
7768 };
7769
7770 static const struct file_operations tracing_mark_raw_fops = {
7771 .open = tracing_mark_open,
7772 .write = tracing_mark_raw_write,
7773 .release = tracing_release_generic_tr,
7774 };
7775
7776 static const struct file_operations trace_clock_fops = {
7777 .open = tracing_clock_open,
7778 .read = seq_read,
7779 .llseek = seq_lseek,
7780 .release = tracing_single_release_tr,
7781 .write = tracing_clock_write,
7782 };
7783
7784 static const struct file_operations trace_time_stamp_mode_fops = {
7785 .open = tracing_time_stamp_mode_open,
7786 .read = seq_read,
7787 .llseek = seq_lseek,
7788 .release = tracing_single_release_tr,
7789 };
7790
7791 static const struct file_operations last_boot_fops = {
7792 .open = tracing_last_boot_open,
7793 .read = seq_read,
7794 .llseek = seq_lseek,
7795 .release = tracing_seq_release,
7796 };
7797
7798 #ifdef CONFIG_TRACER_SNAPSHOT
7799 static const struct file_operations snapshot_fops = {
7800 .open = tracing_snapshot_open,
7801 .read = seq_read,
7802 .write = tracing_snapshot_write,
7803 .llseek = tracing_lseek,
7804 .release = tracing_snapshot_release,
7805 };
7806
7807 static const struct file_operations snapshot_raw_fops = {
7808 .open = snapshot_raw_open,
7809 .read = tracing_buffers_read,
7810 .release = tracing_buffers_release,
7811 .splice_read = tracing_buffers_splice_read,
7812 };
7813
7814 #endif /* CONFIG_TRACER_SNAPSHOT */
7815
7816 /*
7817 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7818 * @filp: The active open file structure
7819 * @ubuf: The userspace provided buffer to read value into
7820 * @cnt: The maximum number of bytes to read
7821 * @ppos: The current "file" position
7822 *
7823 * This function implements the write interface for a struct trace_min_max_param.
7824 * The filp->private_data must point to a trace_min_max_param structure that
7825 * defines where to write the value, the min and the max acceptable values,
7826 * and a lock to protect the write.
7827 */
7828 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7829 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7830 {
7831 struct trace_min_max_param *param = filp->private_data;
7832 u64 val;
7833 int err;
7834
7835 if (!param)
7836 return -EFAULT;
7837
7838 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7839 if (err)
7840 return err;
7841
7842 if (param->lock)
7843 mutex_lock(param->lock);
7844
7845 if (param->min && val < *param->min)
7846 err = -EINVAL;
7847
7848 if (param->max && val > *param->max)
7849 err = -EINVAL;
7850
7851 if (!err)
7852 *param->val = val;
7853
7854 if (param->lock)
7855 mutex_unlock(param->lock);
7856
7857 if (err)
7858 return err;
7859
7860 return cnt;
7861 }
7862
7863 /*
7864 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7865 * @filp: The active open file structure
7866 * @ubuf: The userspace provided buffer to read value into
7867 * @cnt: The maximum number of bytes to read
7868 * @ppos: The current "file" position
7869 *
7870 * This function implements the read interface for a struct trace_min_max_param.
7871 * The filp->private_data must point to a trace_min_max_param struct with valid
7872 * data.
7873 */
7874 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7875 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7876 {
7877 struct trace_min_max_param *param = filp->private_data;
7878 char buf[U64_STR_SIZE];
7879 int len;
7880 u64 val;
7881
7882 if (!param)
7883 return -EFAULT;
7884
7885 val = *param->val;
7886
7887 if (cnt > sizeof(buf))
7888 cnt = sizeof(buf);
7889
7890 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7891
7892 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7893 }
7894
7895 const struct file_operations trace_min_max_fops = {
7896 .open = tracing_open_generic,
7897 .read = trace_min_max_read,
7898 .write = trace_min_max_write,
7899 };
7900
7901 #define TRACING_LOG_ERRS_MAX 8
7902 #define TRACING_LOG_LOC_MAX 128
7903
7904 #define CMD_PREFIX " Command: "
7905
7906 struct err_info {
7907 const char **errs; /* ptr to loc-specific array of err strings */
7908 u8 type; /* index into errs -> specific err string */
7909 u16 pos; /* caret position */
7910 u64 ts;
7911 };
7912
7913 struct tracing_log_err {
7914 struct list_head list;
7915 struct err_info info;
7916 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7917 char *cmd; /* what caused err */
7918 };
7919
7920 static DEFINE_MUTEX(tracing_err_log_lock);
7921
alloc_tracing_log_err(int len)7922 static struct tracing_log_err *alloc_tracing_log_err(int len)
7923 {
7924 struct tracing_log_err *err;
7925
7926 err = kzalloc(sizeof(*err), GFP_KERNEL);
7927 if (!err)
7928 return ERR_PTR(-ENOMEM);
7929
7930 err->cmd = kzalloc(len, GFP_KERNEL);
7931 if (!err->cmd) {
7932 kfree(err);
7933 return ERR_PTR(-ENOMEM);
7934 }
7935
7936 return err;
7937 }
7938
free_tracing_log_err(struct tracing_log_err * err)7939 static void free_tracing_log_err(struct tracing_log_err *err)
7940 {
7941 kfree(err->cmd);
7942 kfree(err);
7943 }
7944
get_tracing_log_err(struct trace_array * tr,int len)7945 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7946 int len)
7947 {
7948 struct tracing_log_err *err;
7949 char *cmd;
7950
7951 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7952 err = alloc_tracing_log_err(len);
7953 if (PTR_ERR(err) != -ENOMEM)
7954 tr->n_err_log_entries++;
7955
7956 return err;
7957 }
7958 cmd = kzalloc(len, GFP_KERNEL);
7959 if (!cmd)
7960 return ERR_PTR(-ENOMEM);
7961 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7962 kfree(err->cmd);
7963 err->cmd = cmd;
7964 list_del(&err->list);
7965
7966 return err;
7967 }
7968
7969 /**
7970 * err_pos - find the position of a string within a command for error careting
7971 * @cmd: The tracing command that caused the error
7972 * @str: The string to position the caret at within @cmd
7973 *
7974 * Finds the position of the first occurrence of @str within @cmd. The
7975 * return value can be passed to tracing_log_err() for caret placement
7976 * within @cmd.
7977 *
7978 * Returns the index within @cmd of the first occurrence of @str or 0
7979 * if @str was not found.
7980 */
err_pos(char * cmd,const char * str)7981 unsigned int err_pos(char *cmd, const char *str)
7982 {
7983 char *found;
7984
7985 if (WARN_ON(!strlen(cmd)))
7986 return 0;
7987
7988 found = strstr(cmd, str);
7989 if (found)
7990 return found - cmd;
7991
7992 return 0;
7993 }
7994
7995 /**
7996 * tracing_log_err - write an error to the tracing error log
7997 * @tr: The associated trace array for the error (NULL for top level array)
7998 * @loc: A string describing where the error occurred
7999 * @cmd: The tracing command that caused the error
8000 * @errs: The array of loc-specific static error strings
8001 * @type: The index into errs[], which produces the specific static err string
8002 * @pos: The position the caret should be placed in the cmd
8003 *
8004 * Writes an error into tracing/error_log of the form:
8005 *
8006 * <loc>: error: <text>
8007 * Command: <cmd>
8008 * ^
8009 *
8010 * tracing/error_log is a small log file containing the last
8011 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8012 * unless there has been a tracing error, and the error log can be
8013 * cleared and have its memory freed by writing the empty string in
8014 * truncation mode to it i.e. echo > tracing/error_log.
8015 *
8016 * NOTE: the @errs array along with the @type param are used to
8017 * produce a static error string - this string is not copied and saved
8018 * when the error is logged - only a pointer to it is saved. See
8019 * existing callers for examples of how static strings are typically
8020 * defined for use with tracing_log_err().
8021 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8022 void tracing_log_err(struct trace_array *tr,
8023 const char *loc, const char *cmd,
8024 const char **errs, u8 type, u16 pos)
8025 {
8026 struct tracing_log_err *err;
8027 int len = 0;
8028
8029 if (!tr)
8030 tr = &global_trace;
8031
8032 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8033
8034 guard(mutex)(&tracing_err_log_lock);
8035
8036 err = get_tracing_log_err(tr, len);
8037 if (PTR_ERR(err) == -ENOMEM)
8038 return;
8039
8040 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8041 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8042
8043 err->info.errs = errs;
8044 err->info.type = type;
8045 err->info.pos = pos;
8046 err->info.ts = local_clock();
8047
8048 list_add_tail(&err->list, &tr->err_log);
8049 }
8050
clear_tracing_err_log(struct trace_array * tr)8051 static void clear_tracing_err_log(struct trace_array *tr)
8052 {
8053 struct tracing_log_err *err, *next;
8054
8055 guard(mutex)(&tracing_err_log_lock);
8056
8057 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8058 list_del(&err->list);
8059 free_tracing_log_err(err);
8060 }
8061
8062 tr->n_err_log_entries = 0;
8063 }
8064
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8065 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8066 {
8067 struct trace_array *tr = m->private;
8068
8069 mutex_lock(&tracing_err_log_lock);
8070
8071 return seq_list_start(&tr->err_log, *pos);
8072 }
8073
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8074 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8075 {
8076 struct trace_array *tr = m->private;
8077
8078 return seq_list_next(v, &tr->err_log, pos);
8079 }
8080
tracing_err_log_seq_stop(struct seq_file * m,void * v)8081 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8082 {
8083 mutex_unlock(&tracing_err_log_lock);
8084 }
8085
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8086 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8087 {
8088 u16 i;
8089
8090 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8091 seq_putc(m, ' ');
8092 for (i = 0; i < pos; i++)
8093 seq_putc(m, ' ');
8094 seq_puts(m, "^\n");
8095 }
8096
tracing_err_log_seq_show(struct seq_file * m,void * v)8097 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8098 {
8099 struct tracing_log_err *err = v;
8100
8101 if (err) {
8102 const char *err_text = err->info.errs[err->info.type];
8103 u64 sec = err->info.ts;
8104 u32 nsec;
8105
8106 nsec = do_div(sec, NSEC_PER_SEC);
8107 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8108 err->loc, err_text);
8109 seq_printf(m, "%s", err->cmd);
8110 tracing_err_log_show_pos(m, err->info.pos);
8111 }
8112
8113 return 0;
8114 }
8115
8116 static const struct seq_operations tracing_err_log_seq_ops = {
8117 .start = tracing_err_log_seq_start,
8118 .next = tracing_err_log_seq_next,
8119 .stop = tracing_err_log_seq_stop,
8120 .show = tracing_err_log_seq_show
8121 };
8122
tracing_err_log_open(struct inode * inode,struct file * file)8123 static int tracing_err_log_open(struct inode *inode, struct file *file)
8124 {
8125 struct trace_array *tr = inode->i_private;
8126 int ret = 0;
8127
8128 ret = tracing_check_open_get_tr(tr);
8129 if (ret)
8130 return ret;
8131
8132 /* If this file was opened for write, then erase contents */
8133 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8134 clear_tracing_err_log(tr);
8135
8136 if (file->f_mode & FMODE_READ) {
8137 ret = seq_open(file, &tracing_err_log_seq_ops);
8138 if (!ret) {
8139 struct seq_file *m = file->private_data;
8140 m->private = tr;
8141 } else {
8142 trace_array_put(tr);
8143 }
8144 }
8145 return ret;
8146 }
8147
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8148 static ssize_t tracing_err_log_write(struct file *file,
8149 const char __user *buffer,
8150 size_t count, loff_t *ppos)
8151 {
8152 return count;
8153 }
8154
tracing_err_log_release(struct inode * inode,struct file * file)8155 static int tracing_err_log_release(struct inode *inode, struct file *file)
8156 {
8157 struct trace_array *tr = inode->i_private;
8158
8159 trace_array_put(tr);
8160
8161 if (file->f_mode & FMODE_READ)
8162 seq_release(inode, file);
8163
8164 return 0;
8165 }
8166
8167 static const struct file_operations tracing_err_log_fops = {
8168 .open = tracing_err_log_open,
8169 .write = tracing_err_log_write,
8170 .read = seq_read,
8171 .llseek = tracing_lseek,
8172 .release = tracing_err_log_release,
8173 };
8174
tracing_buffers_open(struct inode * inode,struct file * filp)8175 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8176 {
8177 struct trace_array *tr = inode->i_private;
8178 struct ftrace_buffer_info *info;
8179 int ret;
8180
8181 ret = tracing_check_open_get_tr(tr);
8182 if (ret)
8183 return ret;
8184
8185 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8186 if (!info) {
8187 trace_array_put(tr);
8188 return -ENOMEM;
8189 }
8190
8191 mutex_lock(&trace_types_lock);
8192
8193 info->iter.tr = tr;
8194 info->iter.cpu_file = tracing_get_cpu(inode);
8195 info->iter.trace = tr->current_trace;
8196 info->iter.array_buffer = &tr->array_buffer;
8197 info->spare = NULL;
8198 /* Force reading ring buffer for first read */
8199 info->read = (unsigned int)-1;
8200
8201 filp->private_data = info;
8202
8203 tr->trace_ref++;
8204
8205 mutex_unlock(&trace_types_lock);
8206
8207 ret = nonseekable_open(inode, filp);
8208 if (ret < 0)
8209 trace_array_put(tr);
8210
8211 return ret;
8212 }
8213
8214 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8215 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8216 {
8217 struct ftrace_buffer_info *info = filp->private_data;
8218 struct trace_iterator *iter = &info->iter;
8219
8220 return trace_poll(iter, filp, poll_table);
8221 }
8222
8223 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8224 tracing_buffers_read(struct file *filp, char __user *ubuf,
8225 size_t count, loff_t *ppos)
8226 {
8227 struct ftrace_buffer_info *info = filp->private_data;
8228 struct trace_iterator *iter = &info->iter;
8229 void *trace_data;
8230 int page_size;
8231 ssize_t ret = 0;
8232 ssize_t size;
8233
8234 if (!count)
8235 return 0;
8236
8237 #ifdef CONFIG_TRACER_MAX_TRACE
8238 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8239 return -EBUSY;
8240 #endif
8241
8242 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8243
8244 /* Make sure the spare matches the current sub buffer size */
8245 if (info->spare) {
8246 if (page_size != info->spare_size) {
8247 ring_buffer_free_read_page(iter->array_buffer->buffer,
8248 info->spare_cpu, info->spare);
8249 info->spare = NULL;
8250 }
8251 }
8252
8253 if (!info->spare) {
8254 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8255 iter->cpu_file);
8256 if (IS_ERR(info->spare)) {
8257 ret = PTR_ERR(info->spare);
8258 info->spare = NULL;
8259 } else {
8260 info->spare_cpu = iter->cpu_file;
8261 info->spare_size = page_size;
8262 }
8263 }
8264 if (!info->spare)
8265 return ret;
8266
8267 /* Do we have previous read data to read? */
8268 if (info->read < page_size)
8269 goto read;
8270
8271 again:
8272 trace_access_lock(iter->cpu_file);
8273 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8274 info->spare,
8275 count,
8276 iter->cpu_file, 0);
8277 trace_access_unlock(iter->cpu_file);
8278
8279 if (ret < 0) {
8280 if (trace_empty(iter) && !iter->closed) {
8281 if (update_last_data_if_empty(iter->tr))
8282 return 0;
8283
8284 if ((filp->f_flags & O_NONBLOCK))
8285 return -EAGAIN;
8286
8287 ret = wait_on_pipe(iter, 0);
8288 if (ret)
8289 return ret;
8290
8291 goto again;
8292 }
8293 return 0;
8294 }
8295
8296 info->read = 0;
8297 read:
8298 size = page_size - info->read;
8299 if (size > count)
8300 size = count;
8301 trace_data = ring_buffer_read_page_data(info->spare);
8302 ret = copy_to_user(ubuf, trace_data + info->read, size);
8303 if (ret == size)
8304 return -EFAULT;
8305
8306 size -= ret;
8307
8308 *ppos += size;
8309 info->read += size;
8310
8311 return size;
8312 }
8313
tracing_buffers_flush(struct file * file,fl_owner_t id)8314 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8315 {
8316 struct ftrace_buffer_info *info = file->private_data;
8317 struct trace_iterator *iter = &info->iter;
8318
8319 iter->closed = true;
8320 /* Make sure the waiters see the new wait_index */
8321 (void)atomic_fetch_inc_release(&iter->wait_index);
8322
8323 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8324
8325 return 0;
8326 }
8327
tracing_buffers_release(struct inode * inode,struct file * file)8328 static int tracing_buffers_release(struct inode *inode, struct file *file)
8329 {
8330 struct ftrace_buffer_info *info = file->private_data;
8331 struct trace_iterator *iter = &info->iter;
8332
8333 guard(mutex)(&trace_types_lock);
8334
8335 iter->tr->trace_ref--;
8336
8337 __trace_array_put(iter->tr);
8338
8339 if (info->spare)
8340 ring_buffer_free_read_page(iter->array_buffer->buffer,
8341 info->spare_cpu, info->spare);
8342 kvfree(info);
8343
8344 return 0;
8345 }
8346
8347 struct buffer_ref {
8348 struct trace_buffer *buffer;
8349 void *page;
8350 int cpu;
8351 refcount_t refcount;
8352 };
8353
buffer_ref_release(struct buffer_ref * ref)8354 static void buffer_ref_release(struct buffer_ref *ref)
8355 {
8356 if (!refcount_dec_and_test(&ref->refcount))
8357 return;
8358 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8359 kfree(ref);
8360 }
8361
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8362 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8363 struct pipe_buffer *buf)
8364 {
8365 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8366
8367 buffer_ref_release(ref);
8368 buf->private = 0;
8369 }
8370
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8371 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8372 struct pipe_buffer *buf)
8373 {
8374 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8375
8376 if (refcount_read(&ref->refcount) > INT_MAX/2)
8377 return false;
8378
8379 refcount_inc(&ref->refcount);
8380 return true;
8381 }
8382
8383 /* Pipe buffer operations for a buffer. */
8384 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8385 .release = buffer_pipe_buf_release,
8386 .get = buffer_pipe_buf_get,
8387 };
8388
8389 /*
8390 * Callback from splice_to_pipe(), if we need to release some pages
8391 * at the end of the spd in case we error'ed out in filling the pipe.
8392 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8393 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8394 {
8395 struct buffer_ref *ref =
8396 (struct buffer_ref *)spd->partial[i].private;
8397
8398 buffer_ref_release(ref);
8399 spd->partial[i].private = 0;
8400 }
8401
8402 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8403 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8404 struct pipe_inode_info *pipe, size_t len,
8405 unsigned int flags)
8406 {
8407 struct ftrace_buffer_info *info = file->private_data;
8408 struct trace_iterator *iter = &info->iter;
8409 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8410 struct page *pages_def[PIPE_DEF_BUFFERS];
8411 struct splice_pipe_desc spd = {
8412 .pages = pages_def,
8413 .partial = partial_def,
8414 .nr_pages_max = PIPE_DEF_BUFFERS,
8415 .ops = &buffer_pipe_buf_ops,
8416 .spd_release = buffer_spd_release,
8417 };
8418 struct buffer_ref *ref;
8419 bool woken = false;
8420 int page_size;
8421 int entries, i;
8422 ssize_t ret = 0;
8423
8424 #ifdef CONFIG_TRACER_MAX_TRACE
8425 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8426 return -EBUSY;
8427 #endif
8428
8429 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8430 if (*ppos & (page_size - 1))
8431 return -EINVAL;
8432
8433 if (len & (page_size - 1)) {
8434 if (len < page_size)
8435 return -EINVAL;
8436 len &= (~(page_size - 1));
8437 }
8438
8439 if (splice_grow_spd(pipe, &spd))
8440 return -ENOMEM;
8441
8442 again:
8443 trace_access_lock(iter->cpu_file);
8444 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8445
8446 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8447 struct page *page;
8448 int r;
8449
8450 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8451 if (!ref) {
8452 ret = -ENOMEM;
8453 break;
8454 }
8455
8456 refcount_set(&ref->refcount, 1);
8457 ref->buffer = iter->array_buffer->buffer;
8458 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8459 if (IS_ERR(ref->page)) {
8460 ret = PTR_ERR(ref->page);
8461 ref->page = NULL;
8462 kfree(ref);
8463 break;
8464 }
8465 ref->cpu = iter->cpu_file;
8466
8467 r = ring_buffer_read_page(ref->buffer, ref->page,
8468 len, iter->cpu_file, 1);
8469 if (r < 0) {
8470 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8471 ref->page);
8472 kfree(ref);
8473 break;
8474 }
8475
8476 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8477
8478 spd.pages[i] = page;
8479 spd.partial[i].len = page_size;
8480 spd.partial[i].offset = 0;
8481 spd.partial[i].private = (unsigned long)ref;
8482 spd.nr_pages++;
8483 *ppos += page_size;
8484
8485 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8486 }
8487
8488 trace_access_unlock(iter->cpu_file);
8489 spd.nr_pages = i;
8490
8491 /* did we read anything? */
8492 if (!spd.nr_pages) {
8493
8494 if (ret)
8495 goto out;
8496
8497 if (woken)
8498 goto out;
8499
8500 ret = -EAGAIN;
8501 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8502 goto out;
8503
8504 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8505 if (ret)
8506 goto out;
8507
8508 /* No need to wait after waking up when tracing is off */
8509 if (!tracer_tracing_is_on(iter->tr))
8510 goto out;
8511
8512 /* Iterate one more time to collect any new data then exit */
8513 woken = true;
8514
8515 goto again;
8516 }
8517
8518 ret = splice_to_pipe(pipe, &spd);
8519 out:
8520 splice_shrink_spd(&spd);
8521
8522 return ret;
8523 }
8524
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8525 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8526 {
8527 struct ftrace_buffer_info *info = file->private_data;
8528 struct trace_iterator *iter = &info->iter;
8529 int err;
8530
8531 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8532 if (!(file->f_flags & O_NONBLOCK)) {
8533 err = ring_buffer_wait(iter->array_buffer->buffer,
8534 iter->cpu_file,
8535 iter->tr->buffer_percent,
8536 NULL, NULL);
8537 if (err)
8538 return err;
8539 }
8540
8541 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8542 iter->cpu_file);
8543 } else if (cmd) {
8544 return -ENOTTY;
8545 }
8546
8547 /*
8548 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8549 * waiters
8550 */
8551 guard(mutex)(&trace_types_lock);
8552
8553 /* Make sure the waiters see the new wait_index */
8554 (void)atomic_fetch_inc_release(&iter->wait_index);
8555
8556 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8557
8558 return 0;
8559 }
8560
8561 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8562 static int get_snapshot_map(struct trace_array *tr)
8563 {
8564 int err = 0;
8565
8566 /*
8567 * Called with mmap_lock held. lockdep would be unhappy if we would now
8568 * take trace_types_lock. Instead use the specific
8569 * snapshot_trigger_lock.
8570 */
8571 spin_lock(&tr->snapshot_trigger_lock);
8572
8573 if (tr->snapshot || tr->mapped == UINT_MAX)
8574 err = -EBUSY;
8575 else
8576 tr->mapped++;
8577
8578 spin_unlock(&tr->snapshot_trigger_lock);
8579
8580 /* Wait for update_max_tr() to observe iter->tr->mapped */
8581 if (tr->mapped == 1)
8582 synchronize_rcu();
8583
8584 return err;
8585
8586 }
put_snapshot_map(struct trace_array * tr)8587 static void put_snapshot_map(struct trace_array *tr)
8588 {
8589 spin_lock(&tr->snapshot_trigger_lock);
8590 if (!WARN_ON(!tr->mapped))
8591 tr->mapped--;
8592 spin_unlock(&tr->snapshot_trigger_lock);
8593 }
8594 #else
get_snapshot_map(struct trace_array * tr)8595 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8596 static inline void put_snapshot_map(struct trace_array *tr) { }
8597 #endif
8598
tracing_buffers_mmap_close(struct vm_area_struct * vma)8599 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8600 {
8601 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8602 struct trace_iterator *iter = &info->iter;
8603
8604 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8605 put_snapshot_map(iter->tr);
8606 }
8607
8608 static const struct vm_operations_struct tracing_buffers_vmops = {
8609 .close = tracing_buffers_mmap_close,
8610 };
8611
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8612 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8613 {
8614 struct ftrace_buffer_info *info = filp->private_data;
8615 struct trace_iterator *iter = &info->iter;
8616 int ret = 0;
8617
8618 /* A memmap'ed buffer is not supported for user space mmap */
8619 if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8620 return -ENODEV;
8621
8622 ret = get_snapshot_map(iter->tr);
8623 if (ret)
8624 return ret;
8625
8626 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8627 if (ret)
8628 put_snapshot_map(iter->tr);
8629
8630 vma->vm_ops = &tracing_buffers_vmops;
8631
8632 return ret;
8633 }
8634
8635 static const struct file_operations tracing_buffers_fops = {
8636 .open = tracing_buffers_open,
8637 .read = tracing_buffers_read,
8638 .poll = tracing_buffers_poll,
8639 .release = tracing_buffers_release,
8640 .flush = tracing_buffers_flush,
8641 .splice_read = tracing_buffers_splice_read,
8642 .unlocked_ioctl = tracing_buffers_ioctl,
8643 .mmap = tracing_buffers_mmap,
8644 };
8645
8646 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8647 tracing_stats_read(struct file *filp, char __user *ubuf,
8648 size_t count, loff_t *ppos)
8649 {
8650 struct inode *inode = file_inode(filp);
8651 struct trace_array *tr = inode->i_private;
8652 struct array_buffer *trace_buf = &tr->array_buffer;
8653 int cpu = tracing_get_cpu(inode);
8654 struct trace_seq *s;
8655 unsigned long cnt;
8656 unsigned long long t;
8657 unsigned long usec_rem;
8658
8659 s = kmalloc(sizeof(*s), GFP_KERNEL);
8660 if (!s)
8661 return -ENOMEM;
8662
8663 trace_seq_init(s);
8664
8665 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8666 trace_seq_printf(s, "entries: %ld\n", cnt);
8667
8668 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8669 trace_seq_printf(s, "overrun: %ld\n", cnt);
8670
8671 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8672 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8673
8674 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8675 trace_seq_printf(s, "bytes: %ld\n", cnt);
8676
8677 if (trace_clocks[tr->clock_id].in_ns) {
8678 /* local or global for trace_clock */
8679 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8680 usec_rem = do_div(t, USEC_PER_SEC);
8681 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8682 t, usec_rem);
8683
8684 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8685 usec_rem = do_div(t, USEC_PER_SEC);
8686 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8687 } else {
8688 /* counter or tsc mode for trace_clock */
8689 trace_seq_printf(s, "oldest event ts: %llu\n",
8690 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8691
8692 trace_seq_printf(s, "now ts: %llu\n",
8693 ring_buffer_time_stamp(trace_buf->buffer));
8694 }
8695
8696 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8697 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8698
8699 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8700 trace_seq_printf(s, "read events: %ld\n", cnt);
8701
8702 count = simple_read_from_buffer(ubuf, count, ppos,
8703 s->buffer, trace_seq_used(s));
8704
8705 kfree(s);
8706
8707 return count;
8708 }
8709
8710 static const struct file_operations tracing_stats_fops = {
8711 .open = tracing_open_generic_tr,
8712 .read = tracing_stats_read,
8713 .llseek = generic_file_llseek,
8714 .release = tracing_release_generic_tr,
8715 };
8716
8717 #ifdef CONFIG_DYNAMIC_FTRACE
8718
8719 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8720 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8721 size_t cnt, loff_t *ppos)
8722 {
8723 ssize_t ret;
8724 char *buf;
8725 int r;
8726
8727 /* 512 should be plenty to hold the amount needed */
8728 #define DYN_INFO_BUF_SIZE 512
8729
8730 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8731 if (!buf)
8732 return -ENOMEM;
8733
8734 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8735 "%ld pages:%ld groups: %ld\n"
8736 "ftrace boot update time = %llu (ns)\n"
8737 "ftrace module total update time = %llu (ns)\n",
8738 ftrace_update_tot_cnt,
8739 ftrace_number_of_pages,
8740 ftrace_number_of_groups,
8741 ftrace_update_time,
8742 ftrace_total_mod_time);
8743
8744 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8745 kfree(buf);
8746 return ret;
8747 }
8748
8749 static const struct file_operations tracing_dyn_info_fops = {
8750 .open = tracing_open_generic,
8751 .read = tracing_read_dyn_info,
8752 .llseek = generic_file_llseek,
8753 };
8754 #endif /* CONFIG_DYNAMIC_FTRACE */
8755
8756 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8757 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8758 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8759 struct trace_array *tr, struct ftrace_probe_ops *ops,
8760 void *data)
8761 {
8762 tracing_snapshot_instance(tr);
8763 }
8764
8765 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8766 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8767 struct trace_array *tr, struct ftrace_probe_ops *ops,
8768 void *data)
8769 {
8770 struct ftrace_func_mapper *mapper = data;
8771 long *count = NULL;
8772
8773 if (mapper)
8774 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8775
8776 if (count) {
8777
8778 if (*count <= 0)
8779 return;
8780
8781 (*count)--;
8782 }
8783
8784 tracing_snapshot_instance(tr);
8785 }
8786
8787 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8788 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8789 struct ftrace_probe_ops *ops, void *data)
8790 {
8791 struct ftrace_func_mapper *mapper = data;
8792 long *count = NULL;
8793
8794 seq_printf(m, "%ps:", (void *)ip);
8795
8796 seq_puts(m, "snapshot");
8797
8798 if (mapper)
8799 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8800
8801 if (count)
8802 seq_printf(m, ":count=%ld\n", *count);
8803 else
8804 seq_puts(m, ":unlimited\n");
8805
8806 return 0;
8807 }
8808
8809 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8810 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8811 unsigned long ip, void *init_data, void **data)
8812 {
8813 struct ftrace_func_mapper *mapper = *data;
8814
8815 if (!mapper) {
8816 mapper = allocate_ftrace_func_mapper();
8817 if (!mapper)
8818 return -ENOMEM;
8819 *data = mapper;
8820 }
8821
8822 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8823 }
8824
8825 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8826 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8827 unsigned long ip, void *data)
8828 {
8829 struct ftrace_func_mapper *mapper = data;
8830
8831 if (!ip) {
8832 if (!mapper)
8833 return;
8834 free_ftrace_func_mapper(mapper, NULL);
8835 return;
8836 }
8837
8838 ftrace_func_mapper_remove_ip(mapper, ip);
8839 }
8840
8841 static struct ftrace_probe_ops snapshot_probe_ops = {
8842 .func = ftrace_snapshot,
8843 .print = ftrace_snapshot_print,
8844 };
8845
8846 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8847 .func = ftrace_count_snapshot,
8848 .print = ftrace_snapshot_print,
8849 .init = ftrace_snapshot_init,
8850 .free = ftrace_snapshot_free,
8851 };
8852
8853 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8854 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8855 char *glob, char *cmd, char *param, int enable)
8856 {
8857 struct ftrace_probe_ops *ops;
8858 void *count = (void *)-1;
8859 char *number;
8860 int ret;
8861
8862 if (!tr)
8863 return -ENODEV;
8864
8865 /* hash funcs only work with set_ftrace_filter */
8866 if (!enable)
8867 return -EINVAL;
8868
8869 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8870
8871 if (glob[0] == '!') {
8872 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8873 if (!ret)
8874 tracing_disarm_snapshot(tr);
8875
8876 return ret;
8877 }
8878
8879 if (!param)
8880 goto out_reg;
8881
8882 number = strsep(¶m, ":");
8883
8884 if (!strlen(number))
8885 goto out_reg;
8886
8887 /*
8888 * We use the callback data field (which is a pointer)
8889 * as our counter.
8890 */
8891 ret = kstrtoul(number, 0, (unsigned long *)&count);
8892 if (ret)
8893 return ret;
8894
8895 out_reg:
8896 ret = tracing_arm_snapshot(tr);
8897 if (ret < 0)
8898 return ret;
8899
8900 ret = register_ftrace_function_probe(glob, tr, ops, count);
8901 if (ret < 0)
8902 tracing_disarm_snapshot(tr);
8903
8904 return ret < 0 ? ret : 0;
8905 }
8906
8907 static struct ftrace_func_command ftrace_snapshot_cmd = {
8908 .name = "snapshot",
8909 .func = ftrace_trace_snapshot_callback,
8910 };
8911
register_snapshot_cmd(void)8912 static __init int register_snapshot_cmd(void)
8913 {
8914 return register_ftrace_command(&ftrace_snapshot_cmd);
8915 }
8916 #else
register_snapshot_cmd(void)8917 static inline __init int register_snapshot_cmd(void) { return 0; }
8918 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8919
tracing_get_dentry(struct trace_array * tr)8920 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8921 {
8922 /* Top directory uses NULL as the parent */
8923 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8924 return NULL;
8925
8926 if (WARN_ON(!tr->dir))
8927 return ERR_PTR(-ENODEV);
8928
8929 /* All sub buffers have a descriptor */
8930 return tr->dir;
8931 }
8932
tracing_dentry_percpu(struct trace_array * tr,int cpu)8933 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8934 {
8935 struct dentry *d_tracer;
8936
8937 if (tr->percpu_dir)
8938 return tr->percpu_dir;
8939
8940 d_tracer = tracing_get_dentry(tr);
8941 if (IS_ERR(d_tracer))
8942 return NULL;
8943
8944 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8945
8946 MEM_FAIL(!tr->percpu_dir,
8947 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8948
8949 return tr->percpu_dir;
8950 }
8951
8952 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8953 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8954 void *data, long cpu, const struct file_operations *fops)
8955 {
8956 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8957
8958 if (ret) /* See tracing_get_cpu() */
8959 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8960 return ret;
8961 }
8962
8963 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8964 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8965 {
8966 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8967 struct dentry *d_cpu;
8968 char cpu_dir[30]; /* 30 characters should be more than enough */
8969
8970 if (!d_percpu)
8971 return;
8972
8973 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8974 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8975 if (!d_cpu) {
8976 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8977 return;
8978 }
8979
8980 /* per cpu trace_pipe */
8981 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8982 tr, cpu, &tracing_pipe_fops);
8983
8984 /* per cpu trace */
8985 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8986 tr, cpu, &tracing_fops);
8987
8988 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8989 tr, cpu, &tracing_buffers_fops);
8990
8991 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8992 tr, cpu, &tracing_stats_fops);
8993
8994 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8995 tr, cpu, &tracing_entries_fops);
8996
8997 if (tr->range_addr_start)
8998 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8999 tr, cpu, &tracing_buffer_meta_fops);
9000 #ifdef CONFIG_TRACER_SNAPSHOT
9001 if (!tr->range_addr_start) {
9002 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9003 tr, cpu, &snapshot_fops);
9004
9005 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9006 tr, cpu, &snapshot_raw_fops);
9007 }
9008 #endif
9009 }
9010
9011 #ifdef CONFIG_FTRACE_SELFTEST
9012 /* Let selftest have access to static functions in this file */
9013 #include "trace_selftest.c"
9014 #endif
9015
9016 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9017 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9018 loff_t *ppos)
9019 {
9020 struct trace_option_dentry *topt = filp->private_data;
9021 char *buf;
9022
9023 if (topt->flags->val & topt->opt->bit)
9024 buf = "1\n";
9025 else
9026 buf = "0\n";
9027
9028 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9029 }
9030
9031 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9032 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9033 loff_t *ppos)
9034 {
9035 struct trace_option_dentry *topt = filp->private_data;
9036 unsigned long val;
9037 int ret;
9038
9039 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9040 if (ret)
9041 return ret;
9042
9043 if (val != 0 && val != 1)
9044 return -EINVAL;
9045
9046 if (!!(topt->flags->val & topt->opt->bit) != val) {
9047 guard(mutex)(&trace_types_lock);
9048 ret = __set_tracer_option(topt->tr, topt->flags,
9049 topt->opt, !val);
9050 if (ret)
9051 return ret;
9052 }
9053
9054 *ppos += cnt;
9055
9056 return cnt;
9057 }
9058
tracing_open_options(struct inode * inode,struct file * filp)9059 static int tracing_open_options(struct inode *inode, struct file *filp)
9060 {
9061 struct trace_option_dentry *topt = inode->i_private;
9062 int ret;
9063
9064 ret = tracing_check_open_get_tr(topt->tr);
9065 if (ret)
9066 return ret;
9067
9068 filp->private_data = inode->i_private;
9069 return 0;
9070 }
9071
tracing_release_options(struct inode * inode,struct file * file)9072 static int tracing_release_options(struct inode *inode, struct file *file)
9073 {
9074 struct trace_option_dentry *topt = file->private_data;
9075
9076 trace_array_put(topt->tr);
9077 return 0;
9078 }
9079
9080 static const struct file_operations trace_options_fops = {
9081 .open = tracing_open_options,
9082 .read = trace_options_read,
9083 .write = trace_options_write,
9084 .llseek = generic_file_llseek,
9085 .release = tracing_release_options,
9086 };
9087
9088 /*
9089 * In order to pass in both the trace_array descriptor as well as the index
9090 * to the flag that the trace option file represents, the trace_array
9091 * has a character array of trace_flags_index[], which holds the index
9092 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9093 * The address of this character array is passed to the flag option file
9094 * read/write callbacks.
9095 *
9096 * In order to extract both the index and the trace_array descriptor,
9097 * get_tr_index() uses the following algorithm.
9098 *
9099 * idx = *ptr;
9100 *
9101 * As the pointer itself contains the address of the index (remember
9102 * index[1] == 1).
9103 *
9104 * Then to get the trace_array descriptor, by subtracting that index
9105 * from the ptr, we get to the start of the index itself.
9106 *
9107 * ptr - idx == &index[0]
9108 *
9109 * Then a simple container_of() from that pointer gets us to the
9110 * trace_array descriptor.
9111 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9112 static void get_tr_index(void *data, struct trace_array **ptr,
9113 unsigned int *pindex)
9114 {
9115 *pindex = *(unsigned char *)data;
9116
9117 *ptr = container_of(data - *pindex, struct trace_array,
9118 trace_flags_index);
9119 }
9120
9121 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9122 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9123 loff_t *ppos)
9124 {
9125 void *tr_index = filp->private_data;
9126 struct trace_array *tr;
9127 unsigned int index;
9128 char *buf;
9129
9130 get_tr_index(tr_index, &tr, &index);
9131
9132 if (tr->trace_flags & (1 << index))
9133 buf = "1\n";
9134 else
9135 buf = "0\n";
9136
9137 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9138 }
9139
9140 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9141 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9142 loff_t *ppos)
9143 {
9144 void *tr_index = filp->private_data;
9145 struct trace_array *tr;
9146 unsigned int index;
9147 unsigned long val;
9148 int ret;
9149
9150 get_tr_index(tr_index, &tr, &index);
9151
9152 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9153 if (ret)
9154 return ret;
9155
9156 if (val != 0 && val != 1)
9157 return -EINVAL;
9158
9159 mutex_lock(&event_mutex);
9160 mutex_lock(&trace_types_lock);
9161 ret = set_tracer_flag(tr, 1 << index, val);
9162 mutex_unlock(&trace_types_lock);
9163 mutex_unlock(&event_mutex);
9164
9165 if (ret < 0)
9166 return ret;
9167
9168 *ppos += cnt;
9169
9170 return cnt;
9171 }
9172
9173 static const struct file_operations trace_options_core_fops = {
9174 .open = tracing_open_generic,
9175 .read = trace_options_core_read,
9176 .write = trace_options_core_write,
9177 .llseek = generic_file_llseek,
9178 };
9179
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9180 struct dentry *trace_create_file(const char *name,
9181 umode_t mode,
9182 struct dentry *parent,
9183 void *data,
9184 const struct file_operations *fops)
9185 {
9186 struct dentry *ret;
9187
9188 ret = tracefs_create_file(name, mode, parent, data, fops);
9189 if (!ret)
9190 pr_warn("Could not create tracefs '%s' entry\n", name);
9191
9192 return ret;
9193 }
9194
9195
trace_options_init_dentry(struct trace_array * tr)9196 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9197 {
9198 struct dentry *d_tracer;
9199
9200 if (tr->options)
9201 return tr->options;
9202
9203 d_tracer = tracing_get_dentry(tr);
9204 if (IS_ERR(d_tracer))
9205 return NULL;
9206
9207 tr->options = tracefs_create_dir("options", d_tracer);
9208 if (!tr->options) {
9209 pr_warn("Could not create tracefs directory 'options'\n");
9210 return NULL;
9211 }
9212
9213 return tr->options;
9214 }
9215
9216 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9217 create_trace_option_file(struct trace_array *tr,
9218 struct trace_option_dentry *topt,
9219 struct tracer_flags *flags,
9220 struct tracer_opt *opt)
9221 {
9222 struct dentry *t_options;
9223
9224 t_options = trace_options_init_dentry(tr);
9225 if (!t_options)
9226 return;
9227
9228 topt->flags = flags;
9229 topt->opt = opt;
9230 topt->tr = tr;
9231
9232 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9233 t_options, topt, &trace_options_fops);
9234
9235 }
9236
9237 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9238 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9239 {
9240 struct trace_option_dentry *topts;
9241 struct trace_options *tr_topts;
9242 struct tracer_flags *flags;
9243 struct tracer_opt *opts;
9244 int cnt;
9245 int i;
9246
9247 if (!tracer)
9248 return;
9249
9250 flags = tracer->flags;
9251
9252 if (!flags || !flags->opts)
9253 return;
9254
9255 /*
9256 * If this is an instance, only create flags for tracers
9257 * the instance may have.
9258 */
9259 if (!trace_ok_for_array(tracer, tr))
9260 return;
9261
9262 for (i = 0; i < tr->nr_topts; i++) {
9263 /* Make sure there's no duplicate flags. */
9264 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9265 return;
9266 }
9267
9268 opts = flags->opts;
9269
9270 for (cnt = 0; opts[cnt].name; cnt++)
9271 ;
9272
9273 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9274 if (!topts)
9275 return;
9276
9277 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9278 GFP_KERNEL);
9279 if (!tr_topts) {
9280 kfree(topts);
9281 return;
9282 }
9283
9284 tr->topts = tr_topts;
9285 tr->topts[tr->nr_topts].tracer = tracer;
9286 tr->topts[tr->nr_topts].topts = topts;
9287 tr->nr_topts++;
9288
9289 for (cnt = 0; opts[cnt].name; cnt++) {
9290 create_trace_option_file(tr, &topts[cnt], flags,
9291 &opts[cnt]);
9292 MEM_FAIL(topts[cnt].entry == NULL,
9293 "Failed to create trace option: %s",
9294 opts[cnt].name);
9295 }
9296 }
9297
9298 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9299 create_trace_option_core_file(struct trace_array *tr,
9300 const char *option, long index)
9301 {
9302 struct dentry *t_options;
9303
9304 t_options = trace_options_init_dentry(tr);
9305 if (!t_options)
9306 return NULL;
9307
9308 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9309 (void *)&tr->trace_flags_index[index],
9310 &trace_options_core_fops);
9311 }
9312
create_trace_options_dir(struct trace_array * tr)9313 static void create_trace_options_dir(struct trace_array *tr)
9314 {
9315 struct dentry *t_options;
9316 bool top_level = tr == &global_trace;
9317 int i;
9318
9319 t_options = trace_options_init_dentry(tr);
9320 if (!t_options)
9321 return;
9322
9323 for (i = 0; trace_options[i]; i++) {
9324 if (top_level ||
9325 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9326 create_trace_option_core_file(tr, trace_options[i], i);
9327 }
9328 }
9329
9330 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9331 rb_simple_read(struct file *filp, char __user *ubuf,
9332 size_t cnt, loff_t *ppos)
9333 {
9334 struct trace_array *tr = filp->private_data;
9335 char buf[64];
9336 int r;
9337
9338 r = tracer_tracing_is_on(tr);
9339 r = sprintf(buf, "%d\n", r);
9340
9341 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9342 }
9343
9344 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9345 rb_simple_write(struct file *filp, const char __user *ubuf,
9346 size_t cnt, loff_t *ppos)
9347 {
9348 struct trace_array *tr = filp->private_data;
9349 struct trace_buffer *buffer = tr->array_buffer.buffer;
9350 unsigned long val;
9351 int ret;
9352
9353 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9354 if (ret)
9355 return ret;
9356
9357 if (buffer) {
9358 guard(mutex)(&trace_types_lock);
9359 if (!!val == tracer_tracing_is_on(tr)) {
9360 val = 0; /* do nothing */
9361 } else if (val) {
9362 tracer_tracing_on(tr);
9363 if (tr->current_trace->start)
9364 tr->current_trace->start(tr);
9365 } else {
9366 tracer_tracing_off(tr);
9367 if (tr->current_trace->stop)
9368 tr->current_trace->stop(tr);
9369 /* Wake up any waiters */
9370 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9371 }
9372 }
9373
9374 (*ppos)++;
9375
9376 return cnt;
9377 }
9378
9379 static const struct file_operations rb_simple_fops = {
9380 .open = tracing_open_generic_tr,
9381 .read = rb_simple_read,
9382 .write = rb_simple_write,
9383 .release = tracing_release_generic_tr,
9384 .llseek = default_llseek,
9385 };
9386
9387 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9388 buffer_percent_read(struct file *filp, char __user *ubuf,
9389 size_t cnt, loff_t *ppos)
9390 {
9391 struct trace_array *tr = filp->private_data;
9392 char buf[64];
9393 int r;
9394
9395 r = tr->buffer_percent;
9396 r = sprintf(buf, "%d\n", r);
9397
9398 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9399 }
9400
9401 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9402 buffer_percent_write(struct file *filp, const char __user *ubuf,
9403 size_t cnt, loff_t *ppos)
9404 {
9405 struct trace_array *tr = filp->private_data;
9406 unsigned long val;
9407 int ret;
9408
9409 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9410 if (ret)
9411 return ret;
9412
9413 if (val > 100)
9414 return -EINVAL;
9415
9416 tr->buffer_percent = val;
9417
9418 (*ppos)++;
9419
9420 return cnt;
9421 }
9422
9423 static const struct file_operations buffer_percent_fops = {
9424 .open = tracing_open_generic_tr,
9425 .read = buffer_percent_read,
9426 .write = buffer_percent_write,
9427 .release = tracing_release_generic_tr,
9428 .llseek = default_llseek,
9429 };
9430
9431 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9432 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9433 {
9434 struct trace_array *tr = filp->private_data;
9435 size_t size;
9436 char buf[64];
9437 int order;
9438 int r;
9439
9440 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9441 size = (PAGE_SIZE << order) / 1024;
9442
9443 r = sprintf(buf, "%zd\n", size);
9444
9445 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9446 }
9447
9448 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9449 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9450 size_t cnt, loff_t *ppos)
9451 {
9452 struct trace_array *tr = filp->private_data;
9453 unsigned long val;
9454 int old_order;
9455 int order;
9456 int pages;
9457 int ret;
9458
9459 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9460 if (ret)
9461 return ret;
9462
9463 val *= 1024; /* value passed in is in KB */
9464
9465 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9466 order = fls(pages - 1);
9467
9468 /* limit between 1 and 128 system pages */
9469 if (order < 0 || order > 7)
9470 return -EINVAL;
9471
9472 /* Do not allow tracing while changing the order of the ring buffer */
9473 tracing_stop_tr(tr);
9474
9475 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9476 if (old_order == order)
9477 goto out;
9478
9479 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9480 if (ret)
9481 goto out;
9482
9483 #ifdef CONFIG_TRACER_MAX_TRACE
9484
9485 if (!tr->allocated_snapshot)
9486 goto out_max;
9487
9488 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9489 if (ret) {
9490 /* Put back the old order */
9491 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9492 if (WARN_ON_ONCE(cnt)) {
9493 /*
9494 * AARGH! We are left with different orders!
9495 * The max buffer is our "snapshot" buffer.
9496 * When a tracer needs a snapshot (one of the
9497 * latency tracers), it swaps the max buffer
9498 * with the saved snap shot. We succeeded to
9499 * update the order of the main buffer, but failed to
9500 * update the order of the max buffer. But when we tried
9501 * to reset the main buffer to the original size, we
9502 * failed there too. This is very unlikely to
9503 * happen, but if it does, warn and kill all
9504 * tracing.
9505 */
9506 tracing_disabled = 1;
9507 }
9508 goto out;
9509 }
9510 out_max:
9511 #endif
9512 (*ppos)++;
9513 out:
9514 if (ret)
9515 cnt = ret;
9516 tracing_start_tr(tr);
9517 return cnt;
9518 }
9519
9520 static const struct file_operations buffer_subbuf_size_fops = {
9521 .open = tracing_open_generic_tr,
9522 .read = buffer_subbuf_size_read,
9523 .write = buffer_subbuf_size_write,
9524 .release = tracing_release_generic_tr,
9525 .llseek = default_llseek,
9526 };
9527
9528 static struct dentry *trace_instance_dir;
9529
9530 static void
9531 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9532
9533 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9534 static int make_mod_delta(struct module *mod, void *data)
9535 {
9536 struct trace_module_delta *module_delta;
9537 struct trace_scratch *tscratch;
9538 struct trace_mod_entry *entry;
9539 struct trace_array *tr = data;
9540 int i;
9541
9542 tscratch = tr->scratch;
9543 module_delta = READ_ONCE(tr->module_delta);
9544 for (i = 0; i < tscratch->nr_entries; i++) {
9545 entry = &tscratch->entries[i];
9546 if (strcmp(mod->name, entry->mod_name))
9547 continue;
9548 if (mod->state == MODULE_STATE_GOING)
9549 module_delta->delta[i] = 0;
9550 else
9551 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9552 - entry->mod_addr;
9553 break;
9554 }
9555 return 0;
9556 }
9557 #else
make_mod_delta(struct module * mod,void * data)9558 static int make_mod_delta(struct module *mod, void *data)
9559 {
9560 return 0;
9561 }
9562 #endif
9563
mod_addr_comp(const void * a,const void * b,const void * data)9564 static int mod_addr_comp(const void *a, const void *b, const void *data)
9565 {
9566 const struct trace_mod_entry *e1 = a;
9567 const struct trace_mod_entry *e2 = b;
9568
9569 return e1->mod_addr > e2->mod_addr ? 1 : -1;
9570 }
9571
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9572 static void setup_trace_scratch(struct trace_array *tr,
9573 struct trace_scratch *tscratch, unsigned int size)
9574 {
9575 struct trace_module_delta *module_delta;
9576 struct trace_mod_entry *entry;
9577 int i, nr_entries;
9578
9579 if (!tscratch)
9580 return;
9581
9582 tr->scratch = tscratch;
9583 tr->scratch_size = size;
9584
9585 if (tscratch->text_addr)
9586 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9587
9588 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9589 goto reset;
9590
9591 /* Check if each module name is a valid string */
9592 for (i = 0; i < tscratch->nr_entries; i++) {
9593 int n;
9594
9595 entry = &tscratch->entries[i];
9596
9597 for (n = 0; n < MODULE_NAME_LEN; n++) {
9598 if (entry->mod_name[n] == '\0')
9599 break;
9600 if (!isprint(entry->mod_name[n]))
9601 goto reset;
9602 }
9603 if (n == MODULE_NAME_LEN)
9604 goto reset;
9605 }
9606
9607 /* Sort the entries so that we can find appropriate module from address. */
9608 nr_entries = tscratch->nr_entries;
9609 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9610 mod_addr_comp, NULL, NULL);
9611
9612 if (IS_ENABLED(CONFIG_MODULES)) {
9613 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9614 if (!module_delta) {
9615 pr_info("module_delta allocation failed. Not able to decode module address.");
9616 goto reset;
9617 }
9618 init_rcu_head(&module_delta->rcu);
9619 } else
9620 module_delta = NULL;
9621 WRITE_ONCE(tr->module_delta, module_delta);
9622
9623 /* Scan modules to make text delta for modules. */
9624 module_for_each_mod(make_mod_delta, tr);
9625
9626 /* Set trace_clock as the same of the previous boot. */
9627 if (tscratch->clock_id != tr->clock_id) {
9628 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9629 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9630 pr_info("the previous trace_clock info is not valid.");
9631 goto reset;
9632 }
9633 }
9634 return;
9635 reset:
9636 /* Invalid trace modules */
9637 memset(tscratch, 0, size);
9638 }
9639
9640 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9641 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9642 {
9643 enum ring_buffer_flags rb_flags;
9644 struct trace_scratch *tscratch;
9645 unsigned int scratch_size = 0;
9646
9647 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9648
9649 buf->tr = tr;
9650
9651 if (tr->range_addr_start && tr->range_addr_size) {
9652 /* Add scratch buffer to handle 128 modules */
9653 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9654 tr->range_addr_start,
9655 tr->range_addr_size,
9656 struct_size(tscratch, entries, 128));
9657
9658 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9659 setup_trace_scratch(tr, tscratch, scratch_size);
9660
9661 /*
9662 * This is basically the same as a mapped buffer,
9663 * with the same restrictions.
9664 */
9665 tr->mapped++;
9666 } else {
9667 buf->buffer = ring_buffer_alloc(size, rb_flags);
9668 }
9669 if (!buf->buffer)
9670 return -ENOMEM;
9671
9672 buf->data = alloc_percpu(struct trace_array_cpu);
9673 if (!buf->data) {
9674 ring_buffer_free(buf->buffer);
9675 buf->buffer = NULL;
9676 return -ENOMEM;
9677 }
9678
9679 /* Allocate the first page for all buffers */
9680 set_buffer_entries(&tr->array_buffer,
9681 ring_buffer_size(tr->array_buffer.buffer, 0));
9682
9683 return 0;
9684 }
9685
free_trace_buffer(struct array_buffer * buf)9686 static void free_trace_buffer(struct array_buffer *buf)
9687 {
9688 if (buf->buffer) {
9689 ring_buffer_free(buf->buffer);
9690 buf->buffer = NULL;
9691 free_percpu(buf->data);
9692 buf->data = NULL;
9693 }
9694 }
9695
allocate_trace_buffers(struct trace_array * tr,int size)9696 static int allocate_trace_buffers(struct trace_array *tr, int size)
9697 {
9698 int ret;
9699
9700 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9701 if (ret)
9702 return ret;
9703
9704 #ifdef CONFIG_TRACER_MAX_TRACE
9705 /* Fix mapped buffer trace arrays do not have snapshot buffers */
9706 if (tr->range_addr_start)
9707 return 0;
9708
9709 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9710 allocate_snapshot ? size : 1);
9711 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9712 free_trace_buffer(&tr->array_buffer);
9713 return -ENOMEM;
9714 }
9715 tr->allocated_snapshot = allocate_snapshot;
9716
9717 allocate_snapshot = false;
9718 #endif
9719
9720 return 0;
9721 }
9722
free_trace_buffers(struct trace_array * tr)9723 static void free_trace_buffers(struct trace_array *tr)
9724 {
9725 if (!tr)
9726 return;
9727
9728 free_trace_buffer(&tr->array_buffer);
9729 kfree(tr->module_delta);
9730
9731 #ifdef CONFIG_TRACER_MAX_TRACE
9732 free_trace_buffer(&tr->max_buffer);
9733 #endif
9734 }
9735
init_trace_flags_index(struct trace_array * tr)9736 static void init_trace_flags_index(struct trace_array *tr)
9737 {
9738 int i;
9739
9740 /* Used by the trace options files */
9741 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9742 tr->trace_flags_index[i] = i;
9743 }
9744
__update_tracer_options(struct trace_array * tr)9745 static void __update_tracer_options(struct trace_array *tr)
9746 {
9747 struct tracer *t;
9748
9749 for (t = trace_types; t; t = t->next)
9750 add_tracer_options(tr, t);
9751 }
9752
update_tracer_options(struct trace_array * tr)9753 static void update_tracer_options(struct trace_array *tr)
9754 {
9755 guard(mutex)(&trace_types_lock);
9756 tracer_options_updated = true;
9757 __update_tracer_options(tr);
9758 }
9759
9760 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9761 struct trace_array *trace_array_find(const char *instance)
9762 {
9763 struct trace_array *tr, *found = NULL;
9764
9765 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9766 if (tr->name && strcmp(tr->name, instance) == 0) {
9767 found = tr;
9768 break;
9769 }
9770 }
9771
9772 return found;
9773 }
9774
trace_array_find_get(const char * instance)9775 struct trace_array *trace_array_find_get(const char *instance)
9776 {
9777 struct trace_array *tr;
9778
9779 guard(mutex)(&trace_types_lock);
9780 tr = trace_array_find(instance);
9781 if (tr)
9782 tr->ref++;
9783
9784 return tr;
9785 }
9786
trace_array_create_dir(struct trace_array * tr)9787 static int trace_array_create_dir(struct trace_array *tr)
9788 {
9789 int ret;
9790
9791 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9792 if (!tr->dir)
9793 return -EINVAL;
9794
9795 ret = event_trace_add_tracer(tr->dir, tr);
9796 if (ret) {
9797 tracefs_remove(tr->dir);
9798 return ret;
9799 }
9800
9801 init_tracer_tracefs(tr, tr->dir);
9802 __update_tracer_options(tr);
9803
9804 return ret;
9805 }
9806
9807 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9808 trace_array_create_systems(const char *name, const char *systems,
9809 unsigned long range_addr_start,
9810 unsigned long range_addr_size)
9811 {
9812 struct trace_array *tr;
9813 int ret;
9814
9815 ret = -ENOMEM;
9816 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9817 if (!tr)
9818 return ERR_PTR(ret);
9819
9820 tr->name = kstrdup(name, GFP_KERNEL);
9821 if (!tr->name)
9822 goto out_free_tr;
9823
9824 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9825 goto out_free_tr;
9826
9827 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9828 goto out_free_tr;
9829
9830 if (systems) {
9831 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9832 if (!tr->system_names)
9833 goto out_free_tr;
9834 }
9835
9836 /* Only for boot up memory mapped ring buffers */
9837 tr->range_addr_start = range_addr_start;
9838 tr->range_addr_size = range_addr_size;
9839
9840 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9841
9842 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9843
9844 raw_spin_lock_init(&tr->start_lock);
9845
9846 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9847 #ifdef CONFIG_TRACER_MAX_TRACE
9848 spin_lock_init(&tr->snapshot_trigger_lock);
9849 #endif
9850 tr->current_trace = &nop_trace;
9851
9852 INIT_LIST_HEAD(&tr->systems);
9853 INIT_LIST_HEAD(&tr->events);
9854 INIT_LIST_HEAD(&tr->hist_vars);
9855 INIT_LIST_HEAD(&tr->err_log);
9856 INIT_LIST_HEAD(&tr->marker_list);
9857
9858 #ifdef CONFIG_MODULES
9859 INIT_LIST_HEAD(&tr->mod_events);
9860 #endif
9861
9862 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9863 goto out_free_tr;
9864
9865 /* The ring buffer is defaultly expanded */
9866 trace_set_ring_buffer_expanded(tr);
9867
9868 if (ftrace_allocate_ftrace_ops(tr) < 0)
9869 goto out_free_tr;
9870
9871 ftrace_init_trace_array(tr);
9872
9873 init_trace_flags_index(tr);
9874
9875 if (trace_instance_dir) {
9876 ret = trace_array_create_dir(tr);
9877 if (ret)
9878 goto out_free_tr;
9879 } else
9880 __trace_early_add_events(tr);
9881
9882 list_add(&tr->list, &ftrace_trace_arrays);
9883
9884 tr->ref++;
9885
9886 return tr;
9887
9888 out_free_tr:
9889 ftrace_free_ftrace_ops(tr);
9890 free_trace_buffers(tr);
9891 free_cpumask_var(tr->pipe_cpumask);
9892 free_cpumask_var(tr->tracing_cpumask);
9893 kfree_const(tr->system_names);
9894 kfree(tr->range_name);
9895 kfree(tr->name);
9896 kfree(tr);
9897
9898 return ERR_PTR(ret);
9899 }
9900
trace_array_create(const char * name)9901 static struct trace_array *trace_array_create(const char *name)
9902 {
9903 return trace_array_create_systems(name, NULL, 0, 0);
9904 }
9905
instance_mkdir(const char * name)9906 static int instance_mkdir(const char *name)
9907 {
9908 struct trace_array *tr;
9909 int ret;
9910
9911 guard(mutex)(&event_mutex);
9912 guard(mutex)(&trace_types_lock);
9913
9914 ret = -EEXIST;
9915 if (trace_array_find(name))
9916 return -EEXIST;
9917
9918 tr = trace_array_create(name);
9919
9920 ret = PTR_ERR_OR_ZERO(tr);
9921
9922 return ret;
9923 }
9924
9925 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9926 static u64 map_pages(unsigned long start, unsigned long size)
9927 {
9928 unsigned long vmap_start, vmap_end;
9929 struct vm_struct *area;
9930 int ret;
9931
9932 area = get_vm_area(size, VM_IOREMAP);
9933 if (!area)
9934 return 0;
9935
9936 vmap_start = (unsigned long) area->addr;
9937 vmap_end = vmap_start + size;
9938
9939 ret = vmap_page_range(vmap_start, vmap_end,
9940 start, pgprot_nx(PAGE_KERNEL));
9941 if (ret < 0) {
9942 free_vm_area(area);
9943 return 0;
9944 }
9945
9946 return (u64)vmap_start;
9947 }
9948 #else
map_pages(unsigned long start,unsigned long size)9949 static inline u64 map_pages(unsigned long start, unsigned long size)
9950 {
9951 return 0;
9952 }
9953 #endif
9954
9955 /**
9956 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9957 * @name: The name of the trace array to be looked up/created.
9958 * @systems: A list of systems to create event directories for (NULL for all)
9959 *
9960 * Returns pointer to trace array with given name.
9961 * NULL, if it cannot be created.
9962 *
9963 * NOTE: This function increments the reference counter associated with the
9964 * trace array returned. This makes sure it cannot be freed while in use.
9965 * Use trace_array_put() once the trace array is no longer needed.
9966 * If the trace_array is to be freed, trace_array_destroy() needs to
9967 * be called after the trace_array_put(), or simply let user space delete
9968 * it from the tracefs instances directory. But until the
9969 * trace_array_put() is called, user space can not delete it.
9970 *
9971 */
trace_array_get_by_name(const char * name,const char * systems)9972 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9973 {
9974 struct trace_array *tr;
9975
9976 guard(mutex)(&event_mutex);
9977 guard(mutex)(&trace_types_lock);
9978
9979 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9980 if (tr->name && strcmp(tr->name, name) == 0) {
9981 tr->ref++;
9982 return tr;
9983 }
9984 }
9985
9986 tr = trace_array_create_systems(name, systems, 0, 0);
9987
9988 if (IS_ERR(tr))
9989 tr = NULL;
9990 else
9991 tr->ref++;
9992
9993 return tr;
9994 }
9995 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9996
__remove_instance(struct trace_array * tr)9997 static int __remove_instance(struct trace_array *tr)
9998 {
9999 int i;
10000
10001 /* Reference counter for a newly created trace array = 1. */
10002 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10003 return -EBUSY;
10004
10005 list_del(&tr->list);
10006
10007 /* Disable all the flags that were enabled coming in */
10008 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10009 if ((1 << i) & ZEROED_TRACE_FLAGS)
10010 set_tracer_flag(tr, 1 << i, 0);
10011 }
10012
10013 if (printk_trace == tr)
10014 update_printk_trace(&global_trace);
10015
10016 if (update_marker_trace(tr, 0))
10017 synchronize_rcu();
10018
10019 tracing_set_nop(tr);
10020 clear_ftrace_function_probes(tr);
10021 event_trace_del_tracer(tr);
10022 ftrace_clear_pids(tr);
10023 ftrace_destroy_function_files(tr);
10024 tracefs_remove(tr->dir);
10025 free_percpu(tr->last_func_repeats);
10026 free_trace_buffers(tr);
10027 clear_tracing_err_log(tr);
10028
10029 if (tr->range_name) {
10030 reserve_mem_release_by_name(tr->range_name);
10031 kfree(tr->range_name);
10032 }
10033
10034 for (i = 0; i < tr->nr_topts; i++) {
10035 kfree(tr->topts[i].topts);
10036 }
10037 kfree(tr->topts);
10038
10039 free_cpumask_var(tr->pipe_cpumask);
10040 free_cpumask_var(tr->tracing_cpumask);
10041 kfree_const(tr->system_names);
10042 kfree(tr->name);
10043 kfree(tr);
10044
10045 return 0;
10046 }
10047
trace_array_destroy(struct trace_array * this_tr)10048 int trace_array_destroy(struct trace_array *this_tr)
10049 {
10050 struct trace_array *tr;
10051
10052 if (!this_tr)
10053 return -EINVAL;
10054
10055 guard(mutex)(&event_mutex);
10056 guard(mutex)(&trace_types_lock);
10057
10058
10059 /* Making sure trace array exists before destroying it. */
10060 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10061 if (tr == this_tr)
10062 return __remove_instance(tr);
10063 }
10064
10065 return -ENODEV;
10066 }
10067 EXPORT_SYMBOL_GPL(trace_array_destroy);
10068
instance_rmdir(const char * name)10069 static int instance_rmdir(const char *name)
10070 {
10071 struct trace_array *tr;
10072
10073 guard(mutex)(&event_mutex);
10074 guard(mutex)(&trace_types_lock);
10075
10076 tr = trace_array_find(name);
10077 if (!tr)
10078 return -ENODEV;
10079
10080 return __remove_instance(tr);
10081 }
10082
create_trace_instances(struct dentry * d_tracer)10083 static __init void create_trace_instances(struct dentry *d_tracer)
10084 {
10085 struct trace_array *tr;
10086
10087 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10088 instance_mkdir,
10089 instance_rmdir);
10090 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10091 return;
10092
10093 guard(mutex)(&event_mutex);
10094 guard(mutex)(&trace_types_lock);
10095
10096 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10097 if (!tr->name)
10098 continue;
10099 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10100 "Failed to create instance directory\n"))
10101 return;
10102 }
10103 }
10104
10105 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10106 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10107 {
10108 int cpu;
10109
10110 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10111 tr, &show_traces_fops);
10112
10113 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10114 tr, &set_tracer_fops);
10115
10116 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10117 tr, &tracing_cpumask_fops);
10118
10119 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10120 tr, &tracing_iter_fops);
10121
10122 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10123 tr, &tracing_fops);
10124
10125 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10126 tr, &tracing_pipe_fops);
10127
10128 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10129 tr, &tracing_entries_fops);
10130
10131 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10132 tr, &tracing_total_entries_fops);
10133
10134 trace_create_file("free_buffer", 0200, d_tracer,
10135 tr, &tracing_free_buffer_fops);
10136
10137 trace_create_file("trace_marker", 0220, d_tracer,
10138 tr, &tracing_mark_fops);
10139
10140 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10141
10142 trace_create_file("trace_marker_raw", 0220, d_tracer,
10143 tr, &tracing_mark_raw_fops);
10144
10145 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10146 &trace_clock_fops);
10147
10148 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10149 tr, &rb_simple_fops);
10150
10151 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10152 &trace_time_stamp_mode_fops);
10153
10154 tr->buffer_percent = 50;
10155
10156 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10157 tr, &buffer_percent_fops);
10158
10159 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10160 tr, &buffer_subbuf_size_fops);
10161
10162 create_trace_options_dir(tr);
10163
10164 #ifdef CONFIG_TRACER_MAX_TRACE
10165 trace_create_maxlat_file(tr, d_tracer);
10166 #endif
10167
10168 if (ftrace_create_function_files(tr, d_tracer))
10169 MEM_FAIL(1, "Could not allocate function filter files");
10170
10171 if (tr->range_addr_start) {
10172 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10173 tr, &last_boot_fops);
10174 #ifdef CONFIG_TRACER_SNAPSHOT
10175 } else {
10176 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10177 tr, &snapshot_fops);
10178 #endif
10179 }
10180
10181 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10182 tr, &tracing_err_log_fops);
10183
10184 for_each_tracing_cpu(cpu)
10185 tracing_init_tracefs_percpu(tr, cpu);
10186
10187 ftrace_init_tracefs(tr, d_tracer);
10188 }
10189
10190 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10191 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10192 {
10193 struct vfsmount *mnt;
10194 struct file_system_type *type;
10195 struct fs_context *fc;
10196 int ret;
10197
10198 /*
10199 * To maintain backward compatibility for tools that mount
10200 * debugfs to get to the tracing facility, tracefs is automatically
10201 * mounted to the debugfs/tracing directory.
10202 */
10203 type = get_fs_type("tracefs");
10204 if (!type)
10205 return NULL;
10206
10207 fc = fs_context_for_submount(type, mntpt);
10208 put_filesystem(type);
10209 if (IS_ERR(fc))
10210 return ERR_CAST(fc);
10211
10212 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10213
10214 ret = vfs_parse_fs_string(fc, "source",
10215 "tracefs", strlen("tracefs"));
10216 if (!ret)
10217 mnt = fc_mount(fc);
10218 else
10219 mnt = ERR_PTR(ret);
10220
10221 put_fs_context(fc);
10222 return mnt;
10223 }
10224 #endif
10225
10226 /**
10227 * tracing_init_dentry - initialize top level trace array
10228 *
10229 * This is called when creating files or directories in the tracing
10230 * directory. It is called via fs_initcall() by any of the boot up code
10231 * and expects to return the dentry of the top level tracing directory.
10232 */
tracing_init_dentry(void)10233 int tracing_init_dentry(void)
10234 {
10235 struct trace_array *tr = &global_trace;
10236
10237 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10238 pr_warn("Tracing disabled due to lockdown\n");
10239 return -EPERM;
10240 }
10241
10242 /* The top level trace array uses NULL as parent */
10243 if (tr->dir)
10244 return 0;
10245
10246 if (WARN_ON(!tracefs_initialized()))
10247 return -ENODEV;
10248
10249 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10250 /*
10251 * As there may still be users that expect the tracing
10252 * files to exist in debugfs/tracing, we must automount
10253 * the tracefs file system there, so older tools still
10254 * work with the newer kernel.
10255 */
10256 tr->dir = debugfs_create_automount("tracing", NULL,
10257 trace_automount, NULL);
10258 #endif
10259
10260 return 0;
10261 }
10262
10263 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10264 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10265
10266 static struct workqueue_struct *eval_map_wq __initdata;
10267 static struct work_struct eval_map_work __initdata;
10268 static struct work_struct tracerfs_init_work __initdata;
10269
eval_map_work_func(struct work_struct * work)10270 static void __init eval_map_work_func(struct work_struct *work)
10271 {
10272 int len;
10273
10274 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10275 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10276 }
10277
trace_eval_init(void)10278 static int __init trace_eval_init(void)
10279 {
10280 INIT_WORK(&eval_map_work, eval_map_work_func);
10281
10282 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10283 if (!eval_map_wq) {
10284 pr_err("Unable to allocate eval_map_wq\n");
10285 /* Do work here */
10286 eval_map_work_func(&eval_map_work);
10287 return -ENOMEM;
10288 }
10289
10290 queue_work(eval_map_wq, &eval_map_work);
10291 return 0;
10292 }
10293
10294 subsys_initcall(trace_eval_init);
10295
trace_eval_sync(void)10296 static int __init trace_eval_sync(void)
10297 {
10298 /* Make sure the eval map updates are finished */
10299 if (eval_map_wq)
10300 destroy_workqueue(eval_map_wq);
10301 return 0;
10302 }
10303
10304 late_initcall_sync(trace_eval_sync);
10305
10306
10307 #ifdef CONFIG_MODULES
10308
module_exists(const char * module)10309 bool module_exists(const char *module)
10310 {
10311 /* All modules have the symbol __this_module */
10312 static const char this_mod[] = "__this_module";
10313 char modname[MODULE_NAME_LEN + sizeof(this_mod) + 2];
10314 unsigned long val;
10315 int n;
10316
10317 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10318
10319 if (n > sizeof(modname) - 1)
10320 return false;
10321
10322 val = module_kallsyms_lookup_name(modname);
10323 return val != 0;
10324 }
10325
trace_module_add_evals(struct module * mod)10326 static void trace_module_add_evals(struct module *mod)
10327 {
10328 /*
10329 * Modules with bad taint do not have events created, do
10330 * not bother with enums either.
10331 */
10332 if (trace_module_has_bad_taint(mod))
10333 return;
10334
10335 /* Even if no trace_evals, this need to sanitize field types. */
10336 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10337 }
10338
10339 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10340 static void trace_module_remove_evals(struct module *mod)
10341 {
10342 union trace_eval_map_item *map;
10343 union trace_eval_map_item **last = &trace_eval_maps;
10344
10345 if (!mod->num_trace_evals)
10346 return;
10347
10348 guard(mutex)(&trace_eval_mutex);
10349
10350 map = trace_eval_maps;
10351
10352 while (map) {
10353 if (map->head.mod == mod)
10354 break;
10355 map = trace_eval_jmp_to_tail(map);
10356 last = &map->tail.next;
10357 map = map->tail.next;
10358 }
10359 if (!map)
10360 return;
10361
10362 *last = trace_eval_jmp_to_tail(map)->tail.next;
10363 kfree(map);
10364 }
10365 #else
trace_module_remove_evals(struct module * mod)10366 static inline void trace_module_remove_evals(struct module *mod) { }
10367 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10368
trace_module_record(struct module * mod,bool add)10369 static void trace_module_record(struct module *mod, bool add)
10370 {
10371 struct trace_array *tr;
10372 unsigned long flags;
10373
10374 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10375 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10376 /* Update any persistent trace array that has already been started */
10377 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10378 guard(mutex)(&scratch_mutex);
10379 save_mod(mod, tr);
10380 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10381 /* Update delta if the module loaded in previous boot */
10382 make_mod_delta(mod, tr);
10383 }
10384 }
10385 }
10386
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10387 static int trace_module_notify(struct notifier_block *self,
10388 unsigned long val, void *data)
10389 {
10390 struct module *mod = data;
10391
10392 switch (val) {
10393 case MODULE_STATE_COMING:
10394 trace_module_add_evals(mod);
10395 trace_module_record(mod, true);
10396 break;
10397 case MODULE_STATE_GOING:
10398 trace_module_remove_evals(mod);
10399 trace_module_record(mod, false);
10400 break;
10401 }
10402
10403 return NOTIFY_OK;
10404 }
10405
10406 static struct notifier_block trace_module_nb = {
10407 .notifier_call = trace_module_notify,
10408 .priority = 0,
10409 };
10410 #endif /* CONFIG_MODULES */
10411
tracer_init_tracefs_work_func(struct work_struct * work)10412 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10413 {
10414
10415 event_trace_init();
10416
10417 init_tracer_tracefs(&global_trace, NULL);
10418 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10419
10420 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10421 &global_trace, &tracing_thresh_fops);
10422
10423 trace_create_file("README", TRACE_MODE_READ, NULL,
10424 NULL, &tracing_readme_fops);
10425
10426 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10427 NULL, &tracing_saved_cmdlines_fops);
10428
10429 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10430 NULL, &tracing_saved_cmdlines_size_fops);
10431
10432 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10433 NULL, &tracing_saved_tgids_fops);
10434
10435 trace_create_eval_file(NULL);
10436
10437 #ifdef CONFIG_MODULES
10438 register_module_notifier(&trace_module_nb);
10439 #endif
10440
10441 #ifdef CONFIG_DYNAMIC_FTRACE
10442 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10443 NULL, &tracing_dyn_info_fops);
10444 #endif
10445
10446 create_trace_instances(NULL);
10447
10448 update_tracer_options(&global_trace);
10449 }
10450
tracer_init_tracefs(void)10451 static __init int tracer_init_tracefs(void)
10452 {
10453 int ret;
10454
10455 trace_access_lock_init();
10456
10457 ret = tracing_init_dentry();
10458 if (ret)
10459 return 0;
10460
10461 if (eval_map_wq) {
10462 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10463 queue_work(eval_map_wq, &tracerfs_init_work);
10464 } else {
10465 tracer_init_tracefs_work_func(NULL);
10466 }
10467
10468 rv_init_interface();
10469
10470 return 0;
10471 }
10472
10473 fs_initcall(tracer_init_tracefs);
10474
10475 static int trace_die_panic_handler(struct notifier_block *self,
10476 unsigned long ev, void *unused);
10477
10478 static struct notifier_block trace_panic_notifier = {
10479 .notifier_call = trace_die_panic_handler,
10480 .priority = INT_MAX - 1,
10481 };
10482
10483 static struct notifier_block trace_die_notifier = {
10484 .notifier_call = trace_die_panic_handler,
10485 .priority = INT_MAX - 1,
10486 };
10487
10488 /*
10489 * The idea is to execute the following die/panic callback early, in order
10490 * to avoid showing irrelevant information in the trace (like other panic
10491 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10492 * warnings get disabled (to prevent potential log flooding).
10493 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10494 static int trace_die_panic_handler(struct notifier_block *self,
10495 unsigned long ev, void *unused)
10496 {
10497 if (!ftrace_dump_on_oops_enabled())
10498 return NOTIFY_DONE;
10499
10500 /* The die notifier requires DIE_OOPS to trigger */
10501 if (self == &trace_die_notifier && ev != DIE_OOPS)
10502 return NOTIFY_DONE;
10503
10504 ftrace_dump(DUMP_PARAM);
10505
10506 return NOTIFY_DONE;
10507 }
10508
10509 /*
10510 * printk is set to max of 1024, we really don't need it that big.
10511 * Nothing should be printing 1000 characters anyway.
10512 */
10513 #define TRACE_MAX_PRINT 1000
10514
10515 /*
10516 * Define here KERN_TRACE so that we have one place to modify
10517 * it if we decide to change what log level the ftrace dump
10518 * should be at.
10519 */
10520 #define KERN_TRACE KERN_EMERG
10521
10522 void
trace_printk_seq(struct trace_seq * s)10523 trace_printk_seq(struct trace_seq *s)
10524 {
10525 /* Probably should print a warning here. */
10526 if (s->seq.len >= TRACE_MAX_PRINT)
10527 s->seq.len = TRACE_MAX_PRINT;
10528
10529 /*
10530 * More paranoid code. Although the buffer size is set to
10531 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10532 * an extra layer of protection.
10533 */
10534 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10535 s->seq.len = s->seq.size - 1;
10536
10537 /* should be zero ended, but we are paranoid. */
10538 s->buffer[s->seq.len] = 0;
10539
10540 printk(KERN_TRACE "%s", s->buffer);
10541
10542 trace_seq_init(s);
10543 }
10544
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10545 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10546 {
10547 iter->tr = tr;
10548 iter->trace = iter->tr->current_trace;
10549 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10550 iter->array_buffer = &tr->array_buffer;
10551
10552 if (iter->trace && iter->trace->open)
10553 iter->trace->open(iter);
10554
10555 /* Annotate start of buffers if we had overruns */
10556 if (ring_buffer_overruns(iter->array_buffer->buffer))
10557 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10558
10559 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10560 if (trace_clocks[iter->tr->clock_id].in_ns)
10561 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10562
10563 /* Can not use kmalloc for iter.temp and iter.fmt */
10564 iter->temp = static_temp_buf;
10565 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10566 iter->fmt = static_fmt_buf;
10567 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10568 }
10569
trace_init_global_iter(struct trace_iterator * iter)10570 void trace_init_global_iter(struct trace_iterator *iter)
10571 {
10572 trace_init_iter(iter, &global_trace);
10573 }
10574
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10575 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10576 {
10577 /* use static because iter can be a bit big for the stack */
10578 static struct trace_iterator iter;
10579 unsigned int old_userobj;
10580 unsigned long flags;
10581 int cnt = 0;
10582
10583 /*
10584 * Always turn off tracing when we dump.
10585 * We don't need to show trace output of what happens
10586 * between multiple crashes.
10587 *
10588 * If the user does a sysrq-z, then they can re-enable
10589 * tracing with echo 1 > tracing_on.
10590 */
10591 tracer_tracing_off(tr);
10592
10593 local_irq_save(flags);
10594
10595 /* Simulate the iterator */
10596 trace_init_iter(&iter, tr);
10597
10598 /* While dumping, do not allow the buffer to be enable */
10599 tracer_tracing_disable(tr);
10600
10601 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10602
10603 /* don't look at user memory in panic mode */
10604 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10605
10606 if (dump_mode == DUMP_ORIG)
10607 iter.cpu_file = raw_smp_processor_id();
10608 else
10609 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10610
10611 if (tr == &global_trace)
10612 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10613 else
10614 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10615
10616 /* Did function tracer already get disabled? */
10617 if (ftrace_is_dead()) {
10618 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10619 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10620 }
10621
10622 /*
10623 * We need to stop all tracing on all CPUS to read
10624 * the next buffer. This is a bit expensive, but is
10625 * not done often. We fill all what we can read,
10626 * and then release the locks again.
10627 */
10628
10629 while (!trace_empty(&iter)) {
10630
10631 if (!cnt)
10632 printk(KERN_TRACE "---------------------------------\n");
10633
10634 cnt++;
10635
10636 trace_iterator_reset(&iter);
10637 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10638
10639 if (trace_find_next_entry_inc(&iter) != NULL) {
10640 int ret;
10641
10642 ret = print_trace_line(&iter);
10643 if (ret != TRACE_TYPE_NO_CONSUME)
10644 trace_consume(&iter);
10645
10646 trace_printk_seq(&iter.seq);
10647 }
10648 touch_nmi_watchdog();
10649 }
10650
10651 if (!cnt)
10652 printk(KERN_TRACE " (ftrace buffer empty)\n");
10653 else
10654 printk(KERN_TRACE "---------------------------------\n");
10655
10656 tr->trace_flags |= old_userobj;
10657
10658 tracer_tracing_enable(tr);
10659 local_irq_restore(flags);
10660 }
10661
ftrace_dump_by_param(void)10662 static void ftrace_dump_by_param(void)
10663 {
10664 bool first_param = true;
10665 char dump_param[MAX_TRACER_SIZE];
10666 char *buf, *token, *inst_name;
10667 struct trace_array *tr;
10668
10669 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10670 buf = dump_param;
10671
10672 while ((token = strsep(&buf, ",")) != NULL) {
10673 if (first_param) {
10674 first_param = false;
10675 if (!strcmp("0", token))
10676 continue;
10677 else if (!strcmp("1", token)) {
10678 ftrace_dump_one(&global_trace, DUMP_ALL);
10679 continue;
10680 }
10681 else if (!strcmp("2", token) ||
10682 !strcmp("orig_cpu", token)) {
10683 ftrace_dump_one(&global_trace, DUMP_ORIG);
10684 continue;
10685 }
10686 }
10687
10688 inst_name = strsep(&token, "=");
10689 tr = trace_array_find(inst_name);
10690 if (!tr) {
10691 printk(KERN_TRACE "Instance %s not found\n", inst_name);
10692 continue;
10693 }
10694
10695 if (token && (!strcmp("2", token) ||
10696 !strcmp("orig_cpu", token)))
10697 ftrace_dump_one(tr, DUMP_ORIG);
10698 else
10699 ftrace_dump_one(tr, DUMP_ALL);
10700 }
10701 }
10702
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10703 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10704 {
10705 static atomic_t dump_running;
10706
10707 /* Only allow one dump user at a time. */
10708 if (atomic_inc_return(&dump_running) != 1) {
10709 atomic_dec(&dump_running);
10710 return;
10711 }
10712
10713 switch (oops_dump_mode) {
10714 case DUMP_ALL:
10715 ftrace_dump_one(&global_trace, DUMP_ALL);
10716 break;
10717 case DUMP_ORIG:
10718 ftrace_dump_one(&global_trace, DUMP_ORIG);
10719 break;
10720 case DUMP_PARAM:
10721 ftrace_dump_by_param();
10722 break;
10723 case DUMP_NONE:
10724 break;
10725 default:
10726 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10727 ftrace_dump_one(&global_trace, DUMP_ALL);
10728 }
10729
10730 atomic_dec(&dump_running);
10731 }
10732 EXPORT_SYMBOL_GPL(ftrace_dump);
10733
10734 #define WRITE_BUFSIZE 4096
10735
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10736 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10737 size_t count, loff_t *ppos,
10738 int (*createfn)(const char *))
10739 {
10740 char *kbuf __free(kfree) = NULL;
10741 char *buf, *tmp;
10742 int ret = 0;
10743 size_t done = 0;
10744 size_t size;
10745
10746 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10747 if (!kbuf)
10748 return -ENOMEM;
10749
10750 while (done < count) {
10751 size = count - done;
10752
10753 if (size >= WRITE_BUFSIZE)
10754 size = WRITE_BUFSIZE - 1;
10755
10756 if (copy_from_user(kbuf, buffer + done, size))
10757 return -EFAULT;
10758
10759 kbuf[size] = '\0';
10760 buf = kbuf;
10761 do {
10762 tmp = strchr(buf, '\n');
10763 if (tmp) {
10764 *tmp = '\0';
10765 size = tmp - buf + 1;
10766 } else {
10767 size = strlen(buf);
10768 if (done + size < count) {
10769 if (buf != kbuf)
10770 break;
10771 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10772 pr_warn("Line length is too long: Should be less than %d\n",
10773 WRITE_BUFSIZE - 2);
10774 return -EINVAL;
10775 }
10776 }
10777 done += size;
10778
10779 /* Remove comments */
10780 tmp = strchr(buf, '#');
10781
10782 if (tmp)
10783 *tmp = '\0';
10784
10785 ret = createfn(buf);
10786 if (ret)
10787 return ret;
10788 buf += size;
10789
10790 } while (done < count);
10791 }
10792 return done;
10793 }
10794
10795 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10796 __init static bool tr_needs_alloc_snapshot(const char *name)
10797 {
10798 char *test;
10799 int len = strlen(name);
10800 bool ret;
10801
10802 if (!boot_snapshot_index)
10803 return false;
10804
10805 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10806 boot_snapshot_info[len] == '\t')
10807 return true;
10808
10809 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10810 if (!test)
10811 return false;
10812
10813 sprintf(test, "\t%s\t", name);
10814 ret = strstr(boot_snapshot_info, test) == NULL;
10815 kfree(test);
10816 return ret;
10817 }
10818
do_allocate_snapshot(const char * name)10819 __init static void do_allocate_snapshot(const char *name)
10820 {
10821 if (!tr_needs_alloc_snapshot(name))
10822 return;
10823
10824 /*
10825 * When allocate_snapshot is set, the next call to
10826 * allocate_trace_buffers() (called by trace_array_get_by_name())
10827 * will allocate the snapshot buffer. That will alse clear
10828 * this flag.
10829 */
10830 allocate_snapshot = true;
10831 }
10832 #else
do_allocate_snapshot(const char * name)10833 static inline void do_allocate_snapshot(const char *name) { }
10834 #endif
10835
enable_instances(void)10836 __init static void enable_instances(void)
10837 {
10838 struct trace_array *tr;
10839 bool memmap_area = false;
10840 char *curr_str;
10841 char *name;
10842 char *str;
10843 char *tok;
10844
10845 /* A tab is always appended */
10846 boot_instance_info[boot_instance_index - 1] = '\0';
10847 str = boot_instance_info;
10848
10849 while ((curr_str = strsep(&str, "\t"))) {
10850 phys_addr_t start = 0;
10851 phys_addr_t size = 0;
10852 unsigned long addr = 0;
10853 bool traceprintk = false;
10854 bool traceoff = false;
10855 char *flag_delim;
10856 char *addr_delim;
10857 char *rname __free(kfree) = NULL;
10858
10859 tok = strsep(&curr_str, ",");
10860
10861 flag_delim = strchr(tok, '^');
10862 addr_delim = strchr(tok, '@');
10863
10864 if (addr_delim)
10865 *addr_delim++ = '\0';
10866
10867 if (flag_delim)
10868 *flag_delim++ = '\0';
10869
10870 name = tok;
10871
10872 if (flag_delim) {
10873 char *flag;
10874
10875 while ((flag = strsep(&flag_delim, "^"))) {
10876 if (strcmp(flag, "traceoff") == 0) {
10877 traceoff = true;
10878 } else if ((strcmp(flag, "printk") == 0) ||
10879 (strcmp(flag, "traceprintk") == 0) ||
10880 (strcmp(flag, "trace_printk") == 0)) {
10881 traceprintk = true;
10882 } else {
10883 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10884 flag, name);
10885 }
10886 }
10887 }
10888
10889 tok = addr_delim;
10890 if (tok && isdigit(*tok)) {
10891 start = memparse(tok, &tok);
10892 if (!start) {
10893 pr_warn("Tracing: Invalid boot instance address for %s\n",
10894 name);
10895 continue;
10896 }
10897 if (*tok != ':') {
10898 pr_warn("Tracing: No size specified for instance %s\n", name);
10899 continue;
10900 }
10901 tok++;
10902 size = memparse(tok, &tok);
10903 if (!size) {
10904 pr_warn("Tracing: Invalid boot instance size for %s\n",
10905 name);
10906 continue;
10907 }
10908 memmap_area = true;
10909 } else if (tok) {
10910 if (!reserve_mem_find_by_name(tok, &start, &size)) {
10911 start = 0;
10912 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10913 continue;
10914 }
10915 rname = kstrdup(tok, GFP_KERNEL);
10916 }
10917
10918 if (start) {
10919 /* Start and size must be page aligned */
10920 if (start & ~PAGE_MASK) {
10921 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10922 continue;
10923 }
10924 if (size & ~PAGE_MASK) {
10925 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10926 continue;
10927 }
10928
10929 if (memmap_area)
10930 addr = map_pages(start, size);
10931 else
10932 addr = (unsigned long)phys_to_virt(start);
10933 if (addr) {
10934 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10935 name, &start, (unsigned long)size);
10936 } else {
10937 pr_warn("Tracing: Failed to map boot instance %s\n", name);
10938 continue;
10939 }
10940 } else {
10941 /* Only non mapped buffers have snapshot buffers */
10942 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10943 do_allocate_snapshot(name);
10944 }
10945
10946 tr = trace_array_create_systems(name, NULL, addr, size);
10947 if (IS_ERR(tr)) {
10948 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10949 continue;
10950 }
10951
10952 if (traceoff)
10953 tracer_tracing_off(tr);
10954
10955 if (traceprintk)
10956 update_printk_trace(tr);
10957
10958 /*
10959 * memmap'd buffers can not be freed.
10960 */
10961 if (memmap_area) {
10962 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
10963 tr->ref++;
10964 }
10965
10966 if (start) {
10967 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
10968 tr->range_name = no_free_ptr(rname);
10969 }
10970
10971 while ((tok = strsep(&curr_str, ","))) {
10972 early_enable_events(tr, tok, true);
10973 }
10974 }
10975 }
10976
tracer_alloc_buffers(void)10977 __init static int tracer_alloc_buffers(void)
10978 {
10979 int ring_buf_size;
10980 int ret = -ENOMEM;
10981
10982
10983 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10984 pr_warn("Tracing disabled due to lockdown\n");
10985 return -EPERM;
10986 }
10987
10988 /*
10989 * Make sure we don't accidentally add more trace options
10990 * than we have bits for.
10991 */
10992 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10993
10994 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10995 return -ENOMEM;
10996
10997 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10998 goto out_free_buffer_mask;
10999
11000 /* Only allocate trace_printk buffers if a trace_printk exists */
11001 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11002 /* Must be called before global_trace.buffer is allocated */
11003 trace_printk_init_buffers();
11004
11005 /* To save memory, keep the ring buffer size to its minimum */
11006 if (global_trace.ring_buffer_expanded)
11007 ring_buf_size = trace_buf_size;
11008 else
11009 ring_buf_size = 1;
11010
11011 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11012 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11013
11014 raw_spin_lock_init(&global_trace.start_lock);
11015
11016 /*
11017 * The prepare callbacks allocates some memory for the ring buffer. We
11018 * don't free the buffer if the CPU goes down. If we were to free
11019 * the buffer, then the user would lose any trace that was in the
11020 * buffer. The memory will be removed once the "instance" is removed.
11021 */
11022 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11023 "trace/RB:prepare", trace_rb_cpu_prepare,
11024 NULL);
11025 if (ret < 0)
11026 goto out_free_cpumask;
11027 /* Used for event triggers */
11028 ret = -ENOMEM;
11029 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11030 if (!temp_buffer)
11031 goto out_rm_hp_state;
11032
11033 if (trace_create_savedcmd() < 0)
11034 goto out_free_temp_buffer;
11035
11036 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11037 goto out_free_savedcmd;
11038
11039 /* TODO: make the number of buffers hot pluggable with CPUS */
11040 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11041 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11042 goto out_free_pipe_cpumask;
11043 }
11044 if (global_trace.buffer_disabled)
11045 tracing_off();
11046
11047 if (trace_boot_clock) {
11048 ret = tracing_set_clock(&global_trace, trace_boot_clock);
11049 if (ret < 0)
11050 pr_warn("Trace clock %s not defined, going back to default\n",
11051 trace_boot_clock);
11052 }
11053
11054 /*
11055 * register_tracer() might reference current_trace, so it
11056 * needs to be set before we register anything. This is
11057 * just a bootstrap of current_trace anyway.
11058 */
11059 global_trace.current_trace = &nop_trace;
11060
11061 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11062 #ifdef CONFIG_TRACER_MAX_TRACE
11063 spin_lock_init(&global_trace.snapshot_trigger_lock);
11064 #endif
11065 ftrace_init_global_array_ops(&global_trace);
11066
11067 #ifdef CONFIG_MODULES
11068 INIT_LIST_HEAD(&global_trace.mod_events);
11069 #endif
11070
11071 init_trace_flags_index(&global_trace);
11072
11073 register_tracer(&nop_trace);
11074
11075 /* Function tracing may start here (via kernel command line) */
11076 init_function_trace();
11077
11078 /* All seems OK, enable tracing */
11079 tracing_disabled = 0;
11080
11081 atomic_notifier_chain_register(&panic_notifier_list,
11082 &trace_panic_notifier);
11083
11084 register_die_notifier(&trace_die_notifier);
11085
11086 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11087
11088 INIT_LIST_HEAD(&global_trace.systems);
11089 INIT_LIST_HEAD(&global_trace.events);
11090 INIT_LIST_HEAD(&global_trace.hist_vars);
11091 INIT_LIST_HEAD(&global_trace.err_log);
11092 list_add(&global_trace.marker_list, &marker_copies);
11093 list_add(&global_trace.list, &ftrace_trace_arrays);
11094
11095 apply_trace_boot_options();
11096
11097 register_snapshot_cmd();
11098
11099 return 0;
11100
11101 out_free_pipe_cpumask:
11102 free_cpumask_var(global_trace.pipe_cpumask);
11103 out_free_savedcmd:
11104 trace_free_saved_cmdlines_buffer();
11105 out_free_temp_buffer:
11106 ring_buffer_free(temp_buffer);
11107 out_rm_hp_state:
11108 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11109 out_free_cpumask:
11110 free_cpumask_var(global_trace.tracing_cpumask);
11111 out_free_buffer_mask:
11112 free_cpumask_var(tracing_buffer_mask);
11113 return ret;
11114 }
11115
11116 #ifdef CONFIG_FUNCTION_TRACER
11117 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11118 __init struct trace_array *trace_get_global_array(void)
11119 {
11120 return &global_trace;
11121 }
11122 #endif
11123
ftrace_boot_snapshot(void)11124 void __init ftrace_boot_snapshot(void)
11125 {
11126 #ifdef CONFIG_TRACER_MAX_TRACE
11127 struct trace_array *tr;
11128
11129 if (!snapshot_at_boot)
11130 return;
11131
11132 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11133 if (!tr->allocated_snapshot)
11134 continue;
11135
11136 tracing_snapshot_instance(tr);
11137 trace_array_puts(tr, "** Boot snapshot taken **\n");
11138 }
11139 #endif
11140 }
11141
early_trace_init(void)11142 void __init early_trace_init(void)
11143 {
11144 if (tracepoint_printk) {
11145 tracepoint_print_iter =
11146 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11147 if (MEM_FAIL(!tracepoint_print_iter,
11148 "Failed to allocate trace iterator\n"))
11149 tracepoint_printk = 0;
11150 else
11151 static_key_enable(&tracepoint_printk_key.key);
11152 }
11153 tracer_alloc_buffers();
11154
11155 init_events();
11156 }
11157
trace_init(void)11158 void __init trace_init(void)
11159 {
11160 trace_event_init();
11161
11162 if (boot_instance_index)
11163 enable_instances();
11164 }
11165
clear_boot_tracer(void)11166 __init static void clear_boot_tracer(void)
11167 {
11168 /*
11169 * The default tracer at boot buffer is an init section.
11170 * This function is called in lateinit. If we did not
11171 * find the boot tracer, then clear it out, to prevent
11172 * later registration from accessing the buffer that is
11173 * about to be freed.
11174 */
11175 if (!default_bootup_tracer)
11176 return;
11177
11178 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11179 default_bootup_tracer);
11180 default_bootup_tracer = NULL;
11181 }
11182
11183 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11184 __init static void tracing_set_default_clock(void)
11185 {
11186 /* sched_clock_stable() is determined in late_initcall */
11187 if (!trace_boot_clock && !sched_clock_stable()) {
11188 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11189 pr_warn("Can not set tracing clock due to lockdown\n");
11190 return;
11191 }
11192
11193 printk(KERN_WARNING
11194 "Unstable clock detected, switching default tracing clock to \"global\"\n"
11195 "If you want to keep using the local clock, then add:\n"
11196 " \"trace_clock=local\"\n"
11197 "on the kernel command line\n");
11198 tracing_set_clock(&global_trace, "global");
11199 }
11200 }
11201 #else
tracing_set_default_clock(void)11202 static inline void tracing_set_default_clock(void) { }
11203 #endif
11204
late_trace_init(void)11205 __init static int late_trace_init(void)
11206 {
11207 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11208 static_key_disable(&tracepoint_printk_key.key);
11209 tracepoint_printk = 0;
11210 }
11211
11212 if (traceoff_after_boot)
11213 tracing_off();
11214
11215 tracing_set_default_clock();
11216 clear_boot_tracer();
11217 return 0;
11218 }
11219
11220 late_initcall_sync(late_trace_init);
11221