1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57
58 #include "trace.h"
59 #include "trace_output.h"
60
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63 * We need to change this state when a selftest is running.
64 * A selftest will lurk into the ring-buffer to count the
65 * entries inserted during the selftest although some concurrent
66 * insertions into the ring-buffer such as trace_printk could occurred
67 * at the same time, giving false positive or negative results.
68 */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72 * If boot-time tracing including tracers/events via kernel cmdline
73 * is running, we do not want to run SELFTEST.
74 */
75 bool __read_mostly tracing_selftest_disabled;
76
disable_tracing_selftest(const char * reason)77 void __init disable_tracing_selftest(const char *reason)
78 {
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82 }
83 }
84 #else
85 #define tracing_selftest_running 0
86 #define tracing_selftest_disabled 0
87 #endif
88
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98 { }
99 };
100
101 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104 return 0;
105 }
106
107 /*
108 * To prevent the comm cache from being overwritten when no
109 * tracing is active, only save the comm when a trace event
110 * occurred.
111 */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113
114 /*
115 * Kill all tracing for good (never come back).
116 * It is initialized to 1 but will turn to zero if the initialization
117 * of the tracer is successful. But that is the only place that sets
118 * this back to zero.
119 */
120 static int tracing_disabled = 1;
121
122 cpumask_var_t __read_mostly tracing_buffer_mask;
123
124 #define MAX_TRACER_SIZE 100
125 /*
126 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127 *
128 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129 * is set, then ftrace_dump is called. This will output the contents
130 * of the ftrace buffers to the console. This is very useful for
131 * capturing traces that lead to crashes and outputing it to a
132 * serial console.
133 *
134 * It is default off, but you can enable it with either specifying
135 * "ftrace_dump_on_oops" in the kernel command line, or setting
136 * /proc/sys/kernel/ftrace_dump_on_oops
137 * Set 1 if you want to dump buffers of all CPUs
138 * Set 2 if you want to dump the buffer of the CPU that triggered oops
139 * Set instance name if you want to dump the specific trace instance
140 * Multiple instance dump is also supported, and instances are seperated
141 * by commas.
142 */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150 void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152 {
153 .procname = "ftrace_dump_on_oops",
154 .data = &ftrace_dump_on_oops,
155 .maxlen = MAX_TRACER_SIZE,
156 .mode = 0644,
157 .proc_handler = proc_dostring,
158 },
159 {
160 .procname = "traceoff_on_warning",
161 .data = &__disable_trace_on_warning,
162 .maxlen = sizeof(__disable_trace_on_warning),
163 .mode = 0644,
164 .proc_handler = proc_dointvec,
165 },
166 {
167 .procname = "tracepoint_printk",
168 .data = &tracepoint_printk,
169 .maxlen = sizeof(tracepoint_printk),
170 .mode = 0644,
171 .proc_handler = tracepoint_printk_sysctl,
172 },
173 };
174
init_trace_sysctls(void)175 static int __init init_trace_sysctls(void)
176 {
177 register_sysctl_init("kernel", trace_sysctl_table);
178 return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185 struct module *mod;
186 unsigned long length;
187 };
188
189 union trace_eval_map_item;
190
191 struct trace_eval_map_tail {
192 /*
193 * "end" is first and points to NULL as it must be different
194 * than "mod" or "eval_string"
195 */
196 union trace_eval_map_item *next;
197 const char *end; /* points to NULL */
198 };
199
200 static DEFINE_MUTEX(trace_eval_mutex);
201
202 /*
203 * The trace_eval_maps are saved in an array with two extra elements,
204 * one at the beginning, and one at the end. The beginning item contains
205 * the count of the saved maps (head.length), and the module they
206 * belong to if not built in (head.mod). The ending item contains a
207 * pointer to the next array of saved eval_map items.
208 */
209 union trace_eval_map_item {
210 struct trace_eval_map map;
211 struct trace_eval_map_head head;
212 struct trace_eval_map_tail tail;
213 };
214
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220 struct trace_buffer *buffer,
221 unsigned int trace_ctx);
222
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234
set_cmdline_ftrace(char * str)235 static int __init set_cmdline_ftrace(char *str)
236 {
237 strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238 default_bootup_tracer = bootup_tracer_buf;
239 /* We are using ftrace early, expand it */
240 trace_set_ring_buffer_expanded(NULL);
241 return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244
ftrace_dump_on_oops_enabled(void)245 int ftrace_dump_on_oops_enabled(void)
246 {
247 if (!strcmp("0", ftrace_dump_on_oops))
248 return 0;
249 else
250 return 1;
251 }
252
set_ftrace_dump_on_oops(char * str)253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255 if (!*str) {
256 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257 return 1;
258 }
259
260 if (*str == ',') {
261 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263 return 1;
264 }
265
266 if (*str++ == '=') {
267 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268 return 1;
269 }
270
271 return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274
stop_trace_on_warning(char * str)275 static int __init stop_trace_on_warning(char *str)
276 {
277 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278 __disable_trace_on_warning = 1;
279 return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282
boot_alloc_snapshot(char * str)283 static int __init boot_alloc_snapshot(char *str)
284 {
285 char *slot = boot_snapshot_info + boot_snapshot_index;
286 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287 int ret;
288
289 if (str[0] == '=') {
290 str++;
291 if (strlen(str) >= left)
292 return -1;
293
294 ret = snprintf(slot, left, "%s\t", str);
295 boot_snapshot_index += ret;
296 } else {
297 allocate_snapshot = true;
298 /* We also need the main ring buffer expanded */
299 trace_set_ring_buffer_expanded(NULL);
300 }
301 return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304
305
boot_snapshot(char * str)306 static int __init boot_snapshot(char *str)
307 {
308 snapshot_at_boot = true;
309 boot_alloc_snapshot(str);
310 return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313
314
boot_instance(char * str)315 static int __init boot_instance(char *str)
316 {
317 char *slot = boot_instance_info + boot_instance_index;
318 int left = sizeof(boot_instance_info) - boot_instance_index;
319 int ret;
320
321 if (strlen(str) >= left)
322 return -1;
323
324 ret = snprintf(slot, left, "%s\t", str);
325 boot_instance_index += ret;
326
327 return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330
331
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333
set_trace_boot_options(char * str)334 static int __init set_trace_boot_options(char *str)
335 {
336 strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337 return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343
set_trace_boot_clock(char * str)344 static int __init set_trace_boot_clock(char *str)
345 {
346 strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347 trace_boot_clock = trace_boot_clock_buf;
348 return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351
set_tracepoint_printk(char * str)352 static int __init set_tracepoint_printk(char *str)
353 {
354 /* Ignore the "tp_printk_stop_on_boot" param */
355 if (*str == '_')
356 return 0;
357
358 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359 tracepoint_printk = 1;
360 return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363
set_tracepoint_printk_stop(char * str)364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366 tracepoint_printk_stop_on_boot = true;
367 return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370
set_traceoff_after_boot(char * str)371 static int __init set_traceoff_after_boot(char *str)
372 {
373 traceoff_after_boot = true;
374 return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377
ns2usecs(u64 nsec)378 unsigned long long ns2usecs(u64 nsec)
379 {
380 nsec += 500;
381 do_div(nsec, 1000);
382 return nsec;
383 }
384
385 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)386 trace_process_export(struct trace_export *export,
387 struct ring_buffer_event *event, int flag)
388 {
389 struct trace_entry *entry;
390 unsigned int size = 0;
391
392 if (export->flags & flag) {
393 entry = ring_buffer_event_data(event);
394 size = ring_buffer_event_length(event);
395 export->write(export, entry, size);
396 }
397 }
398
399 static DEFINE_MUTEX(ftrace_export_lock);
400
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406
ftrace_exports_enable(struct trace_export * export)407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409 if (export->flags & TRACE_EXPORT_FUNCTION)
410 static_branch_inc(&trace_function_exports_enabled);
411
412 if (export->flags & TRACE_EXPORT_EVENT)
413 static_branch_inc(&trace_event_exports_enabled);
414
415 if (export->flags & TRACE_EXPORT_MARKER)
416 static_branch_inc(&trace_marker_exports_enabled);
417 }
418
ftrace_exports_disable(struct trace_export * export)419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421 if (export->flags & TRACE_EXPORT_FUNCTION)
422 static_branch_dec(&trace_function_exports_enabled);
423
424 if (export->flags & TRACE_EXPORT_EVENT)
425 static_branch_dec(&trace_event_exports_enabled);
426
427 if (export->flags & TRACE_EXPORT_MARKER)
428 static_branch_dec(&trace_marker_exports_enabled);
429 }
430
ftrace_exports(struct ring_buffer_event * event,int flag)431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433 struct trace_export *export;
434
435 preempt_disable_notrace();
436
437 export = rcu_dereference_raw_check(ftrace_exports_list);
438 while (export) {
439 trace_process_export(export, event, flag);
440 export = rcu_dereference_raw_check(export->next);
441 }
442
443 preempt_enable_notrace();
444 }
445
446 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)447 add_trace_export(struct trace_export **list, struct trace_export *export)
448 {
449 rcu_assign_pointer(export->next, *list);
450 /*
451 * We are entering export into the list but another
452 * CPU might be walking that list. We need to make sure
453 * the export->next pointer is valid before another CPU sees
454 * the export pointer included into the list.
455 */
456 rcu_assign_pointer(*list, export);
457 }
458
459 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)460 rm_trace_export(struct trace_export **list, struct trace_export *export)
461 {
462 struct trace_export **p;
463
464 for (p = list; *p != NULL; p = &(*p)->next)
465 if (*p == export)
466 break;
467
468 if (*p != export)
469 return -1;
470
471 rcu_assign_pointer(*p, (*p)->next);
472
473 return 0;
474 }
475
476 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)477 add_ftrace_export(struct trace_export **list, struct trace_export *export)
478 {
479 ftrace_exports_enable(export);
480
481 add_trace_export(list, export);
482 }
483
484 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)485 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
486 {
487 int ret;
488
489 ret = rm_trace_export(list, export);
490 ftrace_exports_disable(export);
491
492 return ret;
493 }
494
register_ftrace_export(struct trace_export * export)495 int register_ftrace_export(struct trace_export *export)
496 {
497 if (WARN_ON_ONCE(!export->write))
498 return -1;
499
500 mutex_lock(&ftrace_export_lock);
501
502 add_ftrace_export(&ftrace_exports_list, export);
503
504 mutex_unlock(&ftrace_export_lock);
505
506 return 0;
507 }
508 EXPORT_SYMBOL_GPL(register_ftrace_export);
509
unregister_ftrace_export(struct trace_export * export)510 int unregister_ftrace_export(struct trace_export *export)
511 {
512 int ret;
513
514 mutex_lock(&ftrace_export_lock);
515
516 ret = rm_ftrace_export(&ftrace_exports_list, export);
517
518 mutex_unlock(&ftrace_export_lock);
519
520 return ret;
521 }
522 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
523
524 /* trace_flags holds trace_options default values */
525 #define TRACE_DEFAULT_FLAGS \
526 (FUNCTION_DEFAULT_FLAGS | \
527 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
528 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
529 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
530 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
531 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK | \
532 TRACE_ITER_COPY_MARKER)
533
534 /* trace_options that are only supported by global_trace */
535 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
536 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
537
538 /* trace_flags that are default zero for instances */
539 #define ZEROED_TRACE_FLAGS \
540 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
541 TRACE_ITER_COPY_MARKER)
542
543 /*
544 * The global_trace is the descriptor that holds the top-level tracing
545 * buffers for the live tracing.
546 */
547 static struct trace_array global_trace = {
548 .trace_flags = TRACE_DEFAULT_FLAGS,
549 };
550
551 static struct trace_array *printk_trace = &global_trace;
552
553 /* List of trace_arrays interested in the top level trace_marker */
554 static LIST_HEAD(marker_copies);
555
printk_binsafe(struct trace_array * tr)556 static __always_inline bool printk_binsafe(struct trace_array *tr)
557 {
558 /*
559 * The binary format of traceprintk can cause a crash if used
560 * by a buffer from another boot. Force the use of the
561 * non binary version of trace_printk if the trace_printk
562 * buffer is a boot mapped ring buffer.
563 */
564 return !(tr->flags & TRACE_ARRAY_FL_BOOT);
565 }
566
update_printk_trace(struct trace_array * tr)567 static void update_printk_trace(struct trace_array *tr)
568 {
569 if (printk_trace == tr)
570 return;
571
572 printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
573 printk_trace = tr;
574 tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
575 }
576
577 /* Returns true if the status of tr changed */
update_marker_trace(struct trace_array * tr,int enabled)578 static bool update_marker_trace(struct trace_array *tr, int enabled)
579 {
580 lockdep_assert_held(&event_mutex);
581
582 if (enabled) {
583 if (!list_empty(&tr->marker_list))
584 return false;
585
586 list_add_rcu(&tr->marker_list, &marker_copies);
587 tr->trace_flags |= TRACE_ITER_COPY_MARKER;
588 return true;
589 }
590
591 if (list_empty(&tr->marker_list))
592 return false;
593
594 list_del_init(&tr->marker_list);
595 tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
596 return true;
597 }
598
trace_set_ring_buffer_expanded(struct trace_array * tr)599 void trace_set_ring_buffer_expanded(struct trace_array *tr)
600 {
601 if (!tr)
602 tr = &global_trace;
603 tr->ring_buffer_expanded = true;
604 }
605
606 LIST_HEAD(ftrace_trace_arrays);
607
trace_array_get(struct trace_array * this_tr)608 int trace_array_get(struct trace_array *this_tr)
609 {
610 struct trace_array *tr;
611
612 guard(mutex)(&trace_types_lock);
613 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
614 if (tr == this_tr) {
615 tr->ref++;
616 return 0;
617 }
618 }
619
620 return -ENODEV;
621 }
622
__trace_array_put(struct trace_array * this_tr)623 static void __trace_array_put(struct trace_array *this_tr)
624 {
625 WARN_ON(!this_tr->ref);
626 this_tr->ref--;
627 }
628
629 /**
630 * trace_array_put - Decrement the reference counter for this trace array.
631 * @this_tr : pointer to the trace array
632 *
633 * NOTE: Use this when we no longer need the trace array returned by
634 * trace_array_get_by_name(). This ensures the trace array can be later
635 * destroyed.
636 *
637 */
trace_array_put(struct trace_array * this_tr)638 void trace_array_put(struct trace_array *this_tr)
639 {
640 if (!this_tr)
641 return;
642
643 mutex_lock(&trace_types_lock);
644 __trace_array_put(this_tr);
645 mutex_unlock(&trace_types_lock);
646 }
647 EXPORT_SYMBOL_GPL(trace_array_put);
648
tracing_check_open_get_tr(struct trace_array * tr)649 int tracing_check_open_get_tr(struct trace_array *tr)
650 {
651 int ret;
652
653 ret = security_locked_down(LOCKDOWN_TRACEFS);
654 if (ret)
655 return ret;
656
657 if (tracing_disabled)
658 return -ENODEV;
659
660 if (tr && trace_array_get(tr) < 0)
661 return -ENODEV;
662
663 return 0;
664 }
665
666 /**
667 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
668 * @filtered_pids: The list of pids to check
669 * @search_pid: The PID to find in @filtered_pids
670 *
671 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
672 */
673 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)674 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
675 {
676 return trace_pid_list_is_set(filtered_pids, search_pid);
677 }
678
679 /**
680 * trace_ignore_this_task - should a task be ignored for tracing
681 * @filtered_pids: The list of pids to check
682 * @filtered_no_pids: The list of pids not to be traced
683 * @task: The task that should be ignored if not filtered
684 *
685 * Checks if @task should be traced or not from @filtered_pids.
686 * Returns true if @task should *NOT* be traced.
687 * Returns false if @task should be traced.
688 */
689 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)690 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
691 struct trace_pid_list *filtered_no_pids,
692 struct task_struct *task)
693 {
694 /*
695 * If filtered_no_pids is not empty, and the task's pid is listed
696 * in filtered_no_pids, then return true.
697 * Otherwise, if filtered_pids is empty, that means we can
698 * trace all tasks. If it has content, then only trace pids
699 * within filtered_pids.
700 */
701
702 return (filtered_pids &&
703 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
704 (filtered_no_pids &&
705 trace_find_filtered_pid(filtered_no_pids, task->pid));
706 }
707
708 /**
709 * trace_filter_add_remove_task - Add or remove a task from a pid_list
710 * @pid_list: The list to modify
711 * @self: The current task for fork or NULL for exit
712 * @task: The task to add or remove
713 *
714 * If adding a task, if @self is defined, the task is only added if @self
715 * is also included in @pid_list. This happens on fork and tasks should
716 * only be added when the parent is listed. If @self is NULL, then the
717 * @task pid will be removed from the list, which would happen on exit
718 * of a task.
719 */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)720 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
721 struct task_struct *self,
722 struct task_struct *task)
723 {
724 if (!pid_list)
725 return;
726
727 /* For forks, we only add if the forking task is listed */
728 if (self) {
729 if (!trace_find_filtered_pid(pid_list, self->pid))
730 return;
731 }
732
733 /* "self" is set for forks, and NULL for exits */
734 if (self)
735 trace_pid_list_set(pid_list, task->pid);
736 else
737 trace_pid_list_clear(pid_list, task->pid);
738 }
739
740 /**
741 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
742 * @pid_list: The pid list to show
743 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
744 * @pos: The position of the file
745 *
746 * This is used by the seq_file "next" operation to iterate the pids
747 * listed in a trace_pid_list structure.
748 *
749 * Returns the pid+1 as we want to display pid of zero, but NULL would
750 * stop the iteration.
751 */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)752 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
753 {
754 long pid = (unsigned long)v;
755 unsigned int next;
756
757 (*pos)++;
758
759 /* pid already is +1 of the actual previous bit */
760 if (trace_pid_list_next(pid_list, pid, &next) < 0)
761 return NULL;
762
763 pid = next;
764
765 /* Return pid + 1 to allow zero to be represented */
766 return (void *)(pid + 1);
767 }
768
769 /**
770 * trace_pid_start - Used for seq_file to start reading pid lists
771 * @pid_list: The pid list to show
772 * @pos: The position of the file
773 *
774 * This is used by seq_file "start" operation to start the iteration
775 * of listing pids.
776 *
777 * Returns the pid+1 as we want to display pid of zero, but NULL would
778 * stop the iteration.
779 */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)780 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
781 {
782 unsigned long pid;
783 unsigned int first;
784 loff_t l = 0;
785
786 if (trace_pid_list_first(pid_list, &first) < 0)
787 return NULL;
788
789 pid = first;
790
791 /* Return pid + 1 so that zero can be the exit value */
792 for (pid++; pid && l < *pos;
793 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
794 ;
795 return (void *)pid;
796 }
797
798 /**
799 * trace_pid_show - show the current pid in seq_file processing
800 * @m: The seq_file structure to write into
801 * @v: A void pointer of the pid (+1) value to display
802 *
803 * Can be directly used by seq_file operations to display the current
804 * pid value.
805 */
trace_pid_show(struct seq_file * m,void * v)806 int trace_pid_show(struct seq_file *m, void *v)
807 {
808 unsigned long pid = (unsigned long)v - 1;
809
810 seq_printf(m, "%lu\n", pid);
811 return 0;
812 }
813
814 /* 128 should be much more than enough */
815 #define PID_BUF_SIZE 127
816
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)817 int trace_pid_write(struct trace_pid_list *filtered_pids,
818 struct trace_pid_list **new_pid_list,
819 const char __user *ubuf, size_t cnt)
820 {
821 struct trace_pid_list *pid_list;
822 struct trace_parser parser;
823 unsigned long val;
824 int nr_pids = 0;
825 ssize_t read = 0;
826 ssize_t ret;
827 loff_t pos;
828 pid_t pid;
829
830 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
831 return -ENOMEM;
832
833 /*
834 * Always recreate a new array. The write is an all or nothing
835 * operation. Always create a new array when adding new pids by
836 * the user. If the operation fails, then the current list is
837 * not modified.
838 */
839 pid_list = trace_pid_list_alloc();
840 if (!pid_list) {
841 trace_parser_put(&parser);
842 return -ENOMEM;
843 }
844
845 if (filtered_pids) {
846 /* copy the current bits to the new max */
847 ret = trace_pid_list_first(filtered_pids, &pid);
848 while (!ret) {
849 trace_pid_list_set(pid_list, pid);
850 ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
851 nr_pids++;
852 }
853 }
854
855 ret = 0;
856 while (cnt > 0) {
857
858 pos = 0;
859
860 ret = trace_get_user(&parser, ubuf, cnt, &pos);
861 if (ret < 0)
862 break;
863
864 read += ret;
865 ubuf += ret;
866 cnt -= ret;
867
868 if (!trace_parser_loaded(&parser))
869 break;
870
871 ret = -EINVAL;
872 if (kstrtoul(parser.buffer, 0, &val))
873 break;
874
875 pid = (pid_t)val;
876
877 if (trace_pid_list_set(pid_list, pid) < 0) {
878 ret = -1;
879 break;
880 }
881 nr_pids++;
882
883 trace_parser_clear(&parser);
884 ret = 0;
885 }
886 trace_parser_put(&parser);
887
888 if (ret < 0) {
889 trace_pid_list_free(pid_list);
890 return ret;
891 }
892
893 if (!nr_pids) {
894 /* Cleared the list of pids */
895 trace_pid_list_free(pid_list);
896 pid_list = NULL;
897 }
898
899 *new_pid_list = pid_list;
900
901 return read;
902 }
903
buffer_ftrace_now(struct array_buffer * buf,int cpu)904 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
905 {
906 u64 ts;
907
908 /* Early boot up does not have a buffer yet */
909 if (!buf->buffer)
910 return trace_clock_local();
911
912 ts = ring_buffer_time_stamp(buf->buffer);
913 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
914
915 return ts;
916 }
917
ftrace_now(int cpu)918 u64 ftrace_now(int cpu)
919 {
920 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
921 }
922
923 /**
924 * tracing_is_enabled - Show if global_trace has been enabled
925 *
926 * Shows if the global trace has been enabled or not. It uses the
927 * mirror flag "buffer_disabled" to be used in fast paths such as for
928 * the irqsoff tracer. But it may be inaccurate due to races. If you
929 * need to know the accurate state, use tracing_is_on() which is a little
930 * slower, but accurate.
931 */
tracing_is_enabled(void)932 int tracing_is_enabled(void)
933 {
934 /*
935 * For quick access (irqsoff uses this in fast path), just
936 * return the mirror variable of the state of the ring buffer.
937 * It's a little racy, but we don't really care.
938 */
939 return !global_trace.buffer_disabled;
940 }
941
942 /*
943 * trace_buf_size is the size in bytes that is allocated
944 * for a buffer. Note, the number of bytes is always rounded
945 * to page size.
946 *
947 * This number is purposely set to a low number of 16384.
948 * If the dump on oops happens, it will be much appreciated
949 * to not have to wait for all that output. Anyway this can be
950 * boot time and run time configurable.
951 */
952 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
953
954 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
955
956 /* trace_types holds a link list of available tracers. */
957 static struct tracer *trace_types __read_mostly;
958
959 /*
960 * trace_types_lock is used to protect the trace_types list.
961 */
962 DEFINE_MUTEX(trace_types_lock);
963
964 /*
965 * serialize the access of the ring buffer
966 *
967 * ring buffer serializes readers, but it is low level protection.
968 * The validity of the events (which returns by ring_buffer_peek() ..etc)
969 * are not protected by ring buffer.
970 *
971 * The content of events may become garbage if we allow other process consumes
972 * these events concurrently:
973 * A) the page of the consumed events may become a normal page
974 * (not reader page) in ring buffer, and this page will be rewritten
975 * by events producer.
976 * B) The page of the consumed events may become a page for splice_read,
977 * and this page will be returned to system.
978 *
979 * These primitives allow multi process access to different cpu ring buffer
980 * concurrently.
981 *
982 * These primitives don't distinguish read-only and read-consume access.
983 * Multi read-only access are also serialized.
984 */
985
986 #ifdef CONFIG_SMP
987 static DECLARE_RWSEM(all_cpu_access_lock);
988 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
989
trace_access_lock(int cpu)990 static inline void trace_access_lock(int cpu)
991 {
992 if (cpu == RING_BUFFER_ALL_CPUS) {
993 /* gain it for accessing the whole ring buffer. */
994 down_write(&all_cpu_access_lock);
995 } else {
996 /* gain it for accessing a cpu ring buffer. */
997
998 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
999 down_read(&all_cpu_access_lock);
1000
1001 /* Secondly block other access to this @cpu ring buffer. */
1002 mutex_lock(&per_cpu(cpu_access_lock, cpu));
1003 }
1004 }
1005
trace_access_unlock(int cpu)1006 static inline void trace_access_unlock(int cpu)
1007 {
1008 if (cpu == RING_BUFFER_ALL_CPUS) {
1009 up_write(&all_cpu_access_lock);
1010 } else {
1011 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1012 up_read(&all_cpu_access_lock);
1013 }
1014 }
1015
trace_access_lock_init(void)1016 static inline void trace_access_lock_init(void)
1017 {
1018 int cpu;
1019
1020 for_each_possible_cpu(cpu)
1021 mutex_init(&per_cpu(cpu_access_lock, cpu));
1022 }
1023
1024 #else
1025
1026 static DEFINE_MUTEX(access_lock);
1027
trace_access_lock(int cpu)1028 static inline void trace_access_lock(int cpu)
1029 {
1030 (void)cpu;
1031 mutex_lock(&access_lock);
1032 }
1033
trace_access_unlock(int cpu)1034 static inline void trace_access_unlock(int cpu)
1035 {
1036 (void)cpu;
1037 mutex_unlock(&access_lock);
1038 }
1039
trace_access_lock_init(void)1040 static inline void trace_access_lock_init(void)
1041 {
1042 }
1043
1044 #endif
1045
1046 #ifdef CONFIG_STACKTRACE
1047 static void __ftrace_trace_stack(struct trace_array *tr,
1048 struct trace_buffer *buffer,
1049 unsigned int trace_ctx,
1050 int skip, struct pt_regs *regs);
1051 static inline void ftrace_trace_stack(struct trace_array *tr,
1052 struct trace_buffer *buffer,
1053 unsigned int trace_ctx,
1054 int skip, struct pt_regs *regs);
1055
1056 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)1057 static inline void __ftrace_trace_stack(struct trace_array *tr,
1058 struct trace_buffer *buffer,
1059 unsigned int trace_ctx,
1060 int skip, struct pt_regs *regs)
1061 {
1062 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1063 static inline void ftrace_trace_stack(struct trace_array *tr,
1064 struct trace_buffer *buffer,
1065 unsigned long trace_ctx,
1066 int skip, struct pt_regs *regs)
1067 {
1068 }
1069
1070 #endif
1071
1072 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1073 trace_event_setup(struct ring_buffer_event *event,
1074 int type, unsigned int trace_ctx)
1075 {
1076 struct trace_entry *ent = ring_buffer_event_data(event);
1077
1078 tracing_generic_entry_update(ent, type, trace_ctx);
1079 }
1080
1081 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1082 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1083 int type,
1084 unsigned long len,
1085 unsigned int trace_ctx)
1086 {
1087 struct ring_buffer_event *event;
1088
1089 event = ring_buffer_lock_reserve(buffer, len);
1090 if (event != NULL)
1091 trace_event_setup(event, type, trace_ctx);
1092
1093 return event;
1094 }
1095
tracer_tracing_on(struct trace_array * tr)1096 void tracer_tracing_on(struct trace_array *tr)
1097 {
1098 if (tr->array_buffer.buffer)
1099 ring_buffer_record_on(tr->array_buffer.buffer);
1100 /*
1101 * This flag is looked at when buffers haven't been allocated
1102 * yet, or by some tracers (like irqsoff), that just want to
1103 * know if the ring buffer has been disabled, but it can handle
1104 * races of where it gets disabled but we still do a record.
1105 * As the check is in the fast path of the tracers, it is more
1106 * important to be fast than accurate.
1107 */
1108 tr->buffer_disabled = 0;
1109 }
1110
1111 /**
1112 * tracing_on - enable tracing buffers
1113 *
1114 * This function enables tracing buffers that may have been
1115 * disabled with tracing_off.
1116 */
tracing_on(void)1117 void tracing_on(void)
1118 {
1119 tracer_tracing_on(&global_trace);
1120 }
1121 EXPORT_SYMBOL_GPL(tracing_on);
1122
1123
1124 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1125 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1126 {
1127 __this_cpu_write(trace_taskinfo_save, true);
1128
1129 /* If this is the temp buffer, we need to commit fully */
1130 if (this_cpu_read(trace_buffered_event) == event) {
1131 /* Length is in event->array[0] */
1132 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1133 /* Release the temp buffer */
1134 this_cpu_dec(trace_buffered_event_cnt);
1135 /* ring_buffer_unlock_commit() enables preemption */
1136 preempt_enable_notrace();
1137 } else
1138 ring_buffer_unlock_commit(buffer);
1139 }
1140
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1141 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1142 const char *str, int size)
1143 {
1144 struct ring_buffer_event *event;
1145 struct trace_buffer *buffer;
1146 struct print_entry *entry;
1147 unsigned int trace_ctx;
1148 int alloc;
1149
1150 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1151 return 0;
1152
1153 if (unlikely(tracing_selftest_running && tr == &global_trace))
1154 return 0;
1155
1156 if (unlikely(tracing_disabled))
1157 return 0;
1158
1159 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1160
1161 trace_ctx = tracing_gen_ctx();
1162 buffer = tr->array_buffer.buffer;
1163 ring_buffer_nest_start(buffer);
1164 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1165 trace_ctx);
1166 if (!event) {
1167 size = 0;
1168 goto out;
1169 }
1170
1171 entry = ring_buffer_event_data(event);
1172 entry->ip = ip;
1173
1174 memcpy(&entry->buf, str, size);
1175
1176 /* Add a newline if necessary */
1177 if (entry->buf[size - 1] != '\n') {
1178 entry->buf[size] = '\n';
1179 entry->buf[size + 1] = '\0';
1180 } else
1181 entry->buf[size] = '\0';
1182
1183 __buffer_unlock_commit(buffer, event);
1184 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1185 out:
1186 ring_buffer_nest_end(buffer);
1187 return size;
1188 }
1189 EXPORT_SYMBOL_GPL(__trace_array_puts);
1190
1191 /**
1192 * __trace_puts - write a constant string into the trace buffer.
1193 * @ip: The address of the caller
1194 * @str: The constant string to write
1195 * @size: The size of the string.
1196 */
__trace_puts(unsigned long ip,const char * str,int size)1197 int __trace_puts(unsigned long ip, const char *str, int size)
1198 {
1199 return __trace_array_puts(printk_trace, ip, str, size);
1200 }
1201 EXPORT_SYMBOL_GPL(__trace_puts);
1202
1203 /**
1204 * __trace_bputs - write the pointer to a constant string into trace buffer
1205 * @ip: The address of the caller
1206 * @str: The constant string to write to the buffer to
1207 */
__trace_bputs(unsigned long ip,const char * str)1208 int __trace_bputs(unsigned long ip, const char *str)
1209 {
1210 struct trace_array *tr = READ_ONCE(printk_trace);
1211 struct ring_buffer_event *event;
1212 struct trace_buffer *buffer;
1213 struct bputs_entry *entry;
1214 unsigned int trace_ctx;
1215 int size = sizeof(struct bputs_entry);
1216 int ret = 0;
1217
1218 if (!printk_binsafe(tr))
1219 return __trace_puts(ip, str, strlen(str));
1220
1221 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1222 return 0;
1223
1224 if (unlikely(tracing_selftest_running || tracing_disabled))
1225 return 0;
1226
1227 trace_ctx = tracing_gen_ctx();
1228 buffer = tr->array_buffer.buffer;
1229
1230 ring_buffer_nest_start(buffer);
1231 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1232 trace_ctx);
1233 if (!event)
1234 goto out;
1235
1236 entry = ring_buffer_event_data(event);
1237 entry->ip = ip;
1238 entry->str = str;
1239
1240 __buffer_unlock_commit(buffer, event);
1241 ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1242
1243 ret = 1;
1244 out:
1245 ring_buffer_nest_end(buffer);
1246 return ret;
1247 }
1248 EXPORT_SYMBOL_GPL(__trace_bputs);
1249
1250 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1251 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1252 void *cond_data)
1253 {
1254 struct tracer *tracer = tr->current_trace;
1255 unsigned long flags;
1256
1257 if (in_nmi()) {
1258 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1259 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1260 return;
1261 }
1262
1263 if (!tr->allocated_snapshot) {
1264 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1265 trace_array_puts(tr, "*** stopping trace here! ***\n");
1266 tracer_tracing_off(tr);
1267 return;
1268 }
1269
1270 /* Note, snapshot can not be used when the tracer uses it */
1271 if (tracer->use_max_tr) {
1272 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1273 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1274 return;
1275 }
1276
1277 if (tr->mapped) {
1278 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1279 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1280 return;
1281 }
1282
1283 local_irq_save(flags);
1284 update_max_tr(tr, current, smp_processor_id(), cond_data);
1285 local_irq_restore(flags);
1286 }
1287
tracing_snapshot_instance(struct trace_array * tr)1288 void tracing_snapshot_instance(struct trace_array *tr)
1289 {
1290 tracing_snapshot_instance_cond(tr, NULL);
1291 }
1292
1293 /**
1294 * tracing_snapshot - take a snapshot of the current buffer.
1295 *
1296 * This causes a swap between the snapshot buffer and the current live
1297 * tracing buffer. You can use this to take snapshots of the live
1298 * trace when some condition is triggered, but continue to trace.
1299 *
1300 * Note, make sure to allocate the snapshot with either
1301 * a tracing_snapshot_alloc(), or by doing it manually
1302 * with: echo 1 > /sys/kernel/tracing/snapshot
1303 *
1304 * If the snapshot buffer is not allocated, it will stop tracing.
1305 * Basically making a permanent snapshot.
1306 */
tracing_snapshot(void)1307 void tracing_snapshot(void)
1308 {
1309 struct trace_array *tr = &global_trace;
1310
1311 tracing_snapshot_instance(tr);
1312 }
1313 EXPORT_SYMBOL_GPL(tracing_snapshot);
1314
1315 /**
1316 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1317 * @tr: The tracing instance to snapshot
1318 * @cond_data: The data to be tested conditionally, and possibly saved
1319 *
1320 * This is the same as tracing_snapshot() except that the snapshot is
1321 * conditional - the snapshot will only happen if the
1322 * cond_snapshot.update() implementation receiving the cond_data
1323 * returns true, which means that the trace array's cond_snapshot
1324 * update() operation used the cond_data to determine whether the
1325 * snapshot should be taken, and if it was, presumably saved it along
1326 * with the snapshot.
1327 */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1328 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1329 {
1330 tracing_snapshot_instance_cond(tr, cond_data);
1331 }
1332 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1333
1334 /**
1335 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1336 * @tr: The tracing instance
1337 *
1338 * When the user enables a conditional snapshot using
1339 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1340 * with the snapshot. This accessor is used to retrieve it.
1341 *
1342 * Should not be called from cond_snapshot.update(), since it takes
1343 * the tr->max_lock lock, which the code calling
1344 * cond_snapshot.update() has already done.
1345 *
1346 * Returns the cond_data associated with the trace array's snapshot.
1347 */
tracing_cond_snapshot_data(struct trace_array * tr)1348 void *tracing_cond_snapshot_data(struct trace_array *tr)
1349 {
1350 void *cond_data = NULL;
1351
1352 local_irq_disable();
1353 arch_spin_lock(&tr->max_lock);
1354
1355 if (tr->cond_snapshot)
1356 cond_data = tr->cond_snapshot->cond_data;
1357
1358 arch_spin_unlock(&tr->max_lock);
1359 local_irq_enable();
1360
1361 return cond_data;
1362 }
1363 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1364
1365 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1366 struct array_buffer *size_buf, int cpu_id);
1367 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1368
tracing_alloc_snapshot_instance(struct trace_array * tr)1369 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1370 {
1371 int order;
1372 int ret;
1373
1374 if (!tr->allocated_snapshot) {
1375
1376 /* Make the snapshot buffer have the same order as main buffer */
1377 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1378 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1379 if (ret < 0)
1380 return ret;
1381
1382 /* allocate spare buffer */
1383 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1384 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1385 if (ret < 0)
1386 return ret;
1387
1388 tr->allocated_snapshot = true;
1389 }
1390
1391 return 0;
1392 }
1393
free_snapshot(struct trace_array * tr)1394 static void free_snapshot(struct trace_array *tr)
1395 {
1396 /*
1397 * We don't free the ring buffer. instead, resize it because
1398 * The max_tr ring buffer has some state (e.g. ring->clock) and
1399 * we want preserve it.
1400 */
1401 ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1402 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1403 set_buffer_entries(&tr->max_buffer, 1);
1404 tracing_reset_online_cpus(&tr->max_buffer);
1405 tr->allocated_snapshot = false;
1406 }
1407
tracing_arm_snapshot_locked(struct trace_array * tr)1408 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1409 {
1410 int ret;
1411
1412 lockdep_assert_held(&trace_types_lock);
1413
1414 spin_lock(&tr->snapshot_trigger_lock);
1415 if (tr->snapshot == UINT_MAX || tr->mapped) {
1416 spin_unlock(&tr->snapshot_trigger_lock);
1417 return -EBUSY;
1418 }
1419
1420 tr->snapshot++;
1421 spin_unlock(&tr->snapshot_trigger_lock);
1422
1423 ret = tracing_alloc_snapshot_instance(tr);
1424 if (ret) {
1425 spin_lock(&tr->snapshot_trigger_lock);
1426 tr->snapshot--;
1427 spin_unlock(&tr->snapshot_trigger_lock);
1428 }
1429
1430 return ret;
1431 }
1432
tracing_arm_snapshot(struct trace_array * tr)1433 int tracing_arm_snapshot(struct trace_array *tr)
1434 {
1435 int ret;
1436
1437 mutex_lock(&trace_types_lock);
1438 ret = tracing_arm_snapshot_locked(tr);
1439 mutex_unlock(&trace_types_lock);
1440
1441 return ret;
1442 }
1443
tracing_disarm_snapshot(struct trace_array * tr)1444 void tracing_disarm_snapshot(struct trace_array *tr)
1445 {
1446 spin_lock(&tr->snapshot_trigger_lock);
1447 if (!WARN_ON(!tr->snapshot))
1448 tr->snapshot--;
1449 spin_unlock(&tr->snapshot_trigger_lock);
1450 }
1451
1452 /**
1453 * tracing_alloc_snapshot - allocate snapshot buffer.
1454 *
1455 * This only allocates the snapshot buffer if it isn't already
1456 * allocated - it doesn't also take a snapshot.
1457 *
1458 * This is meant to be used in cases where the snapshot buffer needs
1459 * to be set up for events that can't sleep but need to be able to
1460 * trigger a snapshot.
1461 */
tracing_alloc_snapshot(void)1462 int tracing_alloc_snapshot(void)
1463 {
1464 struct trace_array *tr = &global_trace;
1465 int ret;
1466
1467 ret = tracing_alloc_snapshot_instance(tr);
1468 WARN_ON(ret < 0);
1469
1470 return ret;
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1473
1474 /**
1475 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1476 *
1477 * This is similar to tracing_snapshot(), but it will allocate the
1478 * snapshot buffer if it isn't already allocated. Use this only
1479 * where it is safe to sleep, as the allocation may sleep.
1480 *
1481 * This causes a swap between the snapshot buffer and the current live
1482 * tracing buffer. You can use this to take snapshots of the live
1483 * trace when some condition is triggered, but continue to trace.
1484 */
tracing_snapshot_alloc(void)1485 void tracing_snapshot_alloc(void)
1486 {
1487 int ret;
1488
1489 ret = tracing_alloc_snapshot();
1490 if (ret < 0)
1491 return;
1492
1493 tracing_snapshot();
1494 }
1495 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1496
1497 /**
1498 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1499 * @tr: The tracing instance
1500 * @cond_data: User data to associate with the snapshot
1501 * @update: Implementation of the cond_snapshot update function
1502 *
1503 * Check whether the conditional snapshot for the given instance has
1504 * already been enabled, or if the current tracer is already using a
1505 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1506 * save the cond_data and update function inside.
1507 *
1508 * Returns 0 if successful, error otherwise.
1509 */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1510 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1511 cond_update_fn_t update)
1512 {
1513 struct cond_snapshot *cond_snapshot __free(kfree) =
1514 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1515 int ret;
1516
1517 if (!cond_snapshot)
1518 return -ENOMEM;
1519
1520 cond_snapshot->cond_data = cond_data;
1521 cond_snapshot->update = update;
1522
1523 guard(mutex)(&trace_types_lock);
1524
1525 if (tr->current_trace->use_max_tr)
1526 return -EBUSY;
1527
1528 /*
1529 * The cond_snapshot can only change to NULL without the
1530 * trace_types_lock. We don't care if we race with it going
1531 * to NULL, but we want to make sure that it's not set to
1532 * something other than NULL when we get here, which we can
1533 * do safely with only holding the trace_types_lock and not
1534 * having to take the max_lock.
1535 */
1536 if (tr->cond_snapshot)
1537 return -EBUSY;
1538
1539 ret = tracing_arm_snapshot_locked(tr);
1540 if (ret)
1541 return ret;
1542
1543 local_irq_disable();
1544 arch_spin_lock(&tr->max_lock);
1545 tr->cond_snapshot = no_free_ptr(cond_snapshot);
1546 arch_spin_unlock(&tr->max_lock);
1547 local_irq_enable();
1548
1549 return 0;
1550 }
1551 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1552
1553 /**
1554 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1555 * @tr: The tracing instance
1556 *
1557 * Check whether the conditional snapshot for the given instance is
1558 * enabled; if so, free the cond_snapshot associated with it,
1559 * otherwise return -EINVAL.
1560 *
1561 * Returns 0 if successful, error otherwise.
1562 */
tracing_snapshot_cond_disable(struct trace_array * tr)1563 int tracing_snapshot_cond_disable(struct trace_array *tr)
1564 {
1565 int ret = 0;
1566
1567 local_irq_disable();
1568 arch_spin_lock(&tr->max_lock);
1569
1570 if (!tr->cond_snapshot)
1571 ret = -EINVAL;
1572 else {
1573 kfree(tr->cond_snapshot);
1574 tr->cond_snapshot = NULL;
1575 }
1576
1577 arch_spin_unlock(&tr->max_lock);
1578 local_irq_enable();
1579
1580 tracing_disarm_snapshot(tr);
1581
1582 return ret;
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1585 #else
tracing_snapshot(void)1586 void tracing_snapshot(void)
1587 {
1588 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1589 }
1590 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1591 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1592 {
1593 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1594 }
1595 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1596 int tracing_alloc_snapshot(void)
1597 {
1598 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1599 return -ENODEV;
1600 }
1601 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1602 void tracing_snapshot_alloc(void)
1603 {
1604 /* Give warning */
1605 tracing_snapshot();
1606 }
1607 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1608 void *tracing_cond_snapshot_data(struct trace_array *tr)
1609 {
1610 return NULL;
1611 }
1612 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1613 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1614 {
1615 return -ENODEV;
1616 }
1617 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1618 int tracing_snapshot_cond_disable(struct trace_array *tr)
1619 {
1620 return false;
1621 }
1622 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1623 #define free_snapshot(tr) do { } while (0)
1624 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1625 #endif /* CONFIG_TRACER_SNAPSHOT */
1626
tracer_tracing_off(struct trace_array * tr)1627 void tracer_tracing_off(struct trace_array *tr)
1628 {
1629 if (tr->array_buffer.buffer)
1630 ring_buffer_record_off(tr->array_buffer.buffer);
1631 /*
1632 * This flag is looked at when buffers haven't been allocated
1633 * yet, or by some tracers (like irqsoff), that just want to
1634 * know if the ring buffer has been disabled, but it can handle
1635 * races of where it gets disabled but we still do a record.
1636 * As the check is in the fast path of the tracers, it is more
1637 * important to be fast than accurate.
1638 */
1639 tr->buffer_disabled = 1;
1640 }
1641
1642 /**
1643 * tracer_tracing_disable() - temporary disable the buffer from write
1644 * @tr: The trace array to disable its buffer for
1645 *
1646 * Expects trace_tracing_enable() to re-enable tracing.
1647 * The difference between this and tracer_tracing_off() is that this
1648 * is a counter and can nest, whereas, tracer_tracing_off() can
1649 * be called multiple times and a single trace_tracing_on() will
1650 * enable it.
1651 */
tracer_tracing_disable(struct trace_array * tr)1652 void tracer_tracing_disable(struct trace_array *tr)
1653 {
1654 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1655 return;
1656
1657 ring_buffer_record_disable(tr->array_buffer.buffer);
1658 }
1659
1660 /**
1661 * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1662 * @tr: The trace array that had tracer_tracincg_disable() called on it
1663 *
1664 * This is called after tracer_tracing_disable() has been called on @tr,
1665 * when it's safe to re-enable tracing.
1666 */
tracer_tracing_enable(struct trace_array * tr)1667 void tracer_tracing_enable(struct trace_array *tr)
1668 {
1669 if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1670 return;
1671
1672 ring_buffer_record_enable(tr->array_buffer.buffer);
1673 }
1674
1675 /**
1676 * tracing_off - turn off tracing buffers
1677 *
1678 * This function stops the tracing buffers from recording data.
1679 * It does not disable any overhead the tracers themselves may
1680 * be causing. This function simply causes all recording to
1681 * the ring buffers to fail.
1682 */
tracing_off(void)1683 void tracing_off(void)
1684 {
1685 tracer_tracing_off(&global_trace);
1686 }
1687 EXPORT_SYMBOL_GPL(tracing_off);
1688
disable_trace_on_warning(void)1689 void disable_trace_on_warning(void)
1690 {
1691 if (__disable_trace_on_warning) {
1692 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1693 "Disabling tracing due to warning\n");
1694 tracing_off();
1695 }
1696 }
1697
1698 /**
1699 * tracer_tracing_is_on - show real state of ring buffer enabled
1700 * @tr : the trace array to know if ring buffer is enabled
1701 *
1702 * Shows real state of the ring buffer if it is enabled or not.
1703 */
tracer_tracing_is_on(struct trace_array * tr)1704 bool tracer_tracing_is_on(struct trace_array *tr)
1705 {
1706 if (tr->array_buffer.buffer)
1707 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1708 return !tr->buffer_disabled;
1709 }
1710
1711 /**
1712 * tracing_is_on - show state of ring buffers enabled
1713 */
tracing_is_on(void)1714 int tracing_is_on(void)
1715 {
1716 return tracer_tracing_is_on(&global_trace);
1717 }
1718 EXPORT_SYMBOL_GPL(tracing_is_on);
1719
set_buf_size(char * str)1720 static int __init set_buf_size(char *str)
1721 {
1722 unsigned long buf_size;
1723
1724 if (!str)
1725 return 0;
1726 buf_size = memparse(str, &str);
1727 /*
1728 * nr_entries can not be zero and the startup
1729 * tests require some buffer space. Therefore
1730 * ensure we have at least 4096 bytes of buffer.
1731 */
1732 trace_buf_size = max(4096UL, buf_size);
1733 return 1;
1734 }
1735 __setup("trace_buf_size=", set_buf_size);
1736
set_tracing_thresh(char * str)1737 static int __init set_tracing_thresh(char *str)
1738 {
1739 unsigned long threshold;
1740 int ret;
1741
1742 if (!str)
1743 return 0;
1744 ret = kstrtoul(str, 0, &threshold);
1745 if (ret < 0)
1746 return 0;
1747 tracing_thresh = threshold * 1000;
1748 return 1;
1749 }
1750 __setup("tracing_thresh=", set_tracing_thresh);
1751
nsecs_to_usecs(unsigned long nsecs)1752 unsigned long nsecs_to_usecs(unsigned long nsecs)
1753 {
1754 return nsecs / 1000;
1755 }
1756
1757 /*
1758 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1759 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1760 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1761 * of strings in the order that the evals (enum) were defined.
1762 */
1763 #undef C
1764 #define C(a, b) b
1765
1766 /* These must match the bit positions in trace_iterator_flags */
1767 static const char *trace_options[] = {
1768 TRACE_FLAGS
1769 NULL
1770 };
1771
1772 static struct {
1773 u64 (*func)(void);
1774 const char *name;
1775 int in_ns; /* is this clock in nanoseconds? */
1776 } trace_clocks[] = {
1777 { trace_clock_local, "local", 1 },
1778 { trace_clock_global, "global", 1 },
1779 { trace_clock_counter, "counter", 0 },
1780 { trace_clock_jiffies, "uptime", 0 },
1781 { trace_clock, "perf", 1 },
1782 { ktime_get_mono_fast_ns, "mono", 1 },
1783 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1784 { ktime_get_boot_fast_ns, "boot", 1 },
1785 { ktime_get_tai_fast_ns, "tai", 1 },
1786 ARCH_TRACE_CLOCKS
1787 };
1788
trace_clock_in_ns(struct trace_array * tr)1789 bool trace_clock_in_ns(struct trace_array *tr)
1790 {
1791 if (trace_clocks[tr->clock_id].in_ns)
1792 return true;
1793
1794 return false;
1795 }
1796
1797 /*
1798 * trace_parser_get_init - gets the buffer for trace parser
1799 */
trace_parser_get_init(struct trace_parser * parser,int size)1800 int trace_parser_get_init(struct trace_parser *parser, int size)
1801 {
1802 memset(parser, 0, sizeof(*parser));
1803
1804 parser->buffer = kmalloc(size, GFP_KERNEL);
1805 if (!parser->buffer)
1806 return 1;
1807
1808 parser->size = size;
1809 return 0;
1810 }
1811
1812 /*
1813 * trace_parser_put - frees the buffer for trace parser
1814 */
trace_parser_put(struct trace_parser * parser)1815 void trace_parser_put(struct trace_parser *parser)
1816 {
1817 kfree(parser->buffer);
1818 parser->buffer = NULL;
1819 }
1820
1821 /*
1822 * trace_get_user - reads the user input string separated by space
1823 * (matched by isspace(ch))
1824 *
1825 * For each string found the 'struct trace_parser' is updated,
1826 * and the function returns.
1827 *
1828 * Returns number of bytes read.
1829 *
1830 * See kernel/trace/trace.h for 'struct trace_parser' details.
1831 */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1832 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1833 size_t cnt, loff_t *ppos)
1834 {
1835 char ch;
1836 size_t read = 0;
1837 ssize_t ret;
1838
1839 if (!*ppos)
1840 trace_parser_clear(parser);
1841
1842 ret = get_user(ch, ubuf++);
1843 if (ret)
1844 goto out;
1845
1846 read++;
1847 cnt--;
1848
1849 /*
1850 * The parser is not finished with the last write,
1851 * continue reading the user input without skipping spaces.
1852 */
1853 if (!parser->cont) {
1854 /* skip white space */
1855 while (cnt && isspace(ch)) {
1856 ret = get_user(ch, ubuf++);
1857 if (ret)
1858 goto out;
1859 read++;
1860 cnt--;
1861 }
1862
1863 parser->idx = 0;
1864
1865 /* only spaces were written */
1866 if (isspace(ch) || !ch) {
1867 *ppos += read;
1868 ret = read;
1869 goto out;
1870 }
1871 }
1872
1873 /* read the non-space input */
1874 while (cnt && !isspace(ch) && ch) {
1875 if (parser->idx < parser->size - 1)
1876 parser->buffer[parser->idx++] = ch;
1877 else {
1878 ret = -EINVAL;
1879 goto out;
1880 }
1881 ret = get_user(ch, ubuf++);
1882 if (ret)
1883 goto out;
1884 read++;
1885 cnt--;
1886 }
1887
1888 /* We either got finished input or we have to wait for another call. */
1889 if (isspace(ch) || !ch) {
1890 parser->buffer[parser->idx] = 0;
1891 parser->cont = false;
1892 } else if (parser->idx < parser->size - 1) {
1893 parser->cont = true;
1894 parser->buffer[parser->idx++] = ch;
1895 /* Make sure the parsed string always terminates with '\0'. */
1896 parser->buffer[parser->idx] = 0;
1897 } else {
1898 ret = -EINVAL;
1899 goto out;
1900 }
1901
1902 *ppos += read;
1903 ret = read;
1904
1905 out:
1906 return ret;
1907 }
1908
1909 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1910 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1911 {
1912 int len;
1913
1914 if (trace_seq_used(s) <= s->readpos)
1915 return -EBUSY;
1916
1917 len = trace_seq_used(s) - s->readpos;
1918 if (cnt > len)
1919 cnt = len;
1920 memcpy(buf, s->buffer + s->readpos, cnt);
1921
1922 s->readpos += cnt;
1923 return cnt;
1924 }
1925
1926 unsigned long __read_mostly tracing_thresh;
1927
1928 #ifdef CONFIG_TRACER_MAX_TRACE
1929 static const struct file_operations tracing_max_lat_fops;
1930
1931 #ifdef LATENCY_FS_NOTIFY
1932
1933 static struct workqueue_struct *fsnotify_wq;
1934
latency_fsnotify_workfn(struct work_struct * work)1935 static void latency_fsnotify_workfn(struct work_struct *work)
1936 {
1937 struct trace_array *tr = container_of(work, struct trace_array,
1938 fsnotify_work);
1939 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1940 }
1941
latency_fsnotify_workfn_irq(struct irq_work * iwork)1942 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1943 {
1944 struct trace_array *tr = container_of(iwork, struct trace_array,
1945 fsnotify_irqwork);
1946 queue_work(fsnotify_wq, &tr->fsnotify_work);
1947 }
1948
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1949 static void trace_create_maxlat_file(struct trace_array *tr,
1950 struct dentry *d_tracer)
1951 {
1952 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1953 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1954 tr->d_max_latency = trace_create_file("tracing_max_latency",
1955 TRACE_MODE_WRITE,
1956 d_tracer, tr,
1957 &tracing_max_lat_fops);
1958 }
1959
latency_fsnotify_init(void)1960 __init static int latency_fsnotify_init(void)
1961 {
1962 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1963 WQ_UNBOUND | WQ_HIGHPRI, 0);
1964 if (!fsnotify_wq) {
1965 pr_err("Unable to allocate tr_max_lat_wq\n");
1966 return -ENOMEM;
1967 }
1968 return 0;
1969 }
1970
1971 late_initcall_sync(latency_fsnotify_init);
1972
latency_fsnotify(struct trace_array * tr)1973 void latency_fsnotify(struct trace_array *tr)
1974 {
1975 if (!fsnotify_wq)
1976 return;
1977 /*
1978 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1979 * possible that we are called from __schedule() or do_idle(), which
1980 * could cause a deadlock.
1981 */
1982 irq_work_queue(&tr->fsnotify_irqwork);
1983 }
1984
1985 #else /* !LATENCY_FS_NOTIFY */
1986
1987 #define trace_create_maxlat_file(tr, d_tracer) \
1988 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1989 d_tracer, tr, &tracing_max_lat_fops)
1990
1991 #endif
1992
1993 /*
1994 * Copy the new maximum trace into the separate maximum-trace
1995 * structure. (this way the maximum trace is permanently saved,
1996 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1997 */
1998 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1999 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
2000 {
2001 struct array_buffer *trace_buf = &tr->array_buffer;
2002 struct array_buffer *max_buf = &tr->max_buffer;
2003 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
2004 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
2005
2006 max_buf->cpu = cpu;
2007 max_buf->time_start = data->preempt_timestamp;
2008
2009 max_data->saved_latency = tr->max_latency;
2010 max_data->critical_start = data->critical_start;
2011 max_data->critical_end = data->critical_end;
2012
2013 strscpy(max_data->comm, tsk->comm);
2014 max_data->pid = tsk->pid;
2015 /*
2016 * If tsk == current, then use current_uid(), as that does not use
2017 * RCU. The irq tracer can be called out of RCU scope.
2018 */
2019 if (tsk == current)
2020 max_data->uid = current_uid();
2021 else
2022 max_data->uid = task_uid(tsk);
2023
2024 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2025 max_data->policy = tsk->policy;
2026 max_data->rt_priority = tsk->rt_priority;
2027
2028 /* record this tasks comm */
2029 tracing_record_cmdline(tsk);
2030 latency_fsnotify(tr);
2031 }
2032
2033 /**
2034 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2035 * @tr: tracer
2036 * @tsk: the task with the latency
2037 * @cpu: The cpu that initiated the trace.
2038 * @cond_data: User data associated with a conditional snapshot
2039 *
2040 * Flip the buffers between the @tr and the max_tr and record information
2041 * about which task was the cause of this latency.
2042 */
2043 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)2044 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2045 void *cond_data)
2046 {
2047 if (tr->stop_count)
2048 return;
2049
2050 WARN_ON_ONCE(!irqs_disabled());
2051
2052 if (!tr->allocated_snapshot) {
2053 /* Only the nop tracer should hit this when disabling */
2054 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2055 return;
2056 }
2057
2058 arch_spin_lock(&tr->max_lock);
2059
2060 /* Inherit the recordable setting from array_buffer */
2061 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2062 ring_buffer_record_on(tr->max_buffer.buffer);
2063 else
2064 ring_buffer_record_off(tr->max_buffer.buffer);
2065
2066 #ifdef CONFIG_TRACER_SNAPSHOT
2067 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2068 arch_spin_unlock(&tr->max_lock);
2069 return;
2070 }
2071 #endif
2072 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2073
2074 __update_max_tr(tr, tsk, cpu);
2075
2076 arch_spin_unlock(&tr->max_lock);
2077
2078 /* Any waiters on the old snapshot buffer need to wake up */
2079 ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2080 }
2081
2082 /**
2083 * update_max_tr_single - only copy one trace over, and reset the rest
2084 * @tr: tracer
2085 * @tsk: task with the latency
2086 * @cpu: the cpu of the buffer to copy.
2087 *
2088 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2089 */
2090 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2091 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2092 {
2093 int ret;
2094
2095 if (tr->stop_count)
2096 return;
2097
2098 WARN_ON_ONCE(!irqs_disabled());
2099 if (!tr->allocated_snapshot) {
2100 /* Only the nop tracer should hit this when disabling */
2101 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2102 return;
2103 }
2104
2105 arch_spin_lock(&tr->max_lock);
2106
2107 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2108
2109 if (ret == -EBUSY) {
2110 /*
2111 * We failed to swap the buffer due to a commit taking
2112 * place on this CPU. We fail to record, but we reset
2113 * the max trace buffer (no one writes directly to it)
2114 * and flag that it failed.
2115 * Another reason is resize is in progress.
2116 */
2117 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2118 "Failed to swap buffers due to commit or resize in progress\n");
2119 }
2120
2121 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2122
2123 __update_max_tr(tr, tsk, cpu);
2124 arch_spin_unlock(&tr->max_lock);
2125 }
2126
2127 #endif /* CONFIG_TRACER_MAX_TRACE */
2128
2129 struct pipe_wait {
2130 struct trace_iterator *iter;
2131 int wait_index;
2132 };
2133
wait_pipe_cond(void * data)2134 static bool wait_pipe_cond(void *data)
2135 {
2136 struct pipe_wait *pwait = data;
2137 struct trace_iterator *iter = pwait->iter;
2138
2139 if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2140 return true;
2141
2142 return iter->closed;
2143 }
2144
wait_on_pipe(struct trace_iterator * iter,int full)2145 static int wait_on_pipe(struct trace_iterator *iter, int full)
2146 {
2147 struct pipe_wait pwait;
2148 int ret;
2149
2150 /* Iterators are static, they should be filled or empty */
2151 if (trace_buffer_iter(iter, iter->cpu_file))
2152 return 0;
2153
2154 pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2155 pwait.iter = iter;
2156
2157 ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2158 wait_pipe_cond, &pwait);
2159
2160 #ifdef CONFIG_TRACER_MAX_TRACE
2161 /*
2162 * Make sure this is still the snapshot buffer, as if a snapshot were
2163 * to happen, this would now be the main buffer.
2164 */
2165 if (iter->snapshot)
2166 iter->array_buffer = &iter->tr->max_buffer;
2167 #endif
2168 return ret;
2169 }
2170
2171 #ifdef CONFIG_FTRACE_STARTUP_TEST
2172 static bool selftests_can_run;
2173
2174 struct trace_selftests {
2175 struct list_head list;
2176 struct tracer *type;
2177 };
2178
2179 static LIST_HEAD(postponed_selftests);
2180
save_selftest(struct tracer * type)2181 static int save_selftest(struct tracer *type)
2182 {
2183 struct trace_selftests *selftest;
2184
2185 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2186 if (!selftest)
2187 return -ENOMEM;
2188
2189 selftest->type = type;
2190 list_add(&selftest->list, &postponed_selftests);
2191 return 0;
2192 }
2193
run_tracer_selftest(struct tracer * type)2194 static int run_tracer_selftest(struct tracer *type)
2195 {
2196 struct trace_array *tr = &global_trace;
2197 struct tracer *saved_tracer = tr->current_trace;
2198 int ret;
2199
2200 if (!type->selftest || tracing_selftest_disabled)
2201 return 0;
2202
2203 /*
2204 * If a tracer registers early in boot up (before scheduling is
2205 * initialized and such), then do not run its selftests yet.
2206 * Instead, run it a little later in the boot process.
2207 */
2208 if (!selftests_can_run)
2209 return save_selftest(type);
2210
2211 if (!tracing_is_on()) {
2212 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2213 type->name);
2214 return 0;
2215 }
2216
2217 /*
2218 * Run a selftest on this tracer.
2219 * Here we reset the trace buffer, and set the current
2220 * tracer to be this tracer. The tracer can then run some
2221 * internal tracing to verify that everything is in order.
2222 * If we fail, we do not register this tracer.
2223 */
2224 tracing_reset_online_cpus(&tr->array_buffer);
2225
2226 tr->current_trace = type;
2227
2228 #ifdef CONFIG_TRACER_MAX_TRACE
2229 if (type->use_max_tr) {
2230 /* If we expanded the buffers, make sure the max is expanded too */
2231 if (tr->ring_buffer_expanded)
2232 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2233 RING_BUFFER_ALL_CPUS);
2234 tr->allocated_snapshot = true;
2235 }
2236 #endif
2237
2238 /* the test is responsible for initializing and enabling */
2239 pr_info("Testing tracer %s: ", type->name);
2240 ret = type->selftest(type, tr);
2241 /* the test is responsible for resetting too */
2242 tr->current_trace = saved_tracer;
2243 if (ret) {
2244 printk(KERN_CONT "FAILED!\n");
2245 /* Add the warning after printing 'FAILED' */
2246 WARN_ON(1);
2247 return -1;
2248 }
2249 /* Only reset on passing, to avoid touching corrupted buffers */
2250 tracing_reset_online_cpus(&tr->array_buffer);
2251
2252 #ifdef CONFIG_TRACER_MAX_TRACE
2253 if (type->use_max_tr) {
2254 tr->allocated_snapshot = false;
2255
2256 /* Shrink the max buffer again */
2257 if (tr->ring_buffer_expanded)
2258 ring_buffer_resize(tr->max_buffer.buffer, 1,
2259 RING_BUFFER_ALL_CPUS);
2260 }
2261 #endif
2262
2263 printk(KERN_CONT "PASSED\n");
2264 return 0;
2265 }
2266
do_run_tracer_selftest(struct tracer * type)2267 static int do_run_tracer_selftest(struct tracer *type)
2268 {
2269 int ret;
2270
2271 /*
2272 * Tests can take a long time, especially if they are run one after the
2273 * other, as does happen during bootup when all the tracers are
2274 * registered. This could cause the soft lockup watchdog to trigger.
2275 */
2276 cond_resched();
2277
2278 tracing_selftest_running = true;
2279 ret = run_tracer_selftest(type);
2280 tracing_selftest_running = false;
2281
2282 return ret;
2283 }
2284
init_trace_selftests(void)2285 static __init int init_trace_selftests(void)
2286 {
2287 struct trace_selftests *p, *n;
2288 struct tracer *t, **last;
2289 int ret;
2290
2291 selftests_can_run = true;
2292
2293 guard(mutex)(&trace_types_lock);
2294
2295 if (list_empty(&postponed_selftests))
2296 return 0;
2297
2298 pr_info("Running postponed tracer tests:\n");
2299
2300 tracing_selftest_running = true;
2301 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2302 /* This loop can take minutes when sanitizers are enabled, so
2303 * lets make sure we allow RCU processing.
2304 */
2305 cond_resched();
2306 ret = run_tracer_selftest(p->type);
2307 /* If the test fails, then warn and remove from available_tracers */
2308 if (ret < 0) {
2309 WARN(1, "tracer: %s failed selftest, disabling\n",
2310 p->type->name);
2311 last = &trace_types;
2312 for (t = trace_types; t; t = t->next) {
2313 if (t == p->type) {
2314 *last = t->next;
2315 break;
2316 }
2317 last = &t->next;
2318 }
2319 }
2320 list_del(&p->list);
2321 kfree(p);
2322 }
2323 tracing_selftest_running = false;
2324
2325 return 0;
2326 }
2327 core_initcall(init_trace_selftests);
2328 #else
do_run_tracer_selftest(struct tracer * type)2329 static inline int do_run_tracer_selftest(struct tracer *type)
2330 {
2331 return 0;
2332 }
2333 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2334
2335 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2336
2337 static void __init apply_trace_boot_options(void);
2338
2339 /**
2340 * register_tracer - register a tracer with the ftrace system.
2341 * @type: the plugin for the tracer
2342 *
2343 * Register a new plugin tracer.
2344 */
register_tracer(struct tracer * type)2345 int __init register_tracer(struct tracer *type)
2346 {
2347 struct tracer *t;
2348 int ret = 0;
2349
2350 if (!type->name) {
2351 pr_info("Tracer must have a name\n");
2352 return -1;
2353 }
2354
2355 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2356 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2357 return -1;
2358 }
2359
2360 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2361 pr_warn("Can not register tracer %s due to lockdown\n",
2362 type->name);
2363 return -EPERM;
2364 }
2365
2366 mutex_lock(&trace_types_lock);
2367
2368 for (t = trace_types; t; t = t->next) {
2369 if (strcmp(type->name, t->name) == 0) {
2370 /* already found */
2371 pr_info("Tracer %s already registered\n",
2372 type->name);
2373 ret = -1;
2374 goto out;
2375 }
2376 }
2377
2378 if (!type->set_flag)
2379 type->set_flag = &dummy_set_flag;
2380 if (!type->flags) {
2381 /*allocate a dummy tracer_flags*/
2382 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2383 if (!type->flags) {
2384 ret = -ENOMEM;
2385 goto out;
2386 }
2387 type->flags->val = 0;
2388 type->flags->opts = dummy_tracer_opt;
2389 } else
2390 if (!type->flags->opts)
2391 type->flags->opts = dummy_tracer_opt;
2392
2393 /* store the tracer for __set_tracer_option */
2394 type->flags->trace = type;
2395
2396 ret = do_run_tracer_selftest(type);
2397 if (ret < 0)
2398 goto out;
2399
2400 type->next = trace_types;
2401 trace_types = type;
2402 add_tracer_options(&global_trace, type);
2403
2404 out:
2405 mutex_unlock(&trace_types_lock);
2406
2407 if (ret || !default_bootup_tracer)
2408 goto out_unlock;
2409
2410 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2411 goto out_unlock;
2412
2413 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2414 /* Do we want this tracer to start on bootup? */
2415 tracing_set_tracer(&global_trace, type->name);
2416 default_bootup_tracer = NULL;
2417
2418 apply_trace_boot_options();
2419
2420 /* disable other selftests, since this will break it. */
2421 disable_tracing_selftest("running a tracer");
2422
2423 out_unlock:
2424 return ret;
2425 }
2426
tracing_reset_cpu(struct array_buffer * buf,int cpu)2427 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2428 {
2429 struct trace_buffer *buffer = buf->buffer;
2430
2431 if (!buffer)
2432 return;
2433
2434 ring_buffer_record_disable(buffer);
2435
2436 /* Make sure all commits have finished */
2437 synchronize_rcu();
2438 ring_buffer_reset_cpu(buffer, cpu);
2439
2440 ring_buffer_record_enable(buffer);
2441 }
2442
tracing_reset_online_cpus(struct array_buffer * buf)2443 void tracing_reset_online_cpus(struct array_buffer *buf)
2444 {
2445 struct trace_buffer *buffer = buf->buffer;
2446
2447 if (!buffer)
2448 return;
2449
2450 ring_buffer_record_disable(buffer);
2451
2452 /* Make sure all commits have finished */
2453 synchronize_rcu();
2454
2455 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2456
2457 ring_buffer_reset_online_cpus(buffer);
2458
2459 ring_buffer_record_enable(buffer);
2460 }
2461
tracing_reset_all_cpus(struct array_buffer * buf)2462 static void tracing_reset_all_cpus(struct array_buffer *buf)
2463 {
2464 struct trace_buffer *buffer = buf->buffer;
2465
2466 if (!buffer)
2467 return;
2468
2469 ring_buffer_record_disable(buffer);
2470
2471 /* Make sure all commits have finished */
2472 synchronize_rcu();
2473
2474 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2475
2476 ring_buffer_reset(buffer);
2477
2478 ring_buffer_record_enable(buffer);
2479 }
2480
2481 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2482 void tracing_reset_all_online_cpus_unlocked(void)
2483 {
2484 struct trace_array *tr;
2485
2486 lockdep_assert_held(&trace_types_lock);
2487
2488 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2489 if (!tr->clear_trace)
2490 continue;
2491 tr->clear_trace = false;
2492 tracing_reset_online_cpus(&tr->array_buffer);
2493 #ifdef CONFIG_TRACER_MAX_TRACE
2494 tracing_reset_online_cpus(&tr->max_buffer);
2495 #endif
2496 }
2497 }
2498
tracing_reset_all_online_cpus(void)2499 void tracing_reset_all_online_cpus(void)
2500 {
2501 mutex_lock(&trace_types_lock);
2502 tracing_reset_all_online_cpus_unlocked();
2503 mutex_unlock(&trace_types_lock);
2504 }
2505
is_tracing_stopped(void)2506 int is_tracing_stopped(void)
2507 {
2508 return global_trace.stop_count;
2509 }
2510
tracing_start_tr(struct trace_array * tr)2511 static void tracing_start_tr(struct trace_array *tr)
2512 {
2513 struct trace_buffer *buffer;
2514 unsigned long flags;
2515
2516 if (tracing_disabled)
2517 return;
2518
2519 raw_spin_lock_irqsave(&tr->start_lock, flags);
2520 if (--tr->stop_count) {
2521 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2522 /* Someone screwed up their debugging */
2523 tr->stop_count = 0;
2524 }
2525 goto out;
2526 }
2527
2528 /* Prevent the buffers from switching */
2529 arch_spin_lock(&tr->max_lock);
2530
2531 buffer = tr->array_buffer.buffer;
2532 if (buffer)
2533 ring_buffer_record_enable(buffer);
2534
2535 #ifdef CONFIG_TRACER_MAX_TRACE
2536 buffer = tr->max_buffer.buffer;
2537 if (buffer)
2538 ring_buffer_record_enable(buffer);
2539 #endif
2540
2541 arch_spin_unlock(&tr->max_lock);
2542
2543 out:
2544 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2545 }
2546
2547 /**
2548 * tracing_start - quick start of the tracer
2549 *
2550 * If tracing is enabled but was stopped by tracing_stop,
2551 * this will start the tracer back up.
2552 */
tracing_start(void)2553 void tracing_start(void)
2554
2555 {
2556 return tracing_start_tr(&global_trace);
2557 }
2558
tracing_stop_tr(struct trace_array * tr)2559 static void tracing_stop_tr(struct trace_array *tr)
2560 {
2561 struct trace_buffer *buffer;
2562 unsigned long flags;
2563
2564 raw_spin_lock_irqsave(&tr->start_lock, flags);
2565 if (tr->stop_count++)
2566 goto out;
2567
2568 /* Prevent the buffers from switching */
2569 arch_spin_lock(&tr->max_lock);
2570
2571 buffer = tr->array_buffer.buffer;
2572 if (buffer)
2573 ring_buffer_record_disable(buffer);
2574
2575 #ifdef CONFIG_TRACER_MAX_TRACE
2576 buffer = tr->max_buffer.buffer;
2577 if (buffer)
2578 ring_buffer_record_disable(buffer);
2579 #endif
2580
2581 arch_spin_unlock(&tr->max_lock);
2582
2583 out:
2584 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2585 }
2586
2587 /**
2588 * tracing_stop - quick stop of the tracer
2589 *
2590 * Light weight way to stop tracing. Use in conjunction with
2591 * tracing_start.
2592 */
tracing_stop(void)2593 void tracing_stop(void)
2594 {
2595 return tracing_stop_tr(&global_trace);
2596 }
2597
2598 /*
2599 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2600 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2601 * simplifies those functions and keeps them in sync.
2602 */
trace_handle_return(struct trace_seq * s)2603 enum print_line_t trace_handle_return(struct trace_seq *s)
2604 {
2605 return trace_seq_has_overflowed(s) ?
2606 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2607 }
2608 EXPORT_SYMBOL_GPL(trace_handle_return);
2609
migration_disable_value(void)2610 static unsigned short migration_disable_value(void)
2611 {
2612 #if defined(CONFIG_SMP)
2613 return current->migration_disabled;
2614 #else
2615 return 0;
2616 #endif
2617 }
2618
tracing_gen_ctx_irq_test(unsigned int irqs_status)2619 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2620 {
2621 unsigned int trace_flags = irqs_status;
2622 unsigned int pc;
2623
2624 pc = preempt_count();
2625
2626 if (pc & NMI_MASK)
2627 trace_flags |= TRACE_FLAG_NMI;
2628 if (pc & HARDIRQ_MASK)
2629 trace_flags |= TRACE_FLAG_HARDIRQ;
2630 if (in_serving_softirq())
2631 trace_flags |= TRACE_FLAG_SOFTIRQ;
2632 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2633 trace_flags |= TRACE_FLAG_BH_OFF;
2634
2635 if (tif_need_resched())
2636 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2637 if (test_preempt_need_resched())
2638 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2639 if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2640 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2641 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2642 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2643 }
2644
2645 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2646 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2647 int type,
2648 unsigned long len,
2649 unsigned int trace_ctx)
2650 {
2651 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2652 }
2653
2654 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2655 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2656 static int trace_buffered_event_ref;
2657
2658 /**
2659 * trace_buffered_event_enable - enable buffering events
2660 *
2661 * When events are being filtered, it is quicker to use a temporary
2662 * buffer to write the event data into if there's a likely chance
2663 * that it will not be committed. The discard of the ring buffer
2664 * is not as fast as committing, and is much slower than copying
2665 * a commit.
2666 *
2667 * When an event is to be filtered, allocate per cpu buffers to
2668 * write the event data into, and if the event is filtered and discarded
2669 * it is simply dropped, otherwise, the entire data is to be committed
2670 * in one shot.
2671 */
trace_buffered_event_enable(void)2672 void trace_buffered_event_enable(void)
2673 {
2674 struct ring_buffer_event *event;
2675 struct page *page;
2676 int cpu;
2677
2678 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2679
2680 if (trace_buffered_event_ref++)
2681 return;
2682
2683 for_each_tracing_cpu(cpu) {
2684 page = alloc_pages_node(cpu_to_node(cpu),
2685 GFP_KERNEL | __GFP_NORETRY, 0);
2686 /* This is just an optimization and can handle failures */
2687 if (!page) {
2688 pr_err("Failed to allocate event buffer\n");
2689 break;
2690 }
2691
2692 event = page_address(page);
2693 memset(event, 0, sizeof(*event));
2694
2695 per_cpu(trace_buffered_event, cpu) = event;
2696
2697 preempt_disable();
2698 if (cpu == smp_processor_id() &&
2699 __this_cpu_read(trace_buffered_event) !=
2700 per_cpu(trace_buffered_event, cpu))
2701 WARN_ON_ONCE(1);
2702 preempt_enable();
2703 }
2704 }
2705
enable_trace_buffered_event(void * data)2706 static void enable_trace_buffered_event(void *data)
2707 {
2708 this_cpu_dec(trace_buffered_event_cnt);
2709 }
2710
disable_trace_buffered_event(void * data)2711 static void disable_trace_buffered_event(void *data)
2712 {
2713 this_cpu_inc(trace_buffered_event_cnt);
2714 }
2715
2716 /**
2717 * trace_buffered_event_disable - disable buffering events
2718 *
2719 * When a filter is removed, it is faster to not use the buffered
2720 * events, and to commit directly into the ring buffer. Free up
2721 * the temp buffers when there are no more users. This requires
2722 * special synchronization with current events.
2723 */
trace_buffered_event_disable(void)2724 void trace_buffered_event_disable(void)
2725 {
2726 int cpu;
2727
2728 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2729
2730 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2731 return;
2732
2733 if (--trace_buffered_event_ref)
2734 return;
2735
2736 /* For each CPU, set the buffer as used. */
2737 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2738 NULL, true);
2739
2740 /* Wait for all current users to finish */
2741 synchronize_rcu();
2742
2743 for_each_tracing_cpu(cpu) {
2744 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2745 per_cpu(trace_buffered_event, cpu) = NULL;
2746 }
2747
2748 /*
2749 * Wait for all CPUs that potentially started checking if they can use
2750 * their event buffer only after the previous synchronize_rcu() call and
2751 * they still read a valid pointer from trace_buffered_event. It must be
2752 * ensured they don't see cleared trace_buffered_event_cnt else they
2753 * could wrongly decide to use the pointed-to buffer which is now freed.
2754 */
2755 synchronize_rcu();
2756
2757 /* For each CPU, relinquish the buffer */
2758 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2759 true);
2760 }
2761
2762 static struct trace_buffer *temp_buffer;
2763
2764 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2765 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2766 struct trace_event_file *trace_file,
2767 int type, unsigned long len,
2768 unsigned int trace_ctx)
2769 {
2770 struct ring_buffer_event *entry;
2771 struct trace_array *tr = trace_file->tr;
2772 int val;
2773
2774 *current_rb = tr->array_buffer.buffer;
2775
2776 if (!tr->no_filter_buffering_ref &&
2777 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2778 preempt_disable_notrace();
2779 /*
2780 * Filtering is on, so try to use the per cpu buffer first.
2781 * This buffer will simulate a ring_buffer_event,
2782 * where the type_len is zero and the array[0] will
2783 * hold the full length.
2784 * (see include/linux/ring-buffer.h for details on
2785 * how the ring_buffer_event is structured).
2786 *
2787 * Using a temp buffer during filtering and copying it
2788 * on a matched filter is quicker than writing directly
2789 * into the ring buffer and then discarding it when
2790 * it doesn't match. That is because the discard
2791 * requires several atomic operations to get right.
2792 * Copying on match and doing nothing on a failed match
2793 * is still quicker than no copy on match, but having
2794 * to discard out of the ring buffer on a failed match.
2795 */
2796 if ((entry = __this_cpu_read(trace_buffered_event))) {
2797 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2798
2799 val = this_cpu_inc_return(trace_buffered_event_cnt);
2800
2801 /*
2802 * Preemption is disabled, but interrupts and NMIs
2803 * can still come in now. If that happens after
2804 * the above increment, then it will have to go
2805 * back to the old method of allocating the event
2806 * on the ring buffer, and if the filter fails, it
2807 * will have to call ring_buffer_discard_commit()
2808 * to remove it.
2809 *
2810 * Need to also check the unlikely case that the
2811 * length is bigger than the temp buffer size.
2812 * If that happens, then the reserve is pretty much
2813 * guaranteed to fail, as the ring buffer currently
2814 * only allows events less than a page. But that may
2815 * change in the future, so let the ring buffer reserve
2816 * handle the failure in that case.
2817 */
2818 if (val == 1 && likely(len <= max_len)) {
2819 trace_event_setup(entry, type, trace_ctx);
2820 entry->array[0] = len;
2821 /* Return with preemption disabled */
2822 return entry;
2823 }
2824 this_cpu_dec(trace_buffered_event_cnt);
2825 }
2826 /* __trace_buffer_lock_reserve() disables preemption */
2827 preempt_enable_notrace();
2828 }
2829
2830 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2831 trace_ctx);
2832 /*
2833 * If tracing is off, but we have triggers enabled
2834 * we still need to look at the event data. Use the temp_buffer
2835 * to store the trace event for the trigger to use. It's recursive
2836 * safe and will not be recorded anywhere.
2837 */
2838 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2839 *current_rb = temp_buffer;
2840 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2841 trace_ctx);
2842 }
2843 return entry;
2844 }
2845 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2846
2847 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2848 static DEFINE_MUTEX(tracepoint_printk_mutex);
2849
output_printk(struct trace_event_buffer * fbuffer)2850 static void output_printk(struct trace_event_buffer *fbuffer)
2851 {
2852 struct trace_event_call *event_call;
2853 struct trace_event_file *file;
2854 struct trace_event *event;
2855 unsigned long flags;
2856 struct trace_iterator *iter = tracepoint_print_iter;
2857
2858 /* We should never get here if iter is NULL */
2859 if (WARN_ON_ONCE(!iter))
2860 return;
2861
2862 event_call = fbuffer->trace_file->event_call;
2863 if (!event_call || !event_call->event.funcs ||
2864 !event_call->event.funcs->trace)
2865 return;
2866
2867 file = fbuffer->trace_file;
2868 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2869 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2870 !filter_match_preds(file->filter, fbuffer->entry)))
2871 return;
2872
2873 event = &fbuffer->trace_file->event_call->event;
2874
2875 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2876 trace_seq_init(&iter->seq);
2877 iter->ent = fbuffer->entry;
2878 event_call->event.funcs->trace(iter, 0, event);
2879 trace_seq_putc(&iter->seq, 0);
2880 printk("%s", iter->seq.buffer);
2881
2882 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2883 }
2884
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2885 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2886 void *buffer, size_t *lenp,
2887 loff_t *ppos)
2888 {
2889 int save_tracepoint_printk;
2890 int ret;
2891
2892 guard(mutex)(&tracepoint_printk_mutex);
2893 save_tracepoint_printk = tracepoint_printk;
2894
2895 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2896
2897 /*
2898 * This will force exiting early, as tracepoint_printk
2899 * is always zero when tracepoint_printk_iter is not allocated
2900 */
2901 if (!tracepoint_print_iter)
2902 tracepoint_printk = 0;
2903
2904 if (save_tracepoint_printk == tracepoint_printk)
2905 return ret;
2906
2907 if (tracepoint_printk)
2908 static_key_enable(&tracepoint_printk_key.key);
2909 else
2910 static_key_disable(&tracepoint_printk_key.key);
2911
2912 return ret;
2913 }
2914
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2915 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2916 {
2917 enum event_trigger_type tt = ETT_NONE;
2918 struct trace_event_file *file = fbuffer->trace_file;
2919
2920 if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2921 fbuffer->entry, &tt))
2922 goto discard;
2923
2924 if (static_key_false(&tracepoint_printk_key.key))
2925 output_printk(fbuffer);
2926
2927 if (static_branch_unlikely(&trace_event_exports_enabled))
2928 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2929
2930 trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2931 fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2932
2933 discard:
2934 if (tt)
2935 event_triggers_post_call(file, tt);
2936
2937 }
2938 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2939
2940 /*
2941 * Skip 3:
2942 *
2943 * trace_buffer_unlock_commit_regs()
2944 * trace_event_buffer_commit()
2945 * trace_event_raw_event_xxx()
2946 */
2947 # define STACK_SKIP 3
2948
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2949 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2950 struct trace_buffer *buffer,
2951 struct ring_buffer_event *event,
2952 unsigned int trace_ctx,
2953 struct pt_regs *regs)
2954 {
2955 __buffer_unlock_commit(buffer, event);
2956
2957 /*
2958 * If regs is not set, then skip the necessary functions.
2959 * Note, we can still get here via blktrace, wakeup tracer
2960 * and mmiotrace, but that's ok if they lose a function or
2961 * two. They are not that meaningful.
2962 */
2963 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2964 ftrace_trace_userstack(tr, buffer, trace_ctx);
2965 }
2966
2967 /*
2968 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2969 */
2970 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2971 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2972 struct ring_buffer_event *event)
2973 {
2974 __buffer_unlock_commit(buffer, event);
2975 }
2976
2977 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2978 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2979 parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2980 {
2981 struct trace_buffer *buffer = tr->array_buffer.buffer;
2982 struct ring_buffer_event *event;
2983 struct ftrace_entry *entry;
2984 int size = sizeof(*entry);
2985
2986 size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2987
2988 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2989 trace_ctx);
2990 if (!event)
2991 return;
2992 entry = ring_buffer_event_data(event);
2993 entry->ip = ip;
2994 entry->parent_ip = parent_ip;
2995
2996 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2997 if (fregs) {
2998 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2999 entry->args[i] = ftrace_regs_get_argument(fregs, i);
3000 }
3001 #endif
3002
3003 if (static_branch_unlikely(&trace_function_exports_enabled))
3004 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3005 __buffer_unlock_commit(buffer, event);
3006 }
3007
3008 #ifdef CONFIG_STACKTRACE
3009
3010 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3011 #define FTRACE_KSTACK_NESTING 4
3012
3013 #define FTRACE_KSTACK_ENTRIES (SZ_4K / FTRACE_KSTACK_NESTING)
3014
3015 struct ftrace_stack {
3016 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3017 };
3018
3019
3020 struct ftrace_stacks {
3021 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3022 };
3023
3024 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3025 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3026
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3027 static void __ftrace_trace_stack(struct trace_array *tr,
3028 struct trace_buffer *buffer,
3029 unsigned int trace_ctx,
3030 int skip, struct pt_regs *regs)
3031 {
3032 struct ring_buffer_event *event;
3033 unsigned int size, nr_entries;
3034 struct ftrace_stack *fstack;
3035 struct stack_entry *entry;
3036 int stackidx;
3037
3038 /*
3039 * Add one, for this function and the call to save_stack_trace()
3040 * If regs is set, then these functions will not be in the way.
3041 */
3042 #ifndef CONFIG_UNWINDER_ORC
3043 if (!regs)
3044 skip++;
3045 #endif
3046
3047 preempt_disable_notrace();
3048
3049 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3050
3051 /* This should never happen. If it does, yell once and skip */
3052 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3053 goto out;
3054
3055 /*
3056 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3057 * interrupt will either see the value pre increment or post
3058 * increment. If the interrupt happens pre increment it will have
3059 * restored the counter when it returns. We just need a barrier to
3060 * keep gcc from moving things around.
3061 */
3062 barrier();
3063
3064 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3065 size = ARRAY_SIZE(fstack->calls);
3066
3067 if (regs) {
3068 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3069 size, skip);
3070 } else {
3071 nr_entries = stack_trace_save(fstack->calls, size, skip);
3072 }
3073
3074 #ifdef CONFIG_DYNAMIC_FTRACE
3075 /* Mark entry of stack trace as trampoline code */
3076 if (tr->ops && tr->ops->trampoline) {
3077 unsigned long tramp_start = tr->ops->trampoline;
3078 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3079 unsigned long *calls = fstack->calls;
3080
3081 for (int i = 0; i < nr_entries; i++) {
3082 if (calls[i] >= tramp_start && calls[i] < tramp_end)
3083 calls[i] = FTRACE_TRAMPOLINE_MARKER;
3084 }
3085 }
3086 #endif
3087
3088 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3089 struct_size(entry, caller, nr_entries),
3090 trace_ctx);
3091 if (!event)
3092 goto out;
3093 entry = ring_buffer_event_data(event);
3094
3095 entry->size = nr_entries;
3096 memcpy(&entry->caller, fstack->calls,
3097 flex_array_size(entry, caller, nr_entries));
3098
3099 __buffer_unlock_commit(buffer, event);
3100
3101 out:
3102 /* Again, don't let gcc optimize things here */
3103 barrier();
3104 __this_cpu_dec(ftrace_stack_reserve);
3105 preempt_enable_notrace();
3106
3107 }
3108
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3109 static inline void ftrace_trace_stack(struct trace_array *tr,
3110 struct trace_buffer *buffer,
3111 unsigned int trace_ctx,
3112 int skip, struct pt_regs *regs)
3113 {
3114 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3115 return;
3116
3117 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3118 }
3119
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3120 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3121 int skip)
3122 {
3123 struct trace_buffer *buffer = tr->array_buffer.buffer;
3124
3125 if (rcu_is_watching()) {
3126 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3127 return;
3128 }
3129
3130 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3131 return;
3132
3133 /*
3134 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3135 * but if the above rcu_is_watching() failed, then the NMI
3136 * triggered someplace critical, and ct_irq_enter() should
3137 * not be called from NMI.
3138 */
3139 if (unlikely(in_nmi()))
3140 return;
3141
3142 ct_irq_enter_irqson();
3143 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3144 ct_irq_exit_irqson();
3145 }
3146
3147 /**
3148 * trace_dump_stack - record a stack back trace in the trace buffer
3149 * @skip: Number of functions to skip (helper handlers)
3150 */
trace_dump_stack(int skip)3151 void trace_dump_stack(int skip)
3152 {
3153 if (tracing_disabled || tracing_selftest_running)
3154 return;
3155
3156 #ifndef CONFIG_UNWINDER_ORC
3157 /* Skip 1 to skip this function. */
3158 skip++;
3159 #endif
3160 __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3161 tracing_gen_ctx(), skip, NULL);
3162 }
3163 EXPORT_SYMBOL_GPL(trace_dump_stack);
3164
3165 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3166 static DEFINE_PER_CPU(int, user_stack_count);
3167
3168 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3169 ftrace_trace_userstack(struct trace_array *tr,
3170 struct trace_buffer *buffer, unsigned int trace_ctx)
3171 {
3172 struct ring_buffer_event *event;
3173 struct userstack_entry *entry;
3174
3175 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3176 return;
3177
3178 /*
3179 * NMIs can not handle page faults, even with fix ups.
3180 * The save user stack can (and often does) fault.
3181 */
3182 if (unlikely(in_nmi()))
3183 return;
3184
3185 /*
3186 * prevent recursion, since the user stack tracing may
3187 * trigger other kernel events.
3188 */
3189 preempt_disable();
3190 if (__this_cpu_read(user_stack_count))
3191 goto out;
3192
3193 __this_cpu_inc(user_stack_count);
3194
3195 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3196 sizeof(*entry), trace_ctx);
3197 if (!event)
3198 goto out_drop_count;
3199 entry = ring_buffer_event_data(event);
3200
3201 entry->tgid = current->tgid;
3202 memset(&entry->caller, 0, sizeof(entry->caller));
3203
3204 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3205 __buffer_unlock_commit(buffer, event);
3206
3207 out_drop_count:
3208 __this_cpu_dec(user_stack_count);
3209 out:
3210 preempt_enable();
3211 }
3212 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3213 static void ftrace_trace_userstack(struct trace_array *tr,
3214 struct trace_buffer *buffer,
3215 unsigned int trace_ctx)
3216 {
3217 }
3218 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3219
3220 #endif /* CONFIG_STACKTRACE */
3221
3222 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3223 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3224 unsigned long long delta)
3225 {
3226 entry->bottom_delta_ts = delta & U32_MAX;
3227 entry->top_delta_ts = (delta >> 32);
3228 }
3229
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3230 void trace_last_func_repeats(struct trace_array *tr,
3231 struct trace_func_repeats *last_info,
3232 unsigned int trace_ctx)
3233 {
3234 struct trace_buffer *buffer = tr->array_buffer.buffer;
3235 struct func_repeats_entry *entry;
3236 struct ring_buffer_event *event;
3237 u64 delta;
3238
3239 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3240 sizeof(*entry), trace_ctx);
3241 if (!event)
3242 return;
3243
3244 delta = ring_buffer_event_time_stamp(buffer, event) -
3245 last_info->ts_last_call;
3246
3247 entry = ring_buffer_event_data(event);
3248 entry->ip = last_info->ip;
3249 entry->parent_ip = last_info->parent_ip;
3250 entry->count = last_info->count;
3251 func_repeats_set_delta_ts(entry, delta);
3252
3253 __buffer_unlock_commit(buffer, event);
3254 }
3255
3256 /* created for use with alloc_percpu */
3257 struct trace_buffer_struct {
3258 int nesting;
3259 char buffer[4][TRACE_BUF_SIZE];
3260 };
3261
3262 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3263
3264 /*
3265 * This allows for lockless recording. If we're nested too deeply, then
3266 * this returns NULL.
3267 */
get_trace_buf(void)3268 static char *get_trace_buf(void)
3269 {
3270 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3271
3272 if (!trace_percpu_buffer || buffer->nesting >= 4)
3273 return NULL;
3274
3275 buffer->nesting++;
3276
3277 /* Interrupts must see nesting incremented before we use the buffer */
3278 barrier();
3279 return &buffer->buffer[buffer->nesting - 1][0];
3280 }
3281
put_trace_buf(void)3282 static void put_trace_buf(void)
3283 {
3284 /* Don't let the decrement of nesting leak before this */
3285 barrier();
3286 this_cpu_dec(trace_percpu_buffer->nesting);
3287 }
3288
alloc_percpu_trace_buffer(void)3289 static int alloc_percpu_trace_buffer(void)
3290 {
3291 struct trace_buffer_struct __percpu *buffers;
3292
3293 if (trace_percpu_buffer)
3294 return 0;
3295
3296 buffers = alloc_percpu(struct trace_buffer_struct);
3297 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3298 return -ENOMEM;
3299
3300 trace_percpu_buffer = buffers;
3301 return 0;
3302 }
3303
3304 static int buffers_allocated;
3305
trace_printk_init_buffers(void)3306 void trace_printk_init_buffers(void)
3307 {
3308 if (buffers_allocated)
3309 return;
3310
3311 if (alloc_percpu_trace_buffer())
3312 return;
3313
3314 /* trace_printk() is for debug use only. Don't use it in production. */
3315
3316 pr_warn("\n");
3317 pr_warn("**********************************************************\n");
3318 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3319 pr_warn("** **\n");
3320 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3321 pr_warn("** **\n");
3322 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3323 pr_warn("** unsafe for production use. **\n");
3324 pr_warn("** **\n");
3325 pr_warn("** If you see this message and you are not debugging **\n");
3326 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3327 pr_warn("** **\n");
3328 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3329 pr_warn("**********************************************************\n");
3330
3331 /* Expand the buffers to set size */
3332 tracing_update_buffers(&global_trace);
3333
3334 buffers_allocated = 1;
3335
3336 /*
3337 * trace_printk_init_buffers() can be called by modules.
3338 * If that happens, then we need to start cmdline recording
3339 * directly here. If the global_trace.buffer is already
3340 * allocated here, then this was called by module code.
3341 */
3342 if (global_trace.array_buffer.buffer)
3343 tracing_start_cmdline_record();
3344 }
3345 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3346
trace_printk_start_comm(void)3347 void trace_printk_start_comm(void)
3348 {
3349 /* Start tracing comms if trace printk is set */
3350 if (!buffers_allocated)
3351 return;
3352 tracing_start_cmdline_record();
3353 }
3354
trace_printk_start_stop_comm(int enabled)3355 static void trace_printk_start_stop_comm(int enabled)
3356 {
3357 if (!buffers_allocated)
3358 return;
3359
3360 if (enabled)
3361 tracing_start_cmdline_record();
3362 else
3363 tracing_stop_cmdline_record();
3364 }
3365
3366 /**
3367 * trace_vbprintk - write binary msg to tracing buffer
3368 * @ip: The address of the caller
3369 * @fmt: The string format to write to the buffer
3370 * @args: Arguments for @fmt
3371 */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3372 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3373 {
3374 struct ring_buffer_event *event;
3375 struct trace_buffer *buffer;
3376 struct trace_array *tr = READ_ONCE(printk_trace);
3377 struct bprint_entry *entry;
3378 unsigned int trace_ctx;
3379 char *tbuffer;
3380 int len = 0, size;
3381
3382 if (!printk_binsafe(tr))
3383 return trace_vprintk(ip, fmt, args);
3384
3385 if (unlikely(tracing_selftest_running || tracing_disabled))
3386 return 0;
3387
3388 /* Don't pollute graph traces with trace_vprintk internals */
3389 pause_graph_tracing();
3390
3391 trace_ctx = tracing_gen_ctx();
3392 preempt_disable_notrace();
3393
3394 tbuffer = get_trace_buf();
3395 if (!tbuffer) {
3396 len = 0;
3397 goto out_nobuffer;
3398 }
3399
3400 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3401
3402 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3403 goto out_put;
3404
3405 size = sizeof(*entry) + sizeof(u32) * len;
3406 buffer = tr->array_buffer.buffer;
3407 ring_buffer_nest_start(buffer);
3408 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3409 trace_ctx);
3410 if (!event)
3411 goto out;
3412 entry = ring_buffer_event_data(event);
3413 entry->ip = ip;
3414 entry->fmt = fmt;
3415
3416 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3417 __buffer_unlock_commit(buffer, event);
3418 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3419
3420 out:
3421 ring_buffer_nest_end(buffer);
3422 out_put:
3423 put_trace_buf();
3424
3425 out_nobuffer:
3426 preempt_enable_notrace();
3427 unpause_graph_tracing();
3428
3429 return len;
3430 }
3431 EXPORT_SYMBOL_GPL(trace_vbprintk);
3432
3433 static __printf(3, 0)
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3434 int __trace_array_vprintk(struct trace_buffer *buffer,
3435 unsigned long ip, const char *fmt, va_list args)
3436 {
3437 struct ring_buffer_event *event;
3438 int len = 0, size;
3439 struct print_entry *entry;
3440 unsigned int trace_ctx;
3441 char *tbuffer;
3442
3443 if (tracing_disabled)
3444 return 0;
3445
3446 /* Don't pollute graph traces with trace_vprintk internals */
3447 pause_graph_tracing();
3448
3449 trace_ctx = tracing_gen_ctx();
3450 preempt_disable_notrace();
3451
3452
3453 tbuffer = get_trace_buf();
3454 if (!tbuffer) {
3455 len = 0;
3456 goto out_nobuffer;
3457 }
3458
3459 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3460
3461 size = sizeof(*entry) + len + 1;
3462 ring_buffer_nest_start(buffer);
3463 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3464 trace_ctx);
3465 if (!event)
3466 goto out;
3467 entry = ring_buffer_event_data(event);
3468 entry->ip = ip;
3469
3470 memcpy(&entry->buf, tbuffer, len + 1);
3471 __buffer_unlock_commit(buffer, event);
3472 ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3473
3474 out:
3475 ring_buffer_nest_end(buffer);
3476 put_trace_buf();
3477
3478 out_nobuffer:
3479 preempt_enable_notrace();
3480 unpause_graph_tracing();
3481
3482 return len;
3483 }
3484
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3485 int trace_array_vprintk(struct trace_array *tr,
3486 unsigned long ip, const char *fmt, va_list args)
3487 {
3488 if (tracing_selftest_running && tr == &global_trace)
3489 return 0;
3490
3491 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3492 }
3493
3494 /**
3495 * trace_array_printk - Print a message to a specific instance
3496 * @tr: The instance trace_array descriptor
3497 * @ip: The instruction pointer that this is called from.
3498 * @fmt: The format to print (printf format)
3499 *
3500 * If a subsystem sets up its own instance, they have the right to
3501 * printk strings into their tracing instance buffer using this
3502 * function. Note, this function will not write into the top level
3503 * buffer (use trace_printk() for that), as writing into the top level
3504 * buffer should only have events that can be individually disabled.
3505 * trace_printk() is only used for debugging a kernel, and should not
3506 * be ever incorporated in normal use.
3507 *
3508 * trace_array_printk() can be used, as it will not add noise to the
3509 * top level tracing buffer.
3510 *
3511 * Note, trace_array_init_printk() must be called on @tr before this
3512 * can be used.
3513 */
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3514 int trace_array_printk(struct trace_array *tr,
3515 unsigned long ip, const char *fmt, ...)
3516 {
3517 int ret;
3518 va_list ap;
3519
3520 if (!tr)
3521 return -ENOENT;
3522
3523 /* This is only allowed for created instances */
3524 if (tr == &global_trace)
3525 return 0;
3526
3527 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3528 return 0;
3529
3530 va_start(ap, fmt);
3531 ret = trace_array_vprintk(tr, ip, fmt, ap);
3532 va_end(ap);
3533 return ret;
3534 }
3535 EXPORT_SYMBOL_GPL(trace_array_printk);
3536
3537 /**
3538 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3539 * @tr: The trace array to initialize the buffers for
3540 *
3541 * As trace_array_printk() only writes into instances, they are OK to
3542 * have in the kernel (unlike trace_printk()). This needs to be called
3543 * before trace_array_printk() can be used on a trace_array.
3544 */
trace_array_init_printk(struct trace_array * tr)3545 int trace_array_init_printk(struct trace_array *tr)
3546 {
3547 if (!tr)
3548 return -ENOENT;
3549
3550 /* This is only allowed for created instances */
3551 if (tr == &global_trace)
3552 return -EINVAL;
3553
3554 return alloc_percpu_trace_buffer();
3555 }
3556 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3557
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3558 int trace_array_printk_buf(struct trace_buffer *buffer,
3559 unsigned long ip, const char *fmt, ...)
3560 {
3561 int ret;
3562 va_list ap;
3563
3564 if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3565 return 0;
3566
3567 va_start(ap, fmt);
3568 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3569 va_end(ap);
3570 return ret;
3571 }
3572
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3573 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3574 {
3575 return trace_array_vprintk(printk_trace, ip, fmt, args);
3576 }
3577 EXPORT_SYMBOL_GPL(trace_vprintk);
3578
trace_iterator_increment(struct trace_iterator * iter)3579 static void trace_iterator_increment(struct trace_iterator *iter)
3580 {
3581 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3582
3583 iter->idx++;
3584 if (buf_iter)
3585 ring_buffer_iter_advance(buf_iter);
3586 }
3587
3588 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3589 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3590 unsigned long *lost_events)
3591 {
3592 struct ring_buffer_event *event;
3593 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3594
3595 if (buf_iter) {
3596 event = ring_buffer_iter_peek(buf_iter, ts);
3597 if (lost_events)
3598 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3599 (unsigned long)-1 : 0;
3600 } else {
3601 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3602 lost_events);
3603 }
3604
3605 if (event) {
3606 iter->ent_size = ring_buffer_event_length(event);
3607 return ring_buffer_event_data(event);
3608 }
3609 iter->ent_size = 0;
3610 return NULL;
3611 }
3612
3613 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3614 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3615 unsigned long *missing_events, u64 *ent_ts)
3616 {
3617 struct trace_buffer *buffer = iter->array_buffer->buffer;
3618 struct trace_entry *ent, *next = NULL;
3619 unsigned long lost_events = 0, next_lost = 0;
3620 int cpu_file = iter->cpu_file;
3621 u64 next_ts = 0, ts;
3622 int next_cpu = -1;
3623 int next_size = 0;
3624 int cpu;
3625
3626 /*
3627 * If we are in a per_cpu trace file, don't bother by iterating over
3628 * all cpu and peek directly.
3629 */
3630 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3631 if (ring_buffer_empty_cpu(buffer, cpu_file))
3632 return NULL;
3633 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3634 if (ent_cpu)
3635 *ent_cpu = cpu_file;
3636
3637 return ent;
3638 }
3639
3640 for_each_tracing_cpu(cpu) {
3641
3642 if (ring_buffer_empty_cpu(buffer, cpu))
3643 continue;
3644
3645 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3646
3647 /*
3648 * Pick the entry with the smallest timestamp:
3649 */
3650 if (ent && (!next || ts < next_ts)) {
3651 next = ent;
3652 next_cpu = cpu;
3653 next_ts = ts;
3654 next_lost = lost_events;
3655 next_size = iter->ent_size;
3656 }
3657 }
3658
3659 iter->ent_size = next_size;
3660
3661 if (ent_cpu)
3662 *ent_cpu = next_cpu;
3663
3664 if (ent_ts)
3665 *ent_ts = next_ts;
3666
3667 if (missing_events)
3668 *missing_events = next_lost;
3669
3670 return next;
3671 }
3672
3673 #define STATIC_FMT_BUF_SIZE 128
3674 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3675
trace_iter_expand_format(struct trace_iterator * iter)3676 char *trace_iter_expand_format(struct trace_iterator *iter)
3677 {
3678 char *tmp;
3679
3680 /*
3681 * iter->tr is NULL when used with tp_printk, which makes
3682 * this get called where it is not safe to call krealloc().
3683 */
3684 if (!iter->tr || iter->fmt == static_fmt_buf)
3685 return NULL;
3686
3687 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3688 GFP_KERNEL);
3689 if (tmp) {
3690 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3691 iter->fmt = tmp;
3692 }
3693
3694 return tmp;
3695 }
3696
3697 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3698 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3699 {
3700 unsigned long addr = (unsigned long)str;
3701 struct trace_event *trace_event;
3702 struct trace_event_call *event;
3703
3704 /* OK if part of the event data */
3705 if ((addr >= (unsigned long)iter->ent) &&
3706 (addr < (unsigned long)iter->ent + iter->ent_size))
3707 return true;
3708
3709 /* OK if part of the temp seq buffer */
3710 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3711 (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3712 return true;
3713
3714 /* Core rodata can not be freed */
3715 if (is_kernel_rodata(addr))
3716 return true;
3717
3718 if (trace_is_tracepoint_string(str))
3719 return true;
3720
3721 /*
3722 * Now this could be a module event, referencing core module
3723 * data, which is OK.
3724 */
3725 if (!iter->ent)
3726 return false;
3727
3728 trace_event = ftrace_find_event(iter->ent->type);
3729 if (!trace_event)
3730 return false;
3731
3732 event = container_of(trace_event, struct trace_event_call, event);
3733 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3734 return false;
3735
3736 /* Would rather have rodata, but this will suffice */
3737 if (within_module_core(addr, event->module))
3738 return true;
3739
3740 return false;
3741 }
3742
3743 /**
3744 * ignore_event - Check dereferenced fields while writing to the seq buffer
3745 * @iter: The iterator that holds the seq buffer and the event being printed
3746 *
3747 * At boot up, test_event_printk() will flag any event that dereferences
3748 * a string with "%s" that does exist in the ring buffer. It may still
3749 * be valid, as the string may point to a static string in the kernel
3750 * rodata that never gets freed. But if the string pointer is pointing
3751 * to something that was allocated, there's a chance that it can be freed
3752 * by the time the user reads the trace. This would cause a bad memory
3753 * access by the kernel and possibly crash the system.
3754 *
3755 * This function will check if the event has any fields flagged as needing
3756 * to be checked at runtime and perform those checks.
3757 *
3758 * If it is found that a field is unsafe, it will write into the @iter->seq
3759 * a message stating what was found to be unsafe.
3760 *
3761 * @return: true if the event is unsafe and should be ignored,
3762 * false otherwise.
3763 */
ignore_event(struct trace_iterator * iter)3764 bool ignore_event(struct trace_iterator *iter)
3765 {
3766 struct ftrace_event_field *field;
3767 struct trace_event *trace_event;
3768 struct trace_event_call *event;
3769 struct list_head *head;
3770 struct trace_seq *seq;
3771 const void *ptr;
3772
3773 trace_event = ftrace_find_event(iter->ent->type);
3774
3775 seq = &iter->seq;
3776
3777 if (!trace_event) {
3778 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3779 return true;
3780 }
3781
3782 event = container_of(trace_event, struct trace_event_call, event);
3783 if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3784 return false;
3785
3786 head = trace_get_fields(event);
3787 if (!head) {
3788 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3789 trace_event_name(event));
3790 return true;
3791 }
3792
3793 /* Offsets are from the iter->ent that points to the raw event */
3794 ptr = iter->ent;
3795
3796 list_for_each_entry(field, head, link) {
3797 const char *str;
3798 bool good;
3799
3800 if (!field->needs_test)
3801 continue;
3802
3803 str = *(const char **)(ptr + field->offset);
3804
3805 good = trace_safe_str(iter, str);
3806
3807 /*
3808 * If you hit this warning, it is likely that the
3809 * trace event in question used %s on a string that
3810 * was saved at the time of the event, but may not be
3811 * around when the trace is read. Use __string(),
3812 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3813 * instead. See samples/trace_events/trace-events-sample.h
3814 * for reference.
3815 */
3816 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3817 trace_event_name(event), field->name)) {
3818 trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3819 trace_event_name(event), field->name);
3820 return true;
3821 }
3822 }
3823 return false;
3824 }
3825
trace_event_format(struct trace_iterator * iter,const char * fmt)3826 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3827 {
3828 const char *p, *new_fmt;
3829 char *q;
3830
3831 if (WARN_ON_ONCE(!fmt))
3832 return fmt;
3833
3834 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3835 return fmt;
3836
3837 p = fmt;
3838 new_fmt = q = iter->fmt;
3839 while (*p) {
3840 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3841 if (!trace_iter_expand_format(iter))
3842 return fmt;
3843
3844 q += iter->fmt - new_fmt;
3845 new_fmt = iter->fmt;
3846 }
3847
3848 *q++ = *p++;
3849
3850 /* Replace %p with %px */
3851 if (p[-1] == '%') {
3852 if (p[0] == '%') {
3853 *q++ = *p++;
3854 } else if (p[0] == 'p' && !isalnum(p[1])) {
3855 *q++ = *p++;
3856 *q++ = 'x';
3857 }
3858 }
3859 }
3860 *q = '\0';
3861
3862 return new_fmt;
3863 }
3864
3865 #define STATIC_TEMP_BUF_SIZE 128
3866 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3867
3868 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3869 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3870 int *ent_cpu, u64 *ent_ts)
3871 {
3872 /* __find_next_entry will reset ent_size */
3873 int ent_size = iter->ent_size;
3874 struct trace_entry *entry;
3875
3876 /*
3877 * If called from ftrace_dump(), then the iter->temp buffer
3878 * will be the static_temp_buf and not created from kmalloc.
3879 * If the entry size is greater than the buffer, we can
3880 * not save it. Just return NULL in that case. This is only
3881 * used to add markers when two consecutive events' time
3882 * stamps have a large delta. See trace_print_lat_context()
3883 */
3884 if (iter->temp == static_temp_buf &&
3885 STATIC_TEMP_BUF_SIZE < ent_size)
3886 return NULL;
3887
3888 /*
3889 * The __find_next_entry() may call peek_next_entry(), which may
3890 * call ring_buffer_peek() that may make the contents of iter->ent
3891 * undefined. Need to copy iter->ent now.
3892 */
3893 if (iter->ent && iter->ent != iter->temp) {
3894 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3895 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3896 void *temp;
3897 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3898 if (!temp)
3899 return NULL;
3900 kfree(iter->temp);
3901 iter->temp = temp;
3902 iter->temp_size = iter->ent_size;
3903 }
3904 memcpy(iter->temp, iter->ent, iter->ent_size);
3905 iter->ent = iter->temp;
3906 }
3907 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3908 /* Put back the original ent_size */
3909 iter->ent_size = ent_size;
3910
3911 return entry;
3912 }
3913
3914 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3915 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3916 {
3917 iter->ent = __find_next_entry(iter, &iter->cpu,
3918 &iter->lost_events, &iter->ts);
3919
3920 if (iter->ent)
3921 trace_iterator_increment(iter);
3922
3923 return iter->ent ? iter : NULL;
3924 }
3925
trace_consume(struct trace_iterator * iter)3926 static void trace_consume(struct trace_iterator *iter)
3927 {
3928 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3929 &iter->lost_events);
3930 }
3931
s_next(struct seq_file * m,void * v,loff_t * pos)3932 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3933 {
3934 struct trace_iterator *iter = m->private;
3935 int i = (int)*pos;
3936 void *ent;
3937
3938 WARN_ON_ONCE(iter->leftover);
3939
3940 (*pos)++;
3941
3942 /* can't go backwards */
3943 if (iter->idx > i)
3944 return NULL;
3945
3946 if (iter->idx < 0)
3947 ent = trace_find_next_entry_inc(iter);
3948 else
3949 ent = iter;
3950
3951 while (ent && iter->idx < i)
3952 ent = trace_find_next_entry_inc(iter);
3953
3954 iter->pos = *pos;
3955
3956 return ent;
3957 }
3958
tracing_iter_reset(struct trace_iterator * iter,int cpu)3959 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3960 {
3961 struct ring_buffer_iter *buf_iter;
3962 unsigned long entries = 0;
3963 u64 ts;
3964
3965 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3966
3967 buf_iter = trace_buffer_iter(iter, cpu);
3968 if (!buf_iter)
3969 return;
3970
3971 ring_buffer_iter_reset(buf_iter);
3972
3973 /*
3974 * We could have the case with the max latency tracers
3975 * that a reset never took place on a cpu. This is evident
3976 * by the timestamp being before the start of the buffer.
3977 */
3978 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3979 if (ts >= iter->array_buffer->time_start)
3980 break;
3981 entries++;
3982 ring_buffer_iter_advance(buf_iter);
3983 /* This could be a big loop */
3984 cond_resched();
3985 }
3986
3987 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3988 }
3989
3990 /*
3991 * The current tracer is copied to avoid a global locking
3992 * all around.
3993 */
s_start(struct seq_file * m,loff_t * pos)3994 static void *s_start(struct seq_file *m, loff_t *pos)
3995 {
3996 struct trace_iterator *iter = m->private;
3997 struct trace_array *tr = iter->tr;
3998 int cpu_file = iter->cpu_file;
3999 void *p = NULL;
4000 loff_t l = 0;
4001 int cpu;
4002
4003 mutex_lock(&trace_types_lock);
4004 if (unlikely(tr->current_trace != iter->trace)) {
4005 /* Close iter->trace before switching to the new current tracer */
4006 if (iter->trace->close)
4007 iter->trace->close(iter);
4008 iter->trace = tr->current_trace;
4009 /* Reopen the new current tracer */
4010 if (iter->trace->open)
4011 iter->trace->open(iter);
4012 }
4013 mutex_unlock(&trace_types_lock);
4014
4015 #ifdef CONFIG_TRACER_MAX_TRACE
4016 if (iter->snapshot && iter->trace->use_max_tr)
4017 return ERR_PTR(-EBUSY);
4018 #endif
4019
4020 if (*pos != iter->pos) {
4021 iter->ent = NULL;
4022 iter->cpu = 0;
4023 iter->idx = -1;
4024
4025 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4026 for_each_tracing_cpu(cpu)
4027 tracing_iter_reset(iter, cpu);
4028 } else
4029 tracing_iter_reset(iter, cpu_file);
4030
4031 iter->leftover = 0;
4032 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4033 ;
4034
4035 } else {
4036 /*
4037 * If we overflowed the seq_file before, then we want
4038 * to just reuse the trace_seq buffer again.
4039 */
4040 if (iter->leftover)
4041 p = iter;
4042 else {
4043 l = *pos - 1;
4044 p = s_next(m, p, &l);
4045 }
4046 }
4047
4048 trace_event_read_lock();
4049 trace_access_lock(cpu_file);
4050 return p;
4051 }
4052
s_stop(struct seq_file * m,void * p)4053 static void s_stop(struct seq_file *m, void *p)
4054 {
4055 struct trace_iterator *iter = m->private;
4056
4057 #ifdef CONFIG_TRACER_MAX_TRACE
4058 if (iter->snapshot && iter->trace->use_max_tr)
4059 return;
4060 #endif
4061
4062 trace_access_unlock(iter->cpu_file);
4063 trace_event_read_unlock();
4064 }
4065
4066 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)4067 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4068 unsigned long *entries, int cpu)
4069 {
4070 unsigned long count;
4071
4072 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4073 /*
4074 * If this buffer has skipped entries, then we hold all
4075 * entries for the trace and we need to ignore the
4076 * ones before the time stamp.
4077 */
4078 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4079 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4080 /* total is the same as the entries */
4081 *total = count;
4082 } else
4083 *total = count +
4084 ring_buffer_overrun_cpu(buf->buffer, cpu);
4085 *entries = count;
4086 }
4087
4088 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4089 get_total_entries(struct array_buffer *buf,
4090 unsigned long *total, unsigned long *entries)
4091 {
4092 unsigned long t, e;
4093 int cpu;
4094
4095 *total = 0;
4096 *entries = 0;
4097
4098 for_each_tracing_cpu(cpu) {
4099 get_total_entries_cpu(buf, &t, &e, cpu);
4100 *total += t;
4101 *entries += e;
4102 }
4103 }
4104
trace_total_entries_cpu(struct trace_array * tr,int cpu)4105 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4106 {
4107 unsigned long total, entries;
4108
4109 if (!tr)
4110 tr = &global_trace;
4111
4112 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4113
4114 return entries;
4115 }
4116
trace_total_entries(struct trace_array * tr)4117 unsigned long trace_total_entries(struct trace_array *tr)
4118 {
4119 unsigned long total, entries;
4120
4121 if (!tr)
4122 tr = &global_trace;
4123
4124 get_total_entries(&tr->array_buffer, &total, &entries);
4125
4126 return entries;
4127 }
4128
print_lat_help_header(struct seq_file * m)4129 static void print_lat_help_header(struct seq_file *m)
4130 {
4131 seq_puts(m, "# _------=> CPU# \n"
4132 "# / _-----=> irqs-off/BH-disabled\n"
4133 "# | / _----=> need-resched \n"
4134 "# || / _---=> hardirq/softirq \n"
4135 "# ||| / _--=> preempt-depth \n"
4136 "# |||| / _-=> migrate-disable \n"
4137 "# ||||| / delay \n"
4138 "# cmd pid |||||| time | caller \n"
4139 "# \\ / |||||| \\ | / \n");
4140 }
4141
print_event_info(struct array_buffer * buf,struct seq_file * m)4142 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4143 {
4144 unsigned long total;
4145 unsigned long entries;
4146
4147 get_total_entries(buf, &total, &entries);
4148 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4149 entries, total, num_online_cpus());
4150 seq_puts(m, "#\n");
4151 }
4152
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4153 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4154 unsigned int flags)
4155 {
4156 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4157
4158 print_event_info(buf, m);
4159
4160 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4161 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4162 }
4163
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4164 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4165 unsigned int flags)
4166 {
4167 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4168 static const char space[] = " ";
4169 int prec = tgid ? 12 : 2;
4170
4171 print_event_info(buf, m);
4172
4173 seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4174 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4175 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4176 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4177 seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
4178 seq_printf(m, "# %.*s|||| / delay\n", prec, space);
4179 seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4180 seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
4181 }
4182
4183 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4184 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4185 {
4186 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4187 struct array_buffer *buf = iter->array_buffer;
4188 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4189 struct tracer *type = iter->trace;
4190 unsigned long entries;
4191 unsigned long total;
4192 const char *name = type->name;
4193
4194 get_total_entries(buf, &total, &entries);
4195
4196 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4197 name, init_utsname()->release);
4198 seq_puts(m, "# -----------------------------------"
4199 "---------------------------------\n");
4200 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4201 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4202 nsecs_to_usecs(data->saved_latency),
4203 entries,
4204 total,
4205 buf->cpu,
4206 preempt_model_str(),
4207 /* These are reserved for later use */
4208 0, 0, 0, 0);
4209 #ifdef CONFIG_SMP
4210 seq_printf(m, " #P:%d)\n", num_online_cpus());
4211 #else
4212 seq_puts(m, ")\n");
4213 #endif
4214 seq_puts(m, "# -----------------\n");
4215 seq_printf(m, "# | task: %.16s-%d "
4216 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4217 data->comm, data->pid,
4218 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4219 data->policy, data->rt_priority);
4220 seq_puts(m, "# -----------------\n");
4221
4222 if (data->critical_start) {
4223 seq_puts(m, "# => started at: ");
4224 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4225 trace_print_seq(m, &iter->seq);
4226 seq_puts(m, "\n# => ended at: ");
4227 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4228 trace_print_seq(m, &iter->seq);
4229 seq_puts(m, "\n#\n");
4230 }
4231
4232 seq_puts(m, "#\n");
4233 }
4234
test_cpu_buff_start(struct trace_iterator * iter)4235 static void test_cpu_buff_start(struct trace_iterator *iter)
4236 {
4237 struct trace_seq *s = &iter->seq;
4238 struct trace_array *tr = iter->tr;
4239
4240 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4241 return;
4242
4243 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4244 return;
4245
4246 if (cpumask_available(iter->started) &&
4247 cpumask_test_cpu(iter->cpu, iter->started))
4248 return;
4249
4250 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4251 return;
4252
4253 if (cpumask_available(iter->started))
4254 cpumask_set_cpu(iter->cpu, iter->started);
4255
4256 /* Don't print started cpu buffer for the first entry of the trace */
4257 if (iter->idx > 1)
4258 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4259 iter->cpu);
4260 }
4261
print_trace_fmt(struct trace_iterator * iter)4262 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4263 {
4264 struct trace_array *tr = iter->tr;
4265 struct trace_seq *s = &iter->seq;
4266 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4267 struct trace_entry *entry;
4268 struct trace_event *event;
4269
4270 entry = iter->ent;
4271
4272 test_cpu_buff_start(iter);
4273
4274 event = ftrace_find_event(entry->type);
4275
4276 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4277 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4278 trace_print_lat_context(iter);
4279 else
4280 trace_print_context(iter);
4281 }
4282
4283 if (trace_seq_has_overflowed(s))
4284 return TRACE_TYPE_PARTIAL_LINE;
4285
4286 if (event) {
4287 if (tr->trace_flags & TRACE_ITER_FIELDS)
4288 return print_event_fields(iter, event);
4289 /*
4290 * For TRACE_EVENT() events, the print_fmt is not
4291 * safe to use if the array has delta offsets
4292 * Force printing via the fields.
4293 */
4294 if ((tr->text_delta) &&
4295 event->type > __TRACE_LAST_TYPE)
4296 return print_event_fields(iter, event);
4297
4298 return event->funcs->trace(iter, sym_flags, event);
4299 }
4300
4301 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4302
4303 return trace_handle_return(s);
4304 }
4305
print_raw_fmt(struct trace_iterator * iter)4306 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4307 {
4308 struct trace_array *tr = iter->tr;
4309 struct trace_seq *s = &iter->seq;
4310 struct trace_entry *entry;
4311 struct trace_event *event;
4312
4313 entry = iter->ent;
4314
4315 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4316 trace_seq_printf(s, "%d %d %llu ",
4317 entry->pid, iter->cpu, iter->ts);
4318
4319 if (trace_seq_has_overflowed(s))
4320 return TRACE_TYPE_PARTIAL_LINE;
4321
4322 event = ftrace_find_event(entry->type);
4323 if (event)
4324 return event->funcs->raw(iter, 0, event);
4325
4326 trace_seq_printf(s, "%d ?\n", entry->type);
4327
4328 return trace_handle_return(s);
4329 }
4330
print_hex_fmt(struct trace_iterator * iter)4331 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4332 {
4333 struct trace_array *tr = iter->tr;
4334 struct trace_seq *s = &iter->seq;
4335 unsigned char newline = '\n';
4336 struct trace_entry *entry;
4337 struct trace_event *event;
4338
4339 entry = iter->ent;
4340
4341 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4342 SEQ_PUT_HEX_FIELD(s, entry->pid);
4343 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4344 SEQ_PUT_HEX_FIELD(s, iter->ts);
4345 if (trace_seq_has_overflowed(s))
4346 return TRACE_TYPE_PARTIAL_LINE;
4347 }
4348
4349 event = ftrace_find_event(entry->type);
4350 if (event) {
4351 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4352 if (ret != TRACE_TYPE_HANDLED)
4353 return ret;
4354 }
4355
4356 SEQ_PUT_FIELD(s, newline);
4357
4358 return trace_handle_return(s);
4359 }
4360
print_bin_fmt(struct trace_iterator * iter)4361 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4362 {
4363 struct trace_array *tr = iter->tr;
4364 struct trace_seq *s = &iter->seq;
4365 struct trace_entry *entry;
4366 struct trace_event *event;
4367
4368 entry = iter->ent;
4369
4370 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4371 SEQ_PUT_FIELD(s, entry->pid);
4372 SEQ_PUT_FIELD(s, iter->cpu);
4373 SEQ_PUT_FIELD(s, iter->ts);
4374 if (trace_seq_has_overflowed(s))
4375 return TRACE_TYPE_PARTIAL_LINE;
4376 }
4377
4378 event = ftrace_find_event(entry->type);
4379 return event ? event->funcs->binary(iter, 0, event) :
4380 TRACE_TYPE_HANDLED;
4381 }
4382
trace_empty(struct trace_iterator * iter)4383 int trace_empty(struct trace_iterator *iter)
4384 {
4385 struct ring_buffer_iter *buf_iter;
4386 int cpu;
4387
4388 /* If we are looking at one CPU buffer, only check that one */
4389 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4390 cpu = iter->cpu_file;
4391 buf_iter = trace_buffer_iter(iter, cpu);
4392 if (buf_iter) {
4393 if (!ring_buffer_iter_empty(buf_iter))
4394 return 0;
4395 } else {
4396 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4397 return 0;
4398 }
4399 return 1;
4400 }
4401
4402 for_each_tracing_cpu(cpu) {
4403 buf_iter = trace_buffer_iter(iter, cpu);
4404 if (buf_iter) {
4405 if (!ring_buffer_iter_empty(buf_iter))
4406 return 0;
4407 } else {
4408 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4409 return 0;
4410 }
4411 }
4412
4413 return 1;
4414 }
4415
4416 /* Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4417 enum print_line_t print_trace_line(struct trace_iterator *iter)
4418 {
4419 struct trace_array *tr = iter->tr;
4420 unsigned long trace_flags = tr->trace_flags;
4421 enum print_line_t ret;
4422
4423 if (iter->lost_events) {
4424 if (iter->lost_events == (unsigned long)-1)
4425 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4426 iter->cpu);
4427 else
4428 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4429 iter->cpu, iter->lost_events);
4430 if (trace_seq_has_overflowed(&iter->seq))
4431 return TRACE_TYPE_PARTIAL_LINE;
4432 }
4433
4434 if (iter->trace && iter->trace->print_line) {
4435 ret = iter->trace->print_line(iter);
4436 if (ret != TRACE_TYPE_UNHANDLED)
4437 return ret;
4438 }
4439
4440 if (iter->ent->type == TRACE_BPUTS &&
4441 trace_flags & TRACE_ITER_PRINTK &&
4442 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4443 return trace_print_bputs_msg_only(iter);
4444
4445 if (iter->ent->type == TRACE_BPRINT &&
4446 trace_flags & TRACE_ITER_PRINTK &&
4447 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4448 return trace_print_bprintk_msg_only(iter);
4449
4450 if (iter->ent->type == TRACE_PRINT &&
4451 trace_flags & TRACE_ITER_PRINTK &&
4452 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4453 return trace_print_printk_msg_only(iter);
4454
4455 if (trace_flags & TRACE_ITER_BIN)
4456 return print_bin_fmt(iter);
4457
4458 if (trace_flags & TRACE_ITER_HEX)
4459 return print_hex_fmt(iter);
4460
4461 if (trace_flags & TRACE_ITER_RAW)
4462 return print_raw_fmt(iter);
4463
4464 return print_trace_fmt(iter);
4465 }
4466
trace_latency_header(struct seq_file * m)4467 void trace_latency_header(struct seq_file *m)
4468 {
4469 struct trace_iterator *iter = m->private;
4470 struct trace_array *tr = iter->tr;
4471
4472 /* print nothing if the buffers are empty */
4473 if (trace_empty(iter))
4474 return;
4475
4476 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4477 print_trace_header(m, iter);
4478
4479 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4480 print_lat_help_header(m);
4481 }
4482
trace_default_header(struct seq_file * m)4483 void trace_default_header(struct seq_file *m)
4484 {
4485 struct trace_iterator *iter = m->private;
4486 struct trace_array *tr = iter->tr;
4487 unsigned long trace_flags = tr->trace_flags;
4488
4489 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4490 return;
4491
4492 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4493 /* print nothing if the buffers are empty */
4494 if (trace_empty(iter))
4495 return;
4496 print_trace_header(m, iter);
4497 if (!(trace_flags & TRACE_ITER_VERBOSE))
4498 print_lat_help_header(m);
4499 } else {
4500 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4501 if (trace_flags & TRACE_ITER_IRQ_INFO)
4502 print_func_help_header_irq(iter->array_buffer,
4503 m, trace_flags);
4504 else
4505 print_func_help_header(iter->array_buffer, m,
4506 trace_flags);
4507 }
4508 }
4509 }
4510
test_ftrace_alive(struct seq_file * m)4511 static void test_ftrace_alive(struct seq_file *m)
4512 {
4513 if (!ftrace_is_dead())
4514 return;
4515 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4516 "# MAY BE MISSING FUNCTION EVENTS\n");
4517 }
4518
4519 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4520 static void show_snapshot_main_help(struct seq_file *m)
4521 {
4522 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4523 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4524 "# Takes a snapshot of the main buffer.\n"
4525 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4526 "# (Doesn't have to be '2' works with any number that\n"
4527 "# is not a '0' or '1')\n");
4528 }
4529
show_snapshot_percpu_help(struct seq_file * m)4530 static void show_snapshot_percpu_help(struct seq_file *m)
4531 {
4532 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4533 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4534 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4535 "# Takes a snapshot of the main buffer for this cpu.\n");
4536 #else
4537 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4538 "# Must use main snapshot file to allocate.\n");
4539 #endif
4540 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4541 "# (Doesn't have to be '2' works with any number that\n"
4542 "# is not a '0' or '1')\n");
4543 }
4544
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4545 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4546 {
4547 if (iter->tr->allocated_snapshot)
4548 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4549 else
4550 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4551
4552 seq_puts(m, "# Snapshot commands:\n");
4553 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4554 show_snapshot_main_help(m);
4555 else
4556 show_snapshot_percpu_help(m);
4557 }
4558 #else
4559 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4560 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4561 #endif
4562
s_show(struct seq_file * m,void * v)4563 static int s_show(struct seq_file *m, void *v)
4564 {
4565 struct trace_iterator *iter = v;
4566 int ret;
4567
4568 if (iter->ent == NULL) {
4569 if (iter->tr) {
4570 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4571 seq_puts(m, "#\n");
4572 test_ftrace_alive(m);
4573 }
4574 if (iter->snapshot && trace_empty(iter))
4575 print_snapshot_help(m, iter);
4576 else if (iter->trace && iter->trace->print_header)
4577 iter->trace->print_header(m);
4578 else
4579 trace_default_header(m);
4580
4581 } else if (iter->leftover) {
4582 /*
4583 * If we filled the seq_file buffer earlier, we
4584 * want to just show it now.
4585 */
4586 ret = trace_print_seq(m, &iter->seq);
4587
4588 /* ret should this time be zero, but you never know */
4589 iter->leftover = ret;
4590
4591 } else {
4592 ret = print_trace_line(iter);
4593 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4594 iter->seq.full = 0;
4595 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4596 }
4597 ret = trace_print_seq(m, &iter->seq);
4598 /*
4599 * If we overflow the seq_file buffer, then it will
4600 * ask us for this data again at start up.
4601 * Use that instead.
4602 * ret is 0 if seq_file write succeeded.
4603 * -1 otherwise.
4604 */
4605 iter->leftover = ret;
4606 }
4607
4608 return 0;
4609 }
4610
4611 /*
4612 * Should be used after trace_array_get(), trace_types_lock
4613 * ensures that i_cdev was already initialized.
4614 */
tracing_get_cpu(struct inode * inode)4615 static inline int tracing_get_cpu(struct inode *inode)
4616 {
4617 if (inode->i_cdev) /* See trace_create_cpu_file() */
4618 return (long)inode->i_cdev - 1;
4619 return RING_BUFFER_ALL_CPUS;
4620 }
4621
4622 static const struct seq_operations tracer_seq_ops = {
4623 .start = s_start,
4624 .next = s_next,
4625 .stop = s_stop,
4626 .show = s_show,
4627 };
4628
4629 /*
4630 * Note, as iter itself can be allocated and freed in different
4631 * ways, this function is only used to free its content, and not
4632 * the iterator itself. The only requirement to all the allocations
4633 * is that it must zero all fields (kzalloc), as freeing works with
4634 * ethier allocated content or NULL.
4635 */
free_trace_iter_content(struct trace_iterator * iter)4636 static void free_trace_iter_content(struct trace_iterator *iter)
4637 {
4638 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4639 if (iter->fmt != static_fmt_buf)
4640 kfree(iter->fmt);
4641
4642 kfree(iter->temp);
4643 kfree(iter->buffer_iter);
4644 mutex_destroy(&iter->mutex);
4645 free_cpumask_var(iter->started);
4646 }
4647
4648 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4649 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4650 {
4651 struct trace_array *tr = inode->i_private;
4652 struct trace_iterator *iter;
4653 int cpu;
4654
4655 if (tracing_disabled)
4656 return ERR_PTR(-ENODEV);
4657
4658 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4659 if (!iter)
4660 return ERR_PTR(-ENOMEM);
4661
4662 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4663 GFP_KERNEL);
4664 if (!iter->buffer_iter)
4665 goto release;
4666
4667 /*
4668 * trace_find_next_entry() may need to save off iter->ent.
4669 * It will place it into the iter->temp buffer. As most
4670 * events are less than 128, allocate a buffer of that size.
4671 * If one is greater, then trace_find_next_entry() will
4672 * allocate a new buffer to adjust for the bigger iter->ent.
4673 * It's not critical if it fails to get allocated here.
4674 */
4675 iter->temp = kmalloc(128, GFP_KERNEL);
4676 if (iter->temp)
4677 iter->temp_size = 128;
4678
4679 /*
4680 * trace_event_printf() may need to modify given format
4681 * string to replace %p with %px so that it shows real address
4682 * instead of hash value. However, that is only for the event
4683 * tracing, other tracer may not need. Defer the allocation
4684 * until it is needed.
4685 */
4686 iter->fmt = NULL;
4687 iter->fmt_size = 0;
4688
4689 mutex_lock(&trace_types_lock);
4690 iter->trace = tr->current_trace;
4691
4692 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4693 goto fail;
4694
4695 iter->tr = tr;
4696
4697 #ifdef CONFIG_TRACER_MAX_TRACE
4698 /* Currently only the top directory has a snapshot */
4699 if (tr->current_trace->print_max || snapshot)
4700 iter->array_buffer = &tr->max_buffer;
4701 else
4702 #endif
4703 iter->array_buffer = &tr->array_buffer;
4704 iter->snapshot = snapshot;
4705 iter->pos = -1;
4706 iter->cpu_file = tracing_get_cpu(inode);
4707 mutex_init(&iter->mutex);
4708
4709 /* Notify the tracer early; before we stop tracing. */
4710 if (iter->trace->open)
4711 iter->trace->open(iter);
4712
4713 /* Annotate start of buffers if we had overruns */
4714 if (ring_buffer_overruns(iter->array_buffer->buffer))
4715 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4716
4717 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4718 if (trace_clocks[tr->clock_id].in_ns)
4719 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4720
4721 /*
4722 * If pause-on-trace is enabled, then stop the trace while
4723 * dumping, unless this is the "snapshot" file
4724 */
4725 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4726 tracing_stop_tr(tr);
4727
4728 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4729 for_each_tracing_cpu(cpu) {
4730 iter->buffer_iter[cpu] =
4731 ring_buffer_read_start(iter->array_buffer->buffer,
4732 cpu, GFP_KERNEL);
4733 tracing_iter_reset(iter, cpu);
4734 }
4735 } else {
4736 cpu = iter->cpu_file;
4737 iter->buffer_iter[cpu] =
4738 ring_buffer_read_start(iter->array_buffer->buffer,
4739 cpu, GFP_KERNEL);
4740 tracing_iter_reset(iter, cpu);
4741 }
4742
4743 mutex_unlock(&trace_types_lock);
4744
4745 return iter;
4746
4747 fail:
4748 mutex_unlock(&trace_types_lock);
4749 free_trace_iter_content(iter);
4750 release:
4751 seq_release_private(inode, file);
4752 return ERR_PTR(-ENOMEM);
4753 }
4754
tracing_open_generic(struct inode * inode,struct file * filp)4755 int tracing_open_generic(struct inode *inode, struct file *filp)
4756 {
4757 int ret;
4758
4759 ret = tracing_check_open_get_tr(NULL);
4760 if (ret)
4761 return ret;
4762
4763 filp->private_data = inode->i_private;
4764 return 0;
4765 }
4766
tracing_is_disabled(void)4767 bool tracing_is_disabled(void)
4768 {
4769 return (tracing_disabled) ? true: false;
4770 }
4771
4772 /*
4773 * Open and update trace_array ref count.
4774 * Must have the current trace_array passed to it.
4775 */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4776 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4777 {
4778 struct trace_array *tr = inode->i_private;
4779 int ret;
4780
4781 ret = tracing_check_open_get_tr(tr);
4782 if (ret)
4783 return ret;
4784
4785 filp->private_data = inode->i_private;
4786
4787 return 0;
4788 }
4789
4790 /*
4791 * The private pointer of the inode is the trace_event_file.
4792 * Update the tr ref count associated to it.
4793 */
tracing_open_file_tr(struct inode * inode,struct file * filp)4794 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4795 {
4796 struct trace_event_file *file = inode->i_private;
4797 int ret;
4798
4799 ret = tracing_check_open_get_tr(file->tr);
4800 if (ret)
4801 return ret;
4802
4803 mutex_lock(&event_mutex);
4804
4805 /* Fail if the file is marked for removal */
4806 if (file->flags & EVENT_FILE_FL_FREED) {
4807 trace_array_put(file->tr);
4808 ret = -ENODEV;
4809 } else {
4810 event_file_get(file);
4811 }
4812
4813 mutex_unlock(&event_mutex);
4814 if (ret)
4815 return ret;
4816
4817 filp->private_data = inode->i_private;
4818
4819 return 0;
4820 }
4821
tracing_release_file_tr(struct inode * inode,struct file * filp)4822 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4823 {
4824 struct trace_event_file *file = inode->i_private;
4825
4826 trace_array_put(file->tr);
4827 event_file_put(file);
4828
4829 return 0;
4830 }
4831
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4832 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4833 {
4834 tracing_release_file_tr(inode, filp);
4835 return single_release(inode, filp);
4836 }
4837
tracing_mark_open(struct inode * inode,struct file * filp)4838 static int tracing_mark_open(struct inode *inode, struct file *filp)
4839 {
4840 stream_open(inode, filp);
4841 return tracing_open_generic_tr(inode, filp);
4842 }
4843
tracing_release(struct inode * inode,struct file * file)4844 static int tracing_release(struct inode *inode, struct file *file)
4845 {
4846 struct trace_array *tr = inode->i_private;
4847 struct seq_file *m = file->private_data;
4848 struct trace_iterator *iter;
4849 int cpu;
4850
4851 if (!(file->f_mode & FMODE_READ)) {
4852 trace_array_put(tr);
4853 return 0;
4854 }
4855
4856 /* Writes do not use seq_file */
4857 iter = m->private;
4858 mutex_lock(&trace_types_lock);
4859
4860 for_each_tracing_cpu(cpu) {
4861 if (iter->buffer_iter[cpu])
4862 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4863 }
4864
4865 if (iter->trace && iter->trace->close)
4866 iter->trace->close(iter);
4867
4868 if (!iter->snapshot && tr->stop_count)
4869 /* reenable tracing if it was previously enabled */
4870 tracing_start_tr(tr);
4871
4872 __trace_array_put(tr);
4873
4874 mutex_unlock(&trace_types_lock);
4875
4876 free_trace_iter_content(iter);
4877 seq_release_private(inode, file);
4878
4879 return 0;
4880 }
4881
tracing_release_generic_tr(struct inode * inode,struct file * file)4882 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4883 {
4884 struct trace_array *tr = inode->i_private;
4885
4886 trace_array_put(tr);
4887 return 0;
4888 }
4889
tracing_single_release_tr(struct inode * inode,struct file * file)4890 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4891 {
4892 struct trace_array *tr = inode->i_private;
4893
4894 trace_array_put(tr);
4895
4896 return single_release(inode, file);
4897 }
4898
tracing_open(struct inode * inode,struct file * file)4899 static int tracing_open(struct inode *inode, struct file *file)
4900 {
4901 struct trace_array *tr = inode->i_private;
4902 struct trace_iterator *iter;
4903 int ret;
4904
4905 ret = tracing_check_open_get_tr(tr);
4906 if (ret)
4907 return ret;
4908
4909 /* If this file was open for write, then erase contents */
4910 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4911 int cpu = tracing_get_cpu(inode);
4912 struct array_buffer *trace_buf = &tr->array_buffer;
4913
4914 #ifdef CONFIG_TRACER_MAX_TRACE
4915 if (tr->current_trace->print_max)
4916 trace_buf = &tr->max_buffer;
4917 #endif
4918
4919 if (cpu == RING_BUFFER_ALL_CPUS)
4920 tracing_reset_online_cpus(trace_buf);
4921 else
4922 tracing_reset_cpu(trace_buf, cpu);
4923 }
4924
4925 if (file->f_mode & FMODE_READ) {
4926 iter = __tracing_open(inode, file, false);
4927 if (IS_ERR(iter))
4928 ret = PTR_ERR(iter);
4929 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4930 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4931 }
4932
4933 if (ret < 0)
4934 trace_array_put(tr);
4935
4936 return ret;
4937 }
4938
4939 /*
4940 * Some tracers are not suitable for instance buffers.
4941 * A tracer is always available for the global array (toplevel)
4942 * or if it explicitly states that it is.
4943 */
4944 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4945 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4946 {
4947 #ifdef CONFIG_TRACER_SNAPSHOT
4948 /* arrays with mapped buffer range do not have snapshots */
4949 if (tr->range_addr_start && t->use_max_tr)
4950 return false;
4951 #endif
4952 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4953 }
4954
4955 /* Find the next tracer that this trace array may use */
4956 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4957 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4958 {
4959 while (t && !trace_ok_for_array(t, tr))
4960 t = t->next;
4961
4962 return t;
4963 }
4964
4965 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4966 t_next(struct seq_file *m, void *v, loff_t *pos)
4967 {
4968 struct trace_array *tr = m->private;
4969 struct tracer *t = v;
4970
4971 (*pos)++;
4972
4973 if (t)
4974 t = get_tracer_for_array(tr, t->next);
4975
4976 return t;
4977 }
4978
t_start(struct seq_file * m,loff_t * pos)4979 static void *t_start(struct seq_file *m, loff_t *pos)
4980 {
4981 struct trace_array *tr = m->private;
4982 struct tracer *t;
4983 loff_t l = 0;
4984
4985 mutex_lock(&trace_types_lock);
4986
4987 t = get_tracer_for_array(tr, trace_types);
4988 for (; t && l < *pos; t = t_next(m, t, &l))
4989 ;
4990
4991 return t;
4992 }
4993
t_stop(struct seq_file * m,void * p)4994 static void t_stop(struct seq_file *m, void *p)
4995 {
4996 mutex_unlock(&trace_types_lock);
4997 }
4998
t_show(struct seq_file * m,void * v)4999 static int t_show(struct seq_file *m, void *v)
5000 {
5001 struct tracer *t = v;
5002
5003 if (!t)
5004 return 0;
5005
5006 seq_puts(m, t->name);
5007 if (t->next)
5008 seq_putc(m, ' ');
5009 else
5010 seq_putc(m, '\n');
5011
5012 return 0;
5013 }
5014
5015 static const struct seq_operations show_traces_seq_ops = {
5016 .start = t_start,
5017 .next = t_next,
5018 .stop = t_stop,
5019 .show = t_show,
5020 };
5021
show_traces_open(struct inode * inode,struct file * file)5022 static int show_traces_open(struct inode *inode, struct file *file)
5023 {
5024 struct trace_array *tr = inode->i_private;
5025 struct seq_file *m;
5026 int ret;
5027
5028 ret = tracing_check_open_get_tr(tr);
5029 if (ret)
5030 return ret;
5031
5032 ret = seq_open(file, &show_traces_seq_ops);
5033 if (ret) {
5034 trace_array_put(tr);
5035 return ret;
5036 }
5037
5038 m = file->private_data;
5039 m->private = tr;
5040
5041 return 0;
5042 }
5043
tracing_seq_release(struct inode * inode,struct file * file)5044 static int tracing_seq_release(struct inode *inode, struct file *file)
5045 {
5046 struct trace_array *tr = inode->i_private;
5047
5048 trace_array_put(tr);
5049 return seq_release(inode, file);
5050 }
5051
5052 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5053 tracing_write_stub(struct file *filp, const char __user *ubuf,
5054 size_t count, loff_t *ppos)
5055 {
5056 return count;
5057 }
5058
tracing_lseek(struct file * file,loff_t offset,int whence)5059 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5060 {
5061 int ret;
5062
5063 if (file->f_mode & FMODE_READ)
5064 ret = seq_lseek(file, offset, whence);
5065 else
5066 file->f_pos = ret = 0;
5067
5068 return ret;
5069 }
5070
5071 static const struct file_operations tracing_fops = {
5072 .open = tracing_open,
5073 .read = seq_read,
5074 .read_iter = seq_read_iter,
5075 .splice_read = copy_splice_read,
5076 .write = tracing_write_stub,
5077 .llseek = tracing_lseek,
5078 .release = tracing_release,
5079 };
5080
5081 static const struct file_operations show_traces_fops = {
5082 .open = show_traces_open,
5083 .read = seq_read,
5084 .llseek = seq_lseek,
5085 .release = tracing_seq_release,
5086 };
5087
5088 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5089 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5090 size_t count, loff_t *ppos)
5091 {
5092 struct trace_array *tr = file_inode(filp)->i_private;
5093 char *mask_str;
5094 int len;
5095
5096 len = snprintf(NULL, 0, "%*pb\n",
5097 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5098 mask_str = kmalloc(len, GFP_KERNEL);
5099 if (!mask_str)
5100 return -ENOMEM;
5101
5102 len = snprintf(mask_str, len, "%*pb\n",
5103 cpumask_pr_args(tr->tracing_cpumask));
5104 if (len >= count) {
5105 count = -EINVAL;
5106 goto out_err;
5107 }
5108 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5109
5110 out_err:
5111 kfree(mask_str);
5112
5113 return count;
5114 }
5115
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5116 int tracing_set_cpumask(struct trace_array *tr,
5117 cpumask_var_t tracing_cpumask_new)
5118 {
5119 int cpu;
5120
5121 if (!tr)
5122 return -EINVAL;
5123
5124 local_irq_disable();
5125 arch_spin_lock(&tr->max_lock);
5126 for_each_tracing_cpu(cpu) {
5127 /*
5128 * Increase/decrease the disabled counter if we are
5129 * about to flip a bit in the cpumask:
5130 */
5131 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5132 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5133 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5134 #ifdef CONFIG_TRACER_MAX_TRACE
5135 ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5136 #endif
5137 }
5138 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5139 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5140 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5141 #ifdef CONFIG_TRACER_MAX_TRACE
5142 ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5143 #endif
5144 }
5145 }
5146 arch_spin_unlock(&tr->max_lock);
5147 local_irq_enable();
5148
5149 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5150
5151 return 0;
5152 }
5153
5154 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5155 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5156 size_t count, loff_t *ppos)
5157 {
5158 struct trace_array *tr = file_inode(filp)->i_private;
5159 cpumask_var_t tracing_cpumask_new;
5160 int err;
5161
5162 if (count == 0 || count > KMALLOC_MAX_SIZE)
5163 return -EINVAL;
5164
5165 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5166 return -ENOMEM;
5167
5168 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5169 if (err)
5170 goto err_free;
5171
5172 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5173 if (err)
5174 goto err_free;
5175
5176 free_cpumask_var(tracing_cpumask_new);
5177
5178 return count;
5179
5180 err_free:
5181 free_cpumask_var(tracing_cpumask_new);
5182
5183 return err;
5184 }
5185
5186 static const struct file_operations tracing_cpumask_fops = {
5187 .open = tracing_open_generic_tr,
5188 .read = tracing_cpumask_read,
5189 .write = tracing_cpumask_write,
5190 .release = tracing_release_generic_tr,
5191 .llseek = generic_file_llseek,
5192 };
5193
tracing_trace_options_show(struct seq_file * m,void * v)5194 static int tracing_trace_options_show(struct seq_file *m, void *v)
5195 {
5196 struct tracer_opt *trace_opts;
5197 struct trace_array *tr = m->private;
5198 u32 tracer_flags;
5199 int i;
5200
5201 guard(mutex)(&trace_types_lock);
5202
5203 tracer_flags = tr->current_trace->flags->val;
5204 trace_opts = tr->current_trace->flags->opts;
5205
5206 for (i = 0; trace_options[i]; i++) {
5207 if (tr->trace_flags & (1 << i))
5208 seq_printf(m, "%s\n", trace_options[i]);
5209 else
5210 seq_printf(m, "no%s\n", trace_options[i]);
5211 }
5212
5213 for (i = 0; trace_opts[i].name; i++) {
5214 if (tracer_flags & trace_opts[i].bit)
5215 seq_printf(m, "%s\n", trace_opts[i].name);
5216 else
5217 seq_printf(m, "no%s\n", trace_opts[i].name);
5218 }
5219
5220 return 0;
5221 }
5222
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5223 static int __set_tracer_option(struct trace_array *tr,
5224 struct tracer_flags *tracer_flags,
5225 struct tracer_opt *opts, int neg)
5226 {
5227 struct tracer *trace = tracer_flags->trace;
5228 int ret;
5229
5230 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5231 if (ret)
5232 return ret;
5233
5234 if (neg)
5235 tracer_flags->val &= ~opts->bit;
5236 else
5237 tracer_flags->val |= opts->bit;
5238 return 0;
5239 }
5240
5241 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5242 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5243 {
5244 struct tracer *trace = tr->current_trace;
5245 struct tracer_flags *tracer_flags = trace->flags;
5246 struct tracer_opt *opts = NULL;
5247 int i;
5248
5249 for (i = 0; tracer_flags->opts[i].name; i++) {
5250 opts = &tracer_flags->opts[i];
5251
5252 if (strcmp(cmp, opts->name) == 0)
5253 return __set_tracer_option(tr, trace->flags, opts, neg);
5254 }
5255
5256 return -EINVAL;
5257 }
5258
5259 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5260 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5261 {
5262 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5263 return -1;
5264
5265 return 0;
5266 }
5267
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5268 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5269 {
5270 if ((mask == TRACE_ITER_RECORD_TGID) ||
5271 (mask == TRACE_ITER_RECORD_CMD) ||
5272 (mask == TRACE_ITER_TRACE_PRINTK) ||
5273 (mask == TRACE_ITER_COPY_MARKER))
5274 lockdep_assert_held(&event_mutex);
5275
5276 /* do nothing if flag is already set */
5277 if (!!(tr->trace_flags & mask) == !!enabled)
5278 return 0;
5279
5280 /* Give the tracer a chance to approve the change */
5281 if (tr->current_trace->flag_changed)
5282 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5283 return -EINVAL;
5284
5285 if (mask == TRACE_ITER_TRACE_PRINTK) {
5286 if (enabled) {
5287 update_printk_trace(tr);
5288 } else {
5289 /*
5290 * The global_trace cannot clear this.
5291 * It's flag only gets cleared if another instance sets it.
5292 */
5293 if (printk_trace == &global_trace)
5294 return -EINVAL;
5295 /*
5296 * An instance must always have it set.
5297 * by default, that's the global_trace instane.
5298 */
5299 if (printk_trace == tr)
5300 update_printk_trace(&global_trace);
5301 }
5302 }
5303
5304 if (mask == TRACE_ITER_COPY_MARKER)
5305 update_marker_trace(tr, enabled);
5306
5307 if (enabled)
5308 tr->trace_flags |= mask;
5309 else
5310 tr->trace_flags &= ~mask;
5311
5312 if (mask == TRACE_ITER_RECORD_CMD)
5313 trace_event_enable_cmd_record(enabled);
5314
5315 if (mask == TRACE_ITER_RECORD_TGID) {
5316
5317 if (trace_alloc_tgid_map() < 0) {
5318 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5319 return -ENOMEM;
5320 }
5321
5322 trace_event_enable_tgid_record(enabled);
5323 }
5324
5325 if (mask == TRACE_ITER_EVENT_FORK)
5326 trace_event_follow_fork(tr, enabled);
5327
5328 if (mask == TRACE_ITER_FUNC_FORK)
5329 ftrace_pid_follow_fork(tr, enabled);
5330
5331 if (mask == TRACE_ITER_OVERWRITE) {
5332 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5333 #ifdef CONFIG_TRACER_MAX_TRACE
5334 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5335 #endif
5336 }
5337
5338 if (mask == TRACE_ITER_PRINTK) {
5339 trace_printk_start_stop_comm(enabled);
5340 trace_printk_control(enabled);
5341 }
5342
5343 return 0;
5344 }
5345
trace_set_options(struct trace_array * tr,char * option)5346 int trace_set_options(struct trace_array *tr, char *option)
5347 {
5348 char *cmp;
5349 int neg = 0;
5350 int ret;
5351 size_t orig_len = strlen(option);
5352 int len;
5353
5354 cmp = strstrip(option);
5355
5356 len = str_has_prefix(cmp, "no");
5357 if (len)
5358 neg = 1;
5359
5360 cmp += len;
5361
5362 mutex_lock(&event_mutex);
5363 mutex_lock(&trace_types_lock);
5364
5365 ret = match_string(trace_options, -1, cmp);
5366 /* If no option could be set, test the specific tracer options */
5367 if (ret < 0)
5368 ret = set_tracer_option(tr, cmp, neg);
5369 else
5370 ret = set_tracer_flag(tr, 1 << ret, !neg);
5371
5372 mutex_unlock(&trace_types_lock);
5373 mutex_unlock(&event_mutex);
5374
5375 /*
5376 * If the first trailing whitespace is replaced with '\0' by strstrip,
5377 * turn it back into a space.
5378 */
5379 if (orig_len > strlen(option))
5380 option[strlen(option)] = ' ';
5381
5382 return ret;
5383 }
5384
apply_trace_boot_options(void)5385 static void __init apply_trace_boot_options(void)
5386 {
5387 char *buf = trace_boot_options_buf;
5388 char *option;
5389
5390 while (true) {
5391 option = strsep(&buf, ",");
5392
5393 if (!option)
5394 break;
5395
5396 if (*option)
5397 trace_set_options(&global_trace, option);
5398
5399 /* Put back the comma to allow this to be called again */
5400 if (buf)
5401 *(buf - 1) = ',';
5402 }
5403 }
5404
5405 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5406 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5407 size_t cnt, loff_t *ppos)
5408 {
5409 struct seq_file *m = filp->private_data;
5410 struct trace_array *tr = m->private;
5411 char buf[64];
5412 int ret;
5413
5414 if (cnt >= sizeof(buf))
5415 return -EINVAL;
5416
5417 if (copy_from_user(buf, ubuf, cnt))
5418 return -EFAULT;
5419
5420 buf[cnt] = 0;
5421
5422 ret = trace_set_options(tr, buf);
5423 if (ret < 0)
5424 return ret;
5425
5426 *ppos += cnt;
5427
5428 return cnt;
5429 }
5430
tracing_trace_options_open(struct inode * inode,struct file * file)5431 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5432 {
5433 struct trace_array *tr = inode->i_private;
5434 int ret;
5435
5436 ret = tracing_check_open_get_tr(tr);
5437 if (ret)
5438 return ret;
5439
5440 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5441 if (ret < 0)
5442 trace_array_put(tr);
5443
5444 return ret;
5445 }
5446
5447 static const struct file_operations tracing_iter_fops = {
5448 .open = tracing_trace_options_open,
5449 .read = seq_read,
5450 .llseek = seq_lseek,
5451 .release = tracing_single_release_tr,
5452 .write = tracing_trace_options_write,
5453 };
5454
5455 static const char readme_msg[] =
5456 "tracing mini-HOWTO:\n\n"
5457 "By default tracefs removes all OTH file permission bits.\n"
5458 "When mounting tracefs an optional group id can be specified\n"
5459 "which adds the group to every directory and file in tracefs:\n\n"
5460 "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5461 "# echo 0 > tracing_on : quick way to disable tracing\n"
5462 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5463 " Important files:\n"
5464 " trace\t\t\t- The static contents of the buffer\n"
5465 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5466 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5467 " current_tracer\t- function and latency tracers\n"
5468 " available_tracers\t- list of configured tracers for current_tracer\n"
5469 " error_log\t- error log for failed commands (that support it)\n"
5470 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5471 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5472 " trace_clock\t\t- change the clock used to order events\n"
5473 " local: Per cpu clock but may not be synced across CPUs\n"
5474 " global: Synced across CPUs but slows tracing down.\n"
5475 " counter: Not a clock, but just an increment\n"
5476 " uptime: Jiffy counter from time of boot\n"
5477 " perf: Same clock that perf events use\n"
5478 #ifdef CONFIG_X86_64
5479 " x86-tsc: TSC cycle counter\n"
5480 #endif
5481 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5482 " delta: Delta difference against a buffer-wide timestamp\n"
5483 " absolute: Absolute (standalone) timestamp\n"
5484 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5485 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5486 " tracing_cpumask\t- Limit which CPUs to trace\n"
5487 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5488 "\t\t\t Remove sub-buffer with rmdir\n"
5489 " trace_options\t\t- Set format or modify how tracing happens\n"
5490 "\t\t\t Disable an option by prefixing 'no' to the\n"
5491 "\t\t\t option name\n"
5492 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5493 #ifdef CONFIG_DYNAMIC_FTRACE
5494 "\n available_filter_functions - list of functions that can be filtered on\n"
5495 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5496 "\t\t\t functions\n"
5497 "\t accepts: func_full_name or glob-matching-pattern\n"
5498 "\t modules: Can select a group via module\n"
5499 "\t Format: :mod:<module-name>\n"
5500 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5501 "\t triggers: a command to perform when function is hit\n"
5502 "\t Format: <function>:<trigger>[:count]\n"
5503 "\t trigger: traceon, traceoff\n"
5504 "\t\t enable_event:<system>:<event>\n"
5505 "\t\t disable_event:<system>:<event>\n"
5506 #ifdef CONFIG_STACKTRACE
5507 "\t\t stacktrace\n"
5508 #endif
5509 #ifdef CONFIG_TRACER_SNAPSHOT
5510 "\t\t snapshot\n"
5511 #endif
5512 "\t\t dump\n"
5513 "\t\t cpudump\n"
5514 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5515 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5516 "\t The first one will disable tracing every time do_fault is hit\n"
5517 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5518 "\t The first time do trap is hit and it disables tracing, the\n"
5519 "\t counter will decrement to 2. If tracing is already disabled,\n"
5520 "\t the counter will not decrement. It only decrements when the\n"
5521 "\t trigger did work\n"
5522 "\t To remove trigger without count:\n"
5523 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5524 "\t To remove trigger with a count:\n"
5525 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5526 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5527 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5528 "\t modules: Can select a group via module command :mod:\n"
5529 "\t Does not accept triggers\n"
5530 #endif /* CONFIG_DYNAMIC_FTRACE */
5531 #ifdef CONFIG_FUNCTION_TRACER
5532 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5533 "\t\t (function)\n"
5534 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5535 "\t\t (function)\n"
5536 #endif
5537 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5538 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5539 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5540 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5541 #endif
5542 #ifdef CONFIG_TRACER_SNAPSHOT
5543 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5544 "\t\t\t snapshot buffer. Read the contents for more\n"
5545 "\t\t\t information\n"
5546 #endif
5547 #ifdef CONFIG_STACK_TRACER
5548 " stack_trace\t\t- Shows the max stack trace when active\n"
5549 " stack_max_size\t- Shows current max stack size that was traced\n"
5550 "\t\t\t Write into this file to reset the max size (trigger a\n"
5551 "\t\t\t new trace)\n"
5552 #ifdef CONFIG_DYNAMIC_FTRACE
5553 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5554 "\t\t\t traces\n"
5555 #endif
5556 #endif /* CONFIG_STACK_TRACER */
5557 #ifdef CONFIG_DYNAMIC_EVENTS
5558 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5559 "\t\t\t Write into this file to define/undefine new trace events.\n"
5560 #endif
5561 #ifdef CONFIG_KPROBE_EVENTS
5562 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5563 "\t\t\t Write into this file to define/undefine new trace events.\n"
5564 #endif
5565 #ifdef CONFIG_UPROBE_EVENTS
5566 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5567 "\t\t\t Write into this file to define/undefine new trace events.\n"
5568 #endif
5569 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5570 defined(CONFIG_FPROBE_EVENTS)
5571 "\t accepts: event-definitions (one definition per line)\n"
5572 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5573 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5574 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5575 #endif
5576 #ifdef CONFIG_FPROBE_EVENTS
5577 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5578 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5579 #endif
5580 #ifdef CONFIG_HIST_TRIGGERS
5581 "\t s:[synthetic/]<event> <field> [<field>]\n"
5582 #endif
5583 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5584 "\t -:[<group>/][<event>]\n"
5585 #ifdef CONFIG_KPROBE_EVENTS
5586 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5587 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5588 #endif
5589 #ifdef CONFIG_UPROBE_EVENTS
5590 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5591 #endif
5592 "\t args: <name>=fetcharg[:type]\n"
5593 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5594 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5595 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5596 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5597 "\t <argname>[->field[->field|.field...]],\n"
5598 #endif
5599 #else
5600 "\t $stack<index>, $stack, $retval, $comm,\n"
5601 #endif
5602 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5603 "\t kernel return probes support: $retval, $arg<N>, $comm\n"
5604 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5605 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5606 "\t symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5607 #ifdef CONFIG_HIST_TRIGGERS
5608 "\t field: <stype> <name>;\n"
5609 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5610 "\t [unsigned] char/int/long\n"
5611 #endif
5612 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5613 "\t of the <attached-group>/<attached-event>.\n"
5614 #endif
5615 " set_event\t\t- Enables events by name written into it\n"
5616 "\t\t\t Can enable module events via: :mod:<module>\n"
5617 " events/\t\t- Directory containing all trace event subsystems:\n"
5618 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5619 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5620 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5621 "\t\t\t events\n"
5622 " filter\t\t- If set, only events passing filter are traced\n"
5623 " events/<system>/<event>/\t- Directory containing control files for\n"
5624 "\t\t\t <event>:\n"
5625 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5626 " filter\t\t- If set, only events passing filter are traced\n"
5627 " trigger\t\t- If set, a command to perform when event is hit\n"
5628 "\t Format: <trigger>[:count][if <filter>]\n"
5629 "\t trigger: traceon, traceoff\n"
5630 "\t enable_event:<system>:<event>\n"
5631 "\t disable_event:<system>:<event>\n"
5632 #ifdef CONFIG_HIST_TRIGGERS
5633 "\t enable_hist:<system>:<event>\n"
5634 "\t disable_hist:<system>:<event>\n"
5635 #endif
5636 #ifdef CONFIG_STACKTRACE
5637 "\t\t stacktrace\n"
5638 #endif
5639 #ifdef CONFIG_TRACER_SNAPSHOT
5640 "\t\t snapshot\n"
5641 #endif
5642 #ifdef CONFIG_HIST_TRIGGERS
5643 "\t\t hist (see below)\n"
5644 #endif
5645 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5646 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5647 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5648 "\t events/block/block_unplug/trigger\n"
5649 "\t The first disables tracing every time block_unplug is hit.\n"
5650 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5651 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5652 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5653 "\t Like function triggers, the counter is only decremented if it\n"
5654 "\t enabled or disabled tracing.\n"
5655 "\t To remove a trigger without a count:\n"
5656 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5657 "\t To remove a trigger with a count:\n"
5658 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5659 "\t Filters can be ignored when removing a trigger.\n"
5660 #ifdef CONFIG_HIST_TRIGGERS
5661 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5662 "\t Format: hist:keys=<field1[,field2,...]>\n"
5663 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5664 "\t [:values=<field1[,field2,...]>]\n"
5665 "\t [:sort=<field1[,field2,...]>]\n"
5666 "\t [:size=#entries]\n"
5667 "\t [:pause][:continue][:clear]\n"
5668 "\t [:name=histname1]\n"
5669 "\t [:nohitcount]\n"
5670 "\t [:<handler>.<action>]\n"
5671 "\t [if <filter>]\n\n"
5672 "\t Note, special fields can be used as well:\n"
5673 "\t common_timestamp - to record current timestamp\n"
5674 "\t common_cpu - to record the CPU the event happened on\n"
5675 "\n"
5676 "\t A hist trigger variable can be:\n"
5677 "\t - a reference to a field e.g. x=current_timestamp,\n"
5678 "\t - a reference to another variable e.g. y=$x,\n"
5679 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5680 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5681 "\n"
5682 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5683 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5684 "\t variable reference, field or numeric literal.\n"
5685 "\n"
5686 "\t When a matching event is hit, an entry is added to a hash\n"
5687 "\t table using the key(s) and value(s) named, and the value of a\n"
5688 "\t sum called 'hitcount' is incremented. Keys and values\n"
5689 "\t correspond to fields in the event's format description. Keys\n"
5690 "\t can be any field, or the special string 'common_stacktrace'.\n"
5691 "\t Compound keys consisting of up to two fields can be specified\n"
5692 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5693 "\t fields. Sort keys consisting of up to two fields can be\n"
5694 "\t specified using the 'sort' keyword. The sort direction can\n"
5695 "\t be modified by appending '.descending' or '.ascending' to a\n"
5696 "\t sort field. The 'size' parameter can be used to specify more\n"
5697 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5698 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5699 "\t its histogram data will be shared with other triggers of the\n"
5700 "\t same name, and trigger hits will update this common data.\n\n"
5701 "\t Reading the 'hist' file for the event will dump the hash\n"
5702 "\t table in its entirety to stdout. If there are multiple hist\n"
5703 "\t triggers attached to an event, there will be a table for each\n"
5704 "\t trigger in the output. The table displayed for a named\n"
5705 "\t trigger will be the same as any other instance having the\n"
5706 "\t same name. The default format used to display a given field\n"
5707 "\t can be modified by appending any of the following modifiers\n"
5708 "\t to the field name, as applicable:\n\n"
5709 "\t .hex display a number as a hex value\n"
5710 "\t .sym display an address as a symbol\n"
5711 "\t .sym-offset display an address as a symbol and offset\n"
5712 "\t .execname display a common_pid as a program name\n"
5713 "\t .syscall display a syscall id as a syscall name\n"
5714 "\t .log2 display log2 value rather than raw number\n"
5715 "\t .buckets=size display values in groups of size rather than raw number\n"
5716 "\t .usecs display a common_timestamp in microseconds\n"
5717 "\t .percent display a number of percentage value\n"
5718 "\t .graph display a bar-graph of a value\n\n"
5719 "\t The 'pause' parameter can be used to pause an existing hist\n"
5720 "\t trigger or to start a hist trigger but not log any events\n"
5721 "\t until told to do so. 'continue' can be used to start or\n"
5722 "\t restart a paused hist trigger.\n\n"
5723 "\t The 'clear' parameter will clear the contents of a running\n"
5724 "\t hist trigger and leave its current paused/active state\n"
5725 "\t unchanged.\n\n"
5726 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5727 "\t raw hitcount in the histogram.\n\n"
5728 "\t The enable_hist and disable_hist triggers can be used to\n"
5729 "\t have one event conditionally start and stop another event's\n"
5730 "\t already-attached hist trigger. The syntax is analogous to\n"
5731 "\t the enable_event and disable_event triggers.\n\n"
5732 "\t Hist trigger handlers and actions are executed whenever a\n"
5733 "\t a histogram entry is added or updated. They take the form:\n\n"
5734 "\t <handler>.<action>\n\n"
5735 "\t The available handlers are:\n\n"
5736 "\t onmatch(matching.event) - invoke on addition or update\n"
5737 "\t onmax(var) - invoke if var exceeds current max\n"
5738 "\t onchange(var) - invoke action if var changes\n\n"
5739 "\t The available actions are:\n\n"
5740 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5741 "\t save(field,...) - save current event fields\n"
5742 #ifdef CONFIG_TRACER_SNAPSHOT
5743 "\t snapshot() - snapshot the trace buffer\n\n"
5744 #endif
5745 #ifdef CONFIG_SYNTH_EVENTS
5746 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5747 "\t Write into this file to define/undefine new synthetic events.\n"
5748 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5749 #endif
5750 #endif
5751 ;
5752
5753 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5754 tracing_readme_read(struct file *filp, char __user *ubuf,
5755 size_t cnt, loff_t *ppos)
5756 {
5757 return simple_read_from_buffer(ubuf, cnt, ppos,
5758 readme_msg, strlen(readme_msg));
5759 }
5760
5761 static const struct file_operations tracing_readme_fops = {
5762 .open = tracing_open_generic,
5763 .read = tracing_readme_read,
5764 .llseek = generic_file_llseek,
5765 };
5766
5767 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5768 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5769 update_eval_map(union trace_eval_map_item *ptr)
5770 {
5771 if (!ptr->map.eval_string) {
5772 if (ptr->tail.next) {
5773 ptr = ptr->tail.next;
5774 /* Set ptr to the next real item (skip head) */
5775 ptr++;
5776 } else
5777 return NULL;
5778 }
5779 return ptr;
5780 }
5781
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5782 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5783 {
5784 union trace_eval_map_item *ptr = v;
5785
5786 /*
5787 * Paranoid! If ptr points to end, we don't want to increment past it.
5788 * This really should never happen.
5789 */
5790 (*pos)++;
5791 ptr = update_eval_map(ptr);
5792 if (WARN_ON_ONCE(!ptr))
5793 return NULL;
5794
5795 ptr++;
5796 ptr = update_eval_map(ptr);
5797
5798 return ptr;
5799 }
5800
eval_map_start(struct seq_file * m,loff_t * pos)5801 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5802 {
5803 union trace_eval_map_item *v;
5804 loff_t l = 0;
5805
5806 mutex_lock(&trace_eval_mutex);
5807
5808 v = trace_eval_maps;
5809 if (v)
5810 v++;
5811
5812 while (v && l < *pos) {
5813 v = eval_map_next(m, v, &l);
5814 }
5815
5816 return v;
5817 }
5818
eval_map_stop(struct seq_file * m,void * v)5819 static void eval_map_stop(struct seq_file *m, void *v)
5820 {
5821 mutex_unlock(&trace_eval_mutex);
5822 }
5823
eval_map_show(struct seq_file * m,void * v)5824 static int eval_map_show(struct seq_file *m, void *v)
5825 {
5826 union trace_eval_map_item *ptr = v;
5827
5828 seq_printf(m, "%s %ld (%s)\n",
5829 ptr->map.eval_string, ptr->map.eval_value,
5830 ptr->map.system);
5831
5832 return 0;
5833 }
5834
5835 static const struct seq_operations tracing_eval_map_seq_ops = {
5836 .start = eval_map_start,
5837 .next = eval_map_next,
5838 .stop = eval_map_stop,
5839 .show = eval_map_show,
5840 };
5841
tracing_eval_map_open(struct inode * inode,struct file * filp)5842 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5843 {
5844 int ret;
5845
5846 ret = tracing_check_open_get_tr(NULL);
5847 if (ret)
5848 return ret;
5849
5850 return seq_open(filp, &tracing_eval_map_seq_ops);
5851 }
5852
5853 static const struct file_operations tracing_eval_map_fops = {
5854 .open = tracing_eval_map_open,
5855 .read = seq_read,
5856 .llseek = seq_lseek,
5857 .release = seq_release,
5858 };
5859
5860 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5861 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5862 {
5863 /* Return tail of array given the head */
5864 return ptr + ptr->head.length + 1;
5865 }
5866
5867 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5868 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5869 int len)
5870 {
5871 struct trace_eval_map **stop;
5872 struct trace_eval_map **map;
5873 union trace_eval_map_item *map_array;
5874 union trace_eval_map_item *ptr;
5875
5876 stop = start + len;
5877
5878 /*
5879 * The trace_eval_maps contains the map plus a head and tail item,
5880 * where the head holds the module and length of array, and the
5881 * tail holds a pointer to the next list.
5882 */
5883 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5884 if (!map_array) {
5885 pr_warn("Unable to allocate trace eval mapping\n");
5886 return;
5887 }
5888
5889 guard(mutex)(&trace_eval_mutex);
5890
5891 if (!trace_eval_maps)
5892 trace_eval_maps = map_array;
5893 else {
5894 ptr = trace_eval_maps;
5895 for (;;) {
5896 ptr = trace_eval_jmp_to_tail(ptr);
5897 if (!ptr->tail.next)
5898 break;
5899 ptr = ptr->tail.next;
5900
5901 }
5902 ptr->tail.next = map_array;
5903 }
5904 map_array->head.mod = mod;
5905 map_array->head.length = len;
5906 map_array++;
5907
5908 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5909 map_array->map = **map;
5910 map_array++;
5911 }
5912 memset(map_array, 0, sizeof(*map_array));
5913 }
5914
trace_create_eval_file(struct dentry * d_tracer)5915 static void trace_create_eval_file(struct dentry *d_tracer)
5916 {
5917 trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5918 NULL, &tracing_eval_map_fops);
5919 }
5920
5921 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5922 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5923 static inline void trace_insert_eval_map_file(struct module *mod,
5924 struct trace_eval_map **start, int len) { }
5925 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5926
5927 static void
trace_event_update_with_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5928 trace_event_update_with_eval_map(struct module *mod,
5929 struct trace_eval_map **start,
5930 int len)
5931 {
5932 struct trace_eval_map **map;
5933
5934 /* Always run sanitizer only if btf_type_tag attr exists. */
5935 if (len <= 0) {
5936 if (!(IS_ENABLED(CONFIG_DEBUG_INFO_BTF) &&
5937 IS_ENABLED(CONFIG_PAHOLE_HAS_BTF_TAG) &&
5938 __has_attribute(btf_type_tag)))
5939 return;
5940 }
5941
5942 map = start;
5943
5944 trace_event_update_all(map, len);
5945
5946 if (len <= 0)
5947 return;
5948
5949 trace_insert_eval_map_file(mod, start, len);
5950 }
5951
5952 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5953 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5954 size_t cnt, loff_t *ppos)
5955 {
5956 struct trace_array *tr = filp->private_data;
5957 char buf[MAX_TRACER_SIZE+2];
5958 int r;
5959
5960 mutex_lock(&trace_types_lock);
5961 r = sprintf(buf, "%s\n", tr->current_trace->name);
5962 mutex_unlock(&trace_types_lock);
5963
5964 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5965 }
5966
tracer_init(struct tracer * t,struct trace_array * tr)5967 int tracer_init(struct tracer *t, struct trace_array *tr)
5968 {
5969 tracing_reset_online_cpus(&tr->array_buffer);
5970 return t->init(tr);
5971 }
5972
set_buffer_entries(struct array_buffer * buf,unsigned long val)5973 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5974 {
5975 int cpu;
5976
5977 for_each_tracing_cpu(cpu)
5978 per_cpu_ptr(buf->data, cpu)->entries = val;
5979 }
5980
update_buffer_entries(struct array_buffer * buf,int cpu)5981 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5982 {
5983 if (cpu == RING_BUFFER_ALL_CPUS) {
5984 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5985 } else {
5986 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5987 }
5988 }
5989
5990 #ifdef CONFIG_TRACER_MAX_TRACE
5991 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5992 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5993 struct array_buffer *size_buf, int cpu_id)
5994 {
5995 int cpu, ret = 0;
5996
5997 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5998 for_each_tracing_cpu(cpu) {
5999 ret = ring_buffer_resize(trace_buf->buffer,
6000 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6001 if (ret < 0)
6002 break;
6003 per_cpu_ptr(trace_buf->data, cpu)->entries =
6004 per_cpu_ptr(size_buf->data, cpu)->entries;
6005 }
6006 } else {
6007 ret = ring_buffer_resize(trace_buf->buffer,
6008 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6009 if (ret == 0)
6010 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6011 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6012 }
6013
6014 return ret;
6015 }
6016 #endif /* CONFIG_TRACER_MAX_TRACE */
6017
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)6018 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6019 unsigned long size, int cpu)
6020 {
6021 int ret;
6022
6023 /*
6024 * If kernel or user changes the size of the ring buffer
6025 * we use the size that was given, and we can forget about
6026 * expanding it later.
6027 */
6028 trace_set_ring_buffer_expanded(tr);
6029
6030 /* May be called before buffers are initialized */
6031 if (!tr->array_buffer.buffer)
6032 return 0;
6033
6034 /* Do not allow tracing while resizing ring buffer */
6035 tracing_stop_tr(tr);
6036
6037 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6038 if (ret < 0)
6039 goto out_start;
6040
6041 #ifdef CONFIG_TRACER_MAX_TRACE
6042 if (!tr->allocated_snapshot)
6043 goto out;
6044
6045 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6046 if (ret < 0) {
6047 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6048 &tr->array_buffer, cpu);
6049 if (r < 0) {
6050 /*
6051 * AARGH! We are left with different
6052 * size max buffer!!!!
6053 * The max buffer is our "snapshot" buffer.
6054 * When a tracer needs a snapshot (one of the
6055 * latency tracers), it swaps the max buffer
6056 * with the saved snap shot. We succeeded to
6057 * update the size of the main buffer, but failed to
6058 * update the size of the max buffer. But when we tried
6059 * to reset the main buffer to the original size, we
6060 * failed there too. This is very unlikely to
6061 * happen, but if it does, warn and kill all
6062 * tracing.
6063 */
6064 WARN_ON(1);
6065 tracing_disabled = 1;
6066 }
6067 goto out_start;
6068 }
6069
6070 update_buffer_entries(&tr->max_buffer, cpu);
6071
6072 out:
6073 #endif /* CONFIG_TRACER_MAX_TRACE */
6074
6075 update_buffer_entries(&tr->array_buffer, cpu);
6076 out_start:
6077 tracing_start_tr(tr);
6078 return ret;
6079 }
6080
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)6081 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6082 unsigned long size, int cpu_id)
6083 {
6084 guard(mutex)(&trace_types_lock);
6085
6086 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6087 /* make sure, this cpu is enabled in the mask */
6088 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6089 return -EINVAL;
6090 }
6091
6092 return __tracing_resize_ring_buffer(tr, size, cpu_id);
6093 }
6094
6095 struct trace_mod_entry {
6096 unsigned long mod_addr;
6097 char mod_name[MODULE_NAME_LEN];
6098 };
6099
6100 struct trace_scratch {
6101 unsigned int clock_id;
6102 unsigned long text_addr;
6103 unsigned long nr_entries;
6104 struct trace_mod_entry entries[];
6105 };
6106
6107 static DEFINE_MUTEX(scratch_mutex);
6108
cmp_mod_entry(const void * key,const void * pivot)6109 static int cmp_mod_entry(const void *key, const void *pivot)
6110 {
6111 unsigned long addr = (unsigned long)key;
6112 const struct trace_mod_entry *ent = pivot;
6113
6114 if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6115 return 0;
6116 else
6117 return addr - ent->mod_addr;
6118 }
6119
6120 /**
6121 * trace_adjust_address() - Adjust prev boot address to current address.
6122 * @tr: Persistent ring buffer's trace_array.
6123 * @addr: Address in @tr which is adjusted.
6124 */
trace_adjust_address(struct trace_array * tr,unsigned long addr)6125 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6126 {
6127 struct trace_module_delta *module_delta;
6128 struct trace_scratch *tscratch;
6129 struct trace_mod_entry *entry;
6130 unsigned long raddr;
6131 int idx = 0, nr_entries;
6132
6133 /* If we don't have last boot delta, return the address */
6134 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6135 return addr;
6136
6137 /* tr->module_delta must be protected by rcu. */
6138 guard(rcu)();
6139 tscratch = tr->scratch;
6140 /* if there is no tscrach, module_delta must be NULL. */
6141 module_delta = READ_ONCE(tr->module_delta);
6142 if (!module_delta || !tscratch->nr_entries ||
6143 tscratch->entries[0].mod_addr > addr) {
6144 raddr = addr + tr->text_delta;
6145 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6146 is_kernel_rodata(raddr) ? raddr : addr;
6147 }
6148
6149 /* Note that entries must be sorted. */
6150 nr_entries = tscratch->nr_entries;
6151 if (nr_entries == 1 ||
6152 tscratch->entries[nr_entries - 1].mod_addr < addr)
6153 idx = nr_entries - 1;
6154 else {
6155 entry = __inline_bsearch((void *)addr,
6156 tscratch->entries,
6157 nr_entries - 1,
6158 sizeof(tscratch->entries[0]),
6159 cmp_mod_entry);
6160 if (entry)
6161 idx = entry - tscratch->entries;
6162 }
6163
6164 return addr + module_delta->delta[idx];
6165 }
6166
6167 #ifdef CONFIG_MODULES
save_mod(struct module * mod,void * data)6168 static int save_mod(struct module *mod, void *data)
6169 {
6170 struct trace_array *tr = data;
6171 struct trace_scratch *tscratch;
6172 struct trace_mod_entry *entry;
6173 unsigned int size;
6174
6175 tscratch = tr->scratch;
6176 if (!tscratch)
6177 return -1;
6178 size = tr->scratch_size;
6179
6180 if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6181 return -1;
6182
6183 entry = &tscratch->entries[tscratch->nr_entries];
6184
6185 tscratch->nr_entries++;
6186
6187 entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6188 strscpy(entry->mod_name, mod->name);
6189
6190 return 0;
6191 }
6192 #else
save_mod(struct module * mod,void * data)6193 static int save_mod(struct module *mod, void *data)
6194 {
6195 return 0;
6196 }
6197 #endif
6198
update_last_data(struct trace_array * tr)6199 static void update_last_data(struct trace_array *tr)
6200 {
6201 struct trace_module_delta *module_delta;
6202 struct trace_scratch *tscratch;
6203
6204 if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6205 return;
6206
6207 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6208 return;
6209
6210 /* Only if the buffer has previous boot data clear and update it. */
6211 tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6212
6213 /* Reset the module list and reload them */
6214 if (tr->scratch) {
6215 struct trace_scratch *tscratch = tr->scratch;
6216
6217 tscratch->clock_id = tr->clock_id;
6218 memset(tscratch->entries, 0,
6219 flex_array_size(tscratch, entries, tscratch->nr_entries));
6220 tscratch->nr_entries = 0;
6221
6222 guard(mutex)(&scratch_mutex);
6223 module_for_each_mod(save_mod, tr);
6224 }
6225
6226 /*
6227 * Need to clear all CPU buffers as there cannot be events
6228 * from the previous boot mixed with events with this boot
6229 * as that will cause a confusing trace. Need to clear all
6230 * CPU buffers, even for those that may currently be offline.
6231 */
6232 tracing_reset_all_cpus(&tr->array_buffer);
6233
6234 /* Using current data now */
6235 tr->text_delta = 0;
6236
6237 if (!tr->scratch)
6238 return;
6239
6240 tscratch = tr->scratch;
6241 module_delta = READ_ONCE(tr->module_delta);
6242 WRITE_ONCE(tr->module_delta, NULL);
6243 kfree_rcu(module_delta, rcu);
6244
6245 /* Set the persistent ring buffer meta data to this address */
6246 tscratch->text_addr = (unsigned long)_text;
6247 }
6248
6249 /**
6250 * tracing_update_buffers - used by tracing facility to expand ring buffers
6251 * @tr: The tracing instance
6252 *
6253 * To save on memory when the tracing is never used on a system with it
6254 * configured in. The ring buffers are set to a minimum size. But once
6255 * a user starts to use the tracing facility, then they need to grow
6256 * to their default size.
6257 *
6258 * This function is to be called when a tracer is about to be used.
6259 */
tracing_update_buffers(struct trace_array * tr)6260 int tracing_update_buffers(struct trace_array *tr)
6261 {
6262 int ret = 0;
6263
6264 mutex_lock(&trace_types_lock);
6265
6266 update_last_data(tr);
6267
6268 if (!tr->ring_buffer_expanded)
6269 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6270 RING_BUFFER_ALL_CPUS);
6271 mutex_unlock(&trace_types_lock);
6272
6273 return ret;
6274 }
6275
6276 struct trace_option_dentry;
6277
6278 static void
6279 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6280
6281 /*
6282 * Used to clear out the tracer before deletion of an instance.
6283 * Must have trace_types_lock held.
6284 */
tracing_set_nop(struct trace_array * tr)6285 static void tracing_set_nop(struct trace_array *tr)
6286 {
6287 if (tr->current_trace == &nop_trace)
6288 return;
6289
6290 tr->current_trace->enabled--;
6291
6292 if (tr->current_trace->reset)
6293 tr->current_trace->reset(tr);
6294
6295 tr->current_trace = &nop_trace;
6296 }
6297
6298 static bool tracer_options_updated;
6299
add_tracer_options(struct trace_array * tr,struct tracer * t)6300 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6301 {
6302 /* Only enable if the directory has been created already. */
6303 if (!tr->dir && !(tr->flags & TRACE_ARRAY_FL_GLOBAL))
6304 return;
6305
6306 /* Only create trace option files after update_tracer_options finish */
6307 if (!tracer_options_updated)
6308 return;
6309
6310 create_trace_option_files(tr, t);
6311 }
6312
tracing_set_tracer(struct trace_array * tr,const char * buf)6313 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6314 {
6315 struct tracer *t;
6316 #ifdef CONFIG_TRACER_MAX_TRACE
6317 bool had_max_tr;
6318 #endif
6319 int ret;
6320
6321 guard(mutex)(&trace_types_lock);
6322
6323 update_last_data(tr);
6324
6325 if (!tr->ring_buffer_expanded) {
6326 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6327 RING_BUFFER_ALL_CPUS);
6328 if (ret < 0)
6329 return ret;
6330 ret = 0;
6331 }
6332
6333 for (t = trace_types; t; t = t->next) {
6334 if (strcmp(t->name, buf) == 0)
6335 break;
6336 }
6337 if (!t)
6338 return -EINVAL;
6339
6340 if (t == tr->current_trace)
6341 return 0;
6342
6343 #ifdef CONFIG_TRACER_SNAPSHOT
6344 if (t->use_max_tr) {
6345 local_irq_disable();
6346 arch_spin_lock(&tr->max_lock);
6347 ret = tr->cond_snapshot ? -EBUSY : 0;
6348 arch_spin_unlock(&tr->max_lock);
6349 local_irq_enable();
6350 if (ret)
6351 return ret;
6352 }
6353 #endif
6354 /* Some tracers won't work on kernel command line */
6355 if (system_state < SYSTEM_RUNNING && t->noboot) {
6356 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6357 t->name);
6358 return -EINVAL;
6359 }
6360
6361 /* Some tracers are only allowed for the top level buffer */
6362 if (!trace_ok_for_array(t, tr))
6363 return -EINVAL;
6364
6365 /* If trace pipe files are being read, we can't change the tracer */
6366 if (tr->trace_ref)
6367 return -EBUSY;
6368
6369 trace_branch_disable();
6370
6371 tr->current_trace->enabled--;
6372
6373 if (tr->current_trace->reset)
6374 tr->current_trace->reset(tr);
6375
6376 #ifdef CONFIG_TRACER_MAX_TRACE
6377 had_max_tr = tr->current_trace->use_max_tr;
6378
6379 /* Current trace needs to be nop_trace before synchronize_rcu */
6380 tr->current_trace = &nop_trace;
6381
6382 if (had_max_tr && !t->use_max_tr) {
6383 /*
6384 * We need to make sure that the update_max_tr sees that
6385 * current_trace changed to nop_trace to keep it from
6386 * swapping the buffers after we resize it.
6387 * The update_max_tr is called from interrupts disabled
6388 * so a synchronized_sched() is sufficient.
6389 */
6390 synchronize_rcu();
6391 free_snapshot(tr);
6392 tracing_disarm_snapshot(tr);
6393 }
6394
6395 if (!had_max_tr && t->use_max_tr) {
6396 ret = tracing_arm_snapshot_locked(tr);
6397 if (ret)
6398 return ret;
6399 }
6400 #else
6401 tr->current_trace = &nop_trace;
6402 #endif
6403
6404 if (t->init) {
6405 ret = tracer_init(t, tr);
6406 if (ret) {
6407 #ifdef CONFIG_TRACER_MAX_TRACE
6408 if (t->use_max_tr)
6409 tracing_disarm_snapshot(tr);
6410 #endif
6411 return ret;
6412 }
6413 }
6414
6415 tr->current_trace = t;
6416 tr->current_trace->enabled++;
6417 trace_branch_enable(tr);
6418
6419 return 0;
6420 }
6421
6422 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6423 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6424 size_t cnt, loff_t *ppos)
6425 {
6426 struct trace_array *tr = filp->private_data;
6427 char buf[MAX_TRACER_SIZE+1];
6428 char *name;
6429 size_t ret;
6430 int err;
6431
6432 ret = cnt;
6433
6434 if (cnt > MAX_TRACER_SIZE)
6435 cnt = MAX_TRACER_SIZE;
6436
6437 if (copy_from_user(buf, ubuf, cnt))
6438 return -EFAULT;
6439
6440 buf[cnt] = 0;
6441
6442 name = strim(buf);
6443
6444 err = tracing_set_tracer(tr, name);
6445 if (err)
6446 return err;
6447
6448 *ppos += ret;
6449
6450 return ret;
6451 }
6452
6453 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6454 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6455 size_t cnt, loff_t *ppos)
6456 {
6457 char buf[64];
6458 int r;
6459
6460 r = snprintf(buf, sizeof(buf), "%ld\n",
6461 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6462 if (r > sizeof(buf))
6463 r = sizeof(buf);
6464 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6465 }
6466
6467 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6468 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6469 size_t cnt, loff_t *ppos)
6470 {
6471 unsigned long val;
6472 int ret;
6473
6474 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6475 if (ret)
6476 return ret;
6477
6478 *ptr = val * 1000;
6479
6480 return cnt;
6481 }
6482
6483 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6484 tracing_thresh_read(struct file *filp, char __user *ubuf,
6485 size_t cnt, loff_t *ppos)
6486 {
6487 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6488 }
6489
6490 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6491 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6492 size_t cnt, loff_t *ppos)
6493 {
6494 struct trace_array *tr = filp->private_data;
6495 int ret;
6496
6497 guard(mutex)(&trace_types_lock);
6498 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6499 if (ret < 0)
6500 return ret;
6501
6502 if (tr->current_trace->update_thresh) {
6503 ret = tr->current_trace->update_thresh(tr);
6504 if (ret < 0)
6505 return ret;
6506 }
6507
6508 return cnt;
6509 }
6510
6511 #ifdef CONFIG_TRACER_MAX_TRACE
6512
6513 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6514 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6515 size_t cnt, loff_t *ppos)
6516 {
6517 struct trace_array *tr = filp->private_data;
6518
6519 return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6520 }
6521
6522 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6523 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6524 size_t cnt, loff_t *ppos)
6525 {
6526 struct trace_array *tr = filp->private_data;
6527
6528 return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6529 }
6530
6531 #endif
6532
open_pipe_on_cpu(struct trace_array * tr,int cpu)6533 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6534 {
6535 if (cpu == RING_BUFFER_ALL_CPUS) {
6536 if (cpumask_empty(tr->pipe_cpumask)) {
6537 cpumask_setall(tr->pipe_cpumask);
6538 return 0;
6539 }
6540 } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6541 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6542 return 0;
6543 }
6544 return -EBUSY;
6545 }
6546
close_pipe_on_cpu(struct trace_array * tr,int cpu)6547 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6548 {
6549 if (cpu == RING_BUFFER_ALL_CPUS) {
6550 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6551 cpumask_clear(tr->pipe_cpumask);
6552 } else {
6553 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6554 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6555 }
6556 }
6557
tracing_open_pipe(struct inode * inode,struct file * filp)6558 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6559 {
6560 struct trace_array *tr = inode->i_private;
6561 struct trace_iterator *iter;
6562 int cpu;
6563 int ret;
6564
6565 ret = tracing_check_open_get_tr(tr);
6566 if (ret)
6567 return ret;
6568
6569 mutex_lock(&trace_types_lock);
6570 cpu = tracing_get_cpu(inode);
6571 ret = open_pipe_on_cpu(tr, cpu);
6572 if (ret)
6573 goto fail_pipe_on_cpu;
6574
6575 /* create a buffer to store the information to pass to userspace */
6576 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6577 if (!iter) {
6578 ret = -ENOMEM;
6579 goto fail_alloc_iter;
6580 }
6581
6582 trace_seq_init(&iter->seq);
6583 iter->trace = tr->current_trace;
6584
6585 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6586 ret = -ENOMEM;
6587 goto fail;
6588 }
6589
6590 /* trace pipe does not show start of buffer */
6591 cpumask_setall(iter->started);
6592
6593 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6594 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6595
6596 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6597 if (trace_clocks[tr->clock_id].in_ns)
6598 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6599
6600 iter->tr = tr;
6601 iter->array_buffer = &tr->array_buffer;
6602 iter->cpu_file = cpu;
6603 mutex_init(&iter->mutex);
6604 filp->private_data = iter;
6605
6606 if (iter->trace->pipe_open)
6607 iter->trace->pipe_open(iter);
6608
6609 nonseekable_open(inode, filp);
6610
6611 tr->trace_ref++;
6612
6613 mutex_unlock(&trace_types_lock);
6614 return ret;
6615
6616 fail:
6617 kfree(iter);
6618 fail_alloc_iter:
6619 close_pipe_on_cpu(tr, cpu);
6620 fail_pipe_on_cpu:
6621 __trace_array_put(tr);
6622 mutex_unlock(&trace_types_lock);
6623 return ret;
6624 }
6625
tracing_release_pipe(struct inode * inode,struct file * file)6626 static int tracing_release_pipe(struct inode *inode, struct file *file)
6627 {
6628 struct trace_iterator *iter = file->private_data;
6629 struct trace_array *tr = inode->i_private;
6630
6631 mutex_lock(&trace_types_lock);
6632
6633 tr->trace_ref--;
6634
6635 if (iter->trace->pipe_close)
6636 iter->trace->pipe_close(iter);
6637 close_pipe_on_cpu(tr, iter->cpu_file);
6638 mutex_unlock(&trace_types_lock);
6639
6640 free_trace_iter_content(iter);
6641 kfree(iter);
6642
6643 trace_array_put(tr);
6644
6645 return 0;
6646 }
6647
6648 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6649 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6650 {
6651 struct trace_array *tr = iter->tr;
6652
6653 /* Iterators are static, they should be filled or empty */
6654 if (trace_buffer_iter(iter, iter->cpu_file))
6655 return EPOLLIN | EPOLLRDNORM;
6656
6657 if (tr->trace_flags & TRACE_ITER_BLOCK)
6658 /*
6659 * Always select as readable when in blocking mode
6660 */
6661 return EPOLLIN | EPOLLRDNORM;
6662 else
6663 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6664 filp, poll_table, iter->tr->buffer_percent);
6665 }
6666
6667 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6668 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6669 {
6670 struct trace_iterator *iter = filp->private_data;
6671
6672 return trace_poll(iter, filp, poll_table);
6673 }
6674
6675 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6676 static int tracing_wait_pipe(struct file *filp)
6677 {
6678 struct trace_iterator *iter = filp->private_data;
6679 int ret;
6680
6681 while (trace_empty(iter)) {
6682
6683 if ((filp->f_flags & O_NONBLOCK)) {
6684 return -EAGAIN;
6685 }
6686
6687 /*
6688 * We block until we read something and tracing is disabled.
6689 * We still block if tracing is disabled, but we have never
6690 * read anything. This allows a user to cat this file, and
6691 * then enable tracing. But after we have read something,
6692 * we give an EOF when tracing is again disabled.
6693 *
6694 * iter->pos will be 0 if we haven't read anything.
6695 */
6696 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6697 break;
6698
6699 mutex_unlock(&iter->mutex);
6700
6701 ret = wait_on_pipe(iter, 0);
6702
6703 mutex_lock(&iter->mutex);
6704
6705 if (ret)
6706 return ret;
6707 }
6708
6709 return 1;
6710 }
6711
update_last_data_if_empty(struct trace_array * tr)6712 static bool update_last_data_if_empty(struct trace_array *tr)
6713 {
6714 if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6715 return false;
6716
6717 if (!ring_buffer_empty(tr->array_buffer.buffer))
6718 return false;
6719
6720 /*
6721 * If the buffer contains the last boot data and all per-cpu
6722 * buffers are empty, reset it from the kernel side.
6723 */
6724 update_last_data(tr);
6725 return true;
6726 }
6727
6728 /*
6729 * Consumer reader.
6730 */
6731 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6732 tracing_read_pipe(struct file *filp, char __user *ubuf,
6733 size_t cnt, loff_t *ppos)
6734 {
6735 struct trace_iterator *iter = filp->private_data;
6736 ssize_t sret;
6737
6738 /*
6739 * Avoid more than one consumer on a single file descriptor
6740 * This is just a matter of traces coherency, the ring buffer itself
6741 * is protected.
6742 */
6743 guard(mutex)(&iter->mutex);
6744
6745 /* return any leftover data */
6746 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6747 if (sret != -EBUSY)
6748 return sret;
6749
6750 trace_seq_init(&iter->seq);
6751
6752 if (iter->trace->read) {
6753 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6754 if (sret)
6755 return sret;
6756 }
6757
6758 waitagain:
6759 if (update_last_data_if_empty(iter->tr))
6760 return 0;
6761
6762 sret = tracing_wait_pipe(filp);
6763 if (sret <= 0)
6764 return sret;
6765
6766 /* stop when tracing is finished */
6767 if (trace_empty(iter))
6768 return 0;
6769
6770 if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6771 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6772
6773 /* reset all but tr, trace, and overruns */
6774 trace_iterator_reset(iter);
6775 cpumask_clear(iter->started);
6776 trace_seq_init(&iter->seq);
6777
6778 trace_event_read_lock();
6779 trace_access_lock(iter->cpu_file);
6780 while (trace_find_next_entry_inc(iter) != NULL) {
6781 enum print_line_t ret;
6782 int save_len = iter->seq.seq.len;
6783
6784 ret = print_trace_line(iter);
6785 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6786 /*
6787 * If one print_trace_line() fills entire trace_seq in one shot,
6788 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6789 * In this case, we need to consume it, otherwise, loop will peek
6790 * this event next time, resulting in an infinite loop.
6791 */
6792 if (save_len == 0) {
6793 iter->seq.full = 0;
6794 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6795 trace_consume(iter);
6796 break;
6797 }
6798
6799 /* In other cases, don't print partial lines */
6800 iter->seq.seq.len = save_len;
6801 break;
6802 }
6803 if (ret != TRACE_TYPE_NO_CONSUME)
6804 trace_consume(iter);
6805
6806 if (trace_seq_used(&iter->seq) >= cnt)
6807 break;
6808
6809 /*
6810 * Setting the full flag means we reached the trace_seq buffer
6811 * size and we should leave by partial output condition above.
6812 * One of the trace_seq_* functions is not used properly.
6813 */
6814 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6815 iter->ent->type);
6816 }
6817 trace_access_unlock(iter->cpu_file);
6818 trace_event_read_unlock();
6819
6820 /* Now copy what we have to the user */
6821 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6822 if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6823 trace_seq_init(&iter->seq);
6824
6825 /*
6826 * If there was nothing to send to user, in spite of consuming trace
6827 * entries, go back to wait for more entries.
6828 */
6829 if (sret == -EBUSY)
6830 goto waitagain;
6831
6832 return sret;
6833 }
6834
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6835 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6836 unsigned int idx)
6837 {
6838 __free_page(spd->pages[idx]);
6839 }
6840
6841 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6842 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6843 {
6844 size_t count;
6845 int save_len;
6846 int ret;
6847
6848 /* Seq buffer is page-sized, exactly what we need. */
6849 for (;;) {
6850 save_len = iter->seq.seq.len;
6851 ret = print_trace_line(iter);
6852
6853 if (trace_seq_has_overflowed(&iter->seq)) {
6854 iter->seq.seq.len = save_len;
6855 break;
6856 }
6857
6858 /*
6859 * This should not be hit, because it should only
6860 * be set if the iter->seq overflowed. But check it
6861 * anyway to be safe.
6862 */
6863 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6864 iter->seq.seq.len = save_len;
6865 break;
6866 }
6867
6868 count = trace_seq_used(&iter->seq) - save_len;
6869 if (rem < count) {
6870 rem = 0;
6871 iter->seq.seq.len = save_len;
6872 break;
6873 }
6874
6875 if (ret != TRACE_TYPE_NO_CONSUME)
6876 trace_consume(iter);
6877 rem -= count;
6878 if (!trace_find_next_entry_inc(iter)) {
6879 rem = 0;
6880 iter->ent = NULL;
6881 break;
6882 }
6883 }
6884
6885 return rem;
6886 }
6887
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6888 static ssize_t tracing_splice_read_pipe(struct file *filp,
6889 loff_t *ppos,
6890 struct pipe_inode_info *pipe,
6891 size_t len,
6892 unsigned int flags)
6893 {
6894 struct page *pages_def[PIPE_DEF_BUFFERS];
6895 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6896 struct trace_iterator *iter = filp->private_data;
6897 struct splice_pipe_desc spd = {
6898 .pages = pages_def,
6899 .partial = partial_def,
6900 .nr_pages = 0, /* This gets updated below. */
6901 .nr_pages_max = PIPE_DEF_BUFFERS,
6902 .ops = &default_pipe_buf_ops,
6903 .spd_release = tracing_spd_release_pipe,
6904 };
6905 ssize_t ret;
6906 size_t rem;
6907 unsigned int i;
6908
6909 if (splice_grow_spd(pipe, &spd))
6910 return -ENOMEM;
6911
6912 mutex_lock(&iter->mutex);
6913
6914 if (iter->trace->splice_read) {
6915 ret = iter->trace->splice_read(iter, filp,
6916 ppos, pipe, len, flags);
6917 if (ret)
6918 goto out_err;
6919 }
6920
6921 ret = tracing_wait_pipe(filp);
6922 if (ret <= 0)
6923 goto out_err;
6924
6925 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6926 ret = -EFAULT;
6927 goto out_err;
6928 }
6929
6930 trace_event_read_lock();
6931 trace_access_lock(iter->cpu_file);
6932
6933 /* Fill as many pages as possible. */
6934 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6935 spd.pages[i] = alloc_page(GFP_KERNEL);
6936 if (!spd.pages[i])
6937 break;
6938
6939 rem = tracing_fill_pipe_page(rem, iter);
6940
6941 /* Copy the data into the page, so we can start over. */
6942 ret = trace_seq_to_buffer(&iter->seq,
6943 page_address(spd.pages[i]),
6944 min((size_t)trace_seq_used(&iter->seq),
6945 (size_t)PAGE_SIZE));
6946 if (ret < 0) {
6947 __free_page(spd.pages[i]);
6948 break;
6949 }
6950 spd.partial[i].offset = 0;
6951 spd.partial[i].len = ret;
6952
6953 trace_seq_init(&iter->seq);
6954 }
6955
6956 trace_access_unlock(iter->cpu_file);
6957 trace_event_read_unlock();
6958 mutex_unlock(&iter->mutex);
6959
6960 spd.nr_pages = i;
6961
6962 if (i)
6963 ret = splice_to_pipe(pipe, &spd);
6964 else
6965 ret = 0;
6966 out:
6967 splice_shrink_spd(&spd);
6968 return ret;
6969
6970 out_err:
6971 mutex_unlock(&iter->mutex);
6972 goto out;
6973 }
6974
6975 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6976 tracing_entries_read(struct file *filp, char __user *ubuf,
6977 size_t cnt, loff_t *ppos)
6978 {
6979 struct inode *inode = file_inode(filp);
6980 struct trace_array *tr = inode->i_private;
6981 int cpu = tracing_get_cpu(inode);
6982 char buf[64];
6983 int r = 0;
6984 ssize_t ret;
6985
6986 mutex_lock(&trace_types_lock);
6987
6988 if (cpu == RING_BUFFER_ALL_CPUS) {
6989 int cpu, buf_size_same;
6990 unsigned long size;
6991
6992 size = 0;
6993 buf_size_same = 1;
6994 /* check if all cpu sizes are same */
6995 for_each_tracing_cpu(cpu) {
6996 /* fill in the size from first enabled cpu */
6997 if (size == 0)
6998 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6999 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7000 buf_size_same = 0;
7001 break;
7002 }
7003 }
7004
7005 if (buf_size_same) {
7006 if (!tr->ring_buffer_expanded)
7007 r = sprintf(buf, "%lu (expanded: %lu)\n",
7008 size >> 10,
7009 trace_buf_size >> 10);
7010 else
7011 r = sprintf(buf, "%lu\n", size >> 10);
7012 } else
7013 r = sprintf(buf, "X\n");
7014 } else
7015 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7016
7017 mutex_unlock(&trace_types_lock);
7018
7019 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7020 return ret;
7021 }
7022
7023 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7024 tracing_entries_write(struct file *filp, const char __user *ubuf,
7025 size_t cnt, loff_t *ppos)
7026 {
7027 struct inode *inode = file_inode(filp);
7028 struct trace_array *tr = inode->i_private;
7029 unsigned long val;
7030 int ret;
7031
7032 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7033 if (ret)
7034 return ret;
7035
7036 /* must have at least 1 entry */
7037 if (!val)
7038 return -EINVAL;
7039
7040 /* value is in KB */
7041 val <<= 10;
7042 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7043 if (ret < 0)
7044 return ret;
7045
7046 *ppos += cnt;
7047
7048 return cnt;
7049 }
7050
7051 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7052 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7053 size_t cnt, loff_t *ppos)
7054 {
7055 struct trace_array *tr = filp->private_data;
7056 char buf[64];
7057 int r, cpu;
7058 unsigned long size = 0, expanded_size = 0;
7059
7060 mutex_lock(&trace_types_lock);
7061 for_each_tracing_cpu(cpu) {
7062 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7063 if (!tr->ring_buffer_expanded)
7064 expanded_size += trace_buf_size >> 10;
7065 }
7066 if (tr->ring_buffer_expanded)
7067 r = sprintf(buf, "%lu\n", size);
7068 else
7069 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7070 mutex_unlock(&trace_types_lock);
7071
7072 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7073 }
7074
7075 #define LAST_BOOT_HEADER ((void *)1)
7076
l_next(struct seq_file * m,void * v,loff_t * pos)7077 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7078 {
7079 struct trace_array *tr = m->private;
7080 struct trace_scratch *tscratch = tr->scratch;
7081 unsigned int index = *pos;
7082
7083 (*pos)++;
7084
7085 if (*pos == 1)
7086 return LAST_BOOT_HEADER;
7087
7088 /* Only show offsets of the last boot data */
7089 if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7090 return NULL;
7091
7092 /* *pos 0 is for the header, 1 is for the first module */
7093 index--;
7094
7095 if (index >= tscratch->nr_entries)
7096 return NULL;
7097
7098 return &tscratch->entries[index];
7099 }
7100
l_start(struct seq_file * m,loff_t * pos)7101 static void *l_start(struct seq_file *m, loff_t *pos)
7102 {
7103 mutex_lock(&scratch_mutex);
7104
7105 return l_next(m, NULL, pos);
7106 }
7107
l_stop(struct seq_file * m,void * p)7108 static void l_stop(struct seq_file *m, void *p)
7109 {
7110 mutex_unlock(&scratch_mutex);
7111 }
7112
show_last_boot_header(struct seq_file * m,struct trace_array * tr)7113 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7114 {
7115 struct trace_scratch *tscratch = tr->scratch;
7116
7117 /*
7118 * Do not leak KASLR address. This only shows the KASLR address of
7119 * the last boot. When the ring buffer is started, the LAST_BOOT
7120 * flag gets cleared, and this should only report "current".
7121 * Otherwise it shows the KASLR address from the previous boot which
7122 * should not be the same as the current boot.
7123 */
7124 if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7125 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7126 else
7127 seq_puts(m, "# Current\n");
7128 }
7129
l_show(struct seq_file * m,void * v)7130 static int l_show(struct seq_file *m, void *v)
7131 {
7132 struct trace_array *tr = m->private;
7133 struct trace_mod_entry *entry = v;
7134
7135 if (v == LAST_BOOT_HEADER) {
7136 show_last_boot_header(m, tr);
7137 return 0;
7138 }
7139
7140 seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7141 return 0;
7142 }
7143
7144 static const struct seq_operations last_boot_seq_ops = {
7145 .start = l_start,
7146 .next = l_next,
7147 .stop = l_stop,
7148 .show = l_show,
7149 };
7150
tracing_last_boot_open(struct inode * inode,struct file * file)7151 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7152 {
7153 struct trace_array *tr = inode->i_private;
7154 struct seq_file *m;
7155 int ret;
7156
7157 ret = tracing_check_open_get_tr(tr);
7158 if (ret)
7159 return ret;
7160
7161 ret = seq_open(file, &last_boot_seq_ops);
7162 if (ret) {
7163 trace_array_put(tr);
7164 return ret;
7165 }
7166
7167 m = file->private_data;
7168 m->private = tr;
7169
7170 return 0;
7171 }
7172
tracing_buffer_meta_open(struct inode * inode,struct file * filp)7173 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7174 {
7175 struct trace_array *tr = inode->i_private;
7176 int cpu = tracing_get_cpu(inode);
7177 int ret;
7178
7179 ret = tracing_check_open_get_tr(tr);
7180 if (ret)
7181 return ret;
7182
7183 ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7184 if (ret < 0)
7185 __trace_array_put(tr);
7186 return ret;
7187 }
7188
7189 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7190 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7191 size_t cnt, loff_t *ppos)
7192 {
7193 /*
7194 * There is no need to read what the user has written, this function
7195 * is just to make sure that there is no error when "echo" is used
7196 */
7197
7198 *ppos += cnt;
7199
7200 return cnt;
7201 }
7202
7203 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)7204 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7205 {
7206 struct trace_array *tr = inode->i_private;
7207
7208 /* disable tracing ? */
7209 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7210 tracer_tracing_off(tr);
7211 /* resize the ring buffer to 0 */
7212 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7213
7214 trace_array_put(tr);
7215
7216 return 0;
7217 }
7218
7219 #define TRACE_MARKER_MAX_SIZE 4096
7220
write_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt,unsigned long ip)7221 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7222 size_t cnt, unsigned long ip)
7223 {
7224 struct ring_buffer_event *event;
7225 enum event_trigger_type tt = ETT_NONE;
7226 struct trace_buffer *buffer;
7227 struct print_entry *entry;
7228 int meta_size;
7229 ssize_t written;
7230 size_t size;
7231 int len;
7232
7233 /* Used in tracing_mark_raw_write() as well */
7234 #define FAULTED_STR "<faulted>"
7235 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7236
7237 meta_size = sizeof(*entry) + 2; /* add '\0' and possible '\n' */
7238 again:
7239 size = cnt + meta_size;
7240
7241 /* If less than "<faulted>", then make sure we can still add that */
7242 if (cnt < FAULTED_SIZE)
7243 size += FAULTED_SIZE - cnt;
7244
7245 buffer = tr->array_buffer.buffer;
7246 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7247 tracing_gen_ctx());
7248 if (unlikely(!event)) {
7249 /*
7250 * If the size was greater than what was allowed, then
7251 * make it smaller and try again.
7252 */
7253 if (size > ring_buffer_max_event_size(buffer)) {
7254 /* cnt < FAULTED size should never be bigger than max */
7255 if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7256 return -EBADF;
7257 cnt = ring_buffer_max_event_size(buffer) - meta_size;
7258 /* The above should only happen once */
7259 if (WARN_ON_ONCE(cnt + meta_size == size))
7260 return -EBADF;
7261 goto again;
7262 }
7263
7264 /* Ring buffer disabled, return as if not open for write */
7265 return -EBADF;
7266 }
7267
7268 entry = ring_buffer_event_data(event);
7269 entry->ip = ip;
7270
7271 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7272 if (len) {
7273 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7274 cnt = FAULTED_SIZE;
7275 written = -EFAULT;
7276 } else
7277 written = cnt;
7278
7279 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7280 /* do not add \n before testing triggers, but add \0 */
7281 entry->buf[cnt] = '\0';
7282 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7283 }
7284
7285 if (entry->buf[cnt - 1] != '\n') {
7286 entry->buf[cnt] = '\n';
7287 entry->buf[cnt + 1] = '\0';
7288 } else
7289 entry->buf[cnt] = '\0';
7290
7291 if (static_branch_unlikely(&trace_marker_exports_enabled))
7292 ftrace_exports(event, TRACE_EXPORT_MARKER);
7293 __buffer_unlock_commit(buffer, event);
7294
7295 if (tt)
7296 event_triggers_post_call(tr->trace_marker_file, tt);
7297
7298 return written;
7299 }
7300
7301 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7302 tracing_mark_write(struct file *filp, const char __user *ubuf,
7303 size_t cnt, loff_t *fpos)
7304 {
7305 struct trace_array *tr = filp->private_data;
7306 ssize_t written = -ENODEV;
7307 unsigned long ip;
7308
7309 if (tracing_disabled)
7310 return -EINVAL;
7311
7312 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7313 return -EINVAL;
7314
7315 if ((ssize_t)cnt < 0)
7316 return -EINVAL;
7317
7318 if (cnt > TRACE_MARKER_MAX_SIZE)
7319 cnt = TRACE_MARKER_MAX_SIZE;
7320
7321 /* The selftests expect this function to be the IP address */
7322 ip = _THIS_IP_;
7323
7324 /* The global trace_marker can go to multiple instances */
7325 if (tr == &global_trace) {
7326 guard(rcu)();
7327 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7328 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7329 if (written < 0)
7330 break;
7331 }
7332 } else {
7333 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7334 }
7335
7336 return written;
7337 }
7338
write_raw_marker_to_buffer(struct trace_array * tr,const char __user * ubuf,size_t cnt)7339 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7340 const char __user *ubuf, size_t cnt)
7341 {
7342 struct ring_buffer_event *event;
7343 struct trace_buffer *buffer;
7344 struct raw_data_entry *entry;
7345 ssize_t written;
7346 int size;
7347 int len;
7348
7349 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7350
7351 size = sizeof(*entry) + cnt;
7352 if (cnt < FAULT_SIZE_ID)
7353 size += FAULT_SIZE_ID - cnt;
7354
7355 buffer = tr->array_buffer.buffer;
7356
7357 if (size > ring_buffer_max_event_size(buffer))
7358 return -EINVAL;
7359
7360 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7361 tracing_gen_ctx());
7362 if (!event)
7363 /* Ring buffer disabled, return as if not open for write */
7364 return -EBADF;
7365
7366 entry = ring_buffer_event_data(event);
7367
7368 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7369 if (len) {
7370 entry->id = -1;
7371 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7372 written = -EFAULT;
7373 } else
7374 written = cnt;
7375
7376 __buffer_unlock_commit(buffer, event);
7377
7378 return written;
7379 }
7380
7381 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7382 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7383 size_t cnt, loff_t *fpos)
7384 {
7385 struct trace_array *tr = filp->private_data;
7386 ssize_t written = -ENODEV;
7387
7388 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7389
7390 if (tracing_disabled)
7391 return -EINVAL;
7392
7393 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7394 return -EINVAL;
7395
7396 /* The marker must at least have a tag id */
7397 if (cnt < sizeof(unsigned int))
7398 return -EINVAL;
7399
7400 /* The global trace_marker_raw can go to multiple instances */
7401 if (tr == &global_trace) {
7402 guard(rcu)();
7403 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7404 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7405 if (written < 0)
7406 break;
7407 }
7408 } else {
7409 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7410 }
7411
7412 return written;
7413 }
7414
tracing_clock_show(struct seq_file * m,void * v)7415 static int tracing_clock_show(struct seq_file *m, void *v)
7416 {
7417 struct trace_array *tr = m->private;
7418 int i;
7419
7420 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7421 seq_printf(m,
7422 "%s%s%s%s", i ? " " : "",
7423 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7424 i == tr->clock_id ? "]" : "");
7425 seq_putc(m, '\n');
7426
7427 return 0;
7428 }
7429
tracing_set_clock(struct trace_array * tr,const char * clockstr)7430 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7431 {
7432 int i;
7433
7434 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7435 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7436 break;
7437 }
7438 if (i == ARRAY_SIZE(trace_clocks))
7439 return -EINVAL;
7440
7441 mutex_lock(&trace_types_lock);
7442
7443 tr->clock_id = i;
7444
7445 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7446
7447 /*
7448 * New clock may not be consistent with the previous clock.
7449 * Reset the buffer so that it doesn't have incomparable timestamps.
7450 */
7451 tracing_reset_online_cpus(&tr->array_buffer);
7452
7453 #ifdef CONFIG_TRACER_MAX_TRACE
7454 if (tr->max_buffer.buffer)
7455 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7456 tracing_reset_online_cpus(&tr->max_buffer);
7457 #endif
7458
7459 if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7460 struct trace_scratch *tscratch = tr->scratch;
7461
7462 tscratch->clock_id = i;
7463 }
7464
7465 mutex_unlock(&trace_types_lock);
7466
7467 return 0;
7468 }
7469
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7470 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7471 size_t cnt, loff_t *fpos)
7472 {
7473 struct seq_file *m = filp->private_data;
7474 struct trace_array *tr = m->private;
7475 char buf[64];
7476 const char *clockstr;
7477 int ret;
7478
7479 if (cnt >= sizeof(buf))
7480 return -EINVAL;
7481
7482 if (copy_from_user(buf, ubuf, cnt))
7483 return -EFAULT;
7484
7485 buf[cnt] = 0;
7486
7487 clockstr = strstrip(buf);
7488
7489 ret = tracing_set_clock(tr, clockstr);
7490 if (ret)
7491 return ret;
7492
7493 *fpos += cnt;
7494
7495 return cnt;
7496 }
7497
tracing_clock_open(struct inode * inode,struct file * file)7498 static int tracing_clock_open(struct inode *inode, struct file *file)
7499 {
7500 struct trace_array *tr = inode->i_private;
7501 int ret;
7502
7503 ret = tracing_check_open_get_tr(tr);
7504 if (ret)
7505 return ret;
7506
7507 ret = single_open(file, tracing_clock_show, inode->i_private);
7508 if (ret < 0)
7509 trace_array_put(tr);
7510
7511 return ret;
7512 }
7513
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7514 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7515 {
7516 struct trace_array *tr = m->private;
7517
7518 mutex_lock(&trace_types_lock);
7519
7520 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7521 seq_puts(m, "delta [absolute]\n");
7522 else
7523 seq_puts(m, "[delta] absolute\n");
7524
7525 mutex_unlock(&trace_types_lock);
7526
7527 return 0;
7528 }
7529
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7530 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7531 {
7532 struct trace_array *tr = inode->i_private;
7533 int ret;
7534
7535 ret = tracing_check_open_get_tr(tr);
7536 if (ret)
7537 return ret;
7538
7539 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7540 if (ret < 0)
7541 trace_array_put(tr);
7542
7543 return ret;
7544 }
7545
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7546 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7547 {
7548 if (rbe == this_cpu_read(trace_buffered_event))
7549 return ring_buffer_time_stamp(buffer);
7550
7551 return ring_buffer_event_time_stamp(buffer, rbe);
7552 }
7553
7554 /*
7555 * Set or disable using the per CPU trace_buffer_event when possible.
7556 */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7557 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7558 {
7559 guard(mutex)(&trace_types_lock);
7560
7561 if (set && tr->no_filter_buffering_ref++)
7562 return 0;
7563
7564 if (!set) {
7565 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7566 return -EINVAL;
7567
7568 --tr->no_filter_buffering_ref;
7569 }
7570
7571 return 0;
7572 }
7573
7574 struct ftrace_buffer_info {
7575 struct trace_iterator iter;
7576 void *spare;
7577 unsigned int spare_cpu;
7578 unsigned int spare_size;
7579 unsigned int read;
7580 };
7581
7582 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7583 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7584 {
7585 struct trace_array *tr = inode->i_private;
7586 struct trace_iterator *iter;
7587 struct seq_file *m;
7588 int ret;
7589
7590 ret = tracing_check_open_get_tr(tr);
7591 if (ret)
7592 return ret;
7593
7594 if (file->f_mode & FMODE_READ) {
7595 iter = __tracing_open(inode, file, true);
7596 if (IS_ERR(iter))
7597 ret = PTR_ERR(iter);
7598 } else {
7599 /* Writes still need the seq_file to hold the private data */
7600 ret = -ENOMEM;
7601 m = kzalloc(sizeof(*m), GFP_KERNEL);
7602 if (!m)
7603 goto out;
7604 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7605 if (!iter) {
7606 kfree(m);
7607 goto out;
7608 }
7609 ret = 0;
7610
7611 iter->tr = tr;
7612 iter->array_buffer = &tr->max_buffer;
7613 iter->cpu_file = tracing_get_cpu(inode);
7614 m->private = iter;
7615 file->private_data = m;
7616 }
7617 out:
7618 if (ret < 0)
7619 trace_array_put(tr);
7620
7621 return ret;
7622 }
7623
tracing_swap_cpu_buffer(void * tr)7624 static void tracing_swap_cpu_buffer(void *tr)
7625 {
7626 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7627 }
7628
7629 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7630 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7631 loff_t *ppos)
7632 {
7633 struct seq_file *m = filp->private_data;
7634 struct trace_iterator *iter = m->private;
7635 struct trace_array *tr = iter->tr;
7636 unsigned long val;
7637 int ret;
7638
7639 ret = tracing_update_buffers(tr);
7640 if (ret < 0)
7641 return ret;
7642
7643 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7644 if (ret)
7645 return ret;
7646
7647 guard(mutex)(&trace_types_lock);
7648
7649 if (tr->current_trace->use_max_tr)
7650 return -EBUSY;
7651
7652 local_irq_disable();
7653 arch_spin_lock(&tr->max_lock);
7654 if (tr->cond_snapshot)
7655 ret = -EBUSY;
7656 arch_spin_unlock(&tr->max_lock);
7657 local_irq_enable();
7658 if (ret)
7659 return ret;
7660
7661 switch (val) {
7662 case 0:
7663 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7664 return -EINVAL;
7665 if (tr->allocated_snapshot)
7666 free_snapshot(tr);
7667 break;
7668 case 1:
7669 /* Only allow per-cpu swap if the ring buffer supports it */
7670 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7671 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7672 return -EINVAL;
7673 #endif
7674 if (tr->allocated_snapshot)
7675 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7676 &tr->array_buffer, iter->cpu_file);
7677
7678 ret = tracing_arm_snapshot_locked(tr);
7679 if (ret)
7680 return ret;
7681
7682 /* Now, we're going to swap */
7683 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7684 local_irq_disable();
7685 update_max_tr(tr, current, smp_processor_id(), NULL);
7686 local_irq_enable();
7687 } else {
7688 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7689 (void *)tr, 1);
7690 }
7691 tracing_disarm_snapshot(tr);
7692 break;
7693 default:
7694 if (tr->allocated_snapshot) {
7695 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7696 tracing_reset_online_cpus(&tr->max_buffer);
7697 else
7698 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7699 }
7700 break;
7701 }
7702
7703 if (ret >= 0) {
7704 *ppos += cnt;
7705 ret = cnt;
7706 }
7707
7708 return ret;
7709 }
7710
tracing_snapshot_release(struct inode * inode,struct file * file)7711 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7712 {
7713 struct seq_file *m = file->private_data;
7714 int ret;
7715
7716 ret = tracing_release(inode, file);
7717
7718 if (file->f_mode & FMODE_READ)
7719 return ret;
7720
7721 /* If write only, the seq_file is just a stub */
7722 if (m)
7723 kfree(m->private);
7724 kfree(m);
7725
7726 return 0;
7727 }
7728
7729 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7730 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7731 size_t count, loff_t *ppos);
7732 static int tracing_buffers_release(struct inode *inode, struct file *file);
7733 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7734 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7735
snapshot_raw_open(struct inode * inode,struct file * filp)7736 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7737 {
7738 struct ftrace_buffer_info *info;
7739 int ret;
7740
7741 /* The following checks for tracefs lockdown */
7742 ret = tracing_buffers_open(inode, filp);
7743 if (ret < 0)
7744 return ret;
7745
7746 info = filp->private_data;
7747
7748 if (info->iter.trace->use_max_tr) {
7749 tracing_buffers_release(inode, filp);
7750 return -EBUSY;
7751 }
7752
7753 info->iter.snapshot = true;
7754 info->iter.array_buffer = &info->iter.tr->max_buffer;
7755
7756 return ret;
7757 }
7758
7759 #endif /* CONFIG_TRACER_SNAPSHOT */
7760
7761
7762 static const struct file_operations tracing_thresh_fops = {
7763 .open = tracing_open_generic,
7764 .read = tracing_thresh_read,
7765 .write = tracing_thresh_write,
7766 .llseek = generic_file_llseek,
7767 };
7768
7769 #ifdef CONFIG_TRACER_MAX_TRACE
7770 static const struct file_operations tracing_max_lat_fops = {
7771 .open = tracing_open_generic_tr,
7772 .read = tracing_max_lat_read,
7773 .write = tracing_max_lat_write,
7774 .llseek = generic_file_llseek,
7775 .release = tracing_release_generic_tr,
7776 };
7777 #endif
7778
7779 static const struct file_operations set_tracer_fops = {
7780 .open = tracing_open_generic_tr,
7781 .read = tracing_set_trace_read,
7782 .write = tracing_set_trace_write,
7783 .llseek = generic_file_llseek,
7784 .release = tracing_release_generic_tr,
7785 };
7786
7787 static const struct file_operations tracing_pipe_fops = {
7788 .open = tracing_open_pipe,
7789 .poll = tracing_poll_pipe,
7790 .read = tracing_read_pipe,
7791 .splice_read = tracing_splice_read_pipe,
7792 .release = tracing_release_pipe,
7793 };
7794
7795 static const struct file_operations tracing_entries_fops = {
7796 .open = tracing_open_generic_tr,
7797 .read = tracing_entries_read,
7798 .write = tracing_entries_write,
7799 .llseek = generic_file_llseek,
7800 .release = tracing_release_generic_tr,
7801 };
7802
7803 static const struct file_operations tracing_buffer_meta_fops = {
7804 .open = tracing_buffer_meta_open,
7805 .read = seq_read,
7806 .llseek = seq_lseek,
7807 .release = tracing_seq_release,
7808 };
7809
7810 static const struct file_operations tracing_total_entries_fops = {
7811 .open = tracing_open_generic_tr,
7812 .read = tracing_total_entries_read,
7813 .llseek = generic_file_llseek,
7814 .release = tracing_release_generic_tr,
7815 };
7816
7817 static const struct file_operations tracing_free_buffer_fops = {
7818 .open = tracing_open_generic_tr,
7819 .write = tracing_free_buffer_write,
7820 .release = tracing_free_buffer_release,
7821 };
7822
7823 static const struct file_operations tracing_mark_fops = {
7824 .open = tracing_mark_open,
7825 .write = tracing_mark_write,
7826 .release = tracing_release_generic_tr,
7827 };
7828
7829 static const struct file_operations tracing_mark_raw_fops = {
7830 .open = tracing_mark_open,
7831 .write = tracing_mark_raw_write,
7832 .release = tracing_release_generic_tr,
7833 };
7834
7835 static const struct file_operations trace_clock_fops = {
7836 .open = tracing_clock_open,
7837 .read = seq_read,
7838 .llseek = seq_lseek,
7839 .release = tracing_single_release_tr,
7840 .write = tracing_clock_write,
7841 };
7842
7843 static const struct file_operations trace_time_stamp_mode_fops = {
7844 .open = tracing_time_stamp_mode_open,
7845 .read = seq_read,
7846 .llseek = seq_lseek,
7847 .release = tracing_single_release_tr,
7848 };
7849
7850 static const struct file_operations last_boot_fops = {
7851 .open = tracing_last_boot_open,
7852 .read = seq_read,
7853 .llseek = seq_lseek,
7854 .release = tracing_seq_release,
7855 };
7856
7857 #ifdef CONFIG_TRACER_SNAPSHOT
7858 static const struct file_operations snapshot_fops = {
7859 .open = tracing_snapshot_open,
7860 .read = seq_read,
7861 .write = tracing_snapshot_write,
7862 .llseek = tracing_lseek,
7863 .release = tracing_snapshot_release,
7864 };
7865
7866 static const struct file_operations snapshot_raw_fops = {
7867 .open = snapshot_raw_open,
7868 .read = tracing_buffers_read,
7869 .release = tracing_buffers_release,
7870 .splice_read = tracing_buffers_splice_read,
7871 };
7872
7873 #endif /* CONFIG_TRACER_SNAPSHOT */
7874
7875 /*
7876 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7877 * @filp: The active open file structure
7878 * @ubuf: The userspace provided buffer to read value into
7879 * @cnt: The maximum number of bytes to read
7880 * @ppos: The current "file" position
7881 *
7882 * This function implements the write interface for a struct trace_min_max_param.
7883 * The filp->private_data must point to a trace_min_max_param structure that
7884 * defines where to write the value, the min and the max acceptable values,
7885 * and a lock to protect the write.
7886 */
7887 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7888 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7889 {
7890 struct trace_min_max_param *param = filp->private_data;
7891 u64 val;
7892 int err;
7893
7894 if (!param)
7895 return -EFAULT;
7896
7897 err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7898 if (err)
7899 return err;
7900
7901 if (param->lock)
7902 mutex_lock(param->lock);
7903
7904 if (param->min && val < *param->min)
7905 err = -EINVAL;
7906
7907 if (param->max && val > *param->max)
7908 err = -EINVAL;
7909
7910 if (!err)
7911 *param->val = val;
7912
7913 if (param->lock)
7914 mutex_unlock(param->lock);
7915
7916 if (err)
7917 return err;
7918
7919 return cnt;
7920 }
7921
7922 /*
7923 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7924 * @filp: The active open file structure
7925 * @ubuf: The userspace provided buffer to read value into
7926 * @cnt: The maximum number of bytes to read
7927 * @ppos: The current "file" position
7928 *
7929 * This function implements the read interface for a struct trace_min_max_param.
7930 * The filp->private_data must point to a trace_min_max_param struct with valid
7931 * data.
7932 */
7933 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7934 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7935 {
7936 struct trace_min_max_param *param = filp->private_data;
7937 char buf[U64_STR_SIZE];
7938 int len;
7939 u64 val;
7940
7941 if (!param)
7942 return -EFAULT;
7943
7944 val = *param->val;
7945
7946 if (cnt > sizeof(buf))
7947 cnt = sizeof(buf);
7948
7949 len = snprintf(buf, sizeof(buf), "%llu\n", val);
7950
7951 return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7952 }
7953
7954 const struct file_operations trace_min_max_fops = {
7955 .open = tracing_open_generic,
7956 .read = trace_min_max_read,
7957 .write = trace_min_max_write,
7958 };
7959
7960 #define TRACING_LOG_ERRS_MAX 8
7961 #define TRACING_LOG_LOC_MAX 128
7962
7963 #define CMD_PREFIX " Command: "
7964
7965 struct err_info {
7966 const char **errs; /* ptr to loc-specific array of err strings */
7967 u8 type; /* index into errs -> specific err string */
7968 u16 pos; /* caret position */
7969 u64 ts;
7970 };
7971
7972 struct tracing_log_err {
7973 struct list_head list;
7974 struct err_info info;
7975 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7976 char *cmd; /* what caused err */
7977 };
7978
7979 static DEFINE_MUTEX(tracing_err_log_lock);
7980
alloc_tracing_log_err(int len)7981 static struct tracing_log_err *alloc_tracing_log_err(int len)
7982 {
7983 struct tracing_log_err *err;
7984
7985 err = kzalloc(sizeof(*err), GFP_KERNEL);
7986 if (!err)
7987 return ERR_PTR(-ENOMEM);
7988
7989 err->cmd = kzalloc(len, GFP_KERNEL);
7990 if (!err->cmd) {
7991 kfree(err);
7992 return ERR_PTR(-ENOMEM);
7993 }
7994
7995 return err;
7996 }
7997
free_tracing_log_err(struct tracing_log_err * err)7998 static void free_tracing_log_err(struct tracing_log_err *err)
7999 {
8000 kfree(err->cmd);
8001 kfree(err);
8002 }
8003
get_tracing_log_err(struct trace_array * tr,int len)8004 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8005 int len)
8006 {
8007 struct tracing_log_err *err;
8008 char *cmd;
8009
8010 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8011 err = alloc_tracing_log_err(len);
8012 if (PTR_ERR(err) != -ENOMEM)
8013 tr->n_err_log_entries++;
8014
8015 return err;
8016 }
8017 cmd = kzalloc(len, GFP_KERNEL);
8018 if (!cmd)
8019 return ERR_PTR(-ENOMEM);
8020 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8021 kfree(err->cmd);
8022 err->cmd = cmd;
8023 list_del(&err->list);
8024
8025 return err;
8026 }
8027
8028 /**
8029 * err_pos - find the position of a string within a command for error careting
8030 * @cmd: The tracing command that caused the error
8031 * @str: The string to position the caret at within @cmd
8032 *
8033 * Finds the position of the first occurrence of @str within @cmd. The
8034 * return value can be passed to tracing_log_err() for caret placement
8035 * within @cmd.
8036 *
8037 * Returns the index within @cmd of the first occurrence of @str or 0
8038 * if @str was not found.
8039 */
err_pos(char * cmd,const char * str)8040 unsigned int err_pos(char *cmd, const char *str)
8041 {
8042 char *found;
8043
8044 if (WARN_ON(!strlen(cmd)))
8045 return 0;
8046
8047 found = strstr(cmd, str);
8048 if (found)
8049 return found - cmd;
8050
8051 return 0;
8052 }
8053
8054 /**
8055 * tracing_log_err - write an error to the tracing error log
8056 * @tr: The associated trace array for the error (NULL for top level array)
8057 * @loc: A string describing where the error occurred
8058 * @cmd: The tracing command that caused the error
8059 * @errs: The array of loc-specific static error strings
8060 * @type: The index into errs[], which produces the specific static err string
8061 * @pos: The position the caret should be placed in the cmd
8062 *
8063 * Writes an error into tracing/error_log of the form:
8064 *
8065 * <loc>: error: <text>
8066 * Command: <cmd>
8067 * ^
8068 *
8069 * tracing/error_log is a small log file containing the last
8070 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8071 * unless there has been a tracing error, and the error log can be
8072 * cleared and have its memory freed by writing the empty string in
8073 * truncation mode to it i.e. echo > tracing/error_log.
8074 *
8075 * NOTE: the @errs array along with the @type param are used to
8076 * produce a static error string - this string is not copied and saved
8077 * when the error is logged - only a pointer to it is saved. See
8078 * existing callers for examples of how static strings are typically
8079 * defined for use with tracing_log_err().
8080 */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)8081 void tracing_log_err(struct trace_array *tr,
8082 const char *loc, const char *cmd,
8083 const char **errs, u8 type, u16 pos)
8084 {
8085 struct tracing_log_err *err;
8086 int len = 0;
8087
8088 if (!tr)
8089 tr = &global_trace;
8090
8091 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8092
8093 guard(mutex)(&tracing_err_log_lock);
8094
8095 err = get_tracing_log_err(tr, len);
8096 if (PTR_ERR(err) == -ENOMEM)
8097 return;
8098
8099 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8100 snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8101
8102 err->info.errs = errs;
8103 err->info.type = type;
8104 err->info.pos = pos;
8105 err->info.ts = local_clock();
8106
8107 list_add_tail(&err->list, &tr->err_log);
8108 }
8109
clear_tracing_err_log(struct trace_array * tr)8110 static void clear_tracing_err_log(struct trace_array *tr)
8111 {
8112 struct tracing_log_err *err, *next;
8113
8114 mutex_lock(&tracing_err_log_lock);
8115 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8116 list_del(&err->list);
8117 free_tracing_log_err(err);
8118 }
8119
8120 tr->n_err_log_entries = 0;
8121 mutex_unlock(&tracing_err_log_lock);
8122 }
8123
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)8124 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8125 {
8126 struct trace_array *tr = m->private;
8127
8128 mutex_lock(&tracing_err_log_lock);
8129
8130 return seq_list_start(&tr->err_log, *pos);
8131 }
8132
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)8133 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8134 {
8135 struct trace_array *tr = m->private;
8136
8137 return seq_list_next(v, &tr->err_log, pos);
8138 }
8139
tracing_err_log_seq_stop(struct seq_file * m,void * v)8140 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8141 {
8142 mutex_unlock(&tracing_err_log_lock);
8143 }
8144
tracing_err_log_show_pos(struct seq_file * m,u16 pos)8145 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8146 {
8147 u16 i;
8148
8149 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8150 seq_putc(m, ' ');
8151 for (i = 0; i < pos; i++)
8152 seq_putc(m, ' ');
8153 seq_puts(m, "^\n");
8154 }
8155
tracing_err_log_seq_show(struct seq_file * m,void * v)8156 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8157 {
8158 struct tracing_log_err *err = v;
8159
8160 if (err) {
8161 const char *err_text = err->info.errs[err->info.type];
8162 u64 sec = err->info.ts;
8163 u32 nsec;
8164
8165 nsec = do_div(sec, NSEC_PER_SEC);
8166 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8167 err->loc, err_text);
8168 seq_printf(m, "%s", err->cmd);
8169 tracing_err_log_show_pos(m, err->info.pos);
8170 }
8171
8172 return 0;
8173 }
8174
8175 static const struct seq_operations tracing_err_log_seq_ops = {
8176 .start = tracing_err_log_seq_start,
8177 .next = tracing_err_log_seq_next,
8178 .stop = tracing_err_log_seq_stop,
8179 .show = tracing_err_log_seq_show
8180 };
8181
tracing_err_log_open(struct inode * inode,struct file * file)8182 static int tracing_err_log_open(struct inode *inode, struct file *file)
8183 {
8184 struct trace_array *tr = inode->i_private;
8185 int ret = 0;
8186
8187 ret = tracing_check_open_get_tr(tr);
8188 if (ret)
8189 return ret;
8190
8191 /* If this file was opened for write, then erase contents */
8192 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8193 clear_tracing_err_log(tr);
8194
8195 if (file->f_mode & FMODE_READ) {
8196 ret = seq_open(file, &tracing_err_log_seq_ops);
8197 if (!ret) {
8198 struct seq_file *m = file->private_data;
8199 m->private = tr;
8200 } else {
8201 trace_array_put(tr);
8202 }
8203 }
8204 return ret;
8205 }
8206
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)8207 static ssize_t tracing_err_log_write(struct file *file,
8208 const char __user *buffer,
8209 size_t count, loff_t *ppos)
8210 {
8211 return count;
8212 }
8213
tracing_err_log_release(struct inode * inode,struct file * file)8214 static int tracing_err_log_release(struct inode *inode, struct file *file)
8215 {
8216 struct trace_array *tr = inode->i_private;
8217
8218 trace_array_put(tr);
8219
8220 if (file->f_mode & FMODE_READ)
8221 seq_release(inode, file);
8222
8223 return 0;
8224 }
8225
8226 static const struct file_operations tracing_err_log_fops = {
8227 .open = tracing_err_log_open,
8228 .write = tracing_err_log_write,
8229 .read = seq_read,
8230 .llseek = tracing_lseek,
8231 .release = tracing_err_log_release,
8232 };
8233
tracing_buffers_open(struct inode * inode,struct file * filp)8234 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8235 {
8236 struct trace_array *tr = inode->i_private;
8237 struct ftrace_buffer_info *info;
8238 int ret;
8239
8240 ret = tracing_check_open_get_tr(tr);
8241 if (ret)
8242 return ret;
8243
8244 info = kvzalloc(sizeof(*info), GFP_KERNEL);
8245 if (!info) {
8246 trace_array_put(tr);
8247 return -ENOMEM;
8248 }
8249
8250 mutex_lock(&trace_types_lock);
8251
8252 info->iter.tr = tr;
8253 info->iter.cpu_file = tracing_get_cpu(inode);
8254 info->iter.trace = tr->current_trace;
8255 info->iter.array_buffer = &tr->array_buffer;
8256 info->spare = NULL;
8257 /* Force reading ring buffer for first read */
8258 info->read = (unsigned int)-1;
8259
8260 filp->private_data = info;
8261
8262 tr->trace_ref++;
8263
8264 mutex_unlock(&trace_types_lock);
8265
8266 ret = nonseekable_open(inode, filp);
8267 if (ret < 0)
8268 trace_array_put(tr);
8269
8270 return ret;
8271 }
8272
8273 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)8274 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8275 {
8276 struct ftrace_buffer_info *info = filp->private_data;
8277 struct trace_iterator *iter = &info->iter;
8278
8279 return trace_poll(iter, filp, poll_table);
8280 }
8281
8282 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8283 tracing_buffers_read(struct file *filp, char __user *ubuf,
8284 size_t count, loff_t *ppos)
8285 {
8286 struct ftrace_buffer_info *info = filp->private_data;
8287 struct trace_iterator *iter = &info->iter;
8288 void *trace_data;
8289 int page_size;
8290 ssize_t ret = 0;
8291 ssize_t size;
8292
8293 if (!count)
8294 return 0;
8295
8296 #ifdef CONFIG_TRACER_MAX_TRACE
8297 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8298 return -EBUSY;
8299 #endif
8300
8301 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8302
8303 /* Make sure the spare matches the current sub buffer size */
8304 if (info->spare) {
8305 if (page_size != info->spare_size) {
8306 ring_buffer_free_read_page(iter->array_buffer->buffer,
8307 info->spare_cpu, info->spare);
8308 info->spare = NULL;
8309 }
8310 }
8311
8312 if (!info->spare) {
8313 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8314 iter->cpu_file);
8315 if (IS_ERR(info->spare)) {
8316 ret = PTR_ERR(info->spare);
8317 info->spare = NULL;
8318 } else {
8319 info->spare_cpu = iter->cpu_file;
8320 info->spare_size = page_size;
8321 }
8322 }
8323 if (!info->spare)
8324 return ret;
8325
8326 /* Do we have previous read data to read? */
8327 if (info->read < page_size)
8328 goto read;
8329
8330 again:
8331 trace_access_lock(iter->cpu_file);
8332 ret = ring_buffer_read_page(iter->array_buffer->buffer,
8333 info->spare,
8334 count,
8335 iter->cpu_file, 0);
8336 trace_access_unlock(iter->cpu_file);
8337
8338 if (ret < 0) {
8339 if (trace_empty(iter) && !iter->closed) {
8340 if (update_last_data_if_empty(iter->tr))
8341 return 0;
8342
8343 if ((filp->f_flags & O_NONBLOCK))
8344 return -EAGAIN;
8345
8346 ret = wait_on_pipe(iter, 0);
8347 if (ret)
8348 return ret;
8349
8350 goto again;
8351 }
8352 return 0;
8353 }
8354
8355 info->read = 0;
8356 read:
8357 size = page_size - info->read;
8358 if (size > count)
8359 size = count;
8360 trace_data = ring_buffer_read_page_data(info->spare);
8361 ret = copy_to_user(ubuf, trace_data + info->read, size);
8362 if (ret == size)
8363 return -EFAULT;
8364
8365 size -= ret;
8366
8367 *ppos += size;
8368 info->read += size;
8369
8370 return size;
8371 }
8372
tracing_buffers_flush(struct file * file,fl_owner_t id)8373 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8374 {
8375 struct ftrace_buffer_info *info = file->private_data;
8376 struct trace_iterator *iter = &info->iter;
8377
8378 iter->closed = true;
8379 /* Make sure the waiters see the new wait_index */
8380 (void)atomic_fetch_inc_release(&iter->wait_index);
8381
8382 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8383
8384 return 0;
8385 }
8386
tracing_buffers_release(struct inode * inode,struct file * file)8387 static int tracing_buffers_release(struct inode *inode, struct file *file)
8388 {
8389 struct ftrace_buffer_info *info = file->private_data;
8390 struct trace_iterator *iter = &info->iter;
8391
8392 mutex_lock(&trace_types_lock);
8393
8394 iter->tr->trace_ref--;
8395
8396 __trace_array_put(iter->tr);
8397
8398 if (info->spare)
8399 ring_buffer_free_read_page(iter->array_buffer->buffer,
8400 info->spare_cpu, info->spare);
8401 kvfree(info);
8402
8403 mutex_unlock(&trace_types_lock);
8404
8405 return 0;
8406 }
8407
8408 struct buffer_ref {
8409 struct trace_buffer *buffer;
8410 void *page;
8411 int cpu;
8412 refcount_t refcount;
8413 };
8414
buffer_ref_release(struct buffer_ref * ref)8415 static void buffer_ref_release(struct buffer_ref *ref)
8416 {
8417 if (!refcount_dec_and_test(&ref->refcount))
8418 return;
8419 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8420 kfree(ref);
8421 }
8422
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8423 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8424 struct pipe_buffer *buf)
8425 {
8426 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8427
8428 buffer_ref_release(ref);
8429 buf->private = 0;
8430 }
8431
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8432 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8433 struct pipe_buffer *buf)
8434 {
8435 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8436
8437 if (refcount_read(&ref->refcount) > INT_MAX/2)
8438 return false;
8439
8440 refcount_inc(&ref->refcount);
8441 return true;
8442 }
8443
8444 /* Pipe buffer operations for a buffer. */
8445 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8446 .release = buffer_pipe_buf_release,
8447 .get = buffer_pipe_buf_get,
8448 };
8449
8450 /*
8451 * Callback from splice_to_pipe(), if we need to release some pages
8452 * at the end of the spd in case we error'ed out in filling the pipe.
8453 */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8454 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8455 {
8456 struct buffer_ref *ref =
8457 (struct buffer_ref *)spd->partial[i].private;
8458
8459 buffer_ref_release(ref);
8460 spd->partial[i].private = 0;
8461 }
8462
8463 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8464 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8465 struct pipe_inode_info *pipe, size_t len,
8466 unsigned int flags)
8467 {
8468 struct ftrace_buffer_info *info = file->private_data;
8469 struct trace_iterator *iter = &info->iter;
8470 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8471 struct page *pages_def[PIPE_DEF_BUFFERS];
8472 struct splice_pipe_desc spd = {
8473 .pages = pages_def,
8474 .partial = partial_def,
8475 .nr_pages_max = PIPE_DEF_BUFFERS,
8476 .ops = &buffer_pipe_buf_ops,
8477 .spd_release = buffer_spd_release,
8478 };
8479 struct buffer_ref *ref;
8480 bool woken = false;
8481 int page_size;
8482 int entries, i;
8483 ssize_t ret = 0;
8484
8485 #ifdef CONFIG_TRACER_MAX_TRACE
8486 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8487 return -EBUSY;
8488 #endif
8489
8490 page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8491 if (*ppos & (page_size - 1))
8492 return -EINVAL;
8493
8494 if (len & (page_size - 1)) {
8495 if (len < page_size)
8496 return -EINVAL;
8497 len &= (~(page_size - 1));
8498 }
8499
8500 if (splice_grow_spd(pipe, &spd))
8501 return -ENOMEM;
8502
8503 again:
8504 trace_access_lock(iter->cpu_file);
8505 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8506
8507 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8508 struct page *page;
8509 int r;
8510
8511 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8512 if (!ref) {
8513 ret = -ENOMEM;
8514 break;
8515 }
8516
8517 refcount_set(&ref->refcount, 1);
8518 ref->buffer = iter->array_buffer->buffer;
8519 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8520 if (IS_ERR(ref->page)) {
8521 ret = PTR_ERR(ref->page);
8522 ref->page = NULL;
8523 kfree(ref);
8524 break;
8525 }
8526 ref->cpu = iter->cpu_file;
8527
8528 r = ring_buffer_read_page(ref->buffer, ref->page,
8529 len, iter->cpu_file, 1);
8530 if (r < 0) {
8531 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8532 ref->page);
8533 kfree(ref);
8534 break;
8535 }
8536
8537 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8538
8539 spd.pages[i] = page;
8540 spd.partial[i].len = page_size;
8541 spd.partial[i].offset = 0;
8542 spd.partial[i].private = (unsigned long)ref;
8543 spd.nr_pages++;
8544 *ppos += page_size;
8545
8546 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8547 }
8548
8549 trace_access_unlock(iter->cpu_file);
8550 spd.nr_pages = i;
8551
8552 /* did we read anything? */
8553 if (!spd.nr_pages) {
8554
8555 if (ret)
8556 goto out;
8557
8558 if (woken)
8559 goto out;
8560
8561 ret = -EAGAIN;
8562 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8563 goto out;
8564
8565 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8566 if (ret)
8567 goto out;
8568
8569 /* No need to wait after waking up when tracing is off */
8570 if (!tracer_tracing_is_on(iter->tr))
8571 goto out;
8572
8573 /* Iterate one more time to collect any new data then exit */
8574 woken = true;
8575
8576 goto again;
8577 }
8578
8579 ret = splice_to_pipe(pipe, &spd);
8580 out:
8581 splice_shrink_spd(&spd);
8582
8583 return ret;
8584 }
8585
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8586 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8587 {
8588 struct ftrace_buffer_info *info = file->private_data;
8589 struct trace_iterator *iter = &info->iter;
8590 int err;
8591
8592 if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8593 if (!(file->f_flags & O_NONBLOCK)) {
8594 err = ring_buffer_wait(iter->array_buffer->buffer,
8595 iter->cpu_file,
8596 iter->tr->buffer_percent,
8597 NULL, NULL);
8598 if (err)
8599 return err;
8600 }
8601
8602 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8603 iter->cpu_file);
8604 } else if (cmd) {
8605 return -ENOTTY;
8606 }
8607
8608 /*
8609 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8610 * waiters
8611 */
8612 mutex_lock(&trace_types_lock);
8613
8614 /* Make sure the waiters see the new wait_index */
8615 (void)atomic_fetch_inc_release(&iter->wait_index);
8616
8617 ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8618
8619 mutex_unlock(&trace_types_lock);
8620 return 0;
8621 }
8622
8623 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8624 static int get_snapshot_map(struct trace_array *tr)
8625 {
8626 int err = 0;
8627
8628 /*
8629 * Called with mmap_lock held. lockdep would be unhappy if we would now
8630 * take trace_types_lock. Instead use the specific
8631 * snapshot_trigger_lock.
8632 */
8633 spin_lock(&tr->snapshot_trigger_lock);
8634
8635 if (tr->snapshot || tr->mapped == UINT_MAX)
8636 err = -EBUSY;
8637 else
8638 tr->mapped++;
8639
8640 spin_unlock(&tr->snapshot_trigger_lock);
8641
8642 /* Wait for update_max_tr() to observe iter->tr->mapped */
8643 if (tr->mapped == 1)
8644 synchronize_rcu();
8645
8646 return err;
8647
8648 }
put_snapshot_map(struct trace_array * tr)8649 static void put_snapshot_map(struct trace_array *tr)
8650 {
8651 spin_lock(&tr->snapshot_trigger_lock);
8652 if (!WARN_ON(!tr->mapped))
8653 tr->mapped--;
8654 spin_unlock(&tr->snapshot_trigger_lock);
8655 }
8656 #else
get_snapshot_map(struct trace_array * tr)8657 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8658 static inline void put_snapshot_map(struct trace_array *tr) { }
8659 #endif
8660
tracing_buffers_mmap_close(struct vm_area_struct * vma)8661 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8662 {
8663 struct ftrace_buffer_info *info = vma->vm_file->private_data;
8664 struct trace_iterator *iter = &info->iter;
8665
8666 WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8667 put_snapshot_map(iter->tr);
8668 }
8669
8670 static const struct vm_operations_struct tracing_buffers_vmops = {
8671 .close = tracing_buffers_mmap_close,
8672 };
8673
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8674 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8675 {
8676 struct ftrace_buffer_info *info = filp->private_data;
8677 struct trace_iterator *iter = &info->iter;
8678 int ret = 0;
8679
8680 /* A memmap'ed buffer is not supported for user space mmap */
8681 if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8682 return -ENODEV;
8683
8684 ret = get_snapshot_map(iter->tr);
8685 if (ret)
8686 return ret;
8687
8688 ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8689 if (ret)
8690 put_snapshot_map(iter->tr);
8691
8692 vma->vm_ops = &tracing_buffers_vmops;
8693
8694 return ret;
8695 }
8696
8697 static const struct file_operations tracing_buffers_fops = {
8698 .open = tracing_buffers_open,
8699 .read = tracing_buffers_read,
8700 .poll = tracing_buffers_poll,
8701 .release = tracing_buffers_release,
8702 .flush = tracing_buffers_flush,
8703 .splice_read = tracing_buffers_splice_read,
8704 .unlocked_ioctl = tracing_buffers_ioctl,
8705 .mmap = tracing_buffers_mmap,
8706 };
8707
8708 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8709 tracing_stats_read(struct file *filp, char __user *ubuf,
8710 size_t count, loff_t *ppos)
8711 {
8712 struct inode *inode = file_inode(filp);
8713 struct trace_array *tr = inode->i_private;
8714 struct array_buffer *trace_buf = &tr->array_buffer;
8715 int cpu = tracing_get_cpu(inode);
8716 struct trace_seq *s;
8717 unsigned long cnt;
8718 unsigned long long t;
8719 unsigned long usec_rem;
8720
8721 s = kmalloc(sizeof(*s), GFP_KERNEL);
8722 if (!s)
8723 return -ENOMEM;
8724
8725 trace_seq_init(s);
8726
8727 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8728 trace_seq_printf(s, "entries: %ld\n", cnt);
8729
8730 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8731 trace_seq_printf(s, "overrun: %ld\n", cnt);
8732
8733 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8734 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8735
8736 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8737 trace_seq_printf(s, "bytes: %ld\n", cnt);
8738
8739 if (trace_clocks[tr->clock_id].in_ns) {
8740 /* local or global for trace_clock */
8741 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8742 usec_rem = do_div(t, USEC_PER_SEC);
8743 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8744 t, usec_rem);
8745
8746 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8747 usec_rem = do_div(t, USEC_PER_SEC);
8748 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8749 } else {
8750 /* counter or tsc mode for trace_clock */
8751 trace_seq_printf(s, "oldest event ts: %llu\n",
8752 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8753
8754 trace_seq_printf(s, "now ts: %llu\n",
8755 ring_buffer_time_stamp(trace_buf->buffer));
8756 }
8757
8758 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8759 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8760
8761 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8762 trace_seq_printf(s, "read events: %ld\n", cnt);
8763
8764 count = simple_read_from_buffer(ubuf, count, ppos,
8765 s->buffer, trace_seq_used(s));
8766
8767 kfree(s);
8768
8769 return count;
8770 }
8771
8772 static const struct file_operations tracing_stats_fops = {
8773 .open = tracing_open_generic_tr,
8774 .read = tracing_stats_read,
8775 .llseek = generic_file_llseek,
8776 .release = tracing_release_generic_tr,
8777 };
8778
8779 #ifdef CONFIG_DYNAMIC_FTRACE
8780
8781 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8782 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8783 size_t cnt, loff_t *ppos)
8784 {
8785 ssize_t ret;
8786 char *buf;
8787 int r;
8788
8789 /* 512 should be plenty to hold the amount needed */
8790 #define DYN_INFO_BUF_SIZE 512
8791
8792 buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8793 if (!buf)
8794 return -ENOMEM;
8795
8796 r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8797 "%ld pages:%ld groups: %ld\n"
8798 "ftrace boot update time = %llu (ns)\n"
8799 "ftrace module total update time = %llu (ns)\n",
8800 ftrace_update_tot_cnt,
8801 ftrace_number_of_pages,
8802 ftrace_number_of_groups,
8803 ftrace_update_time,
8804 ftrace_total_mod_time);
8805
8806 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8807 kfree(buf);
8808 return ret;
8809 }
8810
8811 static const struct file_operations tracing_dyn_info_fops = {
8812 .open = tracing_open_generic,
8813 .read = tracing_read_dyn_info,
8814 .llseek = generic_file_llseek,
8815 };
8816 #endif /* CONFIG_DYNAMIC_FTRACE */
8817
8818 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8819 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8820 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8821 struct trace_array *tr, struct ftrace_probe_ops *ops,
8822 void *data)
8823 {
8824 tracing_snapshot_instance(tr);
8825 }
8826
8827 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8828 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8829 struct trace_array *tr, struct ftrace_probe_ops *ops,
8830 void *data)
8831 {
8832 struct ftrace_func_mapper *mapper = data;
8833 long *count = NULL;
8834
8835 if (mapper)
8836 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8837
8838 if (count) {
8839
8840 if (*count <= 0)
8841 return;
8842
8843 (*count)--;
8844 }
8845
8846 tracing_snapshot_instance(tr);
8847 }
8848
8849 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8850 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8851 struct ftrace_probe_ops *ops, void *data)
8852 {
8853 struct ftrace_func_mapper *mapper = data;
8854 long *count = NULL;
8855
8856 seq_printf(m, "%ps:", (void *)ip);
8857
8858 seq_puts(m, "snapshot");
8859
8860 if (mapper)
8861 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8862
8863 if (count)
8864 seq_printf(m, ":count=%ld\n", *count);
8865 else
8866 seq_puts(m, ":unlimited\n");
8867
8868 return 0;
8869 }
8870
8871 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8872 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8873 unsigned long ip, void *init_data, void **data)
8874 {
8875 struct ftrace_func_mapper *mapper = *data;
8876
8877 if (!mapper) {
8878 mapper = allocate_ftrace_func_mapper();
8879 if (!mapper)
8880 return -ENOMEM;
8881 *data = mapper;
8882 }
8883
8884 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8885 }
8886
8887 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8888 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8889 unsigned long ip, void *data)
8890 {
8891 struct ftrace_func_mapper *mapper = data;
8892
8893 if (!ip) {
8894 if (!mapper)
8895 return;
8896 free_ftrace_func_mapper(mapper, NULL);
8897 return;
8898 }
8899
8900 ftrace_func_mapper_remove_ip(mapper, ip);
8901 }
8902
8903 static struct ftrace_probe_ops snapshot_probe_ops = {
8904 .func = ftrace_snapshot,
8905 .print = ftrace_snapshot_print,
8906 };
8907
8908 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8909 .func = ftrace_count_snapshot,
8910 .print = ftrace_snapshot_print,
8911 .init = ftrace_snapshot_init,
8912 .free = ftrace_snapshot_free,
8913 };
8914
8915 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8916 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8917 char *glob, char *cmd, char *param, int enable)
8918 {
8919 struct ftrace_probe_ops *ops;
8920 void *count = (void *)-1;
8921 char *number;
8922 int ret;
8923
8924 if (!tr)
8925 return -ENODEV;
8926
8927 /* hash funcs only work with set_ftrace_filter */
8928 if (!enable)
8929 return -EINVAL;
8930
8931 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8932
8933 if (glob[0] == '!') {
8934 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8935 if (!ret)
8936 tracing_disarm_snapshot(tr);
8937
8938 return ret;
8939 }
8940
8941 if (!param)
8942 goto out_reg;
8943
8944 number = strsep(¶m, ":");
8945
8946 if (!strlen(number))
8947 goto out_reg;
8948
8949 /*
8950 * We use the callback data field (which is a pointer)
8951 * as our counter.
8952 */
8953 ret = kstrtoul(number, 0, (unsigned long *)&count);
8954 if (ret)
8955 return ret;
8956
8957 out_reg:
8958 ret = tracing_arm_snapshot(tr);
8959 if (ret < 0)
8960 goto out;
8961
8962 ret = register_ftrace_function_probe(glob, tr, ops, count);
8963 if (ret < 0)
8964 tracing_disarm_snapshot(tr);
8965 out:
8966 return ret < 0 ? ret : 0;
8967 }
8968
8969 static struct ftrace_func_command ftrace_snapshot_cmd = {
8970 .name = "snapshot",
8971 .func = ftrace_trace_snapshot_callback,
8972 };
8973
register_snapshot_cmd(void)8974 static __init int register_snapshot_cmd(void)
8975 {
8976 return register_ftrace_command(&ftrace_snapshot_cmd);
8977 }
8978 #else
register_snapshot_cmd(void)8979 static inline __init int register_snapshot_cmd(void) { return 0; }
8980 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8981
tracing_get_dentry(struct trace_array * tr)8982 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8983 {
8984 /* Top directory uses NULL as the parent */
8985 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8986 return NULL;
8987
8988 if (WARN_ON(!tr->dir))
8989 return ERR_PTR(-ENODEV);
8990
8991 /* All sub buffers have a descriptor */
8992 return tr->dir;
8993 }
8994
tracing_dentry_percpu(struct trace_array * tr,int cpu)8995 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8996 {
8997 struct dentry *d_tracer;
8998
8999 if (tr->percpu_dir)
9000 return tr->percpu_dir;
9001
9002 d_tracer = tracing_get_dentry(tr);
9003 if (IS_ERR(d_tracer))
9004 return NULL;
9005
9006 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9007
9008 MEM_FAIL(!tr->percpu_dir,
9009 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9010
9011 return tr->percpu_dir;
9012 }
9013
9014 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)9015 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9016 void *data, long cpu, const struct file_operations *fops)
9017 {
9018 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9019
9020 if (ret) /* See tracing_get_cpu() */
9021 d_inode(ret)->i_cdev = (void *)(cpu + 1);
9022 return ret;
9023 }
9024
9025 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)9026 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9027 {
9028 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9029 struct dentry *d_cpu;
9030 char cpu_dir[30]; /* 30 characters should be more than enough */
9031
9032 if (!d_percpu)
9033 return;
9034
9035 snprintf(cpu_dir, 30, "cpu%ld", cpu);
9036 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9037 if (!d_cpu) {
9038 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9039 return;
9040 }
9041
9042 /* per cpu trace_pipe */
9043 trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9044 tr, cpu, &tracing_pipe_fops);
9045
9046 /* per cpu trace */
9047 trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9048 tr, cpu, &tracing_fops);
9049
9050 trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9051 tr, cpu, &tracing_buffers_fops);
9052
9053 trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9054 tr, cpu, &tracing_stats_fops);
9055
9056 trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9057 tr, cpu, &tracing_entries_fops);
9058
9059 if (tr->range_addr_start)
9060 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9061 tr, cpu, &tracing_buffer_meta_fops);
9062 #ifdef CONFIG_TRACER_SNAPSHOT
9063 if (!tr->range_addr_start) {
9064 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9065 tr, cpu, &snapshot_fops);
9066
9067 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9068 tr, cpu, &snapshot_raw_fops);
9069 }
9070 #endif
9071 }
9072
9073 #ifdef CONFIG_FTRACE_SELFTEST
9074 /* Let selftest have access to static functions in this file */
9075 #include "trace_selftest.c"
9076 #endif
9077
9078 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9079 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9080 loff_t *ppos)
9081 {
9082 struct trace_option_dentry *topt = filp->private_data;
9083 char *buf;
9084
9085 if (topt->flags->val & topt->opt->bit)
9086 buf = "1\n";
9087 else
9088 buf = "0\n";
9089
9090 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9091 }
9092
9093 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9094 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9095 loff_t *ppos)
9096 {
9097 struct trace_option_dentry *topt = filp->private_data;
9098 unsigned long val;
9099 int ret;
9100
9101 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9102 if (ret)
9103 return ret;
9104
9105 if (val != 0 && val != 1)
9106 return -EINVAL;
9107
9108 if (!!(topt->flags->val & topt->opt->bit) != val) {
9109 mutex_lock(&trace_types_lock);
9110 ret = __set_tracer_option(topt->tr, topt->flags,
9111 topt->opt, !val);
9112 mutex_unlock(&trace_types_lock);
9113 if (ret)
9114 return ret;
9115 }
9116
9117 *ppos += cnt;
9118
9119 return cnt;
9120 }
9121
tracing_open_options(struct inode * inode,struct file * filp)9122 static int tracing_open_options(struct inode *inode, struct file *filp)
9123 {
9124 struct trace_option_dentry *topt = inode->i_private;
9125 int ret;
9126
9127 ret = tracing_check_open_get_tr(topt->tr);
9128 if (ret)
9129 return ret;
9130
9131 filp->private_data = inode->i_private;
9132 return 0;
9133 }
9134
tracing_release_options(struct inode * inode,struct file * file)9135 static int tracing_release_options(struct inode *inode, struct file *file)
9136 {
9137 struct trace_option_dentry *topt = file->private_data;
9138
9139 trace_array_put(topt->tr);
9140 return 0;
9141 }
9142
9143 static const struct file_operations trace_options_fops = {
9144 .open = tracing_open_options,
9145 .read = trace_options_read,
9146 .write = trace_options_write,
9147 .llseek = generic_file_llseek,
9148 .release = tracing_release_options,
9149 };
9150
9151 /*
9152 * In order to pass in both the trace_array descriptor as well as the index
9153 * to the flag that the trace option file represents, the trace_array
9154 * has a character array of trace_flags_index[], which holds the index
9155 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9156 * The address of this character array is passed to the flag option file
9157 * read/write callbacks.
9158 *
9159 * In order to extract both the index and the trace_array descriptor,
9160 * get_tr_index() uses the following algorithm.
9161 *
9162 * idx = *ptr;
9163 *
9164 * As the pointer itself contains the address of the index (remember
9165 * index[1] == 1).
9166 *
9167 * Then to get the trace_array descriptor, by subtracting that index
9168 * from the ptr, we get to the start of the index itself.
9169 *
9170 * ptr - idx == &index[0]
9171 *
9172 * Then a simple container_of() from that pointer gets us to the
9173 * trace_array descriptor.
9174 */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)9175 static void get_tr_index(void *data, struct trace_array **ptr,
9176 unsigned int *pindex)
9177 {
9178 *pindex = *(unsigned char *)data;
9179
9180 *ptr = container_of(data - *pindex, struct trace_array,
9181 trace_flags_index);
9182 }
9183
9184 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9185 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9186 loff_t *ppos)
9187 {
9188 void *tr_index = filp->private_data;
9189 struct trace_array *tr;
9190 unsigned int index;
9191 char *buf;
9192
9193 get_tr_index(tr_index, &tr, &index);
9194
9195 if (tr->trace_flags & (1 << index))
9196 buf = "1\n";
9197 else
9198 buf = "0\n";
9199
9200 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9201 }
9202
9203 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9204 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9205 loff_t *ppos)
9206 {
9207 void *tr_index = filp->private_data;
9208 struct trace_array *tr;
9209 unsigned int index;
9210 unsigned long val;
9211 int ret;
9212
9213 get_tr_index(tr_index, &tr, &index);
9214
9215 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9216 if (ret)
9217 return ret;
9218
9219 if (val != 0 && val != 1)
9220 return -EINVAL;
9221
9222 mutex_lock(&event_mutex);
9223 mutex_lock(&trace_types_lock);
9224 ret = set_tracer_flag(tr, 1 << index, val);
9225 mutex_unlock(&trace_types_lock);
9226 mutex_unlock(&event_mutex);
9227
9228 if (ret < 0)
9229 return ret;
9230
9231 *ppos += cnt;
9232
9233 return cnt;
9234 }
9235
9236 static const struct file_operations trace_options_core_fops = {
9237 .open = tracing_open_generic,
9238 .read = trace_options_core_read,
9239 .write = trace_options_core_write,
9240 .llseek = generic_file_llseek,
9241 };
9242
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)9243 struct dentry *trace_create_file(const char *name,
9244 umode_t mode,
9245 struct dentry *parent,
9246 void *data,
9247 const struct file_operations *fops)
9248 {
9249 struct dentry *ret;
9250
9251 ret = tracefs_create_file(name, mode, parent, data, fops);
9252 if (!ret)
9253 pr_warn("Could not create tracefs '%s' entry\n", name);
9254
9255 return ret;
9256 }
9257
9258
trace_options_init_dentry(struct trace_array * tr)9259 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9260 {
9261 struct dentry *d_tracer;
9262
9263 if (tr->options)
9264 return tr->options;
9265
9266 d_tracer = tracing_get_dentry(tr);
9267 if (IS_ERR(d_tracer))
9268 return NULL;
9269
9270 tr->options = tracefs_create_dir("options", d_tracer);
9271 if (!tr->options) {
9272 pr_warn("Could not create tracefs directory 'options'\n");
9273 return NULL;
9274 }
9275
9276 return tr->options;
9277 }
9278
9279 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)9280 create_trace_option_file(struct trace_array *tr,
9281 struct trace_option_dentry *topt,
9282 struct tracer_flags *flags,
9283 struct tracer_opt *opt)
9284 {
9285 struct dentry *t_options;
9286
9287 t_options = trace_options_init_dentry(tr);
9288 if (!t_options)
9289 return;
9290
9291 topt->flags = flags;
9292 topt->opt = opt;
9293 topt->tr = tr;
9294
9295 topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9296 t_options, topt, &trace_options_fops);
9297
9298 }
9299
9300 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)9301 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9302 {
9303 struct trace_option_dentry *topts;
9304 struct trace_options *tr_topts;
9305 struct tracer_flags *flags;
9306 struct tracer_opt *opts;
9307 int cnt;
9308 int i;
9309
9310 if (!tracer)
9311 return;
9312
9313 flags = tracer->flags;
9314
9315 if (!flags || !flags->opts)
9316 return;
9317
9318 /*
9319 * If this is an instance, only create flags for tracers
9320 * the instance may have.
9321 */
9322 if (!trace_ok_for_array(tracer, tr))
9323 return;
9324
9325 for (i = 0; i < tr->nr_topts; i++) {
9326 /* Make sure there's no duplicate flags. */
9327 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9328 return;
9329 }
9330
9331 opts = flags->opts;
9332
9333 for (cnt = 0; opts[cnt].name; cnt++)
9334 ;
9335
9336 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9337 if (!topts)
9338 return;
9339
9340 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9341 GFP_KERNEL);
9342 if (!tr_topts) {
9343 kfree(topts);
9344 return;
9345 }
9346
9347 tr->topts = tr_topts;
9348 tr->topts[tr->nr_topts].tracer = tracer;
9349 tr->topts[tr->nr_topts].topts = topts;
9350 tr->nr_topts++;
9351
9352 for (cnt = 0; opts[cnt].name; cnt++) {
9353 create_trace_option_file(tr, &topts[cnt], flags,
9354 &opts[cnt]);
9355 MEM_FAIL(topts[cnt].entry == NULL,
9356 "Failed to create trace option: %s",
9357 opts[cnt].name);
9358 }
9359 }
9360
9361 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9362 create_trace_option_core_file(struct trace_array *tr,
9363 const char *option, long index)
9364 {
9365 struct dentry *t_options;
9366
9367 t_options = trace_options_init_dentry(tr);
9368 if (!t_options)
9369 return NULL;
9370
9371 return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9372 (void *)&tr->trace_flags_index[index],
9373 &trace_options_core_fops);
9374 }
9375
create_trace_options_dir(struct trace_array * tr)9376 static void create_trace_options_dir(struct trace_array *tr)
9377 {
9378 struct dentry *t_options;
9379 bool top_level = tr == &global_trace;
9380 int i;
9381
9382 t_options = trace_options_init_dentry(tr);
9383 if (!t_options)
9384 return;
9385
9386 for (i = 0; trace_options[i]; i++) {
9387 if (top_level ||
9388 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9389 create_trace_option_core_file(tr, trace_options[i], i);
9390 }
9391 }
9392
9393 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9394 rb_simple_read(struct file *filp, char __user *ubuf,
9395 size_t cnt, loff_t *ppos)
9396 {
9397 struct trace_array *tr = filp->private_data;
9398 char buf[64];
9399 int r;
9400
9401 r = tracer_tracing_is_on(tr);
9402 r = sprintf(buf, "%d\n", r);
9403
9404 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9405 }
9406
9407 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9408 rb_simple_write(struct file *filp, const char __user *ubuf,
9409 size_t cnt, loff_t *ppos)
9410 {
9411 struct trace_array *tr = filp->private_data;
9412 struct trace_buffer *buffer = tr->array_buffer.buffer;
9413 unsigned long val;
9414 int ret;
9415
9416 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9417 if (ret)
9418 return ret;
9419
9420 if (buffer) {
9421 mutex_lock(&trace_types_lock);
9422 if (!!val == tracer_tracing_is_on(tr)) {
9423 val = 0; /* do nothing */
9424 } else if (val) {
9425 tracer_tracing_on(tr);
9426 if (tr->current_trace->start)
9427 tr->current_trace->start(tr);
9428 } else {
9429 tracer_tracing_off(tr);
9430 if (tr->current_trace->stop)
9431 tr->current_trace->stop(tr);
9432 /* Wake up any waiters */
9433 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9434 }
9435 mutex_unlock(&trace_types_lock);
9436 }
9437
9438 (*ppos)++;
9439
9440 return cnt;
9441 }
9442
9443 static const struct file_operations rb_simple_fops = {
9444 .open = tracing_open_generic_tr,
9445 .read = rb_simple_read,
9446 .write = rb_simple_write,
9447 .release = tracing_release_generic_tr,
9448 .llseek = default_llseek,
9449 };
9450
9451 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9452 buffer_percent_read(struct file *filp, char __user *ubuf,
9453 size_t cnt, loff_t *ppos)
9454 {
9455 struct trace_array *tr = filp->private_data;
9456 char buf[64];
9457 int r;
9458
9459 r = tr->buffer_percent;
9460 r = sprintf(buf, "%d\n", r);
9461
9462 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9463 }
9464
9465 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9466 buffer_percent_write(struct file *filp, const char __user *ubuf,
9467 size_t cnt, loff_t *ppos)
9468 {
9469 struct trace_array *tr = filp->private_data;
9470 unsigned long val;
9471 int ret;
9472
9473 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9474 if (ret)
9475 return ret;
9476
9477 if (val > 100)
9478 return -EINVAL;
9479
9480 tr->buffer_percent = val;
9481
9482 (*ppos)++;
9483
9484 return cnt;
9485 }
9486
9487 static const struct file_operations buffer_percent_fops = {
9488 .open = tracing_open_generic_tr,
9489 .read = buffer_percent_read,
9490 .write = buffer_percent_write,
9491 .release = tracing_release_generic_tr,
9492 .llseek = default_llseek,
9493 };
9494
9495 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9496 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9497 {
9498 struct trace_array *tr = filp->private_data;
9499 size_t size;
9500 char buf[64];
9501 int order;
9502 int r;
9503
9504 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9505 size = (PAGE_SIZE << order) / 1024;
9506
9507 r = sprintf(buf, "%zd\n", size);
9508
9509 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9510 }
9511
9512 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9513 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9514 size_t cnt, loff_t *ppos)
9515 {
9516 struct trace_array *tr = filp->private_data;
9517 unsigned long val;
9518 int old_order;
9519 int order;
9520 int pages;
9521 int ret;
9522
9523 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9524 if (ret)
9525 return ret;
9526
9527 val *= 1024; /* value passed in is in KB */
9528
9529 pages = DIV_ROUND_UP(val, PAGE_SIZE);
9530 order = fls(pages - 1);
9531
9532 /* limit between 1 and 128 system pages */
9533 if (order < 0 || order > 7)
9534 return -EINVAL;
9535
9536 /* Do not allow tracing while changing the order of the ring buffer */
9537 tracing_stop_tr(tr);
9538
9539 old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9540 if (old_order == order)
9541 goto out;
9542
9543 ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9544 if (ret)
9545 goto out;
9546
9547 #ifdef CONFIG_TRACER_MAX_TRACE
9548
9549 if (!tr->allocated_snapshot)
9550 goto out_max;
9551
9552 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9553 if (ret) {
9554 /* Put back the old order */
9555 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9556 if (WARN_ON_ONCE(cnt)) {
9557 /*
9558 * AARGH! We are left with different orders!
9559 * The max buffer is our "snapshot" buffer.
9560 * When a tracer needs a snapshot (one of the
9561 * latency tracers), it swaps the max buffer
9562 * with the saved snap shot. We succeeded to
9563 * update the order of the main buffer, but failed to
9564 * update the order of the max buffer. But when we tried
9565 * to reset the main buffer to the original size, we
9566 * failed there too. This is very unlikely to
9567 * happen, but if it does, warn and kill all
9568 * tracing.
9569 */
9570 tracing_disabled = 1;
9571 }
9572 goto out;
9573 }
9574 out_max:
9575 #endif
9576 (*ppos)++;
9577 out:
9578 if (ret)
9579 cnt = ret;
9580 tracing_start_tr(tr);
9581 return cnt;
9582 }
9583
9584 static const struct file_operations buffer_subbuf_size_fops = {
9585 .open = tracing_open_generic_tr,
9586 .read = buffer_subbuf_size_read,
9587 .write = buffer_subbuf_size_write,
9588 .release = tracing_release_generic_tr,
9589 .llseek = default_llseek,
9590 };
9591
9592 static struct dentry *trace_instance_dir;
9593
9594 static void
9595 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9596
9597 #ifdef CONFIG_MODULES
make_mod_delta(struct module * mod,void * data)9598 static int make_mod_delta(struct module *mod, void *data)
9599 {
9600 struct trace_module_delta *module_delta;
9601 struct trace_scratch *tscratch;
9602 struct trace_mod_entry *entry;
9603 struct trace_array *tr = data;
9604 int i;
9605
9606 tscratch = tr->scratch;
9607 module_delta = READ_ONCE(tr->module_delta);
9608 for (i = 0; i < tscratch->nr_entries; i++) {
9609 entry = &tscratch->entries[i];
9610 if (strcmp(mod->name, entry->mod_name))
9611 continue;
9612 if (mod->state == MODULE_STATE_GOING)
9613 module_delta->delta[i] = 0;
9614 else
9615 module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9616 - entry->mod_addr;
9617 break;
9618 }
9619 return 0;
9620 }
9621 #else
make_mod_delta(struct module * mod,void * data)9622 static int make_mod_delta(struct module *mod, void *data)
9623 {
9624 return 0;
9625 }
9626 #endif
9627
mod_addr_comp(const void * a,const void * b,const void * data)9628 static int mod_addr_comp(const void *a, const void *b, const void *data)
9629 {
9630 const struct trace_mod_entry *e1 = a;
9631 const struct trace_mod_entry *e2 = b;
9632
9633 return e1->mod_addr > e2->mod_addr ? 1 : -1;
9634 }
9635
setup_trace_scratch(struct trace_array * tr,struct trace_scratch * tscratch,unsigned int size)9636 static void setup_trace_scratch(struct trace_array *tr,
9637 struct trace_scratch *tscratch, unsigned int size)
9638 {
9639 struct trace_module_delta *module_delta;
9640 struct trace_mod_entry *entry;
9641 int i, nr_entries;
9642
9643 if (!tscratch)
9644 return;
9645
9646 tr->scratch = tscratch;
9647 tr->scratch_size = size;
9648
9649 if (tscratch->text_addr)
9650 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9651
9652 if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9653 goto reset;
9654
9655 /* Check if each module name is a valid string */
9656 for (i = 0; i < tscratch->nr_entries; i++) {
9657 int n;
9658
9659 entry = &tscratch->entries[i];
9660
9661 for (n = 0; n < MODULE_NAME_LEN; n++) {
9662 if (entry->mod_name[n] == '\0')
9663 break;
9664 if (!isprint(entry->mod_name[n]))
9665 goto reset;
9666 }
9667 if (n == MODULE_NAME_LEN)
9668 goto reset;
9669 }
9670
9671 /* Sort the entries so that we can find appropriate module from address. */
9672 nr_entries = tscratch->nr_entries;
9673 sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9674 mod_addr_comp, NULL, NULL);
9675
9676 if (IS_ENABLED(CONFIG_MODULES)) {
9677 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9678 if (!module_delta) {
9679 pr_info("module_delta allocation failed. Not able to decode module address.");
9680 goto reset;
9681 }
9682 init_rcu_head(&module_delta->rcu);
9683 } else
9684 module_delta = NULL;
9685 WRITE_ONCE(tr->module_delta, module_delta);
9686
9687 /* Scan modules to make text delta for modules. */
9688 module_for_each_mod(make_mod_delta, tr);
9689
9690 /* Set trace_clock as the same of the previous boot. */
9691 if (tscratch->clock_id != tr->clock_id) {
9692 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9693 tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9694 pr_info("the previous trace_clock info is not valid.");
9695 goto reset;
9696 }
9697 }
9698 return;
9699 reset:
9700 /* Invalid trace modules */
9701 memset(tscratch, 0, size);
9702 }
9703
9704 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9705 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9706 {
9707 enum ring_buffer_flags rb_flags;
9708 struct trace_scratch *tscratch;
9709 unsigned int scratch_size = 0;
9710
9711 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9712
9713 buf->tr = tr;
9714
9715 if (tr->range_addr_start && tr->range_addr_size) {
9716 /* Add scratch buffer to handle 128 modules */
9717 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9718 tr->range_addr_start,
9719 tr->range_addr_size,
9720 struct_size(tscratch, entries, 128));
9721
9722 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9723 setup_trace_scratch(tr, tscratch, scratch_size);
9724
9725 /*
9726 * This is basically the same as a mapped buffer,
9727 * with the same restrictions.
9728 */
9729 tr->mapped++;
9730 } else {
9731 buf->buffer = ring_buffer_alloc(size, rb_flags);
9732 }
9733 if (!buf->buffer)
9734 return -ENOMEM;
9735
9736 buf->data = alloc_percpu(struct trace_array_cpu);
9737 if (!buf->data) {
9738 ring_buffer_free(buf->buffer);
9739 buf->buffer = NULL;
9740 return -ENOMEM;
9741 }
9742
9743 /* Allocate the first page for all buffers */
9744 set_buffer_entries(&tr->array_buffer,
9745 ring_buffer_size(tr->array_buffer.buffer, 0));
9746
9747 return 0;
9748 }
9749
free_trace_buffer(struct array_buffer * buf)9750 static void free_trace_buffer(struct array_buffer *buf)
9751 {
9752 if (buf->buffer) {
9753 ring_buffer_free(buf->buffer);
9754 buf->buffer = NULL;
9755 free_percpu(buf->data);
9756 buf->data = NULL;
9757 }
9758 }
9759
allocate_trace_buffers(struct trace_array * tr,int size)9760 static int allocate_trace_buffers(struct trace_array *tr, int size)
9761 {
9762 int ret;
9763
9764 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9765 if (ret)
9766 return ret;
9767
9768 #ifdef CONFIG_TRACER_MAX_TRACE
9769 /* Fix mapped buffer trace arrays do not have snapshot buffers */
9770 if (tr->range_addr_start)
9771 return 0;
9772
9773 ret = allocate_trace_buffer(tr, &tr->max_buffer,
9774 allocate_snapshot ? size : 1);
9775 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9776 free_trace_buffer(&tr->array_buffer);
9777 return -ENOMEM;
9778 }
9779 tr->allocated_snapshot = allocate_snapshot;
9780
9781 allocate_snapshot = false;
9782 #endif
9783
9784 return 0;
9785 }
9786
free_trace_buffers(struct trace_array * tr)9787 static void free_trace_buffers(struct trace_array *tr)
9788 {
9789 if (!tr)
9790 return;
9791
9792 free_trace_buffer(&tr->array_buffer);
9793 kfree(tr->module_delta);
9794
9795 #ifdef CONFIG_TRACER_MAX_TRACE
9796 free_trace_buffer(&tr->max_buffer);
9797 #endif
9798 }
9799
init_trace_flags_index(struct trace_array * tr)9800 static void init_trace_flags_index(struct trace_array *tr)
9801 {
9802 int i;
9803
9804 /* Used by the trace options files */
9805 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9806 tr->trace_flags_index[i] = i;
9807 }
9808
__update_tracer_options(struct trace_array * tr)9809 static void __update_tracer_options(struct trace_array *tr)
9810 {
9811 struct tracer *t;
9812
9813 for (t = trace_types; t; t = t->next)
9814 add_tracer_options(tr, t);
9815 }
9816
update_tracer_options(struct trace_array * tr)9817 static void update_tracer_options(struct trace_array *tr)
9818 {
9819 mutex_lock(&trace_types_lock);
9820 tracer_options_updated = true;
9821 __update_tracer_options(tr);
9822 mutex_unlock(&trace_types_lock);
9823 }
9824
9825 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9826 struct trace_array *trace_array_find(const char *instance)
9827 {
9828 struct trace_array *tr, *found = NULL;
9829
9830 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9831 if (tr->name && strcmp(tr->name, instance) == 0) {
9832 found = tr;
9833 break;
9834 }
9835 }
9836
9837 return found;
9838 }
9839
trace_array_find_get(const char * instance)9840 struct trace_array *trace_array_find_get(const char *instance)
9841 {
9842 struct trace_array *tr;
9843
9844 mutex_lock(&trace_types_lock);
9845 tr = trace_array_find(instance);
9846 if (tr)
9847 tr->ref++;
9848 mutex_unlock(&trace_types_lock);
9849
9850 return tr;
9851 }
9852
trace_array_create_dir(struct trace_array * tr)9853 static int trace_array_create_dir(struct trace_array *tr)
9854 {
9855 int ret;
9856
9857 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9858 if (!tr->dir)
9859 return -EINVAL;
9860
9861 ret = event_trace_add_tracer(tr->dir, tr);
9862 if (ret) {
9863 tracefs_remove(tr->dir);
9864 return ret;
9865 }
9866
9867 init_tracer_tracefs(tr, tr->dir);
9868 __update_tracer_options(tr);
9869
9870 return ret;
9871 }
9872
9873 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9874 trace_array_create_systems(const char *name, const char *systems,
9875 unsigned long range_addr_start,
9876 unsigned long range_addr_size)
9877 {
9878 struct trace_array *tr;
9879 int ret;
9880
9881 ret = -ENOMEM;
9882 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9883 if (!tr)
9884 return ERR_PTR(ret);
9885
9886 tr->name = kstrdup(name, GFP_KERNEL);
9887 if (!tr->name)
9888 goto out_free_tr;
9889
9890 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9891 goto out_free_tr;
9892
9893 if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9894 goto out_free_tr;
9895
9896 if (systems) {
9897 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9898 if (!tr->system_names)
9899 goto out_free_tr;
9900 }
9901
9902 /* Only for boot up memory mapped ring buffers */
9903 tr->range_addr_start = range_addr_start;
9904 tr->range_addr_size = range_addr_size;
9905
9906 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9907
9908 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9909
9910 raw_spin_lock_init(&tr->start_lock);
9911
9912 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9913 #ifdef CONFIG_TRACER_MAX_TRACE
9914 spin_lock_init(&tr->snapshot_trigger_lock);
9915 #endif
9916 tr->current_trace = &nop_trace;
9917
9918 INIT_LIST_HEAD(&tr->systems);
9919 INIT_LIST_HEAD(&tr->events);
9920 INIT_LIST_HEAD(&tr->hist_vars);
9921 INIT_LIST_HEAD(&tr->err_log);
9922 INIT_LIST_HEAD(&tr->marker_list);
9923
9924 #ifdef CONFIG_MODULES
9925 INIT_LIST_HEAD(&tr->mod_events);
9926 #endif
9927
9928 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9929 goto out_free_tr;
9930
9931 /* The ring buffer is defaultly expanded */
9932 trace_set_ring_buffer_expanded(tr);
9933
9934 if (ftrace_allocate_ftrace_ops(tr) < 0)
9935 goto out_free_tr;
9936
9937 ftrace_init_trace_array(tr);
9938
9939 init_trace_flags_index(tr);
9940
9941 if (trace_instance_dir) {
9942 ret = trace_array_create_dir(tr);
9943 if (ret)
9944 goto out_free_tr;
9945 } else
9946 __trace_early_add_events(tr);
9947
9948 list_add(&tr->list, &ftrace_trace_arrays);
9949
9950 tr->ref++;
9951
9952 return tr;
9953
9954 out_free_tr:
9955 ftrace_free_ftrace_ops(tr);
9956 free_trace_buffers(tr);
9957 free_cpumask_var(tr->pipe_cpumask);
9958 free_cpumask_var(tr->tracing_cpumask);
9959 kfree_const(tr->system_names);
9960 kfree(tr->range_name);
9961 kfree(tr->name);
9962 kfree(tr);
9963
9964 return ERR_PTR(ret);
9965 }
9966
trace_array_create(const char * name)9967 static struct trace_array *trace_array_create(const char *name)
9968 {
9969 return trace_array_create_systems(name, NULL, 0, 0);
9970 }
9971
instance_mkdir(const char * name)9972 static int instance_mkdir(const char *name)
9973 {
9974 struct trace_array *tr;
9975 int ret;
9976
9977 guard(mutex)(&event_mutex);
9978 guard(mutex)(&trace_types_lock);
9979
9980 ret = -EEXIST;
9981 if (trace_array_find(name))
9982 return -EEXIST;
9983
9984 tr = trace_array_create(name);
9985
9986 ret = PTR_ERR_OR_ZERO(tr);
9987
9988 return ret;
9989 }
9990
9991 #ifdef CONFIG_MMU
map_pages(unsigned long start,unsigned long size)9992 static u64 map_pages(unsigned long start, unsigned long size)
9993 {
9994 unsigned long vmap_start, vmap_end;
9995 struct vm_struct *area;
9996 int ret;
9997
9998 area = get_vm_area(size, VM_IOREMAP);
9999 if (!area)
10000 return 0;
10001
10002 vmap_start = (unsigned long) area->addr;
10003 vmap_end = vmap_start + size;
10004
10005 ret = vmap_page_range(vmap_start, vmap_end,
10006 start, pgprot_nx(PAGE_KERNEL));
10007 if (ret < 0) {
10008 free_vm_area(area);
10009 return 0;
10010 }
10011
10012 return (u64)vmap_start;
10013 }
10014 #else
map_pages(unsigned long start,unsigned long size)10015 static inline u64 map_pages(unsigned long start, unsigned long size)
10016 {
10017 return 0;
10018 }
10019 #endif
10020
10021 /**
10022 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10023 * @name: The name of the trace array to be looked up/created.
10024 * @systems: A list of systems to create event directories for (NULL for all)
10025 *
10026 * Returns pointer to trace array with given name.
10027 * NULL, if it cannot be created.
10028 *
10029 * NOTE: This function increments the reference counter associated with the
10030 * trace array returned. This makes sure it cannot be freed while in use.
10031 * Use trace_array_put() once the trace array is no longer needed.
10032 * If the trace_array is to be freed, trace_array_destroy() needs to
10033 * be called after the trace_array_put(), or simply let user space delete
10034 * it from the tracefs instances directory. But until the
10035 * trace_array_put() is called, user space can not delete it.
10036 *
10037 */
trace_array_get_by_name(const char * name,const char * systems)10038 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10039 {
10040 struct trace_array *tr;
10041
10042 guard(mutex)(&event_mutex);
10043 guard(mutex)(&trace_types_lock);
10044
10045 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10046 if (tr->name && strcmp(tr->name, name) == 0) {
10047 tr->ref++;
10048 return tr;
10049 }
10050 }
10051
10052 tr = trace_array_create_systems(name, systems, 0, 0);
10053
10054 if (IS_ERR(tr))
10055 tr = NULL;
10056 else
10057 tr->ref++;
10058
10059 return tr;
10060 }
10061 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10062
__remove_instance(struct trace_array * tr)10063 static int __remove_instance(struct trace_array *tr)
10064 {
10065 int i;
10066
10067 /* Reference counter for a newly created trace array = 1. */
10068 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10069 return -EBUSY;
10070
10071 list_del(&tr->list);
10072
10073 /* Disable all the flags that were enabled coming in */
10074 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10075 if ((1 << i) & ZEROED_TRACE_FLAGS)
10076 set_tracer_flag(tr, 1 << i, 0);
10077 }
10078
10079 if (printk_trace == tr)
10080 update_printk_trace(&global_trace);
10081
10082 if (update_marker_trace(tr, 0))
10083 synchronize_rcu();
10084
10085 tracing_set_nop(tr);
10086 clear_ftrace_function_probes(tr);
10087 event_trace_del_tracer(tr);
10088 ftrace_clear_pids(tr);
10089 ftrace_destroy_function_files(tr);
10090 tracefs_remove(tr->dir);
10091 free_percpu(tr->last_func_repeats);
10092 free_trace_buffers(tr);
10093 clear_tracing_err_log(tr);
10094
10095 if (tr->range_name) {
10096 reserve_mem_release_by_name(tr->range_name);
10097 kfree(tr->range_name);
10098 }
10099
10100 for (i = 0; i < tr->nr_topts; i++) {
10101 kfree(tr->topts[i].topts);
10102 }
10103 kfree(tr->topts);
10104
10105 free_cpumask_var(tr->pipe_cpumask);
10106 free_cpumask_var(tr->tracing_cpumask);
10107 kfree_const(tr->system_names);
10108 kfree(tr->name);
10109 kfree(tr);
10110
10111 return 0;
10112 }
10113
trace_array_destroy(struct trace_array * this_tr)10114 int trace_array_destroy(struct trace_array *this_tr)
10115 {
10116 struct trace_array *tr;
10117
10118 if (!this_tr)
10119 return -EINVAL;
10120
10121 guard(mutex)(&event_mutex);
10122 guard(mutex)(&trace_types_lock);
10123
10124
10125 /* Making sure trace array exists before destroying it. */
10126 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10127 if (tr == this_tr)
10128 return __remove_instance(tr);
10129 }
10130
10131 return -ENODEV;
10132 }
10133 EXPORT_SYMBOL_GPL(trace_array_destroy);
10134
instance_rmdir(const char * name)10135 static int instance_rmdir(const char *name)
10136 {
10137 struct trace_array *tr;
10138
10139 guard(mutex)(&event_mutex);
10140 guard(mutex)(&trace_types_lock);
10141
10142 tr = trace_array_find(name);
10143 if (!tr)
10144 return -ENODEV;
10145
10146 return __remove_instance(tr);
10147 }
10148
create_trace_instances(struct dentry * d_tracer)10149 static __init void create_trace_instances(struct dentry *d_tracer)
10150 {
10151 struct trace_array *tr;
10152
10153 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10154 instance_mkdir,
10155 instance_rmdir);
10156 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10157 return;
10158
10159 guard(mutex)(&event_mutex);
10160 guard(mutex)(&trace_types_lock);
10161
10162 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10163 if (!tr->name)
10164 continue;
10165 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10166 "Failed to create instance directory\n"))
10167 return;
10168 }
10169 }
10170
10171 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)10172 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10173 {
10174 int cpu;
10175
10176 trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10177 tr, &show_traces_fops);
10178
10179 trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10180 tr, &set_tracer_fops);
10181
10182 trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10183 tr, &tracing_cpumask_fops);
10184
10185 trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10186 tr, &tracing_iter_fops);
10187
10188 trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10189 tr, &tracing_fops);
10190
10191 trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10192 tr, &tracing_pipe_fops);
10193
10194 trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10195 tr, &tracing_entries_fops);
10196
10197 trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10198 tr, &tracing_total_entries_fops);
10199
10200 trace_create_file("free_buffer", 0200, d_tracer,
10201 tr, &tracing_free_buffer_fops);
10202
10203 trace_create_file("trace_marker", 0220, d_tracer,
10204 tr, &tracing_mark_fops);
10205
10206 tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10207
10208 trace_create_file("trace_marker_raw", 0220, d_tracer,
10209 tr, &tracing_mark_raw_fops);
10210
10211 trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10212 &trace_clock_fops);
10213
10214 trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10215 tr, &rb_simple_fops);
10216
10217 trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10218 &trace_time_stamp_mode_fops);
10219
10220 tr->buffer_percent = 50;
10221
10222 trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10223 tr, &buffer_percent_fops);
10224
10225 trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10226 tr, &buffer_subbuf_size_fops);
10227
10228 create_trace_options_dir(tr);
10229
10230 #ifdef CONFIG_TRACER_MAX_TRACE
10231 trace_create_maxlat_file(tr, d_tracer);
10232 #endif
10233
10234 if (ftrace_create_function_files(tr, d_tracer))
10235 MEM_FAIL(1, "Could not allocate function filter files");
10236
10237 if (tr->range_addr_start) {
10238 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10239 tr, &last_boot_fops);
10240 #ifdef CONFIG_TRACER_SNAPSHOT
10241 } else {
10242 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10243 tr, &snapshot_fops);
10244 #endif
10245 }
10246
10247 trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10248 tr, &tracing_err_log_fops);
10249
10250 for_each_tracing_cpu(cpu)
10251 tracing_init_tracefs_percpu(tr, cpu);
10252
10253 ftrace_init_tracefs(tr, d_tracer);
10254 }
10255
10256 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
trace_automount(struct dentry * mntpt,void * ingore)10257 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10258 {
10259 struct vfsmount *mnt;
10260 struct file_system_type *type;
10261 struct fs_context *fc;
10262 int ret;
10263
10264 /*
10265 * To maintain backward compatibility for tools that mount
10266 * debugfs to get to the tracing facility, tracefs is automatically
10267 * mounted to the debugfs/tracing directory.
10268 */
10269 type = get_fs_type("tracefs");
10270 if (!type)
10271 return NULL;
10272
10273 fc = fs_context_for_submount(type, mntpt);
10274 put_filesystem(type);
10275 if (IS_ERR(fc))
10276 return ERR_CAST(fc);
10277
10278 pr_warn("NOTICE: Automounting of tracing to debugfs is deprecated and will be removed in 2030\n");
10279
10280 ret = vfs_parse_fs_string(fc, "source",
10281 "tracefs", strlen("tracefs"));
10282 if (!ret)
10283 mnt = fc_mount(fc);
10284 else
10285 mnt = ERR_PTR(ret);
10286
10287 put_fs_context(fc);
10288 return mnt;
10289 }
10290 #endif
10291
10292 /**
10293 * tracing_init_dentry - initialize top level trace array
10294 *
10295 * This is called when creating files or directories in the tracing
10296 * directory. It is called via fs_initcall() by any of the boot up code
10297 * and expects to return the dentry of the top level tracing directory.
10298 */
tracing_init_dentry(void)10299 int tracing_init_dentry(void)
10300 {
10301 struct trace_array *tr = &global_trace;
10302
10303 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10304 pr_warn("Tracing disabled due to lockdown\n");
10305 return -EPERM;
10306 }
10307
10308 /* The top level trace array uses NULL as parent */
10309 if (tr->dir)
10310 return 0;
10311
10312 if (WARN_ON(!tracefs_initialized()))
10313 return -ENODEV;
10314
10315 #ifdef CONFIG_TRACEFS_AUTOMOUNT_DEPRECATED
10316 /*
10317 * As there may still be users that expect the tracing
10318 * files to exist in debugfs/tracing, we must automount
10319 * the tracefs file system there, so older tools still
10320 * work with the newer kernel.
10321 */
10322 tr->dir = debugfs_create_automount("tracing", NULL,
10323 trace_automount, NULL);
10324 #endif
10325
10326 return 0;
10327 }
10328
10329 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10330 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10331
10332 static struct workqueue_struct *eval_map_wq __initdata;
10333 static struct work_struct eval_map_work __initdata;
10334 static struct work_struct tracerfs_init_work __initdata;
10335
eval_map_work_func(struct work_struct * work)10336 static void __init eval_map_work_func(struct work_struct *work)
10337 {
10338 int len;
10339
10340 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10341 trace_event_update_with_eval_map(NULL, __start_ftrace_eval_maps, len);
10342 }
10343
trace_eval_init(void)10344 static int __init trace_eval_init(void)
10345 {
10346 INIT_WORK(&eval_map_work, eval_map_work_func);
10347
10348 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10349 if (!eval_map_wq) {
10350 pr_err("Unable to allocate eval_map_wq\n");
10351 /* Do work here */
10352 eval_map_work_func(&eval_map_work);
10353 return -ENOMEM;
10354 }
10355
10356 queue_work(eval_map_wq, &eval_map_work);
10357 return 0;
10358 }
10359
10360 subsys_initcall(trace_eval_init);
10361
trace_eval_sync(void)10362 static int __init trace_eval_sync(void)
10363 {
10364 /* Make sure the eval map updates are finished */
10365 if (eval_map_wq)
10366 destroy_workqueue(eval_map_wq);
10367 return 0;
10368 }
10369
10370 late_initcall_sync(trace_eval_sync);
10371
10372
10373 #ifdef CONFIG_MODULES
10374
module_exists(const char * module)10375 bool module_exists(const char *module)
10376 {
10377 /* All modules have the symbol __this_module */
10378 static const char this_mod[] = "__this_module";
10379 char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10380 unsigned long val;
10381 int n;
10382
10383 n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10384
10385 if (n > sizeof(modname) - 1)
10386 return false;
10387
10388 val = module_kallsyms_lookup_name(modname);
10389 return val != 0;
10390 }
10391
trace_module_add_evals(struct module * mod)10392 static void trace_module_add_evals(struct module *mod)
10393 {
10394 /*
10395 * Modules with bad taint do not have events created, do
10396 * not bother with enums either.
10397 */
10398 if (trace_module_has_bad_taint(mod))
10399 return;
10400
10401 /* Even if no trace_evals, this need to sanitize field types. */
10402 trace_event_update_with_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10403 }
10404
10405 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)10406 static void trace_module_remove_evals(struct module *mod)
10407 {
10408 union trace_eval_map_item *map;
10409 union trace_eval_map_item **last = &trace_eval_maps;
10410
10411 if (!mod->num_trace_evals)
10412 return;
10413
10414 guard(mutex)(&trace_eval_mutex);
10415
10416 map = trace_eval_maps;
10417
10418 while (map) {
10419 if (map->head.mod == mod)
10420 break;
10421 map = trace_eval_jmp_to_tail(map);
10422 last = &map->tail.next;
10423 map = map->tail.next;
10424 }
10425 if (!map)
10426 return;
10427
10428 *last = trace_eval_jmp_to_tail(map)->tail.next;
10429 kfree(map);
10430 }
10431 #else
trace_module_remove_evals(struct module * mod)10432 static inline void trace_module_remove_evals(struct module *mod) { }
10433 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10434
trace_module_record(struct module * mod,bool add)10435 static void trace_module_record(struct module *mod, bool add)
10436 {
10437 struct trace_array *tr;
10438 unsigned long flags;
10439
10440 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10441 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10442 /* Update any persistent trace array that has already been started */
10443 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10444 guard(mutex)(&scratch_mutex);
10445 save_mod(mod, tr);
10446 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10447 /* Update delta if the module loaded in previous boot */
10448 make_mod_delta(mod, tr);
10449 }
10450 }
10451 }
10452
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)10453 static int trace_module_notify(struct notifier_block *self,
10454 unsigned long val, void *data)
10455 {
10456 struct module *mod = data;
10457
10458 switch (val) {
10459 case MODULE_STATE_COMING:
10460 trace_module_add_evals(mod);
10461 trace_module_record(mod, true);
10462 break;
10463 case MODULE_STATE_GOING:
10464 trace_module_remove_evals(mod);
10465 trace_module_record(mod, false);
10466 break;
10467 }
10468
10469 return NOTIFY_OK;
10470 }
10471
10472 static struct notifier_block trace_module_nb = {
10473 .notifier_call = trace_module_notify,
10474 .priority = 0,
10475 };
10476 #endif /* CONFIG_MODULES */
10477
tracer_init_tracefs_work_func(struct work_struct * work)10478 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10479 {
10480
10481 event_trace_init();
10482
10483 init_tracer_tracefs(&global_trace, NULL);
10484 ftrace_init_tracefs_toplevel(&global_trace, NULL);
10485
10486 trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10487 &global_trace, &tracing_thresh_fops);
10488
10489 trace_create_file("README", TRACE_MODE_READ, NULL,
10490 NULL, &tracing_readme_fops);
10491
10492 trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10493 NULL, &tracing_saved_cmdlines_fops);
10494
10495 trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10496 NULL, &tracing_saved_cmdlines_size_fops);
10497
10498 trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10499 NULL, &tracing_saved_tgids_fops);
10500
10501 trace_create_eval_file(NULL);
10502
10503 #ifdef CONFIG_MODULES
10504 register_module_notifier(&trace_module_nb);
10505 #endif
10506
10507 #ifdef CONFIG_DYNAMIC_FTRACE
10508 trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10509 NULL, &tracing_dyn_info_fops);
10510 #endif
10511
10512 create_trace_instances(NULL);
10513
10514 update_tracer_options(&global_trace);
10515 }
10516
tracer_init_tracefs(void)10517 static __init int tracer_init_tracefs(void)
10518 {
10519 int ret;
10520
10521 trace_access_lock_init();
10522
10523 ret = tracing_init_dentry();
10524 if (ret)
10525 return 0;
10526
10527 if (eval_map_wq) {
10528 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10529 queue_work(eval_map_wq, &tracerfs_init_work);
10530 } else {
10531 tracer_init_tracefs_work_func(NULL);
10532 }
10533
10534 rv_init_interface();
10535
10536 return 0;
10537 }
10538
10539 fs_initcall(tracer_init_tracefs);
10540
10541 static int trace_die_panic_handler(struct notifier_block *self,
10542 unsigned long ev, void *unused);
10543
10544 static struct notifier_block trace_panic_notifier = {
10545 .notifier_call = trace_die_panic_handler,
10546 .priority = INT_MAX - 1,
10547 };
10548
10549 static struct notifier_block trace_die_notifier = {
10550 .notifier_call = trace_die_panic_handler,
10551 .priority = INT_MAX - 1,
10552 };
10553
10554 /*
10555 * The idea is to execute the following die/panic callback early, in order
10556 * to avoid showing irrelevant information in the trace (like other panic
10557 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10558 * warnings get disabled (to prevent potential log flooding).
10559 */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10560 static int trace_die_panic_handler(struct notifier_block *self,
10561 unsigned long ev, void *unused)
10562 {
10563 if (!ftrace_dump_on_oops_enabled())
10564 return NOTIFY_DONE;
10565
10566 /* The die notifier requires DIE_OOPS to trigger */
10567 if (self == &trace_die_notifier && ev != DIE_OOPS)
10568 return NOTIFY_DONE;
10569
10570 ftrace_dump(DUMP_PARAM);
10571
10572 return NOTIFY_DONE;
10573 }
10574
10575 /*
10576 * printk is set to max of 1024, we really don't need it that big.
10577 * Nothing should be printing 1000 characters anyway.
10578 */
10579 #define TRACE_MAX_PRINT 1000
10580
10581 /*
10582 * Define here KERN_TRACE so that we have one place to modify
10583 * it if we decide to change what log level the ftrace dump
10584 * should be at.
10585 */
10586 #define KERN_TRACE KERN_EMERG
10587
10588 void
trace_printk_seq(struct trace_seq * s)10589 trace_printk_seq(struct trace_seq *s)
10590 {
10591 /* Probably should print a warning here. */
10592 if (s->seq.len >= TRACE_MAX_PRINT)
10593 s->seq.len = TRACE_MAX_PRINT;
10594
10595 /*
10596 * More paranoid code. Although the buffer size is set to
10597 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10598 * an extra layer of protection.
10599 */
10600 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10601 s->seq.len = s->seq.size - 1;
10602
10603 /* should be zero ended, but we are paranoid. */
10604 s->buffer[s->seq.len] = 0;
10605
10606 printk(KERN_TRACE "%s", s->buffer);
10607
10608 trace_seq_init(s);
10609 }
10610
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10611 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10612 {
10613 iter->tr = tr;
10614 iter->trace = iter->tr->current_trace;
10615 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10616 iter->array_buffer = &tr->array_buffer;
10617
10618 if (iter->trace && iter->trace->open)
10619 iter->trace->open(iter);
10620
10621 /* Annotate start of buffers if we had overruns */
10622 if (ring_buffer_overruns(iter->array_buffer->buffer))
10623 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10624
10625 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10626 if (trace_clocks[iter->tr->clock_id].in_ns)
10627 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10628
10629 /* Can not use kmalloc for iter.temp and iter.fmt */
10630 iter->temp = static_temp_buf;
10631 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10632 iter->fmt = static_fmt_buf;
10633 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10634 }
10635
trace_init_global_iter(struct trace_iterator * iter)10636 void trace_init_global_iter(struct trace_iterator *iter)
10637 {
10638 trace_init_iter(iter, &global_trace);
10639 }
10640
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10641 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10642 {
10643 /* use static because iter can be a bit big for the stack */
10644 static struct trace_iterator iter;
10645 unsigned int old_userobj;
10646 unsigned long flags;
10647 int cnt = 0;
10648
10649 /*
10650 * Always turn off tracing when we dump.
10651 * We don't need to show trace output of what happens
10652 * between multiple crashes.
10653 *
10654 * If the user does a sysrq-z, then they can re-enable
10655 * tracing with echo 1 > tracing_on.
10656 */
10657 tracer_tracing_off(tr);
10658
10659 local_irq_save(flags);
10660
10661 /* Simulate the iterator */
10662 trace_init_iter(&iter, tr);
10663
10664 /* While dumping, do not allow the buffer to be enable */
10665 tracer_tracing_disable(tr);
10666
10667 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10668
10669 /* don't look at user memory in panic mode */
10670 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10671
10672 if (dump_mode == DUMP_ORIG)
10673 iter.cpu_file = raw_smp_processor_id();
10674 else
10675 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10676
10677 if (tr == &global_trace)
10678 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10679 else
10680 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10681
10682 /* Did function tracer already get disabled? */
10683 if (ftrace_is_dead()) {
10684 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10685 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10686 }
10687
10688 /*
10689 * We need to stop all tracing on all CPUS to read
10690 * the next buffer. This is a bit expensive, but is
10691 * not done often. We fill all what we can read,
10692 * and then release the locks again.
10693 */
10694
10695 while (!trace_empty(&iter)) {
10696
10697 if (!cnt)
10698 printk(KERN_TRACE "---------------------------------\n");
10699
10700 cnt++;
10701
10702 trace_iterator_reset(&iter);
10703 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10704
10705 if (trace_find_next_entry_inc(&iter) != NULL) {
10706 int ret;
10707
10708 ret = print_trace_line(&iter);
10709 if (ret != TRACE_TYPE_NO_CONSUME)
10710 trace_consume(&iter);
10711 }
10712 touch_nmi_watchdog();
10713
10714 trace_printk_seq(&iter.seq);
10715 }
10716
10717 if (!cnt)
10718 printk(KERN_TRACE " (ftrace buffer empty)\n");
10719 else
10720 printk(KERN_TRACE "---------------------------------\n");
10721
10722 tr->trace_flags |= old_userobj;
10723
10724 tracer_tracing_enable(tr);
10725 local_irq_restore(flags);
10726 }
10727
ftrace_dump_by_param(void)10728 static void ftrace_dump_by_param(void)
10729 {
10730 bool first_param = true;
10731 char dump_param[MAX_TRACER_SIZE];
10732 char *buf, *token, *inst_name;
10733 struct trace_array *tr;
10734
10735 strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10736 buf = dump_param;
10737
10738 while ((token = strsep(&buf, ",")) != NULL) {
10739 if (first_param) {
10740 first_param = false;
10741 if (!strcmp("0", token))
10742 continue;
10743 else if (!strcmp("1", token)) {
10744 ftrace_dump_one(&global_trace, DUMP_ALL);
10745 continue;
10746 }
10747 else if (!strcmp("2", token) ||
10748 !strcmp("orig_cpu", token)) {
10749 ftrace_dump_one(&global_trace, DUMP_ORIG);
10750 continue;
10751 }
10752 }
10753
10754 inst_name = strsep(&token, "=");
10755 tr = trace_array_find(inst_name);
10756 if (!tr) {
10757 printk(KERN_TRACE "Instance %s not found\n", inst_name);
10758 continue;
10759 }
10760
10761 if (token && (!strcmp("2", token) ||
10762 !strcmp("orig_cpu", token)))
10763 ftrace_dump_one(tr, DUMP_ORIG);
10764 else
10765 ftrace_dump_one(tr, DUMP_ALL);
10766 }
10767 }
10768
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10769 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10770 {
10771 static atomic_t dump_running;
10772
10773 /* Only allow one dump user at a time. */
10774 if (atomic_inc_return(&dump_running) != 1) {
10775 atomic_dec(&dump_running);
10776 return;
10777 }
10778
10779 switch (oops_dump_mode) {
10780 case DUMP_ALL:
10781 ftrace_dump_one(&global_trace, DUMP_ALL);
10782 break;
10783 case DUMP_ORIG:
10784 ftrace_dump_one(&global_trace, DUMP_ORIG);
10785 break;
10786 case DUMP_PARAM:
10787 ftrace_dump_by_param();
10788 break;
10789 case DUMP_NONE:
10790 break;
10791 default:
10792 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10793 ftrace_dump_one(&global_trace, DUMP_ALL);
10794 }
10795
10796 atomic_dec(&dump_running);
10797 }
10798 EXPORT_SYMBOL_GPL(ftrace_dump);
10799
10800 #define WRITE_BUFSIZE 4096
10801
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10802 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10803 size_t count, loff_t *ppos,
10804 int (*createfn)(const char *))
10805 {
10806 char *kbuf, *buf, *tmp;
10807 int ret = 0;
10808 size_t done = 0;
10809 size_t size;
10810
10811 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10812 if (!kbuf)
10813 return -ENOMEM;
10814
10815 while (done < count) {
10816 size = count - done;
10817
10818 if (size >= WRITE_BUFSIZE)
10819 size = WRITE_BUFSIZE - 1;
10820
10821 if (copy_from_user(kbuf, buffer + done, size)) {
10822 ret = -EFAULT;
10823 goto out;
10824 }
10825 kbuf[size] = '\0';
10826 buf = kbuf;
10827 do {
10828 tmp = strchr(buf, '\n');
10829 if (tmp) {
10830 *tmp = '\0';
10831 size = tmp - buf + 1;
10832 } else {
10833 size = strlen(buf);
10834 if (done + size < count) {
10835 if (buf != kbuf)
10836 break;
10837 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10838 pr_warn("Line length is too long: Should be less than %d\n",
10839 WRITE_BUFSIZE - 2);
10840 ret = -EINVAL;
10841 goto out;
10842 }
10843 }
10844 done += size;
10845
10846 /* Remove comments */
10847 tmp = strchr(buf, '#');
10848
10849 if (tmp)
10850 *tmp = '\0';
10851
10852 ret = createfn(buf);
10853 if (ret)
10854 goto out;
10855 buf += size;
10856
10857 } while (done < count);
10858 }
10859 ret = done;
10860
10861 out:
10862 kfree(kbuf);
10863
10864 return ret;
10865 }
10866
10867 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10868 __init static bool tr_needs_alloc_snapshot(const char *name)
10869 {
10870 char *test;
10871 int len = strlen(name);
10872 bool ret;
10873
10874 if (!boot_snapshot_index)
10875 return false;
10876
10877 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10878 boot_snapshot_info[len] == '\t')
10879 return true;
10880
10881 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10882 if (!test)
10883 return false;
10884
10885 sprintf(test, "\t%s\t", name);
10886 ret = strstr(boot_snapshot_info, test) == NULL;
10887 kfree(test);
10888 return ret;
10889 }
10890
do_allocate_snapshot(const char * name)10891 __init static void do_allocate_snapshot(const char *name)
10892 {
10893 if (!tr_needs_alloc_snapshot(name))
10894 return;
10895
10896 /*
10897 * When allocate_snapshot is set, the next call to
10898 * allocate_trace_buffers() (called by trace_array_get_by_name())
10899 * will allocate the snapshot buffer. That will alse clear
10900 * this flag.
10901 */
10902 allocate_snapshot = true;
10903 }
10904 #else
do_allocate_snapshot(const char * name)10905 static inline void do_allocate_snapshot(const char *name) { }
10906 #endif
10907
enable_instances(void)10908 __init static void enable_instances(void)
10909 {
10910 struct trace_array *tr;
10911 bool memmap_area = false;
10912 char *curr_str;
10913 char *name;
10914 char *str;
10915 char *tok;
10916
10917 /* A tab is always appended */
10918 boot_instance_info[boot_instance_index - 1] = '\0';
10919 str = boot_instance_info;
10920
10921 while ((curr_str = strsep(&str, "\t"))) {
10922 phys_addr_t start = 0;
10923 phys_addr_t size = 0;
10924 unsigned long addr = 0;
10925 bool traceprintk = false;
10926 bool traceoff = false;
10927 char *flag_delim;
10928 char *addr_delim;
10929 char *rname __free(kfree) = NULL;
10930
10931 tok = strsep(&curr_str, ",");
10932
10933 flag_delim = strchr(tok, '^');
10934 addr_delim = strchr(tok, '@');
10935
10936 if (addr_delim)
10937 *addr_delim++ = '\0';
10938
10939 if (flag_delim)
10940 *flag_delim++ = '\0';
10941
10942 name = tok;
10943
10944 if (flag_delim) {
10945 char *flag;
10946
10947 while ((flag = strsep(&flag_delim, "^"))) {
10948 if (strcmp(flag, "traceoff") == 0) {
10949 traceoff = true;
10950 } else if ((strcmp(flag, "printk") == 0) ||
10951 (strcmp(flag, "traceprintk") == 0) ||
10952 (strcmp(flag, "trace_printk") == 0)) {
10953 traceprintk = true;
10954 } else {
10955 pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10956 flag, name);
10957 }
10958 }
10959 }
10960
10961 tok = addr_delim;
10962 if (tok && isdigit(*tok)) {
10963 start = memparse(tok, &tok);
10964 if (!start) {
10965 pr_warn("Tracing: Invalid boot instance address for %s\n",
10966 name);
10967 continue;
10968 }
10969 if (*tok != ':') {
10970 pr_warn("Tracing: No size specified for instance %s\n", name);
10971 continue;
10972 }
10973 tok++;
10974 size = memparse(tok, &tok);
10975 if (!size) {
10976 pr_warn("Tracing: Invalid boot instance size for %s\n",
10977 name);
10978 continue;
10979 }
10980 memmap_area = true;
10981 } else if (tok) {
10982 if (!reserve_mem_find_by_name(tok, &start, &size)) {
10983 start = 0;
10984 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10985 continue;
10986 }
10987 rname = kstrdup(tok, GFP_KERNEL);
10988 }
10989
10990 if (start) {
10991 /* Start and size must be page aligned */
10992 if (start & ~PAGE_MASK) {
10993 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10994 continue;
10995 }
10996 if (size & ~PAGE_MASK) {
10997 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10998 continue;
10999 }
11000
11001 if (memmap_area)
11002 addr = map_pages(start, size);
11003 else
11004 addr = (unsigned long)phys_to_virt(start);
11005 if (addr) {
11006 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11007 name, &start, (unsigned long)size);
11008 } else {
11009 pr_warn("Tracing: Failed to map boot instance %s\n", name);
11010 continue;
11011 }
11012 } else {
11013 /* Only non mapped buffers have snapshot buffers */
11014 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11015 do_allocate_snapshot(name);
11016 }
11017
11018 tr = trace_array_create_systems(name, NULL, addr, size);
11019 if (IS_ERR(tr)) {
11020 pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11021 continue;
11022 }
11023
11024 if (traceoff)
11025 tracer_tracing_off(tr);
11026
11027 if (traceprintk)
11028 update_printk_trace(tr);
11029
11030 /*
11031 * memmap'd buffers can not be freed.
11032 */
11033 if (memmap_area) {
11034 tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11035 tr->ref++;
11036 }
11037
11038 if (start) {
11039 tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11040 tr->range_name = no_free_ptr(rname);
11041 }
11042
11043 while ((tok = strsep(&curr_str, ","))) {
11044 early_enable_events(tr, tok, true);
11045 }
11046 }
11047 }
11048
tracer_alloc_buffers(void)11049 __init static int tracer_alloc_buffers(void)
11050 {
11051 int ring_buf_size;
11052 int ret = -ENOMEM;
11053
11054
11055 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11056 pr_warn("Tracing disabled due to lockdown\n");
11057 return -EPERM;
11058 }
11059
11060 /*
11061 * Make sure we don't accidentally add more trace options
11062 * than we have bits for.
11063 */
11064 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11065
11066 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11067 goto out;
11068
11069 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11070 goto out_free_buffer_mask;
11071
11072 /* Only allocate trace_printk buffers if a trace_printk exists */
11073 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11074 /* Must be called before global_trace.buffer is allocated */
11075 trace_printk_init_buffers();
11076
11077 /* To save memory, keep the ring buffer size to its minimum */
11078 if (global_trace.ring_buffer_expanded)
11079 ring_buf_size = trace_buf_size;
11080 else
11081 ring_buf_size = 1;
11082
11083 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11084 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11085
11086 raw_spin_lock_init(&global_trace.start_lock);
11087
11088 /*
11089 * The prepare callbacks allocates some memory for the ring buffer. We
11090 * don't free the buffer if the CPU goes down. If we were to free
11091 * the buffer, then the user would lose any trace that was in the
11092 * buffer. The memory will be removed once the "instance" is removed.
11093 */
11094 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11095 "trace/RB:prepare", trace_rb_cpu_prepare,
11096 NULL);
11097 if (ret < 0)
11098 goto out_free_cpumask;
11099 /* Used for event triggers */
11100 ret = -ENOMEM;
11101 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11102 if (!temp_buffer)
11103 goto out_rm_hp_state;
11104
11105 if (trace_create_savedcmd() < 0)
11106 goto out_free_temp_buffer;
11107
11108 if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11109 goto out_free_savedcmd;
11110
11111 /* TODO: make the number of buffers hot pluggable with CPUS */
11112 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11113 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11114 goto out_free_pipe_cpumask;
11115 }
11116 if (global_trace.buffer_disabled)
11117 tracing_off();
11118
11119 if (trace_boot_clock) {
11120 ret = tracing_set_clock(&global_trace, trace_boot_clock);
11121 if (ret < 0)
11122 pr_warn("Trace clock %s not defined, going back to default\n",
11123 trace_boot_clock);
11124 }
11125
11126 /*
11127 * register_tracer() might reference current_trace, so it
11128 * needs to be set before we register anything. This is
11129 * just a bootstrap of current_trace anyway.
11130 */
11131 global_trace.current_trace = &nop_trace;
11132
11133 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11134 #ifdef CONFIG_TRACER_MAX_TRACE
11135 spin_lock_init(&global_trace.snapshot_trigger_lock);
11136 #endif
11137 ftrace_init_global_array_ops(&global_trace);
11138
11139 #ifdef CONFIG_MODULES
11140 INIT_LIST_HEAD(&global_trace.mod_events);
11141 #endif
11142
11143 init_trace_flags_index(&global_trace);
11144
11145 register_tracer(&nop_trace);
11146
11147 /* Function tracing may start here (via kernel command line) */
11148 init_function_trace();
11149
11150 /* All seems OK, enable tracing */
11151 tracing_disabled = 0;
11152
11153 atomic_notifier_chain_register(&panic_notifier_list,
11154 &trace_panic_notifier);
11155
11156 register_die_notifier(&trace_die_notifier);
11157
11158 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11159
11160 INIT_LIST_HEAD(&global_trace.systems);
11161 INIT_LIST_HEAD(&global_trace.events);
11162 INIT_LIST_HEAD(&global_trace.hist_vars);
11163 INIT_LIST_HEAD(&global_trace.err_log);
11164 list_add(&global_trace.marker_list, &marker_copies);
11165 list_add(&global_trace.list, &ftrace_trace_arrays);
11166
11167 apply_trace_boot_options();
11168
11169 register_snapshot_cmd();
11170
11171 return 0;
11172
11173 out_free_pipe_cpumask:
11174 free_cpumask_var(global_trace.pipe_cpumask);
11175 out_free_savedcmd:
11176 trace_free_saved_cmdlines_buffer();
11177 out_free_temp_buffer:
11178 ring_buffer_free(temp_buffer);
11179 out_rm_hp_state:
11180 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11181 out_free_cpumask:
11182 free_cpumask_var(global_trace.tracing_cpumask);
11183 out_free_buffer_mask:
11184 free_cpumask_var(tracing_buffer_mask);
11185 out:
11186 return ret;
11187 }
11188
11189 #ifdef CONFIG_FUNCTION_TRACER
11190 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)11191 __init struct trace_array *trace_get_global_array(void)
11192 {
11193 return &global_trace;
11194 }
11195 #endif
11196
ftrace_boot_snapshot(void)11197 void __init ftrace_boot_snapshot(void)
11198 {
11199 #ifdef CONFIG_TRACER_MAX_TRACE
11200 struct trace_array *tr;
11201
11202 if (!snapshot_at_boot)
11203 return;
11204
11205 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11206 if (!tr->allocated_snapshot)
11207 continue;
11208
11209 tracing_snapshot_instance(tr);
11210 trace_array_puts(tr, "** Boot snapshot taken **\n");
11211 }
11212 #endif
11213 }
11214
early_trace_init(void)11215 void __init early_trace_init(void)
11216 {
11217 if (tracepoint_printk) {
11218 tracepoint_print_iter =
11219 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11220 if (MEM_FAIL(!tracepoint_print_iter,
11221 "Failed to allocate trace iterator\n"))
11222 tracepoint_printk = 0;
11223 else
11224 static_key_enable(&tracepoint_printk_key.key);
11225 }
11226 tracer_alloc_buffers();
11227
11228 init_events();
11229 }
11230
trace_init(void)11231 void __init trace_init(void)
11232 {
11233 trace_event_init();
11234
11235 if (boot_instance_index)
11236 enable_instances();
11237 }
11238
clear_boot_tracer(void)11239 __init static void clear_boot_tracer(void)
11240 {
11241 /*
11242 * The default tracer at boot buffer is an init section.
11243 * This function is called in lateinit. If we did not
11244 * find the boot tracer, then clear it out, to prevent
11245 * later registration from accessing the buffer that is
11246 * about to be freed.
11247 */
11248 if (!default_bootup_tracer)
11249 return;
11250
11251 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11252 default_bootup_tracer);
11253 default_bootup_tracer = NULL;
11254 }
11255
11256 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)11257 __init static void tracing_set_default_clock(void)
11258 {
11259 /* sched_clock_stable() is determined in late_initcall */
11260 if (!trace_boot_clock && !sched_clock_stable()) {
11261 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11262 pr_warn("Can not set tracing clock due to lockdown\n");
11263 return;
11264 }
11265
11266 printk(KERN_WARNING
11267 "Unstable clock detected, switching default tracing clock to \"global\"\n"
11268 "If you want to keep using the local clock, then add:\n"
11269 " \"trace_clock=local\"\n"
11270 "on the kernel command line\n");
11271 tracing_set_clock(&global_trace, "global");
11272 }
11273 }
11274 #else
tracing_set_default_clock(void)11275 static inline void tracing_set_default_clock(void) { }
11276 #endif
11277
late_trace_init(void)11278 __init static int late_trace_init(void)
11279 {
11280 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11281 static_key_disable(&tracepoint_printk_key.key);
11282 tracepoint_printk = 0;
11283 }
11284
11285 if (traceoff_after_boot)
11286 tracing_off();
11287
11288 tracing_set_default_clock();
11289 clear_boot_tracer();
11290 return 0;
11291 }
11292
11293 late_initcall_sync(late_trace_init);
11294