xref: /linux/kernel/trace/trace_snapshot.c (revision e2683c8868d03382da7e1ce8453b543a043066d1)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/fsnotify.h>
3 
4 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
5 
6 #include "trace.h"
7 
8 /* Used if snapshot allocated at boot */
9 static bool allocate_snapshot;
10 static bool snapshot_at_boot;
11 
12 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
13 static int boot_snapshot_index;
14 
15 static int __init boot_alloc_snapshot(char *str)
16 {
17 	char *slot = boot_snapshot_info + boot_snapshot_index;
18 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
19 	int ret;
20 
21 	if (str[0] == '=') {
22 		str++;
23 		if (strlen(str) >= left)
24 			return -1;
25 
26 		ret = snprintf(slot, left, "%s\t", str);
27 		boot_snapshot_index += ret;
28 	} else {
29 		allocate_snapshot = true;
30 		/* We also need the main ring buffer expanded */
31 		trace_set_ring_buffer_expanded(NULL);
32 	}
33 	return 1;
34 }
35 __setup("alloc_snapshot", boot_alloc_snapshot);
36 
37 
38 static int __init boot_snapshot(char *str)
39 {
40 	snapshot_at_boot = true;
41 	boot_alloc_snapshot(str);
42 	return 1;
43 }
44 __setup("ftrace_boot_snapshot", boot_snapshot);
45 static void tracing_snapshot_instance_cond(struct trace_array *tr,
46 					   void *cond_data)
47 {
48 	unsigned long flags;
49 
50 	if (in_nmi()) {
51 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
52 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
53 		return;
54 	}
55 
56 	if (!tr->allocated_snapshot) {
57 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
58 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
59 		tracer_tracing_off(tr);
60 		return;
61 	}
62 
63 	if (tr->mapped) {
64 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
65 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
66 		return;
67 	}
68 
69 	/* Note, snapshot can not be used when the tracer uses it */
70 	if (tracer_uses_snapshot(tr->current_trace)) {
71 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
72 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
73 		return;
74 	}
75 
76 	local_irq_save(flags);
77 	update_max_tr(tr, current, smp_processor_id(), cond_data);
78 	local_irq_restore(flags);
79 }
80 
81 void tracing_snapshot_instance(struct trace_array *tr)
82 {
83 	tracing_snapshot_instance_cond(tr, NULL);
84 }
85 
86 /**
87  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
88  * @tr:		The tracing instance to snapshot
89  * @cond_data:	The data to be tested conditionally, and possibly saved
90  *
91  * This is the same as tracing_snapshot() except that the snapshot is
92  * conditional - the snapshot will only happen if the
93  * cond_snapshot.update() implementation receiving the cond_data
94  * returns true, which means that the trace array's cond_snapshot
95  * update() operation used the cond_data to determine whether the
96  * snapshot should be taken, and if it was, presumably saved it along
97  * with the snapshot.
98  */
99 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
100 {
101 	tracing_snapshot_instance_cond(tr, cond_data);
102 }
103 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
104 
105 /**
106  * tracing_cond_snapshot_data - get the user data associated with a snapshot
107  * @tr:		The tracing instance
108  *
109  * When the user enables a conditional snapshot using
110  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
111  * with the snapshot.  This accessor is used to retrieve it.
112  *
113  * Should not be called from cond_snapshot.update(), since it takes
114  * the tr->max_lock lock, which the code calling
115  * cond_snapshot.update() has already done.
116  *
117  * Returns the cond_data associated with the trace array's snapshot.
118  */
119 void *tracing_cond_snapshot_data(struct trace_array *tr)
120 {
121 	void *cond_data = NULL;
122 
123 	local_irq_disable();
124 	arch_spin_lock(&tr->max_lock);
125 
126 	if (tr->cond_snapshot)
127 		cond_data = tr->cond_snapshot->cond_data;
128 
129 	arch_spin_unlock(&tr->max_lock);
130 	local_irq_enable();
131 
132 	return cond_data;
133 }
134 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
135 
136 /* resize @tr's buffer to the size of @size_tr's entries */
137 int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
138 				 struct array_buffer *size_buf, int cpu_id)
139 {
140 	int cpu, ret = 0;
141 
142 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
143 		for_each_tracing_cpu(cpu) {
144 			ret = ring_buffer_resize(trace_buf->buffer,
145 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
146 			if (ret < 0)
147 				break;
148 			per_cpu_ptr(trace_buf->data, cpu)->entries =
149 				per_cpu_ptr(size_buf->data, cpu)->entries;
150 		}
151 	} else {
152 		ret = ring_buffer_resize(trace_buf->buffer,
153 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
154 		if (ret == 0)
155 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
156 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
157 	}
158 
159 	return ret;
160 }
161 
162 int tracing_alloc_snapshot_instance(struct trace_array *tr)
163 {
164 	int order;
165 	int ret;
166 
167 	if (!tr->allocated_snapshot) {
168 
169 		/* Make the snapshot buffer have the same order as main buffer */
170 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
171 		ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order);
172 		if (ret < 0)
173 			return ret;
174 
175 		/* allocate spare buffer */
176 		ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
177 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
178 		if (ret < 0)
179 			return ret;
180 
181 		tr->allocated_snapshot = true;
182 	}
183 
184 	return 0;
185 }
186 
187 void free_snapshot(struct trace_array *tr)
188 {
189 	/*
190 	 * We don't free the ring buffer. instead, resize it because
191 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
192 	 * we want preserve it.
193 	 */
194 	ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0);
195 	ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
196 	trace_set_buffer_entries(&tr->snapshot_buffer, 1);
197 	tracing_reset_online_cpus(&tr->snapshot_buffer);
198 	tr->allocated_snapshot = false;
199 }
200 
201 int tracing_arm_snapshot_locked(struct trace_array *tr)
202 {
203 	int ret;
204 
205 	lockdep_assert_held(&trace_types_lock);
206 
207 	spin_lock(&tr->snapshot_trigger_lock);
208 	if (tr->snapshot == UINT_MAX || tr->mapped) {
209 		spin_unlock(&tr->snapshot_trigger_lock);
210 		return -EBUSY;
211 	}
212 
213 	tr->snapshot++;
214 	spin_unlock(&tr->snapshot_trigger_lock);
215 
216 	ret = tracing_alloc_snapshot_instance(tr);
217 	if (ret) {
218 		spin_lock(&tr->snapshot_trigger_lock);
219 		tr->snapshot--;
220 		spin_unlock(&tr->snapshot_trigger_lock);
221 	}
222 
223 	return ret;
224 }
225 
226 int tracing_arm_snapshot(struct trace_array *tr)
227 {
228 	guard(mutex)(&trace_types_lock);
229 	return tracing_arm_snapshot_locked(tr);
230 }
231 
232 void tracing_disarm_snapshot(struct trace_array *tr)
233 {
234 	spin_lock(&tr->snapshot_trigger_lock);
235 	if (!WARN_ON(!tr->snapshot))
236 		tr->snapshot--;
237 	spin_unlock(&tr->snapshot_trigger_lock);
238 }
239 
240 /**
241  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
242  *
243  * This is similar to tracing_snapshot(), but it will allocate the
244  * snapshot buffer if it isn't already allocated. Use this only
245  * where it is safe to sleep, as the allocation may sleep.
246  *
247  * This causes a swap between the snapshot buffer and the current live
248  * tracing buffer. You can use this to take snapshots of the live
249  * trace when some condition is triggered, but continue to trace.
250  */
251 void tracing_snapshot_alloc(void)
252 {
253 	int ret;
254 
255 	ret = tracing_alloc_snapshot();
256 	if (ret < 0)
257 		return;
258 
259 	tracing_snapshot();
260 }
261 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
262 
263 /**
264  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
265  * @tr:		The tracing instance
266  * @cond_data:	User data to associate with the snapshot
267  * @update:	Implementation of the cond_snapshot update function
268  *
269  * Check whether the conditional snapshot for the given instance has
270  * already been enabled, or if the current tracer is already using a
271  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
272  * save the cond_data and update function inside.
273  *
274  * Returns 0 if successful, error otherwise.
275  */
276 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
277 				 cond_update_fn_t update)
278 {
279 	struct cond_snapshot *cond_snapshot __free(kfree) =
280 		kzalloc_obj(*cond_snapshot);
281 	int ret;
282 
283 	if (!cond_snapshot)
284 		return -ENOMEM;
285 
286 	cond_snapshot->cond_data = cond_data;
287 	cond_snapshot->update = update;
288 
289 	guard(mutex)(&trace_types_lock);
290 
291 	if (tracer_uses_snapshot(tr->current_trace))
292 		return -EBUSY;
293 
294 	/*
295 	 * The cond_snapshot can only change to NULL without the
296 	 * trace_types_lock. We don't care if we race with it going
297 	 * to NULL, but we want to make sure that it's not set to
298 	 * something other than NULL when we get here, which we can
299 	 * do safely with only holding the trace_types_lock and not
300 	 * having to take the max_lock.
301 	 */
302 	if (tr->cond_snapshot)
303 		return -EBUSY;
304 
305 	ret = tracing_arm_snapshot_locked(tr);
306 	if (ret)
307 		return ret;
308 
309 	local_irq_disable();
310 	arch_spin_lock(&tr->max_lock);
311 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
312 	arch_spin_unlock(&tr->max_lock);
313 	local_irq_enable();
314 
315 	return 0;
316 }
317 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
318 
319 /**
320  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
321  * @tr:		The tracing instance
322  *
323  * Check whether the conditional snapshot for the given instance is
324  * enabled; if so, free the cond_snapshot associated with it,
325  * otherwise return -EINVAL.
326  *
327  * Returns 0 if successful, error otherwise.
328  */
329 int tracing_snapshot_cond_disable(struct trace_array *tr)
330 {
331 	int ret = 0;
332 
333 	local_irq_disable();
334 	arch_spin_lock(&tr->max_lock);
335 
336 	if (!tr->cond_snapshot)
337 		ret = -EINVAL;
338 	else {
339 		kfree(tr->cond_snapshot);
340 		tr->cond_snapshot = NULL;
341 	}
342 
343 	arch_spin_unlock(&tr->max_lock);
344 	local_irq_enable();
345 
346 	tracing_disarm_snapshot(tr);
347 
348 	return ret;
349 }
350 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
351 
352 #ifdef CONFIG_TRACER_MAX_TRACE
353 #ifdef LATENCY_FS_NOTIFY
354 static struct workqueue_struct *fsnotify_wq;
355 
356 static void latency_fsnotify_workfn(struct work_struct *work)
357 {
358 	struct trace_array *tr = container_of(work, struct trace_array,
359 					      fsnotify_work);
360 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
361 }
362 
363 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
364 {
365 	struct trace_array *tr = container_of(iwork, struct trace_array,
366 					      fsnotify_irqwork);
367 	queue_work(fsnotify_wq, &tr->fsnotify_work);
368 }
369 
370 __init static int latency_fsnotify_init(void)
371 {
372 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
373 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
374 	if (!fsnotify_wq) {
375 		pr_err("Unable to allocate tr_max_lat_wq\n");
376 		return -ENOMEM;
377 	}
378 	return 0;
379 }
380 
381 late_initcall_sync(latency_fsnotify_init);
382 
383 void latency_fsnotify(struct trace_array *tr)
384 {
385 	if (!fsnotify_wq)
386 		return;
387 	/*
388 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
389 	 * possible that we are called from __schedule() or do_idle(), which
390 	 * could cause a deadlock.
391 	 */
392 	irq_work_queue(&tr->fsnotify_irqwork);
393 }
394 #endif /* LATENCY_FS_NOTIFY */
395 
396 static const struct file_operations tracing_max_lat_fops;
397 
398 void trace_create_maxlat_file(struct trace_array *tr,
399 			      struct dentry *d_tracer)
400 {
401 #ifdef LATENCY_FS_NOTIFY
402 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
403 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
404 #endif
405 	tr->d_max_latency = trace_create_file("tracing_max_latency",
406 					      TRACE_MODE_WRITE,
407 					      d_tracer, tr,
408 					      &tracing_max_lat_fops);
409 }
410 
411 /*
412  * Copy the new maximum trace into the separate maximum-trace
413  * structure. (this way the maximum trace is permanently saved,
414  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
415  */
416 static void
417 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
418 {
419 	struct array_buffer *trace_buf = &tr->array_buffer;
420 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
421 	struct array_buffer *max_buf = &tr->snapshot_buffer;
422 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
423 
424 	max_buf->cpu = cpu;
425 	max_buf->time_start = data->preempt_timestamp;
426 
427 	max_data->saved_latency = tr->max_latency;
428 	max_data->critical_start = data->critical_start;
429 	max_data->critical_end = data->critical_end;
430 
431 	strscpy(max_data->comm, tsk->comm);
432 	max_data->pid = tsk->pid;
433 	/*
434 	 * If tsk == current, then use current_uid(), as that does not use
435 	 * RCU. The irq tracer can be called out of RCU scope.
436 	 */
437 	if (tsk == current)
438 		max_data->uid = current_uid();
439 	else
440 		max_data->uid = task_uid(tsk);
441 
442 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
443 	max_data->policy = tsk->policy;
444 	max_data->rt_priority = tsk->rt_priority;
445 
446 	/* record this tasks comm */
447 	tracing_record_cmdline(tsk);
448 	latency_fsnotify(tr);
449 }
450 #else
451 static inline void __update_max_tr(struct trace_array *tr,
452 				   struct task_struct *tsk, int cpu) { }
453 #endif /* CONFIG_TRACER_MAX_TRACE */
454 
455 /**
456  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
457  * @tr: tracer
458  * @tsk: the task with the latency
459  * @cpu: The cpu that initiated the trace.
460  * @cond_data: User data associated with a conditional snapshot
461  *
462  * Flip the buffers between the @tr and the max_tr and record information
463  * about which task was the cause of this latency.
464  */
465 void
466 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
467 	      void *cond_data)
468 {
469 	if (tr->stop_count)
470 		return;
471 
472 	WARN_ON_ONCE(!irqs_disabled());
473 
474 	if (!tr->allocated_snapshot) {
475 		/* Only the nop tracer should hit this when disabling */
476 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
477 		return;
478 	}
479 
480 	arch_spin_lock(&tr->max_lock);
481 
482 	/* Inherit the recordable setting from array_buffer */
483 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
484 		ring_buffer_record_on(tr->snapshot_buffer.buffer);
485 	else
486 		ring_buffer_record_off(tr->snapshot_buffer.buffer);
487 
488 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
489 		arch_spin_unlock(&tr->max_lock);
490 		return;
491 	}
492 
493 	swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer);
494 
495 	__update_max_tr(tr, tsk, cpu);
496 
497 	arch_spin_unlock(&tr->max_lock);
498 
499 	/* Any waiters on the old snapshot buffer need to wake up */
500 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
501 }
502 
503 /**
504  * update_max_tr_single - only copy one trace over, and reset the rest
505  * @tr: tracer
506  * @tsk: task with the latency
507  * @cpu: the cpu of the buffer to copy.
508  *
509  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
510  */
511 void
512 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
513 {
514 	int ret;
515 
516 	if (tr->stop_count)
517 		return;
518 
519 	WARN_ON_ONCE(!irqs_disabled());
520 	if (!tr->allocated_snapshot) {
521 		/* Only the nop tracer should hit this when disabling */
522 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
523 		return;
524 	}
525 
526 	arch_spin_lock(&tr->max_lock);
527 
528 	ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu);
529 
530 	if (ret == -EBUSY) {
531 		/*
532 		 * We failed to swap the buffer due to a commit taking
533 		 * place on this CPU. We fail to record, but we reset
534 		 * the max trace buffer (no one writes directly to it)
535 		 * and flag that it failed.
536 		 * Another reason is resize is in progress.
537 		 */
538 		trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_,
539 			"Failed to swap buffers due to commit or resize in progress\n");
540 	}
541 
542 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
543 
544 	__update_max_tr(tr, tsk, cpu);
545 	arch_spin_unlock(&tr->max_lock);
546 }
547 
548 static void show_snapshot_main_help(struct seq_file *m)
549 {
550 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
551 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
552 		    "#                      Takes a snapshot of the main buffer.\n"
553 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
554 		    "#                      (Doesn't have to be '2' works with any number that\n"
555 		    "#                       is not a '0' or '1')\n");
556 }
557 
558 static void show_snapshot_percpu_help(struct seq_file *m)
559 {
560 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
561 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
562 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
563 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
564 #else
565 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
566 		    "#                     Must use main snapshot file to allocate.\n");
567 #endif
568 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
569 		    "#                      (Doesn't have to be '2' works with any number that\n"
570 		    "#                       is not a '0' or '1')\n");
571 }
572 
573 void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
574 {
575 	if (iter->tr->allocated_snapshot)
576 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
577 	else
578 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
579 
580 	seq_puts(m, "# Snapshot commands:\n");
581 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
582 		show_snapshot_main_help(m);
583 	else
584 		show_snapshot_percpu_help(m);
585 }
586 
587 static int tracing_snapshot_open(struct inode *inode, struct file *file)
588 {
589 	struct trace_array *tr = inode->i_private;
590 	struct trace_iterator *iter;
591 	struct seq_file *m;
592 	int ret;
593 
594 	ret = tracing_check_open_get_tr(tr);
595 	if (ret)
596 		return ret;
597 
598 	if (file->f_mode & FMODE_READ) {
599 		iter = __tracing_open(inode, file, true);
600 		if (IS_ERR(iter))
601 			ret = PTR_ERR(iter);
602 	} else {
603 		/* Writes still need the seq_file to hold the private data */
604 		ret = -ENOMEM;
605 		m = kzalloc_obj(*m);
606 		if (!m)
607 			goto out;
608 		iter = kzalloc_obj(*iter);
609 		if (!iter) {
610 			kfree(m);
611 			goto out;
612 		}
613 		ret = 0;
614 
615 		iter->tr = tr;
616 		iter->array_buffer = &tr->snapshot_buffer;
617 		iter->cpu_file = tracing_get_cpu(inode);
618 		m->private = iter;
619 		file->private_data = m;
620 	}
621 out:
622 	if (ret < 0)
623 		trace_array_put(tr);
624 
625 	return ret;
626 }
627 
628 static void tracing_swap_cpu_buffer(void *tr)
629 {
630 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
631 }
632 
633 static ssize_t
634 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
635 		       loff_t *ppos)
636 {
637 	struct seq_file *m = filp->private_data;
638 	struct trace_iterator *iter = m->private;
639 	struct trace_array *tr = iter->tr;
640 	unsigned long val;
641 	int ret;
642 
643 	ret = tracing_update_buffers(tr);
644 	if (ret < 0)
645 		return ret;
646 
647 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
648 	if (ret)
649 		return ret;
650 
651 	guard(mutex)(&trace_types_lock);
652 
653 	if (tracer_uses_snapshot(tr->current_trace))
654 		return -EBUSY;
655 
656 	local_irq_disable();
657 	arch_spin_lock(&tr->max_lock);
658 	if (tr->cond_snapshot)
659 		ret = -EBUSY;
660 	arch_spin_unlock(&tr->max_lock);
661 	local_irq_enable();
662 	if (ret)
663 		return ret;
664 
665 	switch (val) {
666 	case 0:
667 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
668 			return -EINVAL;
669 		if (tr->allocated_snapshot)
670 			free_snapshot(tr);
671 		break;
672 	case 1:
673 /* Only allow per-cpu swap if the ring buffer supports it */
674 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
675 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
676 			return -EINVAL;
677 #endif
678 		if (tr->allocated_snapshot)
679 			ret = resize_buffer_duplicate_size(&tr->snapshot_buffer,
680 					&tr->array_buffer, iter->cpu_file);
681 
682 		ret = tracing_arm_snapshot_locked(tr);
683 		if (ret)
684 			return ret;
685 
686 		/* Now, we're going to swap */
687 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
688 			local_irq_disable();
689 			update_max_tr(tr, current, smp_processor_id(), NULL);
690 			local_irq_enable();
691 		} else {
692 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
693 						 (void *)tr, 1);
694 		}
695 		tracing_disarm_snapshot(tr);
696 		break;
697 	default:
698 		if (tr->allocated_snapshot) {
699 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
700 				tracing_reset_online_cpus(&tr->snapshot_buffer);
701 			else
702 				tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file);
703 		}
704 		break;
705 	}
706 
707 	if (ret >= 0) {
708 		*ppos += cnt;
709 		ret = cnt;
710 	}
711 
712 	return ret;
713 }
714 
715 static int tracing_snapshot_release(struct inode *inode, struct file *file)
716 {
717 	struct seq_file *m = file->private_data;
718 	int ret;
719 
720 	ret = tracing_release(inode, file);
721 
722 	if (file->f_mode & FMODE_READ)
723 		return ret;
724 
725 	/* If write only, the seq_file is just a stub */
726 	if (m)
727 		kfree(m->private);
728 	kfree(m);
729 
730 	return 0;
731 }
732 
733 static int snapshot_raw_open(struct inode *inode, struct file *filp)
734 {
735 	struct ftrace_buffer_info *info;
736 	int ret;
737 
738 	/* The following checks for tracefs lockdown */
739 	ret = tracing_buffers_open(inode, filp);
740 	if (ret < 0)
741 		return ret;
742 
743 	info = filp->private_data;
744 
745 	if (tracer_uses_snapshot(info->iter.trace)) {
746 		tracing_buffers_release(inode, filp);
747 		return -EBUSY;
748 	}
749 
750 	info->iter.snapshot = true;
751 	info->iter.array_buffer = &info->iter.tr->snapshot_buffer;
752 
753 	return ret;
754 }
755 
756 const struct file_operations snapshot_fops = {
757 	.open		= tracing_snapshot_open,
758 	.read		= seq_read,
759 	.write		= tracing_snapshot_write,
760 	.llseek		= tracing_lseek,
761 	.release	= tracing_snapshot_release,
762 };
763 
764 const struct file_operations snapshot_raw_fops = {
765 	.open		= snapshot_raw_open,
766 	.read		= tracing_buffers_read,
767 	.release	= tracing_buffers_release,
768 	.splice_read	= tracing_buffers_splice_read,
769 };
770 
771 #ifdef CONFIG_TRACER_MAX_TRACE
772 static ssize_t
773 tracing_max_lat_read(struct file *filp, char __user *ubuf,
774 		     size_t cnt, loff_t *ppos)
775 {
776 	struct trace_array *tr = filp->private_data;
777 
778 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
779 }
780 
781 static ssize_t
782 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
783 		      size_t cnt, loff_t *ppos)
784 {
785 	struct trace_array *tr = filp->private_data;
786 
787 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
788 }
789 
790 static const struct file_operations tracing_max_lat_fops = {
791 	.open		= tracing_open_generic_tr,
792 	.read		= tracing_max_lat_read,
793 	.write		= tracing_max_lat_write,
794 	.llseek		= generic_file_llseek,
795 	.release	= tracing_release_generic_tr,
796 };
797 #endif /* CONFIG_TRACER_MAX_TRACE */
798 
799 int get_snapshot_map(struct trace_array *tr)
800 {
801 	int err = 0;
802 
803 	/*
804 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
805 	 * take trace_types_lock. Instead use the specific
806 	 * snapshot_trigger_lock.
807 	 */
808 	spin_lock(&tr->snapshot_trigger_lock);
809 
810 	if (tr->snapshot || tr->mapped == UINT_MAX)
811 		err = -EBUSY;
812 	else
813 		tr->mapped++;
814 
815 	spin_unlock(&tr->snapshot_trigger_lock);
816 
817 	/* Wait for update_max_tr() to observe iter->tr->mapped */
818 	if (tr->mapped == 1)
819 		synchronize_rcu();
820 
821 	return err;
822 
823 }
824 
825 void put_snapshot_map(struct trace_array *tr)
826 {
827 	spin_lock(&tr->snapshot_trigger_lock);
828 	if (!WARN_ON(!tr->mapped))
829 		tr->mapped--;
830 	spin_unlock(&tr->snapshot_trigger_lock);
831 }
832 
833 #ifdef CONFIG_DYNAMIC_FTRACE
834 static void
835 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
836 		struct trace_array *tr, struct ftrace_probe_ops *ops,
837 		void *data)
838 {
839 	tracing_snapshot_instance(tr);
840 }
841 
842 static void
843 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
844 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
845 		      void *data)
846 {
847 	struct ftrace_func_mapper *mapper = data;
848 	long *count = NULL;
849 
850 	if (mapper)
851 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
852 
853 	if (count) {
854 
855 		if (*count <= 0)
856 			return;
857 
858 		(*count)--;
859 	}
860 
861 	tracing_snapshot_instance(tr);
862 }
863 
864 static int
865 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
866 		      struct ftrace_probe_ops *ops, void *data)
867 {
868 	struct ftrace_func_mapper *mapper = data;
869 	long *count = NULL;
870 
871 	seq_printf(m, "%ps:", (void *)ip);
872 
873 	seq_puts(m, "snapshot");
874 
875 	if (mapper)
876 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
877 
878 	if (count)
879 		seq_printf(m, ":count=%ld\n", *count);
880 	else
881 		seq_puts(m, ":unlimited\n");
882 
883 	return 0;
884 }
885 
886 static int
887 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
888 		     unsigned long ip, void *init_data, void **data)
889 {
890 	struct ftrace_func_mapper *mapper = *data;
891 
892 	if (!mapper) {
893 		mapper = allocate_ftrace_func_mapper();
894 		if (!mapper)
895 			return -ENOMEM;
896 		*data = mapper;
897 	}
898 
899 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
900 }
901 
902 static void
903 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
904 		     unsigned long ip, void *data)
905 {
906 	struct ftrace_func_mapper *mapper = data;
907 
908 	if (!ip) {
909 		if (!mapper)
910 			return;
911 		free_ftrace_func_mapper(mapper, NULL);
912 		return;
913 	}
914 
915 	ftrace_func_mapper_remove_ip(mapper, ip);
916 }
917 
918 static struct ftrace_probe_ops snapshot_probe_ops = {
919 	.func			= ftrace_snapshot,
920 	.print			= ftrace_snapshot_print,
921 };
922 
923 static struct ftrace_probe_ops snapshot_count_probe_ops = {
924 	.func			= ftrace_count_snapshot,
925 	.print			= ftrace_snapshot_print,
926 	.init			= ftrace_snapshot_init,
927 	.free			= ftrace_snapshot_free,
928 };
929 
930 static int
931 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
932 			       char *glob, char *cmd, char *param, int enable)
933 {
934 	struct ftrace_probe_ops *ops;
935 	void *count = (void *)-1;
936 	char *number;
937 	int ret;
938 
939 	if (!tr)
940 		return -ENODEV;
941 
942 	/* hash funcs only work with set_ftrace_filter */
943 	if (!enable)
944 		return -EINVAL;
945 
946 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
947 
948 	if (glob[0] == '!') {
949 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
950 		if (!ret)
951 			tracing_disarm_snapshot(tr);
952 
953 		return ret;
954 	}
955 
956 	if (!param)
957 		goto out_reg;
958 
959 	number = strsep(&param, ":");
960 
961 	if (!strlen(number))
962 		goto out_reg;
963 
964 	/*
965 	 * We use the callback data field (which is a pointer)
966 	 * as our counter.
967 	 */
968 	ret = kstrtoul(number, 0, (unsigned long *)&count);
969 	if (ret)
970 		return ret;
971 
972  out_reg:
973 	ret = tracing_arm_snapshot(tr);
974 	if (ret < 0)
975 		return ret;
976 
977 	ret = register_ftrace_function_probe(glob, tr, ops, count);
978 	if (ret < 0)
979 		tracing_disarm_snapshot(tr);
980 
981 	return ret < 0 ? ret : 0;
982 }
983 
984 static struct ftrace_func_command ftrace_snapshot_cmd = {
985 	.name			= "snapshot",
986 	.func			= ftrace_trace_snapshot_callback,
987 };
988 
989 __init int register_snapshot_cmd(void)
990 {
991 	return register_ftrace_command(&ftrace_snapshot_cmd);
992 }
993 #endif /* CONFIG_DYNAMIC_FTRACE */
994 
995 int trace_allocate_snapshot(struct trace_array *tr, int size)
996 {
997 	int ret;
998 
999 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
1000 	if (tr->range_addr_start)
1001 		return 0;
1002 
1003 	/* allocate_snapshot can only be true during system boot */
1004 	ret = allocate_trace_buffer(tr, &tr->snapshot_buffer,
1005 				    allocate_snapshot ? size : 1);
1006 	if (ret < 0)
1007 		return -ENOMEM;
1008 
1009 	tr->allocated_snapshot = allocate_snapshot;
1010 
1011 	allocate_snapshot = false;
1012 	return 0;
1013 }
1014 
1015 __init static bool tr_needs_alloc_snapshot(const char *name)
1016 {
1017 	char *test;
1018 	int len = strlen(name);
1019 	bool ret;
1020 
1021 	if (!boot_snapshot_index)
1022 		return false;
1023 
1024 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
1025 	    boot_snapshot_info[len] == '\t')
1026 		return true;
1027 
1028 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
1029 	if (!test)
1030 		return false;
1031 
1032 	sprintf(test, "\t%s\t", name);
1033 	ret = strstr(boot_snapshot_info, test) == NULL;
1034 	kfree(test);
1035 	return ret;
1036 }
1037 
1038 __init void do_allocate_snapshot(const char *name)
1039 {
1040 	if (!tr_needs_alloc_snapshot(name))
1041 		return;
1042 
1043 	/*
1044 	 * When allocate_snapshot is set, the next call to
1045 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
1046 	 * will allocate the snapshot buffer. That will also clear
1047 	 * this flag.
1048 	 */
1049 	allocate_snapshot = true;
1050 }
1051 
1052 void __init ftrace_boot_snapshot(void)
1053 {
1054 	struct trace_array *tr;
1055 
1056 	if (!snapshot_at_boot)
1057 		return;
1058 
1059 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1060 		if (!tr->allocated_snapshot)
1061 			continue;
1062 
1063 		tracing_snapshot_instance(tr);
1064 		trace_array_puts(tr, "** Boot snapshot taken **\n");
1065 	}
1066 }
1067