xref: /linux/kernel/trace/trace_sched_wakeup.c (revision 367b8112fe2ea5c39a7bb4d263dcdd9b612fae18)
1 /*
2  * trace task wakeup timings
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Based on code from the latency_tracer, that is:
8  *
9  *  Copyright (C) 2004-2006 Ingo Molnar
10  *  Copyright (C) 2004 William Lee Irwin III
11  */
12 #include <linux/module.h>
13 #include <linux/fs.h>
14 #include <linux/debugfs.h>
15 #include <linux/kallsyms.h>
16 #include <linux/uaccess.h>
17 #include <linux/ftrace.h>
18 #include <trace/sched.h>
19 
20 #include "trace.h"
21 
22 static struct trace_array	*wakeup_trace;
23 static int __read_mostly	tracer_enabled;
24 
25 static struct task_struct	*wakeup_task;
26 static int			wakeup_cpu;
27 static unsigned			wakeup_prio = -1;
28 
29 static raw_spinlock_t wakeup_lock =
30 	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
31 
32 static void __wakeup_reset(struct trace_array *tr);
33 
34 #ifdef CONFIG_FUNCTION_TRACER
35 /*
36  * irqsoff uses its own tracer function to keep the overhead down:
37  */
38 static void
39 wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
40 {
41 	struct trace_array *tr = wakeup_trace;
42 	struct trace_array_cpu *data;
43 	unsigned long flags;
44 	long disabled;
45 	int resched;
46 	int cpu;
47 	int pc;
48 
49 	if (likely(!wakeup_task))
50 		return;
51 
52 	pc = preempt_count();
53 	resched = need_resched();
54 	preempt_disable_notrace();
55 
56 	cpu = raw_smp_processor_id();
57 	data = tr->data[cpu];
58 	disabled = atomic_inc_return(&data->disabled);
59 	if (unlikely(disabled != 1))
60 		goto out;
61 
62 	local_irq_save(flags);
63 	__raw_spin_lock(&wakeup_lock);
64 
65 	if (unlikely(!wakeup_task))
66 		goto unlock;
67 
68 	/*
69 	 * The task can't disappear because it needs to
70 	 * wake up first, and we have the wakeup_lock.
71 	 */
72 	if (task_cpu(wakeup_task) != cpu)
73 		goto unlock;
74 
75 	trace_function(tr, data, ip, parent_ip, flags, pc);
76 
77  unlock:
78 	__raw_spin_unlock(&wakeup_lock);
79 	local_irq_restore(flags);
80 
81  out:
82 	atomic_dec(&data->disabled);
83 
84 	/*
85 	 * To prevent recursion from the scheduler, if the
86 	 * resched flag was set before we entered, then
87 	 * don't reschedule.
88 	 */
89 	if (resched)
90 		preempt_enable_no_resched_notrace();
91 	else
92 		preempt_enable_notrace();
93 }
94 
95 static struct ftrace_ops trace_ops __read_mostly =
96 {
97 	.func = wakeup_tracer_call,
98 };
99 #endif /* CONFIG_FUNCTION_TRACER */
100 
101 /*
102  * Should this new latency be reported/recorded?
103  */
104 static int report_latency(cycle_t delta)
105 {
106 	if (tracing_thresh) {
107 		if (delta < tracing_thresh)
108 			return 0;
109 	} else {
110 		if (delta <= tracing_max_latency)
111 			return 0;
112 	}
113 	return 1;
114 }
115 
116 static void notrace
117 probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
118 	struct task_struct *next)
119 {
120 	unsigned long latency = 0, t0 = 0, t1 = 0;
121 	struct trace_array_cpu *data;
122 	cycle_t T0, T1, delta;
123 	unsigned long flags;
124 	long disabled;
125 	int cpu;
126 	int pc;
127 
128 	tracing_record_cmdline(prev);
129 
130 	if (unlikely(!tracer_enabled))
131 		return;
132 
133 	/*
134 	 * When we start a new trace, we set wakeup_task to NULL
135 	 * and then set tracer_enabled = 1. We want to make sure
136 	 * that another CPU does not see the tracer_enabled = 1
137 	 * and the wakeup_task with an older task, that might
138 	 * actually be the same as next.
139 	 */
140 	smp_rmb();
141 
142 	if (next != wakeup_task)
143 		return;
144 
145 	pc = preempt_count();
146 
147 	/* The task we are waiting for is waking up */
148 	data = wakeup_trace->data[wakeup_cpu];
149 
150 	/* disable local data, not wakeup_cpu data */
151 	cpu = raw_smp_processor_id();
152 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
153 	if (likely(disabled != 1))
154 		goto out;
155 
156 	local_irq_save(flags);
157 	__raw_spin_lock(&wakeup_lock);
158 
159 	/* We could race with grabbing wakeup_lock */
160 	if (unlikely(!tracer_enabled || next != wakeup_task))
161 		goto out_unlock;
162 
163 	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
164 
165 	/*
166 	 * usecs conversion is slow so we try to delay the conversion
167 	 * as long as possible:
168 	 */
169 	T0 = data->preempt_timestamp;
170 	T1 = ftrace_now(cpu);
171 	delta = T1-T0;
172 
173 	if (!report_latency(delta))
174 		goto out_unlock;
175 
176 	latency = nsecs_to_usecs(delta);
177 
178 	tracing_max_latency = delta;
179 	t0 = nsecs_to_usecs(T0);
180 	t1 = nsecs_to_usecs(T1);
181 
182 	update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
183 
184 out_unlock:
185 	__wakeup_reset(wakeup_trace);
186 	__raw_spin_unlock(&wakeup_lock);
187 	local_irq_restore(flags);
188 out:
189 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
190 }
191 
192 static void __wakeup_reset(struct trace_array *tr)
193 {
194 	struct trace_array_cpu *data;
195 	int cpu;
196 
197 	for_each_possible_cpu(cpu) {
198 		data = tr->data[cpu];
199 		tracing_reset(tr, cpu);
200 	}
201 
202 	wakeup_cpu = -1;
203 	wakeup_prio = -1;
204 
205 	if (wakeup_task)
206 		put_task_struct(wakeup_task);
207 
208 	wakeup_task = NULL;
209 }
210 
211 static void wakeup_reset(struct trace_array *tr)
212 {
213 	unsigned long flags;
214 
215 	local_irq_save(flags);
216 	__raw_spin_lock(&wakeup_lock);
217 	__wakeup_reset(tr);
218 	__raw_spin_unlock(&wakeup_lock);
219 	local_irq_restore(flags);
220 }
221 
222 static void
223 probe_wakeup(struct rq *rq, struct task_struct *p)
224 {
225 	int cpu = smp_processor_id();
226 	unsigned long flags;
227 	long disabled;
228 	int pc;
229 
230 	if (likely(!tracer_enabled))
231 		return;
232 
233 	tracing_record_cmdline(p);
234 	tracing_record_cmdline(current);
235 
236 	if (likely(!rt_task(p)) ||
237 			p->prio >= wakeup_prio ||
238 			p->prio >= current->prio)
239 		return;
240 
241 	pc = preempt_count();
242 	disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
243 	if (unlikely(disabled != 1))
244 		goto out;
245 
246 	/* interrupts should be off from try_to_wake_up */
247 	__raw_spin_lock(&wakeup_lock);
248 
249 	/* check for races. */
250 	if (!tracer_enabled || p->prio >= wakeup_prio)
251 		goto out_locked;
252 
253 	/* reset the trace */
254 	__wakeup_reset(wakeup_trace);
255 
256 	wakeup_cpu = task_cpu(p);
257 	wakeup_prio = p->prio;
258 
259 	wakeup_task = p;
260 	get_task_struct(wakeup_task);
261 
262 	local_save_flags(flags);
263 
264 	wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
265 	trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
266 		       CALLER_ADDR1, CALLER_ADDR2, flags, pc);
267 
268 out_locked:
269 	__raw_spin_unlock(&wakeup_lock);
270 out:
271 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
272 }
273 
274 static void start_wakeup_tracer(struct trace_array *tr)
275 {
276 	int ret;
277 
278 	ret = register_trace_sched_wakeup(probe_wakeup);
279 	if (ret) {
280 		pr_info("wakeup trace: Couldn't activate tracepoint"
281 			" probe to kernel_sched_wakeup\n");
282 		return;
283 	}
284 
285 	ret = register_trace_sched_wakeup_new(probe_wakeup);
286 	if (ret) {
287 		pr_info("wakeup trace: Couldn't activate tracepoint"
288 			" probe to kernel_sched_wakeup_new\n");
289 		goto fail_deprobe;
290 	}
291 
292 	ret = register_trace_sched_switch(probe_wakeup_sched_switch);
293 	if (ret) {
294 		pr_info("sched trace: Couldn't activate tracepoint"
295 			" probe to kernel_sched_schedule\n");
296 		goto fail_deprobe_wake_new;
297 	}
298 
299 	wakeup_reset(tr);
300 
301 	/*
302 	 * Don't let the tracer_enabled = 1 show up before
303 	 * the wakeup_task is reset. This may be overkill since
304 	 * wakeup_reset does a spin_unlock after setting the
305 	 * wakeup_task to NULL, but I want to be safe.
306 	 * This is a slow path anyway.
307 	 */
308 	smp_wmb();
309 
310 	register_ftrace_function(&trace_ops);
311 
312 	tracer_enabled = 1;
313 
314 	return;
315 fail_deprobe_wake_new:
316 	unregister_trace_sched_wakeup_new(probe_wakeup);
317 fail_deprobe:
318 	unregister_trace_sched_wakeup(probe_wakeup);
319 }
320 
321 static void stop_wakeup_tracer(struct trace_array *tr)
322 {
323 	tracer_enabled = 0;
324 	unregister_ftrace_function(&trace_ops);
325 	unregister_trace_sched_switch(probe_wakeup_sched_switch);
326 	unregister_trace_sched_wakeup_new(probe_wakeup);
327 	unregister_trace_sched_wakeup(probe_wakeup);
328 }
329 
330 static void wakeup_tracer_init(struct trace_array *tr)
331 {
332 	wakeup_trace = tr;
333 
334 	if (tr->ctrl)
335 		start_wakeup_tracer(tr);
336 }
337 
338 static void wakeup_tracer_reset(struct trace_array *tr)
339 {
340 	if (tr->ctrl) {
341 		stop_wakeup_tracer(tr);
342 		/* make sure we put back any tasks we are tracing */
343 		wakeup_reset(tr);
344 	}
345 }
346 
347 static void wakeup_tracer_ctrl_update(struct trace_array *tr)
348 {
349 	if (tr->ctrl)
350 		start_wakeup_tracer(tr);
351 	else
352 		stop_wakeup_tracer(tr);
353 }
354 
355 static void wakeup_tracer_open(struct trace_iterator *iter)
356 {
357 	/* stop the trace while dumping */
358 	if (iter->tr->ctrl)
359 		stop_wakeup_tracer(iter->tr);
360 }
361 
362 static void wakeup_tracer_close(struct trace_iterator *iter)
363 {
364 	/* forget about any processes we were recording */
365 	if (iter->tr->ctrl)
366 		start_wakeup_tracer(iter->tr);
367 }
368 
369 static struct tracer wakeup_tracer __read_mostly =
370 {
371 	.name		= "wakeup",
372 	.init		= wakeup_tracer_init,
373 	.reset		= wakeup_tracer_reset,
374 	.open		= wakeup_tracer_open,
375 	.close		= wakeup_tracer_close,
376 	.ctrl_update	= wakeup_tracer_ctrl_update,
377 	.print_max	= 1,
378 #ifdef CONFIG_FTRACE_SELFTEST
379 	.selftest    = trace_selftest_startup_wakeup,
380 #endif
381 };
382 
383 __init static int init_wakeup_tracer(void)
384 {
385 	int ret;
386 
387 	ret = register_tracer(&wakeup_tracer);
388 	if (ret)
389 		return ret;
390 
391 	return 0;
392 }
393 device_initcall(init_wakeup_tracer);
394