1 /* 2 * trace task wakeup timings 3 * 4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> 5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> 6 * 7 * Based on code from the latency_tracer, that is: 8 * 9 * Copyright (C) 2004-2006 Ingo Molnar 10 * Copyright (C) 2004 William Lee Irwin III 11 */ 12 #include <linux/module.h> 13 #include <linux/fs.h> 14 #include <linux/debugfs.h> 15 #include <linux/kallsyms.h> 16 #include <linux/uaccess.h> 17 #include <linux/ftrace.h> 18 #include <linux/marker.h> 19 20 #include "trace.h" 21 22 static struct trace_array *wakeup_trace; 23 static int __read_mostly tracer_enabled; 24 25 static struct task_struct *wakeup_task; 26 static int wakeup_cpu; 27 static unsigned wakeup_prio = -1; 28 29 static DEFINE_SPINLOCK(wakeup_lock); 30 31 static void __wakeup_reset(struct trace_array *tr); 32 33 #ifdef CONFIG_FTRACE 34 /* 35 * irqsoff uses its own tracer function to keep the overhead down: 36 */ 37 static void 38 wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) 39 { 40 struct trace_array *tr = wakeup_trace; 41 struct trace_array_cpu *data; 42 unsigned long flags; 43 long disabled; 44 int resched; 45 int cpu; 46 47 if (likely(!wakeup_task)) 48 return; 49 50 resched = need_resched(); 51 preempt_disable_notrace(); 52 53 cpu = raw_smp_processor_id(); 54 data = tr->data[cpu]; 55 disabled = atomic_inc_return(&data->disabled); 56 if (unlikely(disabled != 1)) 57 goto out; 58 59 spin_lock_irqsave(&wakeup_lock, flags); 60 61 if (unlikely(!wakeup_task)) 62 goto unlock; 63 64 /* 65 * The task can't disappear because it needs to 66 * wake up first, and we have the wakeup_lock. 67 */ 68 if (task_cpu(wakeup_task) != cpu) 69 goto unlock; 70 71 trace_function(tr, data, ip, parent_ip, flags); 72 73 unlock: 74 spin_unlock_irqrestore(&wakeup_lock, flags); 75 76 out: 77 atomic_dec(&data->disabled); 78 79 /* 80 * To prevent recursion from the scheduler, if the 81 * resched flag was set before we entered, then 82 * don't reschedule. 83 */ 84 if (resched) 85 preempt_enable_no_resched_notrace(); 86 else 87 preempt_enable_notrace(); 88 } 89 90 static struct ftrace_ops trace_ops __read_mostly = 91 { 92 .func = wakeup_tracer_call, 93 }; 94 #endif /* CONFIG_FTRACE */ 95 96 /* 97 * Should this new latency be reported/recorded? 98 */ 99 static int report_latency(cycle_t delta) 100 { 101 if (tracing_thresh) { 102 if (delta < tracing_thresh) 103 return 0; 104 } else { 105 if (delta <= tracing_max_latency) 106 return 0; 107 } 108 return 1; 109 } 110 111 static void notrace 112 wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, 113 struct task_struct *next) 114 { 115 unsigned long latency = 0, t0 = 0, t1 = 0; 116 struct trace_array **ptr = private; 117 struct trace_array *tr = *ptr; 118 struct trace_array_cpu *data; 119 cycle_t T0, T1, delta; 120 unsigned long flags; 121 long disabled; 122 int cpu; 123 124 if (unlikely(!tracer_enabled)) 125 return; 126 127 /* 128 * When we start a new trace, we set wakeup_task to NULL 129 * and then set tracer_enabled = 1. We want to make sure 130 * that another CPU does not see the tracer_enabled = 1 131 * and the wakeup_task with an older task, that might 132 * actually be the same as next. 133 */ 134 smp_rmb(); 135 136 if (next != wakeup_task) 137 return; 138 139 /* The task we are waiting for is waking up */ 140 data = tr->data[wakeup_cpu]; 141 142 /* disable local data, not wakeup_cpu data */ 143 cpu = raw_smp_processor_id(); 144 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 145 if (likely(disabled != 1)) 146 goto out; 147 148 spin_lock_irqsave(&wakeup_lock, flags); 149 150 /* We could race with grabbing wakeup_lock */ 151 if (unlikely(!tracer_enabled || next != wakeup_task)) 152 goto out_unlock; 153 154 trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); 155 156 /* 157 * usecs conversion is slow so we try to delay the conversion 158 * as long as possible: 159 */ 160 T0 = data->preempt_timestamp; 161 T1 = ftrace_now(cpu); 162 delta = T1-T0; 163 164 if (!report_latency(delta)) 165 goto out_unlock; 166 167 latency = nsecs_to_usecs(delta); 168 169 tracing_max_latency = delta; 170 t0 = nsecs_to_usecs(T0); 171 t1 = nsecs_to_usecs(T1); 172 173 update_max_tr(tr, wakeup_task, wakeup_cpu); 174 175 out_unlock: 176 __wakeup_reset(tr); 177 spin_unlock_irqrestore(&wakeup_lock, flags); 178 out: 179 atomic_dec(&tr->data[cpu]->disabled); 180 } 181 182 static notrace void 183 sched_switch_callback(void *probe_data, void *call_data, 184 const char *format, va_list *args) 185 { 186 struct task_struct *prev; 187 struct task_struct *next; 188 struct rq *__rq; 189 190 /* skip prev_pid %d next_pid %d prev_state %ld */ 191 (void)va_arg(*args, int); 192 (void)va_arg(*args, int); 193 (void)va_arg(*args, long); 194 __rq = va_arg(*args, typeof(__rq)); 195 prev = va_arg(*args, typeof(prev)); 196 next = va_arg(*args, typeof(next)); 197 198 tracing_record_cmdline(prev); 199 200 /* 201 * If tracer_switch_func only points to the local 202 * switch func, it still needs the ptr passed to it. 203 */ 204 wakeup_sched_switch(probe_data, __rq, prev, next); 205 } 206 207 static void __wakeup_reset(struct trace_array *tr) 208 { 209 struct trace_array_cpu *data; 210 int cpu; 211 212 assert_spin_locked(&wakeup_lock); 213 214 for_each_possible_cpu(cpu) { 215 data = tr->data[cpu]; 216 tracing_reset(data); 217 } 218 219 wakeup_cpu = -1; 220 wakeup_prio = -1; 221 222 if (wakeup_task) 223 put_task_struct(wakeup_task); 224 225 wakeup_task = NULL; 226 } 227 228 static void wakeup_reset(struct trace_array *tr) 229 { 230 unsigned long flags; 231 232 spin_lock_irqsave(&wakeup_lock, flags); 233 __wakeup_reset(tr); 234 spin_unlock_irqrestore(&wakeup_lock, flags); 235 } 236 237 static void 238 wakeup_check_start(struct trace_array *tr, struct task_struct *p, 239 struct task_struct *curr) 240 { 241 int cpu = smp_processor_id(); 242 unsigned long flags; 243 long disabled; 244 245 if (likely(!rt_task(p)) || 246 p->prio >= wakeup_prio || 247 p->prio >= curr->prio) 248 return; 249 250 disabled = atomic_inc_return(&tr->data[cpu]->disabled); 251 if (unlikely(disabled != 1)) 252 goto out; 253 254 /* interrupts should be off from try_to_wake_up */ 255 spin_lock(&wakeup_lock); 256 257 /* check for races. */ 258 if (!tracer_enabled || p->prio >= wakeup_prio) 259 goto out_locked; 260 261 /* reset the trace */ 262 __wakeup_reset(tr); 263 264 wakeup_cpu = task_cpu(p); 265 wakeup_prio = p->prio; 266 267 wakeup_task = p; 268 get_task_struct(wakeup_task); 269 270 local_save_flags(flags); 271 272 tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); 273 trace_function(tr, tr->data[wakeup_cpu], 274 CALLER_ADDR1, CALLER_ADDR2, flags); 275 276 out_locked: 277 spin_unlock(&wakeup_lock); 278 out: 279 atomic_dec(&tr->data[cpu]->disabled); 280 } 281 282 static notrace void 283 wake_up_callback(void *probe_data, void *call_data, 284 const char *format, va_list *args) 285 { 286 struct trace_array **ptr = probe_data; 287 struct trace_array *tr = *ptr; 288 struct task_struct *curr; 289 struct task_struct *task; 290 struct rq *__rq; 291 292 if (likely(!tracer_enabled)) 293 return; 294 295 /* Skip pid %d state %ld */ 296 (void)va_arg(*args, int); 297 (void)va_arg(*args, long); 298 /* now get the meat: "rq %p task %p rq->curr %p" */ 299 __rq = va_arg(*args, typeof(__rq)); 300 task = va_arg(*args, typeof(task)); 301 curr = va_arg(*args, typeof(curr)); 302 303 tracing_record_cmdline(task); 304 tracing_record_cmdline(curr); 305 306 wakeup_check_start(tr, task, curr); 307 } 308 309 static void start_wakeup_tracer(struct trace_array *tr) 310 { 311 int ret; 312 313 ret = marker_probe_register("kernel_sched_wakeup", 314 "pid %d state %ld ## rq %p task %p rq->curr %p", 315 wake_up_callback, 316 &wakeup_trace); 317 if (ret) { 318 pr_info("wakeup trace: Couldn't add marker" 319 " probe to kernel_sched_wakeup\n"); 320 return; 321 } 322 323 ret = marker_probe_register("kernel_sched_wakeup_new", 324 "pid %d state %ld ## rq %p task %p rq->curr %p", 325 wake_up_callback, 326 &wakeup_trace); 327 if (ret) { 328 pr_info("wakeup trace: Couldn't add marker" 329 " probe to kernel_sched_wakeup_new\n"); 330 goto fail_deprobe; 331 } 332 333 ret = marker_probe_register("kernel_sched_schedule", 334 "prev_pid %d next_pid %d prev_state %ld " 335 "## rq %p prev %p next %p", 336 sched_switch_callback, 337 &wakeup_trace); 338 if (ret) { 339 pr_info("sched trace: Couldn't add marker" 340 " probe to kernel_sched_schedule\n"); 341 goto fail_deprobe_wake_new; 342 } 343 344 wakeup_reset(tr); 345 346 /* 347 * Don't let the tracer_enabled = 1 show up before 348 * the wakeup_task is reset. This may be overkill since 349 * wakeup_reset does a spin_unlock after setting the 350 * wakeup_task to NULL, but I want to be safe. 351 * This is a slow path anyway. 352 */ 353 smp_wmb(); 354 355 register_ftrace_function(&trace_ops); 356 357 tracer_enabled = 1; 358 359 return; 360 fail_deprobe_wake_new: 361 marker_probe_unregister("kernel_sched_wakeup_new", 362 wake_up_callback, 363 &wakeup_trace); 364 fail_deprobe: 365 marker_probe_unregister("kernel_sched_wakeup", 366 wake_up_callback, 367 &wakeup_trace); 368 } 369 370 static void stop_wakeup_tracer(struct trace_array *tr) 371 { 372 tracer_enabled = 0; 373 unregister_ftrace_function(&trace_ops); 374 marker_probe_unregister("kernel_sched_schedule", 375 sched_switch_callback, 376 &wakeup_trace); 377 marker_probe_unregister("kernel_sched_wakeup_new", 378 wake_up_callback, 379 &wakeup_trace); 380 marker_probe_unregister("kernel_sched_wakeup", 381 wake_up_callback, 382 &wakeup_trace); 383 } 384 385 static void wakeup_tracer_init(struct trace_array *tr) 386 { 387 wakeup_trace = tr; 388 389 if (tr->ctrl) 390 start_wakeup_tracer(tr); 391 } 392 393 static void wakeup_tracer_reset(struct trace_array *tr) 394 { 395 if (tr->ctrl) { 396 stop_wakeup_tracer(tr); 397 /* make sure we put back any tasks we are tracing */ 398 wakeup_reset(tr); 399 } 400 } 401 402 static void wakeup_tracer_ctrl_update(struct trace_array *tr) 403 { 404 if (tr->ctrl) 405 start_wakeup_tracer(tr); 406 else 407 stop_wakeup_tracer(tr); 408 } 409 410 static void wakeup_tracer_open(struct trace_iterator *iter) 411 { 412 /* stop the trace while dumping */ 413 if (iter->tr->ctrl) 414 stop_wakeup_tracer(iter->tr); 415 } 416 417 static void wakeup_tracer_close(struct trace_iterator *iter) 418 { 419 /* forget about any processes we were recording */ 420 if (iter->tr->ctrl) 421 start_wakeup_tracer(iter->tr); 422 } 423 424 static struct tracer wakeup_tracer __read_mostly = 425 { 426 .name = "wakeup", 427 .init = wakeup_tracer_init, 428 .reset = wakeup_tracer_reset, 429 .open = wakeup_tracer_open, 430 .close = wakeup_tracer_close, 431 .ctrl_update = wakeup_tracer_ctrl_update, 432 .print_max = 1, 433 #ifdef CONFIG_FTRACE_SELFTEST 434 .selftest = trace_selftest_startup_wakeup, 435 #endif 436 }; 437 438 __init static int init_wakeup_tracer(void) 439 { 440 int ret; 441 442 ret = register_tracer(&wakeup_tracer); 443 if (ret) 444 return ret; 445 446 return 0; 447 } 448 device_initcall(init_wakeup_tracer); 449