Merge branch 'linus' into tracing/sysprof
[pandora-kernel.git] / kernel / trace / trace_sched_wakeup.c
1 /*
2  * trace task wakeup timings
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Based on code from the latency_tracer, that is:
8  *
9  *  Copyright (C) 2004-2006 Ingo Molnar
10  *  Copyright (C) 2004 William Lee Irwin III
11  */
12 #include <linux/module.h>
13 #include <linux/fs.h>
14 #include <linux/debugfs.h>
15 #include <linux/kallsyms.h>
16 #include <linux/uaccess.h>
17 #include <linux/ftrace.h>
18 #include <linux/marker.h>
19
20 #include "trace.h"
21
22 static struct trace_array       *wakeup_trace;
23 static int __read_mostly        tracer_enabled;
24
25 static struct task_struct       *wakeup_task;
26 static int                      wakeup_cpu;
27 static unsigned                 wakeup_prio = -1;
28
29 static DEFINE_SPINLOCK(wakeup_lock);
30
31 static void __wakeup_reset(struct trace_array *tr);
32
33 /*
34  * Should this new latency be reported/recorded?
35  */
36 static int report_latency(cycle_t delta)
37 {
38         if (tracing_thresh) {
39                 if (delta < tracing_thresh)
40                         return 0;
41         } else {
42                 if (delta <= tracing_max_latency)
43                         return 0;
44         }
45         return 1;
46 }
47
48 static void notrace
49 wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
50         struct task_struct *next)
51 {
52         unsigned long latency = 0, t0 = 0, t1 = 0;
53         struct trace_array **ptr = private;
54         struct trace_array *tr = *ptr;
55         struct trace_array_cpu *data;
56         cycle_t T0, T1, delta;
57         unsigned long flags;
58         long disabled;
59         int cpu;
60
61         if (unlikely(!tracer_enabled))
62                 return;
63
64         /*
65          * When we start a new trace, we set wakeup_task to NULL
66          * and then set tracer_enabled = 1. We want to make sure
67          * that another CPU does not see the tracer_enabled = 1
68          * and the wakeup_task with an older task, that might
69          * actually be the same as next.
70          */
71         smp_rmb();
72
73         if (next != wakeup_task)
74                 return;
75
76         /* The task we are waitng for is waking up */
77         data = tr->data[wakeup_cpu];
78
79         /* disable local data, not wakeup_cpu data */
80         cpu = raw_smp_processor_id();
81         disabled = atomic_inc_return(&tr->data[cpu]->disabled);
82         if (likely(disabled != 1))
83                 goto out;
84
85         spin_lock_irqsave(&wakeup_lock, flags);
86
87         /* We could race with grabbing wakeup_lock */
88         if (unlikely(!tracer_enabled || next != wakeup_task))
89                 goto out_unlock;
90
91         trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
92
93         /*
94          * usecs conversion is slow so we try to delay the conversion
95          * as long as possible:
96          */
97         T0 = data->preempt_timestamp;
98         T1 = ftrace_now(cpu);
99         delta = T1-T0;
100
101         if (!report_latency(delta))
102                 goto out_unlock;
103
104         latency = nsecs_to_usecs(delta);
105
106         tracing_max_latency = delta;
107         t0 = nsecs_to_usecs(T0);
108         t1 = nsecs_to_usecs(T1);
109
110         update_max_tr(tr, wakeup_task, wakeup_cpu);
111
112 out_unlock:
113         __wakeup_reset(tr);
114         spin_unlock_irqrestore(&wakeup_lock, flags);
115 out:
116         atomic_dec(&tr->data[cpu]->disabled);
117 }
118
119 static notrace void
120 sched_switch_callback(void *probe_data, void *call_data,
121                       const char *format, va_list *args)
122 {
123         struct task_struct *prev;
124         struct task_struct *next;
125         struct rq *__rq;
126
127         /* skip prev_pid %d next_pid %d prev_state %ld */
128         (void)va_arg(*args, int);
129         (void)va_arg(*args, int);
130         (void)va_arg(*args, long);
131         __rq = va_arg(*args, typeof(__rq));
132         prev = va_arg(*args, typeof(prev));
133         next = va_arg(*args, typeof(next));
134
135         tracing_record_cmdline(prev);
136
137         /*
138          * If tracer_switch_func only points to the local
139          * switch func, it still needs the ptr passed to it.
140          */
141         wakeup_sched_switch(probe_data, __rq, prev, next);
142 }
143
144 static void __wakeup_reset(struct trace_array *tr)
145 {
146         struct trace_array_cpu *data;
147         int cpu;
148
149         assert_spin_locked(&wakeup_lock);
150
151         for_each_possible_cpu(cpu) {
152                 data = tr->data[cpu];
153                 tracing_reset(data);
154         }
155
156         wakeup_cpu = -1;
157         wakeup_prio = -1;
158
159         if (wakeup_task)
160                 put_task_struct(wakeup_task);
161
162         wakeup_task = NULL;
163 }
164
165 static void wakeup_reset(struct trace_array *tr)
166 {
167         unsigned long flags;
168
169         spin_lock_irqsave(&wakeup_lock, flags);
170         __wakeup_reset(tr);
171         spin_unlock_irqrestore(&wakeup_lock, flags);
172 }
173
174 static void
175 wakeup_check_start(struct trace_array *tr, struct task_struct *p,
176                    struct task_struct *curr)
177 {
178         int cpu = smp_processor_id();
179         unsigned long flags;
180         long disabled;
181
182         if (likely(!rt_task(p)) ||
183                         p->prio >= wakeup_prio ||
184                         p->prio >= curr->prio)
185                 return;
186
187         disabled = atomic_inc_return(&tr->data[cpu]->disabled);
188         if (unlikely(disabled != 1))
189                 goto out;
190
191         /* interrupts should be off from try_to_wake_up */
192         spin_lock(&wakeup_lock);
193
194         /* check for races. */
195         if (!tracer_enabled || p->prio >= wakeup_prio)
196                 goto out_locked;
197
198         /* reset the trace */
199         __wakeup_reset(tr);
200
201         wakeup_cpu = task_cpu(p);
202         wakeup_prio = p->prio;
203
204         wakeup_task = p;
205         get_task_struct(wakeup_task);
206
207         local_save_flags(flags);
208
209         tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
210         trace_function(tr, tr->data[wakeup_cpu],
211                        CALLER_ADDR1, CALLER_ADDR2, flags);
212
213 out_locked:
214         spin_unlock(&wakeup_lock);
215 out:
216         atomic_dec(&tr->data[cpu]->disabled);
217 }
218
219 static notrace void
220 wake_up_callback(void *probe_data, void *call_data,
221                  const char *format, va_list *args)
222 {
223         struct trace_array **ptr = probe_data;
224         struct trace_array *tr = *ptr;
225         struct task_struct *curr;
226         struct task_struct *task;
227         struct rq *__rq;
228
229         if (likely(!tracer_enabled))
230                 return;
231
232         /* Skip pid %d state %ld */
233         (void)va_arg(*args, int);
234         (void)va_arg(*args, long);
235         /* now get the meat: "rq %p task %p rq->curr %p" */
236         __rq = va_arg(*args, typeof(__rq));
237         task = va_arg(*args, typeof(task));
238         curr = va_arg(*args, typeof(curr));
239
240         tracing_record_cmdline(task);
241         tracing_record_cmdline(curr);
242
243         wakeup_check_start(tr, task, curr);
244 }
245
246 static void start_wakeup_tracer(struct trace_array *tr)
247 {
248         int ret;
249
250         ret = marker_probe_register("kernel_sched_wakeup",
251                         "pid %d state %ld ## rq %p task %p rq->curr %p",
252                         wake_up_callback,
253                         &wakeup_trace);
254         if (ret) {
255                 pr_info("wakeup trace: Couldn't add marker"
256                         " probe to kernel_sched_wakeup\n");
257                 return;
258         }
259
260         ret = marker_probe_register("kernel_sched_wakeup_new",
261                         "pid %d state %ld ## rq %p task %p rq->curr %p",
262                         wake_up_callback,
263                         &wakeup_trace);
264         if (ret) {
265                 pr_info("wakeup trace: Couldn't add marker"
266                         " probe to kernel_sched_wakeup_new\n");
267                 goto fail_deprobe;
268         }
269
270         ret = marker_probe_register("kernel_sched_schedule",
271                 "prev_pid %d next_pid %d prev_state %ld "
272                 "## rq %p prev %p next %p",
273                 sched_switch_callback,
274                 &wakeup_trace);
275         if (ret) {
276                 pr_info("sched trace: Couldn't add marker"
277                         " probe to kernel_sched_schedule\n");
278                 goto fail_deprobe_wake_new;
279         }
280
281         wakeup_reset(tr);
282
283         /*
284          * Don't let the tracer_enabled = 1 show up before
285          * the wakeup_task is reset. This may be overkill since
286          * wakeup_reset does a spin_unlock after setting the
287          * wakeup_task to NULL, but I want to be safe.
288          * This is a slow path anyway.
289          */
290         smp_wmb();
291
292         tracer_enabled = 1;
293
294         return;
295 fail_deprobe_wake_new:
296         marker_probe_unregister("kernel_sched_wakeup_new",
297                                 wake_up_callback,
298                                 &wakeup_trace);
299 fail_deprobe:
300         marker_probe_unregister("kernel_sched_wakeup",
301                                 wake_up_callback,
302                                 &wakeup_trace);
303 }
304
305 static void stop_wakeup_tracer(struct trace_array *tr)
306 {
307         tracer_enabled = 0;
308         marker_probe_unregister("kernel_sched_schedule",
309                                 sched_switch_callback,
310                                 &wakeup_trace);
311         marker_probe_unregister("kernel_sched_wakeup_new",
312                                 wake_up_callback,
313                                 &wakeup_trace);
314         marker_probe_unregister("kernel_sched_wakeup",
315                                 wake_up_callback,
316                                 &wakeup_trace);
317 }
318
319 static void wakeup_tracer_init(struct trace_array *tr)
320 {
321         wakeup_trace = tr;
322
323         if (tr->ctrl)
324                 start_wakeup_tracer(tr);
325 }
326
327 static void wakeup_tracer_reset(struct trace_array *tr)
328 {
329         if (tr->ctrl) {
330                 stop_wakeup_tracer(tr);
331                 /* make sure we put back any tasks we are tracing */
332                 wakeup_reset(tr);
333         }
334 }
335
336 static void wakeup_tracer_ctrl_update(struct trace_array *tr)
337 {
338         if (tr->ctrl)
339                 start_wakeup_tracer(tr);
340         else
341                 stop_wakeup_tracer(tr);
342 }
343
344 static void wakeup_tracer_open(struct trace_iterator *iter)
345 {
346         /* stop the trace while dumping */
347         if (iter->tr->ctrl)
348                 stop_wakeup_tracer(iter->tr);
349 }
350
351 static void wakeup_tracer_close(struct trace_iterator *iter)
352 {
353         /* forget about any processes we were recording */
354         if (iter->tr->ctrl)
355                 start_wakeup_tracer(iter->tr);
356 }
357
358 static struct tracer wakeup_tracer __read_mostly =
359 {
360         .name           = "wakeup",
361         .init           = wakeup_tracer_init,
362         .reset          = wakeup_tracer_reset,
363         .open           = wakeup_tracer_open,
364         .close          = wakeup_tracer_close,
365         .ctrl_update    = wakeup_tracer_ctrl_update,
366         .print_max      = 1,
367 #ifdef CONFIG_FTRACE_SELFTEST
368         .selftest    = trace_selftest_startup_wakeup,
369 #endif
370 };
371
372 __init static int init_wakeup_tracer(void)
373 {
374         int ret;
375
376         ret = register_tracer(&wakeup_tracer);
377         if (ret)
378                 return ret;
379
380         return 0;
381 }
382 device_initcall(init_wakeup_tracer);