Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6
[pandora-kernel.git] / drivers / oprofile / cpu_buffer.c
1 /**
2  * @file cpu_buffer.c
3  *
4  * @remark Copyright 2002 OProfile authors
5  * @remark Read the file COPYING
6  *
7  * @author John Levon <levon@movementarian.org>
8  *
9  * Each CPU has a local buffer that stores PC value/event
10  * pairs. We also log context switches when we notice them.
11  * Eventually each CPU's buffer is processed into the global
12  * event buffer by sync_buffer().
13  *
14  * We use a local buffer for two reasons: an NMI or similar
15  * interrupt cannot synchronise, and high sampling rates
16  * would lead to catastrophic global synchronisation if
17  * a global buffer was used.
18  */
19
20 #include <linux/sched.h>
21 #include <linux/oprofile.h>
22 #include <linux/vmalloc.h>
23 #include <linux/errno.h>
24  
25 #include "event_buffer.h"
26 #include "cpu_buffer.h"
27 #include "buffer_sync.h"
28 #include "oprof.h"
29
30 DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
31
32 static void wq_sync_buffer(struct work_struct *work);
33
34 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
35 static int work_enabled;
36
37 void free_cpu_buffers(void)
38 {
39         int i;
40  
41         for_each_online_cpu(i) {
42                 vfree(per_cpu(cpu_buffer, i).buffer);
43                 per_cpu(cpu_buffer, i).buffer = NULL;
44         }
45 }
46
47 int alloc_cpu_buffers(void)
48 {
49         int i;
50  
51         unsigned long buffer_size = fs_cpu_buffer_size;
52  
53         for_each_online_cpu(i) {
54                 struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
55  
56                 b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
57                         cpu_to_node(i));
58                 if (!b->buffer)
59                         goto fail;
60  
61                 b->last_task = NULL;
62                 b->last_is_kernel = -1;
63                 b->tracing = 0;
64                 b->buffer_size = buffer_size;
65                 b->tail_pos = 0;
66                 b->head_pos = 0;
67                 b->sample_received = 0;
68                 b->sample_lost_overflow = 0;
69                 b->backtrace_aborted = 0;
70                 b->sample_invalid_eip = 0;
71                 b->cpu = i;
72                 INIT_DELAYED_WORK(&b->work, wq_sync_buffer);
73         }
74         return 0;
75
76 fail:
77         free_cpu_buffers();
78         return -ENOMEM;
79 }
80
81 void start_cpu_work(void)
82 {
83         int i;
84
85         work_enabled = 1;
86
87         for_each_online_cpu(i) {
88                 struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
89
90                 /*
91                  * Spread the work by 1 jiffy per cpu so they dont all
92                  * fire at once.
93                  */
94                 schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
95         }
96 }
97
98 void end_cpu_work(void)
99 {
100         int i;
101
102         work_enabled = 0;
103
104         for_each_online_cpu(i) {
105                 struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
106
107                 cancel_delayed_work(&b->work);
108         }
109
110         flush_scheduled_work();
111 }
112
113 /* Resets the cpu buffer to a sane state. */
114 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf)
115 {
116         /* reset these to invalid values; the next sample
117          * collected will populate the buffer with proper
118          * values to initialize the buffer
119          */
120         cpu_buf->last_is_kernel = -1;
121         cpu_buf->last_task = NULL;
122 }
123
124 /* compute number of available slots in cpu_buffer queue */
125 static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b)
126 {
127         unsigned long head = b->head_pos;
128         unsigned long tail = b->tail_pos;
129
130         if (tail > head)
131                 return (tail - head) - 1;
132
133         return tail + (b->buffer_size - head) - 1;
134 }
135
136 static void increment_head(struct oprofile_cpu_buffer * b)
137 {
138         unsigned long new_head = b->head_pos + 1;
139
140         /* Ensure anything written to the slot before we
141          * increment is visible */
142         wmb();
143
144         if (new_head < b->buffer_size)
145                 b->head_pos = new_head;
146         else
147                 b->head_pos = 0;
148 }
149
150 static inline void
151 add_sample(struct oprofile_cpu_buffer * cpu_buf,
152            unsigned long pc, unsigned long event)
153 {
154         struct op_sample * entry = &cpu_buf->buffer[cpu_buf->head_pos];
155         entry->eip = pc;
156         entry->event = event;
157         increment_head(cpu_buf);
158 }
159
160 static inline void
161 add_code(struct oprofile_cpu_buffer * buffer, unsigned long value)
162 {
163         add_sample(buffer, ESCAPE_CODE, value);
164 }
165
166 /* This must be safe from any context. It's safe writing here
167  * because of the head/tail separation of the writer and reader
168  * of the CPU buffer.
169  *
170  * is_kernel is needed because on some architectures you cannot
171  * tell if you are in kernel or user space simply by looking at
172  * pc. We tag this in the buffer by generating kernel enter/exit
173  * events whenever is_kernel changes
174  */
175 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
176                       int is_kernel, unsigned long event)
177 {
178         struct task_struct * task;
179
180         cpu_buf->sample_received++;
181
182         if (pc == ESCAPE_CODE) {
183                 cpu_buf->sample_invalid_eip++;
184                 return 0;
185         }
186
187         if (nr_available_slots(cpu_buf) < 3) {
188                 cpu_buf->sample_lost_overflow++;
189                 return 0;
190         }
191
192         is_kernel = !!is_kernel;
193
194         task = current;
195
196         /* notice a switch from user->kernel or vice versa */
197         if (cpu_buf->last_is_kernel != is_kernel) {
198                 cpu_buf->last_is_kernel = is_kernel;
199                 add_code(cpu_buf, is_kernel);
200         }
201
202         /* notice a task switch */
203         if (cpu_buf->last_task != task) {
204                 cpu_buf->last_task = task;
205                 add_code(cpu_buf, (unsigned long)task);
206         }
207  
208         add_sample(cpu_buf, pc, event);
209         return 1;
210 }
211
212 static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
213 {
214         if (nr_available_slots(cpu_buf) < 4) {
215                 cpu_buf->sample_lost_overflow++;
216                 return 0;
217         }
218
219         add_code(cpu_buf, CPU_TRACE_BEGIN);
220         cpu_buf->tracing = 1;
221         return 1;
222 }
223
224 static void oprofile_end_trace(struct oprofile_cpu_buffer * cpu_buf)
225 {
226         cpu_buf->tracing = 0;
227 }
228
229 void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
230                                 unsigned long event, int is_kernel)
231 {
232         struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
233
234         if (!backtrace_depth) {
235                 log_sample(cpu_buf, pc, is_kernel, event);
236                 return;
237         }
238
239         if (!oprofile_begin_trace(cpu_buf))
240                 return;
241
242         /* if log_sample() fail we can't backtrace since we lost the source
243          * of this event */
244         if (log_sample(cpu_buf, pc, is_kernel, event))
245                 oprofile_ops.backtrace(regs, backtrace_depth);
246         oprofile_end_trace(cpu_buf);
247 }
248
249 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
250 {
251         int is_kernel = !user_mode(regs);
252         unsigned long pc = profile_pc(regs);
253
254         oprofile_add_ext_sample(pc, regs, event, is_kernel);
255 }
256
257 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
258 {
259         struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
260         log_sample(cpu_buf, pc, is_kernel, event);
261 }
262
263 void oprofile_add_trace(unsigned long pc)
264 {
265         struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
266
267         if (!cpu_buf->tracing)
268                 return;
269
270         if (nr_available_slots(cpu_buf) < 1) {
271                 cpu_buf->tracing = 0;
272                 cpu_buf->sample_lost_overflow++;
273                 return;
274         }
275
276         /* broken frame can give an eip with the same value as an escape code,
277          * abort the trace if we get it */
278         if (pc == ESCAPE_CODE) {
279                 cpu_buf->tracing = 0;
280                 cpu_buf->backtrace_aborted++;
281                 return;
282         }
283
284         add_sample(cpu_buf, pc, 0);
285 }
286
287 /*
288  * This serves to avoid cpu buffer overflow, and makes sure
289  * the task mortuary progresses
290  *
291  * By using schedule_delayed_work_on and then schedule_delayed_work
292  * we guarantee this will stay on the correct cpu
293  */
294 static void wq_sync_buffer(struct work_struct *work)
295 {
296         struct oprofile_cpu_buffer * b =
297                 container_of(work, struct oprofile_cpu_buffer, work.work);
298         if (b->cpu != smp_processor_id()) {
299                 printk("WQ on CPU%d, prefer CPU%d\n",
300                        smp_processor_id(), b->cpu);
301         }
302         sync_buffer(b->cpu);
303
304         /* don't re-add the work if we're shutting down */
305         if (work_enabled)
306                 schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE);
307 }