Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
[pandora-kernel.git] / arch / powerpc / kernel / perf_callchain.c
1 /*
2  * Performance counter callchain support - powerpc architecture code
3  *
4  * Copyright © 2009 Paul Mackerras, IBM Corporation.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/perf_event.h>
14 #include <linux/percpu.h>
15 #include <linux/uaccess.h>
16 #include <linux/mm.h>
17 #include <asm/ptrace.h>
18 #include <asm/pgtable.h>
19 #include <asm/sigcontext.h>
20 #include <asm/ucontext.h>
21 #include <asm/vdso.h>
22 #ifdef CONFIG_PPC64
23 #include "ppc32.h"
24 #endif
25
26
27 /*
28  * Is sp valid as the address of the next kernel stack frame after prev_sp?
29  * The next frame may be in a different stack area but should not go
30  * back down in the same stack area.
31  */
32 static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
33 {
34         if (sp & 0xf)
35                 return 0;               /* must be 16-byte aligned */
36         if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
37                 return 0;
38         if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
39                 return 1;
40         /*
41          * sp could decrease when we jump off an interrupt stack
42          * back to the regular process stack.
43          */
44         if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1)))
45                 return 1;
46         return 0;
47 }
48
49 void
50 perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
51 {
52         unsigned long sp, next_sp;
53         unsigned long next_ip;
54         unsigned long lr;
55         long level = 0;
56         unsigned long *fp;
57
58         lr = regs->link;
59         sp = regs->gpr[1];
60         perf_callchain_store(entry, regs->nip);
61
62         if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
63                 return;
64
65         for (;;) {
66                 fp = (unsigned long *) sp;
67                 next_sp = fp[0];
68
69                 if (next_sp == sp + STACK_INT_FRAME_SIZE &&
70                     fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
71                         /*
72                          * This looks like an interrupt frame for an
73                          * interrupt that occurred in the kernel
74                          */
75                         regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
76                         next_ip = regs->nip;
77                         lr = regs->link;
78                         level = 0;
79                         perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
80
81                 } else {
82                         if (level == 0)
83                                 next_ip = lr;
84                         else
85                                 next_ip = fp[STACK_FRAME_LR_SAVE];
86
87                         /*
88                          * We can't tell which of the first two addresses
89                          * we get are valid, but we can filter out the
90                          * obviously bogus ones here.  We replace them
91                          * with 0 rather than removing them entirely so
92                          * that userspace can tell which is which.
93                          */
94                         if ((level == 1 && next_ip == lr) ||
95                             (level <= 1 && !kernel_text_address(next_ip)))
96                                 next_ip = 0;
97
98                         ++level;
99                 }
100
101                 perf_callchain_store(entry, next_ip);
102                 if (!valid_next_sp(next_sp, sp))
103                         return;
104                 sp = next_sp;
105         }
106 }
107
108 #ifdef CONFIG_PPC64
109 /*
110  * On 64-bit we don't want to invoke hash_page on user addresses from
111  * interrupt context, so if the access faults, we read the page tables
112  * to find which page (if any) is mapped and access it directly.
113  */
114 static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
115 {
116         pgd_t *pgdir;
117         pte_t *ptep, pte;
118         unsigned shift;
119         unsigned long addr = (unsigned long) ptr;
120         unsigned long offset;
121         unsigned long pfn;
122         void *kaddr;
123
124         pgdir = current->mm->pgd;
125         if (!pgdir)
126                 return -EFAULT;
127
128         ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
129         if (!shift)
130                 shift = PAGE_SHIFT;
131
132         /* align address to page boundary */
133         offset = addr & ((1UL << shift) - 1);
134         addr -= offset;
135
136         if (ptep == NULL)
137                 return -EFAULT;
138         pte = *ptep;
139         if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
140                 return -EFAULT;
141         pfn = pte_pfn(pte);
142         if (!page_is_ram(pfn))
143                 return -EFAULT;
144
145         /* no highmem to worry about here */
146         kaddr = pfn_to_kaddr(pfn);
147         memcpy(ret, kaddr + offset, nb);
148         return 0;
149 }
150
151 static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
152 {
153         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
154             ((unsigned long)ptr & 7))
155                 return -EFAULT;
156
157         if (!__get_user_inatomic(*ret, ptr))
158                 return 0;
159
160         return read_user_stack_slow(ptr, ret, 8);
161 }
162
163 static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
164 {
165         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
166             ((unsigned long)ptr & 3))
167                 return -EFAULT;
168
169         if (!__get_user_inatomic(*ret, ptr))
170                 return 0;
171
172         return read_user_stack_slow(ptr, ret, 4);
173 }
174
175 static inline int valid_user_sp(unsigned long sp, int is_64)
176 {
177         if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
178                 return 0;
179         return 1;
180 }
181
182 /*
183  * 64-bit user processes use the same stack frame for RT and non-RT signals.
184  */
185 struct signal_frame_64 {
186         char            dummy[__SIGNAL_FRAMESIZE];
187         struct ucontext uc;
188         unsigned long   unused[2];
189         unsigned int    tramp[6];
190         struct siginfo  *pinfo;
191         void            *puc;
192         struct siginfo  info;
193         char            abigap[288];
194 };
195
196 static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
197 {
198         if (nip == fp + offsetof(struct signal_frame_64, tramp))
199                 return 1;
200         if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
201             nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
202                 return 1;
203         return 0;
204 }
205
206 /*
207  * Do some sanity checking on the signal frame pointed to by sp.
208  * We check the pinfo and puc pointers in the frame.
209  */
210 static int sane_signal_64_frame(unsigned long sp)
211 {
212         struct signal_frame_64 __user *sf;
213         unsigned long pinfo, puc;
214
215         sf = (struct signal_frame_64 __user *) sp;
216         if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
217             read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
218                 return 0;
219         return pinfo == (unsigned long) &sf->info &&
220                 puc == (unsigned long) &sf->uc;
221 }
222
223 static void perf_callchain_user_64(struct perf_callchain_entry *entry,
224                                    struct pt_regs *regs)
225 {
226         unsigned long sp, next_sp;
227         unsigned long next_ip;
228         unsigned long lr;
229         long level = 0;
230         struct signal_frame_64 __user *sigframe;
231         unsigned long __user *fp, *uregs;
232
233         next_ip = regs->nip;
234         lr = regs->link;
235         sp = regs->gpr[1];
236         perf_callchain_store(entry, next_ip);
237
238         for (;;) {
239                 fp = (unsigned long __user *) sp;
240                 if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
241                         return;
242                 if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
243                         return;
244
245                 /*
246                  * Note: the next_sp - sp >= signal frame size check
247                  * is true when next_sp < sp, which can happen when
248                  * transitioning from an alternate signal stack to the
249                  * normal stack.
250                  */
251                 if (next_sp - sp >= sizeof(struct signal_frame_64) &&
252                     (is_sigreturn_64_address(next_ip, sp) ||
253                      (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
254                     sane_signal_64_frame(sp)) {
255                         /*
256                          * This looks like an signal frame
257                          */
258                         sigframe = (struct signal_frame_64 __user *) sp;
259                         uregs = sigframe->uc.uc_mcontext.gp_regs;
260                         if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
261                             read_user_stack_64(&uregs[PT_LNK], &lr) ||
262                             read_user_stack_64(&uregs[PT_R1], &sp))
263                                 return;
264                         level = 0;
265                         perf_callchain_store(entry, PERF_CONTEXT_USER);
266                         perf_callchain_store(entry, next_ip);
267                         continue;
268                 }
269
270                 if (level == 0)
271                         next_ip = lr;
272                 perf_callchain_store(entry, next_ip);
273                 ++level;
274                 sp = next_sp;
275         }
276 }
277
278 static inline int current_is_64bit(void)
279 {
280         /*
281          * We can't use test_thread_flag() here because we may be on an
282          * interrupt stack, and the thread flags don't get copied over
283          * from the thread_info on the main stack to the interrupt stack.
284          */
285         return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
286 }
287
288 #else  /* CONFIG_PPC64 */
289 /*
290  * On 32-bit we just access the address and let hash_page create a
291  * HPTE if necessary, so there is no need to fall back to reading
292  * the page tables.  Since this is called at interrupt level,
293  * do_page_fault() won't treat a DSI as a page fault.
294  */
295 static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
296 {
297         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
298             ((unsigned long)ptr & 3))
299                 return -EFAULT;
300
301         return __get_user_inatomic(*ret, ptr);
302 }
303
304 static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
305                                           struct pt_regs *regs)
306 {
307 }
308
309 static inline int current_is_64bit(void)
310 {
311         return 0;
312 }
313
314 static inline int valid_user_sp(unsigned long sp, int is_64)
315 {
316         if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
317                 return 0;
318         return 1;
319 }
320
321 #define __SIGNAL_FRAMESIZE32    __SIGNAL_FRAMESIZE
322 #define sigcontext32            sigcontext
323 #define mcontext32              mcontext
324 #define ucontext32              ucontext
325 #define compat_siginfo_t        struct siginfo
326
327 #endif /* CONFIG_PPC64 */
328
329 /*
330  * Layout for non-RT signal frames
331  */
332 struct signal_frame_32 {
333         char                    dummy[__SIGNAL_FRAMESIZE32];
334         struct sigcontext32     sctx;
335         struct mcontext32       mctx;
336         int                     abigap[56];
337 };
338
339 /*
340  * Layout for RT signal frames
341  */
342 struct rt_signal_frame_32 {
343         char                    dummy[__SIGNAL_FRAMESIZE32 + 16];
344         compat_siginfo_t        info;
345         struct ucontext32       uc;
346         int                     abigap[56];
347 };
348
349 static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
350 {
351         if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
352                 return 1;
353         if (vdso32_sigtramp && current->mm->context.vdso_base &&
354             nip == current->mm->context.vdso_base + vdso32_sigtramp)
355                 return 1;
356         return 0;
357 }
358
359 static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
360 {
361         if (nip == fp + offsetof(struct rt_signal_frame_32,
362                                  uc.uc_mcontext.mc_pad))
363                 return 1;
364         if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
365             nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
366                 return 1;
367         return 0;
368 }
369
370 static int sane_signal_32_frame(unsigned int sp)
371 {
372         struct signal_frame_32 __user *sf;
373         unsigned int regs;
374
375         sf = (struct signal_frame_32 __user *) (unsigned long) sp;
376         if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
377                 return 0;
378         return regs == (unsigned long) &sf->mctx;
379 }
380
381 static int sane_rt_signal_32_frame(unsigned int sp)
382 {
383         struct rt_signal_frame_32 __user *sf;
384         unsigned int regs;
385
386         sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
387         if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
388                 return 0;
389         return regs == (unsigned long) &sf->uc.uc_mcontext;
390 }
391
392 static unsigned int __user *signal_frame_32_regs(unsigned int sp,
393                                 unsigned int next_sp, unsigned int next_ip)
394 {
395         struct mcontext32 __user *mctx = NULL;
396         struct signal_frame_32 __user *sf;
397         struct rt_signal_frame_32 __user *rt_sf;
398
399         /*
400          * Note: the next_sp - sp >= signal frame size check
401          * is true when next_sp < sp, for example, when
402          * transitioning from an alternate signal stack to the
403          * normal stack.
404          */
405         if (next_sp - sp >= sizeof(struct signal_frame_32) &&
406             is_sigreturn_32_address(next_ip, sp) &&
407             sane_signal_32_frame(sp)) {
408                 sf = (struct signal_frame_32 __user *) (unsigned long) sp;
409                 mctx = &sf->mctx;
410         }
411
412         if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
413             is_rt_sigreturn_32_address(next_ip, sp) &&
414             sane_rt_signal_32_frame(sp)) {
415                 rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
416                 mctx = &rt_sf->uc.uc_mcontext;
417         }
418
419         if (!mctx)
420                 return NULL;
421         return mctx->mc_gregs;
422 }
423
424 static void perf_callchain_user_32(struct perf_callchain_entry *entry,
425                                    struct pt_regs *regs)
426 {
427         unsigned int sp, next_sp;
428         unsigned int next_ip;
429         unsigned int lr;
430         long level = 0;
431         unsigned int __user *fp, *uregs;
432
433         next_ip = regs->nip;
434         lr = regs->link;
435         sp = regs->gpr[1];
436         perf_callchain_store(entry, next_ip);
437
438         while (entry->nr < PERF_MAX_STACK_DEPTH) {
439                 fp = (unsigned int __user *) (unsigned long) sp;
440                 if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
441                         return;
442                 if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
443                         return;
444
445                 uregs = signal_frame_32_regs(sp, next_sp, next_ip);
446                 if (!uregs && level <= 1)
447                         uregs = signal_frame_32_regs(sp, next_sp, lr);
448                 if (uregs) {
449                         /*
450                          * This looks like an signal frame, so restart
451                          * the stack trace with the values in it.
452                          */
453                         if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
454                             read_user_stack_32(&uregs[PT_LNK], &lr) ||
455                             read_user_stack_32(&uregs[PT_R1], &sp))
456                                 return;
457                         level = 0;
458                         perf_callchain_store(entry, PERF_CONTEXT_USER);
459                         perf_callchain_store(entry, next_ip);
460                         continue;
461                 }
462
463                 if (level == 0)
464                         next_ip = lr;
465                 perf_callchain_store(entry, next_ip);
466                 ++level;
467                 sp = next_sp;
468         }
469 }
470
471 void
472 perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
473 {
474         if (current_is_64bit())
475                 perf_callchain_user_64(entry, regs);
476         else
477                 perf_callchain_user_32(entry, regs);
478 }