Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/dlm
[pandora-kernel.git] / arch / powerpc / kernel / perf_callchain.c
1 /*
2  * Performance counter callchain support - powerpc architecture code
3  *
4  * Copyright © 2009 Paul Mackerras, IBM Corporation.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/perf_counter.h>
14 #include <linux/percpu.h>
15 #include <linux/uaccess.h>
16 #include <linux/mm.h>
17 #include <asm/ptrace.h>
18 #include <asm/pgtable.h>
19 #include <asm/sigcontext.h>
20 #include <asm/ucontext.h>
21 #include <asm/vdso.h>
22 #ifdef CONFIG_PPC64
23 #include "ppc32.h"
24 #endif
25
26 /*
27  * Store another value in a callchain_entry.
28  */
29 static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
30 {
31         unsigned int nr = entry->nr;
32
33         if (nr < PERF_MAX_STACK_DEPTH) {
34                 entry->ip[nr] = ip;
35                 entry->nr = nr + 1;
36         }
37 }
38
39 /*
40  * Is sp valid as the address of the next kernel stack frame after prev_sp?
41  * The next frame may be in a different stack area but should not go
42  * back down in the same stack area.
43  */
44 static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
45 {
46         if (sp & 0xf)
47                 return 0;               /* must be 16-byte aligned */
48         if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
49                 return 0;
50         if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
51                 return 1;
52         /*
53          * sp could decrease when we jump off an interrupt stack
54          * back to the regular process stack.
55          */
56         if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1)))
57                 return 1;
58         return 0;
59 }
60
61 static void perf_callchain_kernel(struct pt_regs *regs,
62                                   struct perf_callchain_entry *entry)
63 {
64         unsigned long sp, next_sp;
65         unsigned long next_ip;
66         unsigned long lr;
67         long level = 0;
68         unsigned long *fp;
69
70         lr = regs->link;
71         sp = regs->gpr[1];
72         callchain_store(entry, PERF_CONTEXT_KERNEL);
73         callchain_store(entry, regs->nip);
74
75         if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
76                 return;
77
78         for (;;) {
79                 fp = (unsigned long *) sp;
80                 next_sp = fp[0];
81
82                 if (next_sp == sp + STACK_INT_FRAME_SIZE &&
83                     fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
84                         /*
85                          * This looks like an interrupt frame for an
86                          * interrupt that occurred in the kernel
87                          */
88                         regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
89                         next_ip = regs->nip;
90                         lr = regs->link;
91                         level = 0;
92                         callchain_store(entry, PERF_CONTEXT_KERNEL);
93
94                 } else {
95                         if (level == 0)
96                                 next_ip = lr;
97                         else
98                                 next_ip = fp[STACK_FRAME_LR_SAVE];
99
100                         /*
101                          * We can't tell which of the first two addresses
102                          * we get are valid, but we can filter out the
103                          * obviously bogus ones here.  We replace them
104                          * with 0 rather than removing them entirely so
105                          * that userspace can tell which is which.
106                          */
107                         if ((level == 1 && next_ip == lr) ||
108                             (level <= 1 && !kernel_text_address(next_ip)))
109                                 next_ip = 0;
110
111                         ++level;
112                 }
113
114                 callchain_store(entry, next_ip);
115                 if (!valid_next_sp(next_sp, sp))
116                         return;
117                 sp = next_sp;
118         }
119 }
120
121 #ifdef CONFIG_PPC64
122
123 #ifdef CONFIG_HUGETLB_PAGE
124 #define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize])
125 #else
126 #define is_huge_psize(pagesize) 0
127 #endif
128
129 /*
130  * On 64-bit we don't want to invoke hash_page on user addresses from
131  * interrupt context, so if the access faults, we read the page tables
132  * to find which page (if any) is mapped and access it directly.
133  */
134 static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
135 {
136         pgd_t *pgdir;
137         pte_t *ptep, pte;
138         int pagesize;
139         unsigned long addr = (unsigned long) ptr;
140         unsigned long offset;
141         unsigned long pfn;
142         void *kaddr;
143
144         pgdir = current->mm->pgd;
145         if (!pgdir)
146                 return -EFAULT;
147
148         pagesize = get_slice_psize(current->mm, addr);
149
150         /* align address to page boundary */
151         offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1);
152         addr -= offset;
153
154         if (is_huge_psize(pagesize))
155                 ptep = huge_pte_offset(current->mm, addr);
156         else
157                 ptep = find_linux_pte(pgdir, addr);
158
159         if (ptep == NULL)
160                 return -EFAULT;
161         pte = *ptep;
162         if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
163                 return -EFAULT;
164         pfn = pte_pfn(pte);
165         if (!page_is_ram(pfn))
166                 return -EFAULT;
167
168         /* no highmem to worry about here */
169         kaddr = pfn_to_kaddr(pfn);
170         memcpy(ret, kaddr + offset, nb);
171         return 0;
172 }
173
174 static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
175 {
176         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
177             ((unsigned long)ptr & 7))
178                 return -EFAULT;
179
180         if (!__get_user_inatomic(*ret, ptr))
181                 return 0;
182
183         return read_user_stack_slow(ptr, ret, 8);
184 }
185
186 static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
187 {
188         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
189             ((unsigned long)ptr & 3))
190                 return -EFAULT;
191
192         if (!__get_user_inatomic(*ret, ptr))
193                 return 0;
194
195         return read_user_stack_slow(ptr, ret, 4);
196 }
197
198 static inline int valid_user_sp(unsigned long sp, int is_64)
199 {
200         if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
201                 return 0;
202         return 1;
203 }
204
205 /*
206  * 64-bit user processes use the same stack frame for RT and non-RT signals.
207  */
208 struct signal_frame_64 {
209         char            dummy[__SIGNAL_FRAMESIZE];
210         struct ucontext uc;
211         unsigned long   unused[2];
212         unsigned int    tramp[6];
213         struct siginfo  *pinfo;
214         void            *puc;
215         struct siginfo  info;
216         char            abigap[288];
217 };
218
219 static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
220 {
221         if (nip == fp + offsetof(struct signal_frame_64, tramp))
222                 return 1;
223         if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
224             nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
225                 return 1;
226         return 0;
227 }
228
229 /*
230  * Do some sanity checking on the signal frame pointed to by sp.
231  * We check the pinfo and puc pointers in the frame.
232  */
233 static int sane_signal_64_frame(unsigned long sp)
234 {
235         struct signal_frame_64 __user *sf;
236         unsigned long pinfo, puc;
237
238         sf = (struct signal_frame_64 __user *) sp;
239         if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
240             read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
241                 return 0;
242         return pinfo == (unsigned long) &sf->info &&
243                 puc == (unsigned long) &sf->uc;
244 }
245
246 static void perf_callchain_user_64(struct pt_regs *regs,
247                                    struct perf_callchain_entry *entry)
248 {
249         unsigned long sp, next_sp;
250         unsigned long next_ip;
251         unsigned long lr;
252         long level = 0;
253         struct signal_frame_64 __user *sigframe;
254         unsigned long __user *fp, *uregs;
255
256         next_ip = regs->nip;
257         lr = regs->link;
258         sp = regs->gpr[1];
259         callchain_store(entry, PERF_CONTEXT_USER);
260         callchain_store(entry, next_ip);
261
262         for (;;) {
263                 fp = (unsigned long __user *) sp;
264                 if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
265                         return;
266                 if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
267                         return;
268
269                 /*
270                  * Note: the next_sp - sp >= signal frame size check
271                  * is true when next_sp < sp, which can happen when
272                  * transitioning from an alternate signal stack to the
273                  * normal stack.
274                  */
275                 if (next_sp - sp >= sizeof(struct signal_frame_64) &&
276                     (is_sigreturn_64_address(next_ip, sp) ||
277                      (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
278                     sane_signal_64_frame(sp)) {
279                         /*
280                          * This looks like an signal frame
281                          */
282                         sigframe = (struct signal_frame_64 __user *) sp;
283                         uregs = sigframe->uc.uc_mcontext.gp_regs;
284                         if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
285                             read_user_stack_64(&uregs[PT_LNK], &lr) ||
286                             read_user_stack_64(&uregs[PT_R1], &sp))
287                                 return;
288                         level = 0;
289                         callchain_store(entry, PERF_CONTEXT_USER);
290                         callchain_store(entry, next_ip);
291                         continue;
292                 }
293
294                 if (level == 0)
295                         next_ip = lr;
296                 callchain_store(entry, next_ip);
297                 ++level;
298                 sp = next_sp;
299         }
300 }
301
302 static inline int current_is_64bit(void)
303 {
304         /*
305          * We can't use test_thread_flag() here because we may be on an
306          * interrupt stack, and the thread flags don't get copied over
307          * from the thread_info on the main stack to the interrupt stack.
308          */
309         return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
310 }
311
312 #else  /* CONFIG_PPC64 */
313 /*
314  * On 32-bit we just access the address and let hash_page create a
315  * HPTE if necessary, so there is no need to fall back to reading
316  * the page tables.  Since this is called at interrupt level,
317  * do_page_fault() won't treat a DSI as a page fault.
318  */
319 static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
320 {
321         if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
322             ((unsigned long)ptr & 3))
323                 return -EFAULT;
324
325         return __get_user_inatomic(*ret, ptr);
326 }
327
328 static inline void perf_callchain_user_64(struct pt_regs *regs,
329                                           struct perf_callchain_entry *entry)
330 {
331 }
332
333 static inline int current_is_64bit(void)
334 {
335         return 0;
336 }
337
338 static inline int valid_user_sp(unsigned long sp, int is_64)
339 {
340         if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
341                 return 0;
342         return 1;
343 }
344
345 #define __SIGNAL_FRAMESIZE32    __SIGNAL_FRAMESIZE
346 #define sigcontext32            sigcontext
347 #define mcontext32              mcontext
348 #define ucontext32              ucontext
349 #define compat_siginfo_t        struct siginfo
350
351 #endif /* CONFIG_PPC64 */
352
353 /*
354  * Layout for non-RT signal frames
355  */
356 struct signal_frame_32 {
357         char                    dummy[__SIGNAL_FRAMESIZE32];
358         struct sigcontext32     sctx;
359         struct mcontext32       mctx;
360         int                     abigap[56];
361 };
362
363 /*
364  * Layout for RT signal frames
365  */
366 struct rt_signal_frame_32 {
367         char                    dummy[__SIGNAL_FRAMESIZE32 + 16];
368         compat_siginfo_t        info;
369         struct ucontext32       uc;
370         int                     abigap[56];
371 };
372
373 static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
374 {
375         if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
376                 return 1;
377         if (vdso32_sigtramp && current->mm->context.vdso_base &&
378             nip == current->mm->context.vdso_base + vdso32_sigtramp)
379                 return 1;
380         return 0;
381 }
382
383 static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
384 {
385         if (nip == fp + offsetof(struct rt_signal_frame_32,
386                                  uc.uc_mcontext.mc_pad))
387                 return 1;
388         if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
389             nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
390                 return 1;
391         return 0;
392 }
393
394 static int sane_signal_32_frame(unsigned int sp)
395 {
396         struct signal_frame_32 __user *sf;
397         unsigned int regs;
398
399         sf = (struct signal_frame_32 __user *) (unsigned long) sp;
400         if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
401                 return 0;
402         return regs == (unsigned long) &sf->mctx;
403 }
404
405 static int sane_rt_signal_32_frame(unsigned int sp)
406 {
407         struct rt_signal_frame_32 __user *sf;
408         unsigned int regs;
409
410         sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
411         if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
412                 return 0;
413         return regs == (unsigned long) &sf->uc.uc_mcontext;
414 }
415
416 static unsigned int __user *signal_frame_32_regs(unsigned int sp,
417                                 unsigned int next_sp, unsigned int next_ip)
418 {
419         struct mcontext32 __user *mctx = NULL;
420         struct signal_frame_32 __user *sf;
421         struct rt_signal_frame_32 __user *rt_sf;
422
423         /*
424          * Note: the next_sp - sp >= signal frame size check
425          * is true when next_sp < sp, for example, when
426          * transitioning from an alternate signal stack to the
427          * normal stack.
428          */
429         if (next_sp - sp >= sizeof(struct signal_frame_32) &&
430             is_sigreturn_32_address(next_ip, sp) &&
431             sane_signal_32_frame(sp)) {
432                 sf = (struct signal_frame_32 __user *) (unsigned long) sp;
433                 mctx = &sf->mctx;
434         }
435
436         if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
437             is_rt_sigreturn_32_address(next_ip, sp) &&
438             sane_rt_signal_32_frame(sp)) {
439                 rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
440                 mctx = &rt_sf->uc.uc_mcontext;
441         }
442
443         if (!mctx)
444                 return NULL;
445         return mctx->mc_gregs;
446 }
447
448 static void perf_callchain_user_32(struct pt_regs *regs,
449                                    struct perf_callchain_entry *entry)
450 {
451         unsigned int sp, next_sp;
452         unsigned int next_ip;
453         unsigned int lr;
454         long level = 0;
455         unsigned int __user *fp, *uregs;
456
457         next_ip = regs->nip;
458         lr = regs->link;
459         sp = regs->gpr[1];
460         callchain_store(entry, PERF_CONTEXT_USER);
461         callchain_store(entry, next_ip);
462
463         while (entry->nr < PERF_MAX_STACK_DEPTH) {
464                 fp = (unsigned int __user *) (unsigned long) sp;
465                 if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
466                         return;
467                 if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
468                         return;
469
470                 uregs = signal_frame_32_regs(sp, next_sp, next_ip);
471                 if (!uregs && level <= 1)
472                         uregs = signal_frame_32_regs(sp, next_sp, lr);
473                 if (uregs) {
474                         /*
475                          * This looks like an signal frame, so restart
476                          * the stack trace with the values in it.
477                          */
478                         if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
479                             read_user_stack_32(&uregs[PT_LNK], &lr) ||
480                             read_user_stack_32(&uregs[PT_R1], &sp))
481                                 return;
482                         level = 0;
483                         callchain_store(entry, PERF_CONTEXT_USER);
484                         callchain_store(entry, next_ip);
485                         continue;
486                 }
487
488                 if (level == 0)
489                         next_ip = lr;
490                 callchain_store(entry, next_ip);
491                 ++level;
492                 sp = next_sp;
493         }
494 }
495
496 /*
497  * Since we can't get PMU interrupts inside a PMU interrupt handler,
498  * we don't need separate irq and nmi entries here.
499  */
500 static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
501
502 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
503 {
504         struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
505
506         entry->nr = 0;
507
508         if (current->pid == 0)          /* idle task? */
509                 return entry;
510
511         if (!user_mode(regs)) {
512                 perf_callchain_kernel(regs, entry);
513                 if (current->mm)
514                         regs = task_pt_regs(current);
515                 else
516                         regs = NULL;
517         }
518
519         if (regs) {
520                 if (current_is_64bit())
521                         perf_callchain_user_64(regs, entry);
522                 else
523                         perf_callchain_user_32(regs, entry);
524         }
525
526         return entry;
527 }