Merge branch 'oprofile-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / arch / x86 / kernel / cpu / perf_counter.c
1 /*
2  * Performance counter x86 architecture code
3  *
4  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6  *  Copyright (C) 2009 Jaswinder Singh Rajput
7  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9  *
10  *  For licencing details see kernel-base/COPYING
11  */
12
13 #include <linux/perf_counter.h>
14 #include <linux/capability.h>
15 #include <linux/notifier.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <linux/module.h>
19 #include <linux/kdebug.h>
20 #include <linux/sched.h>
21 #include <linux/uaccess.h>
22 #include <linux/highmem.h>
23
24 #include <asm/apic.h>
25 #include <asm/stacktrace.h>
26 #include <asm/nmi.h>
27
28 static u64 perf_counter_mask __read_mostly;
29
30 struct cpu_hw_counters {
31         struct perf_counter     *counters[X86_PMC_IDX_MAX];
32         unsigned long           used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
33         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
34         unsigned long           interrupts;
35         int                     enabled;
36 };
37
38 /*
39  * struct x86_pmu - generic x86 pmu
40  */
41 struct x86_pmu {
42         const char      *name;
43         int             version;
44         int             (*handle_irq)(struct pt_regs *);
45         void            (*disable_all)(void);
46         void            (*enable_all)(void);
47         void            (*enable)(struct hw_perf_counter *, int);
48         void            (*disable)(struct hw_perf_counter *, int);
49         unsigned        eventsel;
50         unsigned        perfctr;
51         u64             (*event_map)(int);
52         u64             (*raw_event)(u64);
53         int             max_events;
54         int             num_counters;
55         int             num_counters_fixed;
56         int             counter_bits;
57         u64             counter_mask;
58         int             apic;
59         u64             max_period;
60         u64             intel_ctrl;
61 };
62
63 static struct x86_pmu x86_pmu __read_mostly;
64
65 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
66         .enabled = 1,
67 };
68
69 /*
70  * Not sure about some of these
71  */
72 static const u64 p6_perfmon_event_map[] =
73 {
74   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0079,
75   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
76   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0f2e,
77   [PERF_COUNT_HW_CACHE_MISSES]          = 0x012e,
78   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
79   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
80   [PERF_COUNT_HW_BUS_CYCLES]            = 0x0062,
81 };
82
83 static u64 p6_pmu_event_map(int event)
84 {
85         return p6_perfmon_event_map[event];
86 }
87
88 /*
89  * Counter setting that is specified not to count anything.
90  * We use this to effectively disable a counter.
91  *
92  * L2_RQSTS with 0 MESI unit mask.
93  */
94 #define P6_NOP_COUNTER                  0x0000002EULL
95
96 static u64 p6_pmu_raw_event(u64 event)
97 {
98 #define P6_EVNTSEL_EVENT_MASK           0x000000FFULL
99 #define P6_EVNTSEL_UNIT_MASK            0x0000FF00ULL
100 #define P6_EVNTSEL_EDGE_MASK            0x00040000ULL
101 #define P6_EVNTSEL_INV_MASK             0x00800000ULL
102 #define P6_EVNTSEL_COUNTER_MASK         0xFF000000ULL
103
104 #define P6_EVNTSEL_MASK                 \
105         (P6_EVNTSEL_EVENT_MASK |        \
106          P6_EVNTSEL_UNIT_MASK  |        \
107          P6_EVNTSEL_EDGE_MASK  |        \
108          P6_EVNTSEL_INV_MASK   |        \
109          P6_EVNTSEL_COUNTER_MASK)
110
111         return event & P6_EVNTSEL_MASK;
112 }
113
114
115 /*
116  * Intel PerfMon v3. Used on Core2 and later.
117  */
118 static const u64 intel_perfmon_event_map[] =
119 {
120   [PERF_COUNT_HW_CPU_CYCLES]            = 0x003c,
121   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
122   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x4f2e,
123   [PERF_COUNT_HW_CACHE_MISSES]          = 0x412e,
124   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
125   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
126   [PERF_COUNT_HW_BUS_CYCLES]            = 0x013c,
127 };
128
129 static u64 intel_pmu_event_map(int event)
130 {
131         return intel_perfmon_event_map[event];
132 }
133
134 /*
135  * Generalized hw caching related event table, filled
136  * in on a per model basis. A value of 0 means
137  * 'not supported', -1 means 'event makes no sense on
138  * this CPU', any other value means the raw event
139  * ID.
140  */
141
142 #define C(x) PERF_COUNT_HW_CACHE_##x
143
144 static u64 __read_mostly hw_cache_event_ids
145                                 [PERF_COUNT_HW_CACHE_MAX]
146                                 [PERF_COUNT_HW_CACHE_OP_MAX]
147                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
148
149 static const u64 nehalem_hw_cache_event_ids
150                                 [PERF_COUNT_HW_CACHE_MAX]
151                                 [PERF_COUNT_HW_CACHE_OP_MAX]
152                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
153 {
154  [ C(L1D) ] = {
155         [ C(OP_READ) ] = {
156                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
157                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
158         },
159         [ C(OP_WRITE) ] = {
160                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
161                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
162         },
163         [ C(OP_PREFETCH) ] = {
164                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
165                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
166         },
167  },
168  [ C(L1I ) ] = {
169         [ C(OP_READ) ] = {
170                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
171                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
172         },
173         [ C(OP_WRITE) ] = {
174                 [ C(RESULT_ACCESS) ] = -1,
175                 [ C(RESULT_MISS)   ] = -1,
176         },
177         [ C(OP_PREFETCH) ] = {
178                 [ C(RESULT_ACCESS) ] = 0x0,
179                 [ C(RESULT_MISS)   ] = 0x0,
180         },
181  },
182  [ C(LL  ) ] = {
183         [ C(OP_READ) ] = {
184                 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
185                 [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
186         },
187         [ C(OP_WRITE) ] = {
188                 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
189                 [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
190         },
191         [ C(OP_PREFETCH) ] = {
192                 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
193                 [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
194         },
195  },
196  [ C(DTLB) ] = {
197         [ C(OP_READ) ] = {
198                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
199                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
200         },
201         [ C(OP_WRITE) ] = {
202                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
203                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
204         },
205         [ C(OP_PREFETCH) ] = {
206                 [ C(RESULT_ACCESS) ] = 0x0,
207                 [ C(RESULT_MISS)   ] = 0x0,
208         },
209  },
210  [ C(ITLB) ] = {
211         [ C(OP_READ) ] = {
212                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
213                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
214         },
215         [ C(OP_WRITE) ] = {
216                 [ C(RESULT_ACCESS) ] = -1,
217                 [ C(RESULT_MISS)   ] = -1,
218         },
219         [ C(OP_PREFETCH) ] = {
220                 [ C(RESULT_ACCESS) ] = -1,
221                 [ C(RESULT_MISS)   ] = -1,
222         },
223  },
224  [ C(BPU ) ] = {
225         [ C(OP_READ) ] = {
226                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
227                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
228         },
229         [ C(OP_WRITE) ] = {
230                 [ C(RESULT_ACCESS) ] = -1,
231                 [ C(RESULT_MISS)   ] = -1,
232         },
233         [ C(OP_PREFETCH) ] = {
234                 [ C(RESULT_ACCESS) ] = -1,
235                 [ C(RESULT_MISS)   ] = -1,
236         },
237  },
238 };
239
240 static const u64 core2_hw_cache_event_ids
241                                 [PERF_COUNT_HW_CACHE_MAX]
242                                 [PERF_COUNT_HW_CACHE_OP_MAX]
243                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
244 {
245  [ C(L1D) ] = {
246         [ C(OP_READ) ] = {
247                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
248                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
249         },
250         [ C(OP_WRITE) ] = {
251                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
252                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
253         },
254         [ C(OP_PREFETCH) ] = {
255                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
256                 [ C(RESULT_MISS)   ] = 0,
257         },
258  },
259  [ C(L1I ) ] = {
260         [ C(OP_READ) ] = {
261                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
262                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
263         },
264         [ C(OP_WRITE) ] = {
265                 [ C(RESULT_ACCESS) ] = -1,
266                 [ C(RESULT_MISS)   ] = -1,
267         },
268         [ C(OP_PREFETCH) ] = {
269                 [ C(RESULT_ACCESS) ] = 0,
270                 [ C(RESULT_MISS)   ] = 0,
271         },
272  },
273  [ C(LL  ) ] = {
274         [ C(OP_READ) ] = {
275                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
276                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
277         },
278         [ C(OP_WRITE) ] = {
279                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
280                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
281         },
282         [ C(OP_PREFETCH) ] = {
283                 [ C(RESULT_ACCESS) ] = 0,
284                 [ C(RESULT_MISS)   ] = 0,
285         },
286  },
287  [ C(DTLB) ] = {
288         [ C(OP_READ) ] = {
289                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
290                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
291         },
292         [ C(OP_WRITE) ] = {
293                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
294                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
295         },
296         [ C(OP_PREFETCH) ] = {
297                 [ C(RESULT_ACCESS) ] = 0,
298                 [ C(RESULT_MISS)   ] = 0,
299         },
300  },
301  [ C(ITLB) ] = {
302         [ C(OP_READ) ] = {
303                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
304                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
305         },
306         [ C(OP_WRITE) ] = {
307                 [ C(RESULT_ACCESS) ] = -1,
308                 [ C(RESULT_MISS)   ] = -1,
309         },
310         [ C(OP_PREFETCH) ] = {
311                 [ C(RESULT_ACCESS) ] = -1,
312                 [ C(RESULT_MISS)   ] = -1,
313         },
314  },
315  [ C(BPU ) ] = {
316         [ C(OP_READ) ] = {
317                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
318                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
319         },
320         [ C(OP_WRITE) ] = {
321                 [ C(RESULT_ACCESS) ] = -1,
322                 [ C(RESULT_MISS)   ] = -1,
323         },
324         [ C(OP_PREFETCH) ] = {
325                 [ C(RESULT_ACCESS) ] = -1,
326                 [ C(RESULT_MISS)   ] = -1,
327         },
328  },
329 };
330
331 static const u64 atom_hw_cache_event_ids
332                                 [PERF_COUNT_HW_CACHE_MAX]
333                                 [PERF_COUNT_HW_CACHE_OP_MAX]
334                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
335 {
336  [ C(L1D) ] = {
337         [ C(OP_READ) ] = {
338                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
339                 [ C(RESULT_MISS)   ] = 0,
340         },
341         [ C(OP_WRITE) ] = {
342                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
343                 [ C(RESULT_MISS)   ] = 0,
344         },
345         [ C(OP_PREFETCH) ] = {
346                 [ C(RESULT_ACCESS) ] = 0x0,
347                 [ C(RESULT_MISS)   ] = 0,
348         },
349  },
350  [ C(L1I ) ] = {
351         [ C(OP_READ) ] = {
352                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
353                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
354         },
355         [ C(OP_WRITE) ] = {
356                 [ C(RESULT_ACCESS) ] = -1,
357                 [ C(RESULT_MISS)   ] = -1,
358         },
359         [ C(OP_PREFETCH) ] = {
360                 [ C(RESULT_ACCESS) ] = 0,
361                 [ C(RESULT_MISS)   ] = 0,
362         },
363  },
364  [ C(LL  ) ] = {
365         [ C(OP_READ) ] = {
366                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
367                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
368         },
369         [ C(OP_WRITE) ] = {
370                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
371                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
372         },
373         [ C(OP_PREFETCH) ] = {
374                 [ C(RESULT_ACCESS) ] = 0,
375                 [ C(RESULT_MISS)   ] = 0,
376         },
377  },
378  [ C(DTLB) ] = {
379         [ C(OP_READ) ] = {
380                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
381                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
382         },
383         [ C(OP_WRITE) ] = {
384                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
385                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
386         },
387         [ C(OP_PREFETCH) ] = {
388                 [ C(RESULT_ACCESS) ] = 0,
389                 [ C(RESULT_MISS)   ] = 0,
390         },
391  },
392  [ C(ITLB) ] = {
393         [ C(OP_READ) ] = {
394                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
395                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
396         },
397         [ C(OP_WRITE) ] = {
398                 [ C(RESULT_ACCESS) ] = -1,
399                 [ C(RESULT_MISS)   ] = -1,
400         },
401         [ C(OP_PREFETCH) ] = {
402                 [ C(RESULT_ACCESS) ] = -1,
403                 [ C(RESULT_MISS)   ] = -1,
404         },
405  },
406  [ C(BPU ) ] = {
407         [ C(OP_READ) ] = {
408                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
409                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
410         },
411         [ C(OP_WRITE) ] = {
412                 [ C(RESULT_ACCESS) ] = -1,
413                 [ C(RESULT_MISS)   ] = -1,
414         },
415         [ C(OP_PREFETCH) ] = {
416                 [ C(RESULT_ACCESS) ] = -1,
417                 [ C(RESULT_MISS)   ] = -1,
418         },
419  },
420 };
421
422 static u64 intel_pmu_raw_event(u64 event)
423 {
424 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
425 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
426 #define CORE_EVNTSEL_EDGE_MASK          0x00040000ULL
427 #define CORE_EVNTSEL_INV_MASK           0x00800000ULL
428 #define CORE_EVNTSEL_COUNTER_MASK       0xFF000000ULL
429
430 #define CORE_EVNTSEL_MASK               \
431         (CORE_EVNTSEL_EVENT_MASK |      \
432          CORE_EVNTSEL_UNIT_MASK  |      \
433          CORE_EVNTSEL_EDGE_MASK  |      \
434          CORE_EVNTSEL_INV_MASK  |       \
435          CORE_EVNTSEL_COUNTER_MASK)
436
437         return event & CORE_EVNTSEL_MASK;
438 }
439
440 static const u64 amd_hw_cache_event_ids
441                                 [PERF_COUNT_HW_CACHE_MAX]
442                                 [PERF_COUNT_HW_CACHE_OP_MAX]
443                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
444 {
445  [ C(L1D) ] = {
446         [ C(OP_READ) ] = {
447                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
448                 [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
449         },
450         [ C(OP_WRITE) ] = {
451                 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
452                 [ C(RESULT_MISS)   ] = 0,
453         },
454         [ C(OP_PREFETCH) ] = {
455                 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
456                 [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
457         },
458  },
459  [ C(L1I ) ] = {
460         [ C(OP_READ) ] = {
461                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
462                 [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
463         },
464         [ C(OP_WRITE) ] = {
465                 [ C(RESULT_ACCESS) ] = -1,
466                 [ C(RESULT_MISS)   ] = -1,
467         },
468         [ C(OP_PREFETCH) ] = {
469                 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
470                 [ C(RESULT_MISS)   ] = 0,
471         },
472  },
473  [ C(LL  ) ] = {
474         [ C(OP_READ) ] = {
475                 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
476                 [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
477         },
478         [ C(OP_WRITE) ] = {
479                 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
480                 [ C(RESULT_MISS)   ] = 0,
481         },
482         [ C(OP_PREFETCH) ] = {
483                 [ C(RESULT_ACCESS) ] = 0,
484                 [ C(RESULT_MISS)   ] = 0,
485         },
486  },
487  [ C(DTLB) ] = {
488         [ C(OP_READ) ] = {
489                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
490                 [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
491         },
492         [ C(OP_WRITE) ] = {
493                 [ C(RESULT_ACCESS) ] = 0,
494                 [ C(RESULT_MISS)   ] = 0,
495         },
496         [ C(OP_PREFETCH) ] = {
497                 [ C(RESULT_ACCESS) ] = 0,
498                 [ C(RESULT_MISS)   ] = 0,
499         },
500  },
501  [ C(ITLB) ] = {
502         [ C(OP_READ) ] = {
503                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
504                 [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
505         },
506         [ C(OP_WRITE) ] = {
507                 [ C(RESULT_ACCESS) ] = -1,
508                 [ C(RESULT_MISS)   ] = -1,
509         },
510         [ C(OP_PREFETCH) ] = {
511                 [ C(RESULT_ACCESS) ] = -1,
512                 [ C(RESULT_MISS)   ] = -1,
513         },
514  },
515  [ C(BPU ) ] = {
516         [ C(OP_READ) ] = {
517                 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
518                 [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
519         },
520         [ C(OP_WRITE) ] = {
521                 [ C(RESULT_ACCESS) ] = -1,
522                 [ C(RESULT_MISS)   ] = -1,
523         },
524         [ C(OP_PREFETCH) ] = {
525                 [ C(RESULT_ACCESS) ] = -1,
526                 [ C(RESULT_MISS)   ] = -1,
527         },
528  },
529 };
530
531 /*
532  * AMD Performance Monitor K7 and later.
533  */
534 static const u64 amd_perfmon_event_map[] =
535 {
536   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0076,
537   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
538   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0080,
539   [PERF_COUNT_HW_CACHE_MISSES]          = 0x0081,
540   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
541   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
542 };
543
544 static u64 amd_pmu_event_map(int event)
545 {
546         return amd_perfmon_event_map[event];
547 }
548
549 static u64 amd_pmu_raw_event(u64 event)
550 {
551 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
552 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
553 #define K7_EVNTSEL_EDGE_MASK    0x000040000ULL
554 #define K7_EVNTSEL_INV_MASK     0x000800000ULL
555 #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
556
557 #define K7_EVNTSEL_MASK                 \
558         (K7_EVNTSEL_EVENT_MASK |        \
559          K7_EVNTSEL_UNIT_MASK  |        \
560          K7_EVNTSEL_EDGE_MASK  |        \
561          K7_EVNTSEL_INV_MASK   |        \
562          K7_EVNTSEL_COUNTER_MASK)
563
564         return event & K7_EVNTSEL_MASK;
565 }
566
567 /*
568  * Propagate counter elapsed time into the generic counter.
569  * Can only be executed on the CPU where the counter is active.
570  * Returns the delta events processed.
571  */
572 static u64
573 x86_perf_counter_update(struct perf_counter *counter,
574                         struct hw_perf_counter *hwc, int idx)
575 {
576         int shift = 64 - x86_pmu.counter_bits;
577         u64 prev_raw_count, new_raw_count;
578         s64 delta;
579
580         /*
581          * Careful: an NMI might modify the previous counter value.
582          *
583          * Our tactic to handle this is to first atomically read and
584          * exchange a new raw count - then add that new-prev delta
585          * count to the generic counter atomically:
586          */
587 again:
588         prev_raw_count = atomic64_read(&hwc->prev_count);
589         rdmsrl(hwc->counter_base + idx, new_raw_count);
590
591         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
592                                         new_raw_count) != prev_raw_count)
593                 goto again;
594
595         /*
596          * Now we have the new raw value and have updated the prev
597          * timestamp already. We can now calculate the elapsed delta
598          * (counter-)time and add that to the generic counter.
599          *
600          * Careful, not all hw sign-extends above the physical width
601          * of the count.
602          */
603         delta = (new_raw_count << shift) - (prev_raw_count << shift);
604         delta >>= shift;
605
606         atomic64_add(delta, &counter->count);
607         atomic64_sub(delta, &hwc->period_left);
608
609         return new_raw_count;
610 }
611
612 static atomic_t active_counters;
613 static DEFINE_MUTEX(pmc_reserve_mutex);
614
615 static bool reserve_pmc_hardware(void)
616 {
617 #ifdef CONFIG_X86_LOCAL_APIC
618         int i;
619
620         if (nmi_watchdog == NMI_LOCAL_APIC)
621                 disable_lapic_nmi_watchdog();
622
623         for (i = 0; i < x86_pmu.num_counters; i++) {
624                 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
625                         goto perfctr_fail;
626         }
627
628         for (i = 0; i < x86_pmu.num_counters; i++) {
629                 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
630                         goto eventsel_fail;
631         }
632 #endif
633
634         return true;
635
636 #ifdef CONFIG_X86_LOCAL_APIC
637 eventsel_fail:
638         for (i--; i >= 0; i--)
639                 release_evntsel_nmi(x86_pmu.eventsel + i);
640
641         i = x86_pmu.num_counters;
642
643 perfctr_fail:
644         for (i--; i >= 0; i--)
645                 release_perfctr_nmi(x86_pmu.perfctr + i);
646
647         if (nmi_watchdog == NMI_LOCAL_APIC)
648                 enable_lapic_nmi_watchdog();
649
650         return false;
651 #endif
652 }
653
654 static void release_pmc_hardware(void)
655 {
656 #ifdef CONFIG_X86_LOCAL_APIC
657         int i;
658
659         for (i = 0; i < x86_pmu.num_counters; i++) {
660                 release_perfctr_nmi(x86_pmu.perfctr + i);
661                 release_evntsel_nmi(x86_pmu.eventsel + i);
662         }
663
664         if (nmi_watchdog == NMI_LOCAL_APIC)
665                 enable_lapic_nmi_watchdog();
666 #endif
667 }
668
669 static void hw_perf_counter_destroy(struct perf_counter *counter)
670 {
671         if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
672                 release_pmc_hardware();
673                 mutex_unlock(&pmc_reserve_mutex);
674         }
675 }
676
677 static inline int x86_pmu_initialized(void)
678 {
679         return x86_pmu.handle_irq != NULL;
680 }
681
682 static inline int
683 set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
684 {
685         unsigned int cache_type, cache_op, cache_result;
686         u64 config, val;
687
688         config = attr->config;
689
690         cache_type = (config >>  0) & 0xff;
691         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
692                 return -EINVAL;
693
694         cache_op = (config >>  8) & 0xff;
695         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
696                 return -EINVAL;
697
698         cache_result = (config >> 16) & 0xff;
699         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
700                 return -EINVAL;
701
702         val = hw_cache_event_ids[cache_type][cache_op][cache_result];
703
704         if (val == 0)
705                 return -ENOENT;
706
707         if (val == -1)
708                 return -EINVAL;
709
710         hwc->config |= val;
711
712         return 0;
713 }
714
715 /*
716  * Setup the hardware configuration for a given attr_type
717  */
718 static int __hw_perf_counter_init(struct perf_counter *counter)
719 {
720         struct perf_counter_attr *attr = &counter->attr;
721         struct hw_perf_counter *hwc = &counter->hw;
722         u64 config;
723         int err;
724
725         if (!x86_pmu_initialized())
726                 return -ENODEV;
727
728         err = 0;
729         if (!atomic_inc_not_zero(&active_counters)) {
730                 mutex_lock(&pmc_reserve_mutex);
731                 if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
732                         err = -EBUSY;
733                 else
734                         atomic_inc(&active_counters);
735                 mutex_unlock(&pmc_reserve_mutex);
736         }
737         if (err)
738                 return err;
739
740         /*
741          * Generate PMC IRQs:
742          * (keep 'enabled' bit clear for now)
743          */
744         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
745
746         /*
747          * Count user and OS events unless requested not to.
748          */
749         if (!attr->exclude_user)
750                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
751         if (!attr->exclude_kernel)
752                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
753
754         if (!hwc->sample_period) {
755                 hwc->sample_period = x86_pmu.max_period;
756                 hwc->last_period = hwc->sample_period;
757                 atomic64_set(&hwc->period_left, hwc->sample_period);
758         } else {
759                 /*
760                  * If we have a PMU initialized but no APIC
761                  * interrupts, we cannot sample hardware
762                  * counters (user-space has to fall back and
763                  * sample via a hrtimer based software counter):
764                  */
765                 if (!x86_pmu.apic)
766                         return -EOPNOTSUPP;
767         }
768
769         counter->destroy = hw_perf_counter_destroy;
770
771         /*
772          * Raw event type provide the config in the event structure
773          */
774         if (attr->type == PERF_TYPE_RAW) {
775                 hwc->config |= x86_pmu.raw_event(attr->config);
776                 return 0;
777         }
778
779         if (attr->type == PERF_TYPE_HW_CACHE)
780                 return set_ext_hw_attr(hwc, attr);
781
782         if (attr->config >= x86_pmu.max_events)
783                 return -EINVAL;
784
785         /*
786          * The generic map:
787          */
788         config = x86_pmu.event_map(attr->config);
789
790         if (config == 0)
791                 return -ENOENT;
792
793         if (config == -1LL)
794                 return -EINVAL;
795
796         hwc->config |= config;
797
798         return 0;
799 }
800
801 static void p6_pmu_disable_all(void)
802 {
803         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
804         u64 val;
805
806         if (!cpuc->enabled)
807                 return;
808
809         cpuc->enabled = 0;
810         barrier();
811
812         /* p6 only has one enable register */
813         rdmsrl(MSR_P6_EVNTSEL0, val);
814         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
815         wrmsrl(MSR_P6_EVNTSEL0, val);
816 }
817
818 static void intel_pmu_disable_all(void)
819 {
820         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
821 }
822
823 static void amd_pmu_disable_all(void)
824 {
825         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
826         int idx;
827
828         if (!cpuc->enabled)
829                 return;
830
831         cpuc->enabled = 0;
832         /*
833          * ensure we write the disable before we start disabling the
834          * counters proper, so that amd_pmu_enable_counter() does the
835          * right thing.
836          */
837         barrier();
838
839         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
840                 u64 val;
841
842                 if (!test_bit(idx, cpuc->active_mask))
843                         continue;
844                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
845                 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
846                         continue;
847                 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
848                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
849         }
850 }
851
852 void hw_perf_disable(void)
853 {
854         if (!x86_pmu_initialized())
855                 return;
856         return x86_pmu.disable_all();
857 }
858
859 static void p6_pmu_enable_all(void)
860 {
861         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
862         unsigned long val;
863
864         if (cpuc->enabled)
865                 return;
866
867         cpuc->enabled = 1;
868         barrier();
869
870         /* p6 only has one enable register */
871         rdmsrl(MSR_P6_EVNTSEL0, val);
872         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
873         wrmsrl(MSR_P6_EVNTSEL0, val);
874 }
875
876 static void intel_pmu_enable_all(void)
877 {
878         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
879 }
880
881 static void amd_pmu_enable_all(void)
882 {
883         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
884         int idx;
885
886         if (cpuc->enabled)
887                 return;
888
889         cpuc->enabled = 1;
890         barrier();
891
892         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
893                 struct perf_counter *counter = cpuc->counters[idx];
894                 u64 val;
895
896                 if (!test_bit(idx, cpuc->active_mask))
897                         continue;
898
899                 val = counter->hw.config;
900                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
901                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
902         }
903 }
904
905 void hw_perf_enable(void)
906 {
907         if (!x86_pmu_initialized())
908                 return;
909         x86_pmu.enable_all();
910 }
911
912 static inline u64 intel_pmu_get_status(void)
913 {
914         u64 status;
915
916         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
917
918         return status;
919 }
920
921 static inline void intel_pmu_ack_status(u64 ack)
922 {
923         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
924 }
925
926 static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
927 {
928         (void)checking_wrmsrl(hwc->config_base + idx,
929                               hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
930 }
931
932 static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
933 {
934         (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
935 }
936
937 static inline void
938 intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
939 {
940         int idx = __idx - X86_PMC_IDX_FIXED;
941         u64 ctrl_val, mask;
942
943         mask = 0xfULL << (idx * 4);
944
945         rdmsrl(hwc->config_base, ctrl_val);
946         ctrl_val &= ~mask;
947         (void)checking_wrmsrl(hwc->config_base, ctrl_val);
948 }
949
950 static inline void
951 p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
952 {
953         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
954         u64 val = P6_NOP_COUNTER;
955
956         if (cpuc->enabled)
957                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
958
959         (void)checking_wrmsrl(hwc->config_base + idx, val);
960 }
961
962 static inline void
963 intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
964 {
965         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
966                 intel_pmu_disable_fixed(hwc, idx);
967                 return;
968         }
969
970         x86_pmu_disable_counter(hwc, idx);
971 }
972
973 static inline void
974 amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
975 {
976         x86_pmu_disable_counter(hwc, idx);
977 }
978
979 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
980
981 /*
982  * Set the next IRQ period, based on the hwc->period_left value.
983  * To be called with the counter disabled in hw:
984  */
985 static int
986 x86_perf_counter_set_period(struct perf_counter *counter,
987                              struct hw_perf_counter *hwc, int idx)
988 {
989         s64 left = atomic64_read(&hwc->period_left);
990         s64 period = hwc->sample_period;
991         int err, ret = 0;
992
993         /*
994          * If we are way outside a reasoable range then just skip forward:
995          */
996         if (unlikely(left <= -period)) {
997                 left = period;
998                 atomic64_set(&hwc->period_left, left);
999                 hwc->last_period = period;
1000                 ret = 1;
1001         }
1002
1003         if (unlikely(left <= 0)) {
1004                 left += period;
1005                 atomic64_set(&hwc->period_left, left);
1006                 hwc->last_period = period;
1007                 ret = 1;
1008         }
1009         /*
1010          * Quirk: certain CPUs dont like it if just 1 event is left:
1011          */
1012         if (unlikely(left < 2))
1013                 left = 2;
1014
1015         if (left > x86_pmu.max_period)
1016                 left = x86_pmu.max_period;
1017
1018         per_cpu(prev_left[idx], smp_processor_id()) = left;
1019
1020         /*
1021          * The hw counter starts counting from this counter offset,
1022          * mark it to be able to extra future deltas:
1023          */
1024         atomic64_set(&hwc->prev_count, (u64)-left);
1025
1026         err = checking_wrmsrl(hwc->counter_base + idx,
1027                              (u64)(-left) & x86_pmu.counter_mask);
1028
1029         perf_counter_update_userpage(counter);
1030
1031         return ret;
1032 }
1033
1034 static inline void
1035 intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
1036 {
1037         int idx = __idx - X86_PMC_IDX_FIXED;
1038         u64 ctrl_val, bits, mask;
1039         int err;
1040
1041         /*
1042          * Enable IRQ generation (0x8),
1043          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1044          * if requested:
1045          */
1046         bits = 0x8ULL;
1047         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1048                 bits |= 0x2;
1049         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1050                 bits |= 0x1;
1051         bits <<= (idx * 4);
1052         mask = 0xfULL << (idx * 4);
1053
1054         rdmsrl(hwc->config_base, ctrl_val);
1055         ctrl_val &= ~mask;
1056         ctrl_val |= bits;
1057         err = checking_wrmsrl(hwc->config_base, ctrl_val);
1058 }
1059
1060 static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1061 {
1062         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1063         u64 val;
1064
1065         val = hwc->config;
1066         if (cpuc->enabled)
1067                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1068
1069         (void)checking_wrmsrl(hwc->config_base + idx, val);
1070 }
1071
1072
1073 static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1074 {
1075         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1076                 intel_pmu_enable_fixed(hwc, idx);
1077                 return;
1078         }
1079
1080         x86_pmu_enable_counter(hwc, idx);
1081 }
1082
1083 static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1084 {
1085         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1086
1087         if (cpuc->enabled)
1088                 x86_pmu_enable_counter(hwc, idx);
1089 }
1090
1091 static int
1092 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
1093 {
1094         unsigned int event;
1095
1096         if (!x86_pmu.num_counters_fixed)
1097                 return -1;
1098
1099         event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1100
1101         if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1102                 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1103         if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1104                 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1105         if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1106                 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1107
1108         return -1;
1109 }
1110
1111 /*
1112  * Find a PMC slot for the freshly enabled / scheduled in counter:
1113  */
1114 static int x86_pmu_enable(struct perf_counter *counter)
1115 {
1116         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1117         struct hw_perf_counter *hwc = &counter->hw;
1118         int idx;
1119
1120         idx = fixed_mode_idx(counter, hwc);
1121         if (idx >= 0) {
1122                 /*
1123                  * Try to get the fixed counter, if that is already taken
1124                  * then try to get a generic counter:
1125                  */
1126                 if (test_and_set_bit(idx, cpuc->used_mask))
1127                         goto try_generic;
1128
1129                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1130                 /*
1131                  * We set it so that counter_base + idx in wrmsr/rdmsr maps to
1132                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1133                  */
1134                 hwc->counter_base =
1135                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1136                 hwc->idx = idx;
1137         } else {
1138                 idx = hwc->idx;
1139                 /* Try to get the previous generic counter again */
1140                 if (test_and_set_bit(idx, cpuc->used_mask)) {
1141 try_generic:
1142                         idx = find_first_zero_bit(cpuc->used_mask,
1143                                                   x86_pmu.num_counters);
1144                         if (idx == x86_pmu.num_counters)
1145                                 return -EAGAIN;
1146
1147                         set_bit(idx, cpuc->used_mask);
1148                         hwc->idx = idx;
1149                 }
1150                 hwc->config_base  = x86_pmu.eventsel;
1151                 hwc->counter_base = x86_pmu.perfctr;
1152         }
1153
1154         perf_counters_lapic_init();
1155
1156         x86_pmu.disable(hwc, idx);
1157
1158         cpuc->counters[idx] = counter;
1159         set_bit(idx, cpuc->active_mask);
1160
1161         x86_perf_counter_set_period(counter, hwc, idx);
1162         x86_pmu.enable(hwc, idx);
1163
1164         perf_counter_update_userpage(counter);
1165
1166         return 0;
1167 }
1168
1169 static void x86_pmu_unthrottle(struct perf_counter *counter)
1170 {
1171         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1172         struct hw_perf_counter *hwc = &counter->hw;
1173
1174         if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1175                                 cpuc->counters[hwc->idx] != counter))
1176                 return;
1177
1178         x86_pmu.enable(hwc, hwc->idx);
1179 }
1180
1181 void perf_counter_print_debug(void)
1182 {
1183         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1184         struct cpu_hw_counters *cpuc;
1185         unsigned long flags;
1186         int cpu, idx;
1187
1188         if (!x86_pmu.num_counters)
1189                 return;
1190
1191         local_irq_save(flags);
1192
1193         cpu = smp_processor_id();
1194         cpuc = &per_cpu(cpu_hw_counters, cpu);
1195
1196         if (x86_pmu.version >= 2) {
1197                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1198                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1199                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1200                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1201
1202                 pr_info("\n");
1203                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
1204                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
1205                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
1206                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1207         }
1208         pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1209
1210         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1211                 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1212                 rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
1213
1214                 prev_left = per_cpu(prev_left[idx], cpu);
1215
1216                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
1217                         cpu, idx, pmc_ctrl);
1218                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
1219                         cpu, idx, pmc_count);
1220                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1221                         cpu, idx, prev_left);
1222         }
1223         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1224                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1225
1226                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1227                         cpu, idx, pmc_count);
1228         }
1229         local_irq_restore(flags);
1230 }
1231
1232 static void x86_pmu_disable(struct perf_counter *counter)
1233 {
1234         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1235         struct hw_perf_counter *hwc = &counter->hw;
1236         int idx = hwc->idx;
1237
1238         /*
1239          * Must be done before we disable, otherwise the nmi handler
1240          * could reenable again:
1241          */
1242         clear_bit(idx, cpuc->active_mask);
1243         x86_pmu.disable(hwc, idx);
1244
1245         /*
1246          * Make sure the cleared pointer becomes visible before we
1247          * (potentially) free the counter:
1248          */
1249         barrier();
1250
1251         /*
1252          * Drain the remaining delta count out of a counter
1253          * that we are disabling:
1254          */
1255         x86_perf_counter_update(counter, hwc, idx);
1256         cpuc->counters[idx] = NULL;
1257         clear_bit(idx, cpuc->used_mask);
1258
1259         perf_counter_update_userpage(counter);
1260 }
1261
1262 /*
1263  * Save and restart an expired counter. Called by NMI contexts,
1264  * so it has to be careful about preempting normal counter ops:
1265  */
1266 static int intel_pmu_save_and_restart(struct perf_counter *counter)
1267 {
1268         struct hw_perf_counter *hwc = &counter->hw;
1269         int idx = hwc->idx;
1270         int ret;
1271
1272         x86_perf_counter_update(counter, hwc, idx);
1273         ret = x86_perf_counter_set_period(counter, hwc, idx);
1274
1275         if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1276                 intel_pmu_enable_counter(hwc, idx);
1277
1278         return ret;
1279 }
1280
1281 static void intel_pmu_reset(void)
1282 {
1283         unsigned long flags;
1284         int idx;
1285
1286         if (!x86_pmu.num_counters)
1287                 return;
1288
1289         local_irq_save(flags);
1290
1291         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1292
1293         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1294                 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1295                 checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
1296         }
1297         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1298                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1299         }
1300
1301         local_irq_restore(flags);
1302 }
1303
1304 static int p6_pmu_handle_irq(struct pt_regs *regs)
1305 {
1306         struct perf_sample_data data;
1307         struct cpu_hw_counters *cpuc;
1308         struct perf_counter *counter;
1309         struct hw_perf_counter *hwc;
1310         int idx, handled = 0;
1311         u64 val;
1312
1313         data.regs = regs;
1314         data.addr = 0;
1315
1316         cpuc = &__get_cpu_var(cpu_hw_counters);
1317
1318         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1319                 if (!test_bit(idx, cpuc->active_mask))
1320                         continue;
1321
1322                 counter = cpuc->counters[idx];
1323                 hwc = &counter->hw;
1324
1325                 val = x86_perf_counter_update(counter, hwc, idx);
1326                 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1327                         continue;
1328
1329                 /*
1330                  * counter overflow
1331                  */
1332                 handled         = 1;
1333                 data.period     = counter->hw.last_period;
1334
1335                 if (!x86_perf_counter_set_period(counter, hwc, idx))
1336                         continue;
1337
1338                 if (perf_counter_overflow(counter, 1, &data))
1339                         p6_pmu_disable_counter(hwc, idx);
1340         }
1341
1342         if (handled)
1343                 inc_irq_stat(apic_perf_irqs);
1344
1345         return handled;
1346 }
1347
1348 /*
1349  * This handler is triggered by the local APIC, so the APIC IRQ handling
1350  * rules apply:
1351  */
1352 static int intel_pmu_handle_irq(struct pt_regs *regs)
1353 {
1354         struct perf_sample_data data;
1355         struct cpu_hw_counters *cpuc;
1356         int bit, loops;
1357         u64 ack, status;
1358
1359         data.regs = regs;
1360         data.addr = 0;
1361
1362         cpuc = &__get_cpu_var(cpu_hw_counters);
1363
1364         perf_disable();
1365         status = intel_pmu_get_status();
1366         if (!status) {
1367                 perf_enable();
1368                 return 0;
1369         }
1370
1371         loops = 0;
1372 again:
1373         if (++loops > 100) {
1374                 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
1375                 perf_counter_print_debug();
1376                 intel_pmu_reset();
1377                 perf_enable();
1378                 return 1;
1379         }
1380
1381         inc_irq_stat(apic_perf_irqs);
1382         ack = status;
1383         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1384                 struct perf_counter *counter = cpuc->counters[bit];
1385
1386                 clear_bit(bit, (unsigned long *) &status);
1387                 if (!test_bit(bit, cpuc->active_mask))
1388                         continue;
1389
1390                 if (!intel_pmu_save_and_restart(counter))
1391                         continue;
1392
1393                 data.period = counter->hw.last_period;
1394
1395                 if (perf_counter_overflow(counter, 1, &data))
1396                         intel_pmu_disable_counter(&counter->hw, bit);
1397         }
1398
1399         intel_pmu_ack_status(ack);
1400
1401         /*
1402          * Repeat if there is more work to be done:
1403          */
1404         status = intel_pmu_get_status();
1405         if (status)
1406                 goto again;
1407
1408         perf_enable();
1409
1410         return 1;
1411 }
1412
1413 static int amd_pmu_handle_irq(struct pt_regs *regs)
1414 {
1415         struct perf_sample_data data;
1416         struct cpu_hw_counters *cpuc;
1417         struct perf_counter *counter;
1418         struct hw_perf_counter *hwc;
1419         int idx, handled = 0;
1420         u64 val;
1421
1422         data.regs = regs;
1423         data.addr = 0;
1424
1425         cpuc = &__get_cpu_var(cpu_hw_counters);
1426
1427         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1428                 if (!test_bit(idx, cpuc->active_mask))
1429                         continue;
1430
1431                 counter = cpuc->counters[idx];
1432                 hwc = &counter->hw;
1433
1434                 val = x86_perf_counter_update(counter, hwc, idx);
1435                 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1436                         continue;
1437
1438                 /*
1439                  * counter overflow
1440                  */
1441                 handled         = 1;
1442                 data.period     = counter->hw.last_period;
1443
1444                 if (!x86_perf_counter_set_period(counter, hwc, idx))
1445                         continue;
1446
1447                 if (perf_counter_overflow(counter, 1, &data))
1448                         amd_pmu_disable_counter(hwc, idx);
1449         }
1450
1451         if (handled)
1452                 inc_irq_stat(apic_perf_irqs);
1453
1454         return handled;
1455 }
1456
1457 void smp_perf_pending_interrupt(struct pt_regs *regs)
1458 {
1459         irq_enter();
1460         ack_APIC_irq();
1461         inc_irq_stat(apic_pending_irqs);
1462         perf_counter_do_pending();
1463         irq_exit();
1464 }
1465
1466 void set_perf_counter_pending(void)
1467 {
1468 #ifdef CONFIG_X86_LOCAL_APIC
1469         apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1470 #endif
1471 }
1472
1473 void perf_counters_lapic_init(void)
1474 {
1475 #ifdef CONFIG_X86_LOCAL_APIC
1476         if (!x86_pmu.apic || !x86_pmu_initialized())
1477                 return;
1478
1479         /*
1480          * Always use NMI for PMU
1481          */
1482         apic_write(APIC_LVTPC, APIC_DM_NMI);
1483 #endif
1484 }
1485
1486 static int __kprobes
1487 perf_counter_nmi_handler(struct notifier_block *self,
1488                          unsigned long cmd, void *__args)
1489 {
1490         struct die_args *args = __args;
1491         struct pt_regs *regs;
1492
1493         if (!atomic_read(&active_counters))
1494                 return NOTIFY_DONE;
1495
1496         switch (cmd) {
1497         case DIE_NMI:
1498         case DIE_NMI_IPI:
1499                 break;
1500
1501         default:
1502                 return NOTIFY_DONE;
1503         }
1504
1505         regs = args->regs;
1506
1507 #ifdef CONFIG_X86_LOCAL_APIC
1508         apic_write(APIC_LVTPC, APIC_DM_NMI);
1509 #endif
1510         /*
1511          * Can't rely on the handled return value to say it was our NMI, two
1512          * counters could trigger 'simultaneously' raising two back-to-back NMIs.
1513          *
1514          * If the first NMI handles both, the latter will be empty and daze
1515          * the CPU.
1516          */
1517         x86_pmu.handle_irq(regs);
1518
1519         return NOTIFY_STOP;
1520 }
1521
1522 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1523         .notifier_call          = perf_counter_nmi_handler,
1524         .next                   = NULL,
1525         .priority               = 1
1526 };
1527
1528 static struct x86_pmu p6_pmu = {
1529         .name                   = "p6",
1530         .handle_irq             = p6_pmu_handle_irq,
1531         .disable_all            = p6_pmu_disable_all,
1532         .enable_all             = p6_pmu_enable_all,
1533         .enable                 = p6_pmu_enable_counter,
1534         .disable                = p6_pmu_disable_counter,
1535         .eventsel               = MSR_P6_EVNTSEL0,
1536         .perfctr                = MSR_P6_PERFCTR0,
1537         .event_map              = p6_pmu_event_map,
1538         .raw_event              = p6_pmu_raw_event,
1539         .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
1540         .apic                   = 1,
1541         .max_period             = (1ULL << 31) - 1,
1542         .version                = 0,
1543         .num_counters           = 2,
1544         /*
1545          * Counters have 40 bits implemented. However they are designed such
1546          * that bits [32-39] are sign extensions of bit 31. As such the
1547          * effective width of a counter for P6-like PMU is 32 bits only.
1548          *
1549          * See IA-32 Intel Architecture Software developer manual Vol 3B
1550          */
1551         .counter_bits           = 32,
1552         .counter_mask           = (1ULL << 32) - 1,
1553 };
1554
1555 static struct x86_pmu intel_pmu = {
1556         .name                   = "Intel",
1557         .handle_irq             = intel_pmu_handle_irq,
1558         .disable_all            = intel_pmu_disable_all,
1559         .enable_all             = intel_pmu_enable_all,
1560         .enable                 = intel_pmu_enable_counter,
1561         .disable                = intel_pmu_disable_counter,
1562         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
1563         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
1564         .event_map              = intel_pmu_event_map,
1565         .raw_event              = intel_pmu_raw_event,
1566         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
1567         .apic                   = 1,
1568         /*
1569          * Intel PMCs cannot be accessed sanely above 32 bit width,
1570          * so we install an artificial 1<<31 period regardless of
1571          * the generic counter period:
1572          */
1573         .max_period             = (1ULL << 31) - 1,
1574 };
1575
1576 static struct x86_pmu amd_pmu = {
1577         .name                   = "AMD",
1578         .handle_irq             = amd_pmu_handle_irq,
1579         .disable_all            = amd_pmu_disable_all,
1580         .enable_all             = amd_pmu_enable_all,
1581         .enable                 = amd_pmu_enable_counter,
1582         .disable                = amd_pmu_disable_counter,
1583         .eventsel               = MSR_K7_EVNTSEL0,
1584         .perfctr                = MSR_K7_PERFCTR0,
1585         .event_map              = amd_pmu_event_map,
1586         .raw_event              = amd_pmu_raw_event,
1587         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
1588         .num_counters           = 4,
1589         .counter_bits           = 48,
1590         .counter_mask           = (1ULL << 48) - 1,
1591         .apic                   = 1,
1592         /* use highest bit to detect overflow */
1593         .max_period             = (1ULL << 47) - 1,
1594 };
1595
1596 static int p6_pmu_init(void)
1597 {
1598         switch (boot_cpu_data.x86_model) {
1599         case 1:
1600         case 3:  /* Pentium Pro */
1601         case 5:
1602         case 6:  /* Pentium II */
1603         case 7:
1604         case 8:
1605         case 11: /* Pentium III */
1606                 break;
1607         case 9:
1608         case 13:
1609                 /* Pentium M */
1610                 break;
1611         default:
1612                 pr_cont("unsupported p6 CPU model %d ",
1613                         boot_cpu_data.x86_model);
1614                 return -ENODEV;
1615         }
1616
1617         x86_pmu = p6_pmu;
1618
1619         if (!cpu_has_apic) {
1620                 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1621                 pr_info("no hardware sampling interrupt available.\n");
1622                 x86_pmu.apic = 0;
1623         }
1624
1625         return 0;
1626 }
1627
1628 static int intel_pmu_init(void)
1629 {
1630         union cpuid10_edx edx;
1631         union cpuid10_eax eax;
1632         unsigned int unused;
1633         unsigned int ebx;
1634         int version;
1635
1636         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1637                 /* check for P6 processor family */
1638            if (boot_cpu_data.x86 == 6) {
1639                 return p6_pmu_init();
1640            } else {
1641                 return -ENODEV;
1642            }
1643         }
1644
1645         /*
1646          * Check whether the Architectural PerfMon supports
1647          * Branch Misses Retired Event or not.
1648          */
1649         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1650         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1651                 return -ENODEV;
1652
1653         version = eax.split.version_id;
1654         if (version < 2)
1655                 return -ENODEV;
1656
1657         x86_pmu                         = intel_pmu;
1658         x86_pmu.version                 = version;
1659         x86_pmu.num_counters            = eax.split.num_counters;
1660         x86_pmu.counter_bits            = eax.split.bit_width;
1661         x86_pmu.counter_mask            = (1ULL << eax.split.bit_width) - 1;
1662
1663         /*
1664          * Quirk: v2 perfmon does not report fixed-purpose counters, so
1665          * assume at least 3 counters:
1666          */
1667         x86_pmu.num_counters_fixed      = max((int)edx.split.num_counters_fixed, 3);
1668
1669         /*
1670          * Install the hw-cache-events table:
1671          */
1672         switch (boot_cpu_data.x86_model) {
1673         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1674         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1675         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1676         case 29: /* six-core 45 nm xeon "Dunnington" */
1677                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1678                        sizeof(hw_cache_event_ids));
1679
1680                 pr_cont("Core2 events, ");
1681                 break;
1682         default:
1683         case 26:
1684                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1685                        sizeof(hw_cache_event_ids));
1686
1687                 pr_cont("Nehalem/Corei7 events, ");
1688                 break;
1689         case 28:
1690                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1691                        sizeof(hw_cache_event_ids));
1692
1693                 pr_cont("Atom events, ");
1694                 break;
1695         }
1696         return 0;
1697 }
1698
1699 static int amd_pmu_init(void)
1700 {
1701         /* Performance-monitoring supported from K7 and later: */
1702         if (boot_cpu_data.x86 < 6)
1703                 return -ENODEV;
1704
1705         x86_pmu = amd_pmu;
1706
1707         /* Events are common for all AMDs */
1708         memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1709                sizeof(hw_cache_event_ids));
1710
1711         return 0;
1712 }
1713
1714 void __init init_hw_perf_counters(void)
1715 {
1716         int err;
1717
1718         pr_info("Performance Counters: ");
1719
1720         switch (boot_cpu_data.x86_vendor) {
1721         case X86_VENDOR_INTEL:
1722                 err = intel_pmu_init();
1723                 break;
1724         case X86_VENDOR_AMD:
1725                 err = amd_pmu_init();
1726                 break;
1727         default:
1728                 return;
1729         }
1730         if (err != 0) {
1731                 pr_cont("no PMU driver, software counters only.\n");
1732                 return;
1733         }
1734
1735         pr_cont("%s PMU driver.\n", x86_pmu.name);
1736
1737         if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1738                 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1739                      x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1740                 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1741         }
1742         perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1743         perf_max_counters = x86_pmu.num_counters;
1744
1745         if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1746                 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1747                      x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1748                 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1749         }
1750
1751         perf_counter_mask |=
1752                 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1753         x86_pmu.intel_ctrl = perf_counter_mask;
1754
1755         perf_counters_lapic_init();
1756         register_die_notifier(&perf_counter_nmi_notifier);
1757
1758         pr_info("... version:                 %d\n",     x86_pmu.version);
1759         pr_info("... bit width:               %d\n",     x86_pmu.counter_bits);
1760         pr_info("... generic counters:        %d\n",     x86_pmu.num_counters);
1761         pr_info("... value mask:              %016Lx\n", x86_pmu.counter_mask);
1762         pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
1763         pr_info("... fixed-purpose counters:  %d\n",     x86_pmu.num_counters_fixed);
1764         pr_info("... counter mask:            %016Lx\n", perf_counter_mask);
1765 }
1766
1767 static inline void x86_pmu_read(struct perf_counter *counter)
1768 {
1769         x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1770 }
1771
1772 static const struct pmu pmu = {
1773         .enable         = x86_pmu_enable,
1774         .disable        = x86_pmu_disable,
1775         .read           = x86_pmu_read,
1776         .unthrottle     = x86_pmu_unthrottle,
1777 };
1778
1779 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1780 {
1781         int err;
1782
1783         err = __hw_perf_counter_init(counter);
1784         if (err)
1785                 return ERR_PTR(err);
1786
1787         return &pmu;
1788 }
1789
1790 /*
1791  * callchain support
1792  */
1793
1794 static inline
1795 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1796 {
1797         if (entry->nr < PERF_MAX_STACK_DEPTH)
1798                 entry->ip[entry->nr++] = ip;
1799 }
1800
1801 static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1802 static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1803 static DEFINE_PER_CPU(int, in_nmi_frame);
1804
1805
1806 static void
1807 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1808 {
1809         /* Ignore warnings */
1810 }
1811
1812 static void backtrace_warning(void *data, char *msg)
1813 {
1814         /* Ignore warnings */
1815 }
1816
1817 static int backtrace_stack(void *data, char *name)
1818 {
1819         per_cpu(in_nmi_frame, smp_processor_id()) =
1820                         x86_is_stack_id(NMI_STACK, name);
1821
1822         return 0;
1823 }
1824
1825 static void backtrace_address(void *data, unsigned long addr, int reliable)
1826 {
1827         struct perf_callchain_entry *entry = data;
1828
1829         if (per_cpu(in_nmi_frame, smp_processor_id()))
1830                 return;
1831
1832         if (reliable)
1833                 callchain_store(entry, addr);
1834 }
1835
1836 static const struct stacktrace_ops backtrace_ops = {
1837         .warning                = backtrace_warning,
1838         .warning_symbol         = backtrace_warning_symbol,
1839         .stack                  = backtrace_stack,
1840         .address                = backtrace_address,
1841 };
1842
1843 #include "../dumpstack.h"
1844
1845 static void
1846 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1847 {
1848         callchain_store(entry, PERF_CONTEXT_KERNEL);
1849         callchain_store(entry, regs->ip);
1850
1851         dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1852 }
1853
1854 /*
1855  * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1856  */
1857 static unsigned long
1858 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1859 {
1860         unsigned long offset, addr = (unsigned long)from;
1861         int type = in_nmi() ? KM_NMI : KM_IRQ0;
1862         unsigned long size, len = 0;
1863         struct page *page;
1864         void *map;
1865         int ret;
1866
1867         do {
1868                 ret = __get_user_pages_fast(addr, 1, 0, &page);
1869                 if (!ret)
1870                         break;
1871
1872                 offset = addr & (PAGE_SIZE - 1);
1873                 size = min(PAGE_SIZE - offset, n - len);
1874
1875                 map = kmap_atomic(page, type);
1876                 memcpy(to, map+offset, size);
1877                 kunmap_atomic(map, type);
1878                 put_page(page);
1879
1880                 len  += size;
1881                 to   += size;
1882                 addr += size;
1883
1884         } while (len < n);
1885
1886         return len;
1887 }
1888
1889 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1890 {
1891         unsigned long bytes;
1892
1893         bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1894
1895         return bytes == sizeof(*frame);
1896 }
1897
1898 static void
1899 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1900 {
1901         struct stack_frame frame;
1902         const void __user *fp;
1903
1904         if (!user_mode(regs))
1905                 regs = task_pt_regs(current);
1906
1907         fp = (void __user *)regs->bp;
1908
1909         callchain_store(entry, PERF_CONTEXT_USER);
1910         callchain_store(entry, regs->ip);
1911
1912         while (entry->nr < PERF_MAX_STACK_DEPTH) {
1913                 frame.next_frame             = NULL;
1914                 frame.return_address = 0;
1915
1916                 if (!copy_stack_frame(fp, &frame))
1917                         break;
1918
1919                 if ((unsigned long)fp < regs->sp)
1920                         break;
1921
1922                 callchain_store(entry, frame.return_address);
1923                 fp = frame.next_frame;
1924         }
1925 }
1926
1927 static void
1928 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1929 {
1930         int is_user;
1931
1932         if (!regs)
1933                 return;
1934
1935         is_user = user_mode(regs);
1936
1937         if (!current || current->pid == 0)
1938                 return;
1939
1940         if (is_user && current->state != TASK_RUNNING)
1941                 return;
1942
1943         if (!is_user)
1944                 perf_callchain_kernel(regs, entry);
1945
1946         if (current->mm)
1947                 perf_callchain_user(regs, entry);
1948 }
1949
1950 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1951 {
1952         struct perf_callchain_entry *entry;
1953
1954         if (in_nmi())
1955                 entry = &__get_cpu_var(nmi_entry);
1956         else
1957                 entry = &__get_cpu_var(irq_entry);
1958
1959         entry->nr = 0;
1960
1961         perf_do_callchain(regs, entry);
1962
1963         return entry;
1964 }