Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
[pandora-kernel.git] / arch / x86 / kernel / cpu / perf_counter.c
1 /*
2  * Performance counter x86 architecture code
3  *
4  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6  *  Copyright (C) 2009 Jaswinder Singh Rajput
7  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9  *
10  *  For licencing details see kernel-base/COPYING
11  */
12
13 #include <linux/perf_counter.h>
14 #include <linux/capability.h>
15 #include <linux/notifier.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <linux/module.h>
19 #include <linux/kdebug.h>
20 #include <linux/sched.h>
21 #include <linux/uaccess.h>
22 #include <linux/highmem.h>
23
24 #include <asm/apic.h>
25 #include <asm/stacktrace.h>
26 #include <asm/nmi.h>
27
28 static u64 perf_counter_mask __read_mostly;
29
30 struct cpu_hw_counters {
31         struct perf_counter     *counters[X86_PMC_IDX_MAX];
32         unsigned long           used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
33         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
34         unsigned long           interrupts;
35         int                     enabled;
36 };
37
38 /*
39  * struct x86_pmu - generic x86 pmu
40  */
41 struct x86_pmu {
42         const char      *name;
43         int             version;
44         int             (*handle_irq)(struct pt_regs *);
45         void            (*disable_all)(void);
46         void            (*enable_all)(void);
47         void            (*enable)(struct hw_perf_counter *, int);
48         void            (*disable)(struct hw_perf_counter *, int);
49         unsigned        eventsel;
50         unsigned        perfctr;
51         u64             (*event_map)(int);
52         u64             (*raw_event)(u64);
53         int             max_events;
54         int             num_counters;
55         int             num_counters_fixed;
56         int             counter_bits;
57         u64             counter_mask;
58         u64             max_period;
59         u64             intel_ctrl;
60 };
61
62 static struct x86_pmu x86_pmu __read_mostly;
63
64 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
65         .enabled = 1,
66 };
67
68 /*
69  * Not sure about some of these
70  */
71 static const u64 p6_perfmon_event_map[] =
72 {
73   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0079,
74   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
75   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0000,
76   [PERF_COUNT_HW_CACHE_MISSES]          = 0x0000,
77   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
78   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
79   [PERF_COUNT_HW_BUS_CYCLES]            = 0x0062,
80 };
81
82 static u64 p6_pmu_event_map(int event)
83 {
84         return p6_perfmon_event_map[event];
85 }
86
87 /*
88  * Counter setting that is specified not to count anything.
89  * We use this to effectively disable a counter.
90  *
91  * L2_RQSTS with 0 MESI unit mask.
92  */
93 #define P6_NOP_COUNTER                  0x0000002EULL
94
95 static u64 p6_pmu_raw_event(u64 event)
96 {
97 #define P6_EVNTSEL_EVENT_MASK           0x000000FFULL
98 #define P6_EVNTSEL_UNIT_MASK            0x0000FF00ULL
99 #define P6_EVNTSEL_EDGE_MASK            0x00040000ULL
100 #define P6_EVNTSEL_INV_MASK             0x00800000ULL
101 #define P6_EVNTSEL_COUNTER_MASK         0xFF000000ULL
102
103 #define P6_EVNTSEL_MASK                 \
104         (P6_EVNTSEL_EVENT_MASK |        \
105          P6_EVNTSEL_UNIT_MASK  |        \
106          P6_EVNTSEL_EDGE_MASK  |        \
107          P6_EVNTSEL_INV_MASK   |        \
108          P6_EVNTSEL_COUNTER_MASK)
109
110         return event & P6_EVNTSEL_MASK;
111 }
112
113
114 /*
115  * Intel PerfMon v3. Used on Core2 and later.
116  */
117 static const u64 intel_perfmon_event_map[] =
118 {
119   [PERF_COUNT_HW_CPU_CYCLES]            = 0x003c,
120   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
121   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x4f2e,
122   [PERF_COUNT_HW_CACHE_MISSES]          = 0x412e,
123   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
124   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
125   [PERF_COUNT_HW_BUS_CYCLES]            = 0x013c,
126 };
127
128 static u64 intel_pmu_event_map(int event)
129 {
130         return intel_perfmon_event_map[event];
131 }
132
133 /*
134  * Generalized hw caching related event table, filled
135  * in on a per model basis. A value of 0 means
136  * 'not supported', -1 means 'event makes no sense on
137  * this CPU', any other value means the raw event
138  * ID.
139  */
140
141 #define C(x) PERF_COUNT_HW_CACHE_##x
142
143 static u64 __read_mostly hw_cache_event_ids
144                                 [PERF_COUNT_HW_CACHE_MAX]
145                                 [PERF_COUNT_HW_CACHE_OP_MAX]
146                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
147
148 static const u64 nehalem_hw_cache_event_ids
149                                 [PERF_COUNT_HW_CACHE_MAX]
150                                 [PERF_COUNT_HW_CACHE_OP_MAX]
151                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
152 {
153  [ C(L1D) ] = {
154         [ C(OP_READ) ] = {
155                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
156                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
157         },
158         [ C(OP_WRITE) ] = {
159                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
160                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
161         },
162         [ C(OP_PREFETCH) ] = {
163                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
164                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
165         },
166  },
167  [ C(L1I ) ] = {
168         [ C(OP_READ) ] = {
169                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
170                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
171         },
172         [ C(OP_WRITE) ] = {
173                 [ C(RESULT_ACCESS) ] = -1,
174                 [ C(RESULT_MISS)   ] = -1,
175         },
176         [ C(OP_PREFETCH) ] = {
177                 [ C(RESULT_ACCESS) ] = 0x0,
178                 [ C(RESULT_MISS)   ] = 0x0,
179         },
180  },
181  [ C(LL  ) ] = {
182         [ C(OP_READ) ] = {
183                 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
184                 [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
185         },
186         [ C(OP_WRITE) ] = {
187                 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
188                 [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
189         },
190         [ C(OP_PREFETCH) ] = {
191                 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
192                 [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
193         },
194  },
195  [ C(DTLB) ] = {
196         [ C(OP_READ) ] = {
197                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
198                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
199         },
200         [ C(OP_WRITE) ] = {
201                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
202                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
203         },
204         [ C(OP_PREFETCH) ] = {
205                 [ C(RESULT_ACCESS) ] = 0x0,
206                 [ C(RESULT_MISS)   ] = 0x0,
207         },
208  },
209  [ C(ITLB) ] = {
210         [ C(OP_READ) ] = {
211                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
212                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
213         },
214         [ C(OP_WRITE) ] = {
215                 [ C(RESULT_ACCESS) ] = -1,
216                 [ C(RESULT_MISS)   ] = -1,
217         },
218         [ C(OP_PREFETCH) ] = {
219                 [ C(RESULT_ACCESS) ] = -1,
220                 [ C(RESULT_MISS)   ] = -1,
221         },
222  },
223  [ C(BPU ) ] = {
224         [ C(OP_READ) ] = {
225                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
226                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
227         },
228         [ C(OP_WRITE) ] = {
229                 [ C(RESULT_ACCESS) ] = -1,
230                 [ C(RESULT_MISS)   ] = -1,
231         },
232         [ C(OP_PREFETCH) ] = {
233                 [ C(RESULT_ACCESS) ] = -1,
234                 [ C(RESULT_MISS)   ] = -1,
235         },
236  },
237 };
238
239 static const u64 core2_hw_cache_event_ids
240                                 [PERF_COUNT_HW_CACHE_MAX]
241                                 [PERF_COUNT_HW_CACHE_OP_MAX]
242                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
243 {
244  [ C(L1D) ] = {
245         [ C(OP_READ) ] = {
246                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
247                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
248         },
249         [ C(OP_WRITE) ] = {
250                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
251                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
252         },
253         [ C(OP_PREFETCH) ] = {
254                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
255                 [ C(RESULT_MISS)   ] = 0,
256         },
257  },
258  [ C(L1I ) ] = {
259         [ C(OP_READ) ] = {
260                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
261                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
262         },
263         [ C(OP_WRITE) ] = {
264                 [ C(RESULT_ACCESS) ] = -1,
265                 [ C(RESULT_MISS)   ] = -1,
266         },
267         [ C(OP_PREFETCH) ] = {
268                 [ C(RESULT_ACCESS) ] = 0,
269                 [ C(RESULT_MISS)   ] = 0,
270         },
271  },
272  [ C(LL  ) ] = {
273         [ C(OP_READ) ] = {
274                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
275                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
276         },
277         [ C(OP_WRITE) ] = {
278                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
279                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
280         },
281         [ C(OP_PREFETCH) ] = {
282                 [ C(RESULT_ACCESS) ] = 0,
283                 [ C(RESULT_MISS)   ] = 0,
284         },
285  },
286  [ C(DTLB) ] = {
287         [ C(OP_READ) ] = {
288                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
289                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
290         },
291         [ C(OP_WRITE) ] = {
292                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
293                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
294         },
295         [ C(OP_PREFETCH) ] = {
296                 [ C(RESULT_ACCESS) ] = 0,
297                 [ C(RESULT_MISS)   ] = 0,
298         },
299  },
300  [ C(ITLB) ] = {
301         [ C(OP_READ) ] = {
302                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
303                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
304         },
305         [ C(OP_WRITE) ] = {
306                 [ C(RESULT_ACCESS) ] = -1,
307                 [ C(RESULT_MISS)   ] = -1,
308         },
309         [ C(OP_PREFETCH) ] = {
310                 [ C(RESULT_ACCESS) ] = -1,
311                 [ C(RESULT_MISS)   ] = -1,
312         },
313  },
314  [ C(BPU ) ] = {
315         [ C(OP_READ) ] = {
316                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
317                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
318         },
319         [ C(OP_WRITE) ] = {
320                 [ C(RESULT_ACCESS) ] = -1,
321                 [ C(RESULT_MISS)   ] = -1,
322         },
323         [ C(OP_PREFETCH) ] = {
324                 [ C(RESULT_ACCESS) ] = -1,
325                 [ C(RESULT_MISS)   ] = -1,
326         },
327  },
328 };
329
330 static const u64 atom_hw_cache_event_ids
331                                 [PERF_COUNT_HW_CACHE_MAX]
332                                 [PERF_COUNT_HW_CACHE_OP_MAX]
333                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
334 {
335  [ C(L1D) ] = {
336         [ C(OP_READ) ] = {
337                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
338                 [ C(RESULT_MISS)   ] = 0,
339         },
340         [ C(OP_WRITE) ] = {
341                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
342                 [ C(RESULT_MISS)   ] = 0,
343         },
344         [ C(OP_PREFETCH) ] = {
345                 [ C(RESULT_ACCESS) ] = 0x0,
346                 [ C(RESULT_MISS)   ] = 0,
347         },
348  },
349  [ C(L1I ) ] = {
350         [ C(OP_READ) ] = {
351                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
352                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
353         },
354         [ C(OP_WRITE) ] = {
355                 [ C(RESULT_ACCESS) ] = -1,
356                 [ C(RESULT_MISS)   ] = -1,
357         },
358         [ C(OP_PREFETCH) ] = {
359                 [ C(RESULT_ACCESS) ] = 0,
360                 [ C(RESULT_MISS)   ] = 0,
361         },
362  },
363  [ C(LL  ) ] = {
364         [ C(OP_READ) ] = {
365                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
366                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
367         },
368         [ C(OP_WRITE) ] = {
369                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
370                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
371         },
372         [ C(OP_PREFETCH) ] = {
373                 [ C(RESULT_ACCESS) ] = 0,
374                 [ C(RESULT_MISS)   ] = 0,
375         },
376  },
377  [ C(DTLB) ] = {
378         [ C(OP_READ) ] = {
379                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
380                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
381         },
382         [ C(OP_WRITE) ] = {
383                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
384                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
385         },
386         [ C(OP_PREFETCH) ] = {
387                 [ C(RESULT_ACCESS) ] = 0,
388                 [ C(RESULT_MISS)   ] = 0,
389         },
390  },
391  [ C(ITLB) ] = {
392         [ C(OP_READ) ] = {
393                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
394                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
395         },
396         [ C(OP_WRITE) ] = {
397                 [ C(RESULT_ACCESS) ] = -1,
398                 [ C(RESULT_MISS)   ] = -1,
399         },
400         [ C(OP_PREFETCH) ] = {
401                 [ C(RESULT_ACCESS) ] = -1,
402                 [ C(RESULT_MISS)   ] = -1,
403         },
404  },
405  [ C(BPU ) ] = {
406         [ C(OP_READ) ] = {
407                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
408                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
409         },
410         [ C(OP_WRITE) ] = {
411                 [ C(RESULT_ACCESS) ] = -1,
412                 [ C(RESULT_MISS)   ] = -1,
413         },
414         [ C(OP_PREFETCH) ] = {
415                 [ C(RESULT_ACCESS) ] = -1,
416                 [ C(RESULT_MISS)   ] = -1,
417         },
418  },
419 };
420
421 static u64 intel_pmu_raw_event(u64 event)
422 {
423 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
424 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
425 #define CORE_EVNTSEL_EDGE_MASK          0x00040000ULL
426 #define CORE_EVNTSEL_INV_MASK           0x00800000ULL
427 #define CORE_EVNTSEL_COUNTER_MASK       0xFF000000ULL
428
429 #define CORE_EVNTSEL_MASK               \
430         (CORE_EVNTSEL_EVENT_MASK |      \
431          CORE_EVNTSEL_UNIT_MASK  |      \
432          CORE_EVNTSEL_EDGE_MASK  |      \
433          CORE_EVNTSEL_INV_MASK  |       \
434          CORE_EVNTSEL_COUNTER_MASK)
435
436         return event & CORE_EVNTSEL_MASK;
437 }
438
439 static const u64 amd_hw_cache_event_ids
440                                 [PERF_COUNT_HW_CACHE_MAX]
441                                 [PERF_COUNT_HW_CACHE_OP_MAX]
442                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
443 {
444  [ C(L1D) ] = {
445         [ C(OP_READ) ] = {
446                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
447                 [ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
448         },
449         [ C(OP_WRITE) ] = {
450                 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
451                 [ C(RESULT_MISS)   ] = 0,
452         },
453         [ C(OP_PREFETCH) ] = {
454                 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
455                 [ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
456         },
457  },
458  [ C(L1I ) ] = {
459         [ C(OP_READ) ] = {
460                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
461                 [ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
462         },
463         [ C(OP_WRITE) ] = {
464                 [ C(RESULT_ACCESS) ] = -1,
465                 [ C(RESULT_MISS)   ] = -1,
466         },
467         [ C(OP_PREFETCH) ] = {
468                 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
469                 [ C(RESULT_MISS)   ] = 0,
470         },
471  },
472  [ C(LL  ) ] = {
473         [ C(OP_READ) ] = {
474                 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
475                 [ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
476         },
477         [ C(OP_WRITE) ] = {
478                 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
479                 [ C(RESULT_MISS)   ] = 0,
480         },
481         [ C(OP_PREFETCH) ] = {
482                 [ C(RESULT_ACCESS) ] = 0,
483                 [ C(RESULT_MISS)   ] = 0,
484         },
485  },
486  [ C(DTLB) ] = {
487         [ C(OP_READ) ] = {
488                 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
489                 [ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
490         },
491         [ C(OP_WRITE) ] = {
492                 [ C(RESULT_ACCESS) ] = 0,
493                 [ C(RESULT_MISS)   ] = 0,
494         },
495         [ C(OP_PREFETCH) ] = {
496                 [ C(RESULT_ACCESS) ] = 0,
497                 [ C(RESULT_MISS)   ] = 0,
498         },
499  },
500  [ C(ITLB) ] = {
501         [ C(OP_READ) ] = {
502                 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
503                 [ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
504         },
505         [ C(OP_WRITE) ] = {
506                 [ C(RESULT_ACCESS) ] = -1,
507                 [ C(RESULT_MISS)   ] = -1,
508         },
509         [ C(OP_PREFETCH) ] = {
510                 [ C(RESULT_ACCESS) ] = -1,
511                 [ C(RESULT_MISS)   ] = -1,
512         },
513  },
514  [ C(BPU ) ] = {
515         [ C(OP_READ) ] = {
516                 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
517                 [ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
518         },
519         [ C(OP_WRITE) ] = {
520                 [ C(RESULT_ACCESS) ] = -1,
521                 [ C(RESULT_MISS)   ] = -1,
522         },
523         [ C(OP_PREFETCH) ] = {
524                 [ C(RESULT_ACCESS) ] = -1,
525                 [ C(RESULT_MISS)   ] = -1,
526         },
527  },
528 };
529
530 /*
531  * AMD Performance Monitor K7 and later.
532  */
533 static const u64 amd_perfmon_event_map[] =
534 {
535   [PERF_COUNT_HW_CPU_CYCLES]            = 0x0076,
536   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
537   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x0080,
538   [PERF_COUNT_HW_CACHE_MISSES]          = 0x0081,
539   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
540   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
541 };
542
543 static u64 amd_pmu_event_map(int event)
544 {
545         return amd_perfmon_event_map[event];
546 }
547
548 static u64 amd_pmu_raw_event(u64 event)
549 {
550 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
551 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
552 #define K7_EVNTSEL_EDGE_MASK    0x000040000ULL
553 #define K7_EVNTSEL_INV_MASK     0x000800000ULL
554 #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
555
556 #define K7_EVNTSEL_MASK                 \
557         (K7_EVNTSEL_EVENT_MASK |        \
558          K7_EVNTSEL_UNIT_MASK  |        \
559          K7_EVNTSEL_EDGE_MASK  |        \
560          K7_EVNTSEL_INV_MASK   |        \
561          K7_EVNTSEL_COUNTER_MASK)
562
563         return event & K7_EVNTSEL_MASK;
564 }
565
566 /*
567  * Propagate counter elapsed time into the generic counter.
568  * Can only be executed on the CPU where the counter is active.
569  * Returns the delta events processed.
570  */
571 static u64
572 x86_perf_counter_update(struct perf_counter *counter,
573                         struct hw_perf_counter *hwc, int idx)
574 {
575         int shift = 64 - x86_pmu.counter_bits;
576         u64 prev_raw_count, new_raw_count;
577         s64 delta;
578
579         /*
580          * Careful: an NMI might modify the previous counter value.
581          *
582          * Our tactic to handle this is to first atomically read and
583          * exchange a new raw count - then add that new-prev delta
584          * count to the generic counter atomically:
585          */
586 again:
587         prev_raw_count = atomic64_read(&hwc->prev_count);
588         rdmsrl(hwc->counter_base + idx, new_raw_count);
589
590         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
591                                         new_raw_count) != prev_raw_count)
592                 goto again;
593
594         /*
595          * Now we have the new raw value and have updated the prev
596          * timestamp already. We can now calculate the elapsed delta
597          * (counter-)time and add that to the generic counter.
598          *
599          * Careful, not all hw sign-extends above the physical width
600          * of the count.
601          */
602         delta = (new_raw_count << shift) - (prev_raw_count << shift);
603         delta >>= shift;
604
605         atomic64_add(delta, &counter->count);
606         atomic64_sub(delta, &hwc->period_left);
607
608         return new_raw_count;
609 }
610
611 static atomic_t active_counters;
612 static DEFINE_MUTEX(pmc_reserve_mutex);
613
614 static bool reserve_pmc_hardware(void)
615 {
616         int i;
617
618         if (nmi_watchdog == NMI_LOCAL_APIC)
619                 disable_lapic_nmi_watchdog();
620
621         for (i = 0; i < x86_pmu.num_counters; i++) {
622                 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
623                         goto perfctr_fail;
624         }
625
626         for (i = 0; i < x86_pmu.num_counters; i++) {
627                 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
628                         goto eventsel_fail;
629         }
630
631         return true;
632
633 eventsel_fail:
634         for (i--; i >= 0; i--)
635                 release_evntsel_nmi(x86_pmu.eventsel + i);
636
637         i = x86_pmu.num_counters;
638
639 perfctr_fail:
640         for (i--; i >= 0; i--)
641                 release_perfctr_nmi(x86_pmu.perfctr + i);
642
643         if (nmi_watchdog == NMI_LOCAL_APIC)
644                 enable_lapic_nmi_watchdog();
645
646         return false;
647 }
648
649 static void release_pmc_hardware(void)
650 {
651         int i;
652
653         for (i = 0; i < x86_pmu.num_counters; i++) {
654                 release_perfctr_nmi(x86_pmu.perfctr + i);
655                 release_evntsel_nmi(x86_pmu.eventsel + i);
656         }
657
658         if (nmi_watchdog == NMI_LOCAL_APIC)
659                 enable_lapic_nmi_watchdog();
660 }
661
662 static void hw_perf_counter_destroy(struct perf_counter *counter)
663 {
664         if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
665                 release_pmc_hardware();
666                 mutex_unlock(&pmc_reserve_mutex);
667         }
668 }
669
670 static inline int x86_pmu_initialized(void)
671 {
672         return x86_pmu.handle_irq != NULL;
673 }
674
675 static inline int
676 set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
677 {
678         unsigned int cache_type, cache_op, cache_result;
679         u64 config, val;
680
681         config = attr->config;
682
683         cache_type = (config >>  0) & 0xff;
684         if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
685                 return -EINVAL;
686
687         cache_op = (config >>  8) & 0xff;
688         if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
689                 return -EINVAL;
690
691         cache_result = (config >> 16) & 0xff;
692         if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
693                 return -EINVAL;
694
695         val = hw_cache_event_ids[cache_type][cache_op][cache_result];
696
697         if (val == 0)
698                 return -ENOENT;
699
700         if (val == -1)
701                 return -EINVAL;
702
703         hwc->config |= val;
704
705         return 0;
706 }
707
708 /*
709  * Setup the hardware configuration for a given attr_type
710  */
711 static int __hw_perf_counter_init(struct perf_counter *counter)
712 {
713         struct perf_counter_attr *attr = &counter->attr;
714         struct hw_perf_counter *hwc = &counter->hw;
715         u64 config;
716         int err;
717
718         if (!x86_pmu_initialized())
719                 return -ENODEV;
720
721         err = 0;
722         if (!atomic_inc_not_zero(&active_counters)) {
723                 mutex_lock(&pmc_reserve_mutex);
724                 if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
725                         err = -EBUSY;
726                 else
727                         atomic_inc(&active_counters);
728                 mutex_unlock(&pmc_reserve_mutex);
729         }
730         if (err)
731                 return err;
732
733         /*
734          * Generate PMC IRQs:
735          * (keep 'enabled' bit clear for now)
736          */
737         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
738
739         /*
740          * Count user and OS events unless requested not to.
741          */
742         if (!attr->exclude_user)
743                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
744         if (!attr->exclude_kernel)
745                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
746
747         if (!hwc->sample_period) {
748                 hwc->sample_period = x86_pmu.max_period;
749                 hwc->last_period = hwc->sample_period;
750                 atomic64_set(&hwc->period_left, hwc->sample_period);
751         }
752
753         counter->destroy = hw_perf_counter_destroy;
754
755         /*
756          * Raw event type provide the config in the event structure
757          */
758         if (attr->type == PERF_TYPE_RAW) {
759                 hwc->config |= x86_pmu.raw_event(attr->config);
760                 return 0;
761         }
762
763         if (attr->type == PERF_TYPE_HW_CACHE)
764                 return set_ext_hw_attr(hwc, attr);
765
766         if (attr->config >= x86_pmu.max_events)
767                 return -EINVAL;
768
769         /*
770          * The generic map:
771          */
772         config = x86_pmu.event_map(attr->config);
773
774         if (config == 0)
775                 return -ENOENT;
776
777         if (config == -1LL)
778                 return -EINVAL;
779
780         hwc->config |= config;
781
782         return 0;
783 }
784
785 static void p6_pmu_disable_all(void)
786 {
787         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
788         u64 val;
789
790         if (!cpuc->enabled)
791                 return;
792
793         cpuc->enabled = 0;
794         barrier();
795
796         /* p6 only has one enable register */
797         rdmsrl(MSR_P6_EVNTSEL0, val);
798         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
799         wrmsrl(MSR_P6_EVNTSEL0, val);
800 }
801
802 static void intel_pmu_disable_all(void)
803 {
804         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
805 }
806
807 static void amd_pmu_disable_all(void)
808 {
809         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
810         int idx;
811
812         if (!cpuc->enabled)
813                 return;
814
815         cpuc->enabled = 0;
816         /*
817          * ensure we write the disable before we start disabling the
818          * counters proper, so that amd_pmu_enable_counter() does the
819          * right thing.
820          */
821         barrier();
822
823         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
824                 u64 val;
825
826                 if (!test_bit(idx, cpuc->active_mask))
827                         continue;
828                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
829                 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
830                         continue;
831                 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
832                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
833         }
834 }
835
836 void hw_perf_disable(void)
837 {
838         if (!x86_pmu_initialized())
839                 return;
840         return x86_pmu.disable_all();
841 }
842
843 static void p6_pmu_enable_all(void)
844 {
845         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
846         unsigned long val;
847
848         if (cpuc->enabled)
849                 return;
850
851         cpuc->enabled = 1;
852         barrier();
853
854         /* p6 only has one enable register */
855         rdmsrl(MSR_P6_EVNTSEL0, val);
856         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
857         wrmsrl(MSR_P6_EVNTSEL0, val);
858 }
859
860 static void intel_pmu_enable_all(void)
861 {
862         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
863 }
864
865 static void amd_pmu_enable_all(void)
866 {
867         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
868         int idx;
869
870         if (cpuc->enabled)
871                 return;
872
873         cpuc->enabled = 1;
874         barrier();
875
876         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
877                 struct perf_counter *counter = cpuc->counters[idx];
878                 u64 val;
879
880                 if (!test_bit(idx, cpuc->active_mask))
881                         continue;
882
883                 val = counter->hw.config;
884                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
885                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
886         }
887 }
888
889 void hw_perf_enable(void)
890 {
891         if (!x86_pmu_initialized())
892                 return;
893         x86_pmu.enable_all();
894 }
895
896 static inline u64 intel_pmu_get_status(void)
897 {
898         u64 status;
899
900         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
901
902         return status;
903 }
904
905 static inline void intel_pmu_ack_status(u64 ack)
906 {
907         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
908 }
909
910 static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
911 {
912         (void)checking_wrmsrl(hwc->config_base + idx,
913                               hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
914 }
915
916 static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
917 {
918         (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
919 }
920
921 static inline void
922 intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
923 {
924         int idx = __idx - X86_PMC_IDX_FIXED;
925         u64 ctrl_val, mask;
926
927         mask = 0xfULL << (idx * 4);
928
929         rdmsrl(hwc->config_base, ctrl_val);
930         ctrl_val &= ~mask;
931         (void)checking_wrmsrl(hwc->config_base, ctrl_val);
932 }
933
934 static inline void
935 p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
936 {
937         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
938         u64 val = P6_NOP_COUNTER;
939
940         if (cpuc->enabled)
941                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
942
943         (void)checking_wrmsrl(hwc->config_base + idx, val);
944 }
945
946 static inline void
947 intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
948 {
949         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
950                 intel_pmu_disable_fixed(hwc, idx);
951                 return;
952         }
953
954         x86_pmu_disable_counter(hwc, idx);
955 }
956
957 static inline void
958 amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
959 {
960         x86_pmu_disable_counter(hwc, idx);
961 }
962
963 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
964
965 /*
966  * Set the next IRQ period, based on the hwc->period_left value.
967  * To be called with the counter disabled in hw:
968  */
969 static int
970 x86_perf_counter_set_period(struct perf_counter *counter,
971                              struct hw_perf_counter *hwc, int idx)
972 {
973         s64 left = atomic64_read(&hwc->period_left);
974         s64 period = hwc->sample_period;
975         int err, ret = 0;
976
977         /*
978          * If we are way outside a reasoable range then just skip forward:
979          */
980         if (unlikely(left <= -period)) {
981                 left = period;
982                 atomic64_set(&hwc->period_left, left);
983                 hwc->last_period = period;
984                 ret = 1;
985         }
986
987         if (unlikely(left <= 0)) {
988                 left += period;
989                 atomic64_set(&hwc->period_left, left);
990                 hwc->last_period = period;
991                 ret = 1;
992         }
993         /*
994          * Quirk: certain CPUs dont like it if just 1 event is left:
995          */
996         if (unlikely(left < 2))
997                 left = 2;
998
999         if (left > x86_pmu.max_period)
1000                 left = x86_pmu.max_period;
1001
1002         per_cpu(prev_left[idx], smp_processor_id()) = left;
1003
1004         /*
1005          * The hw counter starts counting from this counter offset,
1006          * mark it to be able to extra future deltas:
1007          */
1008         atomic64_set(&hwc->prev_count, (u64)-left);
1009
1010         err = checking_wrmsrl(hwc->counter_base + idx,
1011                              (u64)(-left) & x86_pmu.counter_mask);
1012
1013         perf_counter_update_userpage(counter);
1014
1015         return ret;
1016 }
1017
1018 static inline void
1019 intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
1020 {
1021         int idx = __idx - X86_PMC_IDX_FIXED;
1022         u64 ctrl_val, bits, mask;
1023         int err;
1024
1025         /*
1026          * Enable IRQ generation (0x8),
1027          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1028          * if requested:
1029          */
1030         bits = 0x8ULL;
1031         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1032                 bits |= 0x2;
1033         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1034                 bits |= 0x1;
1035         bits <<= (idx * 4);
1036         mask = 0xfULL << (idx * 4);
1037
1038         rdmsrl(hwc->config_base, ctrl_val);
1039         ctrl_val &= ~mask;
1040         ctrl_val |= bits;
1041         err = checking_wrmsrl(hwc->config_base, ctrl_val);
1042 }
1043
1044 static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1045 {
1046         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1047         u64 val;
1048
1049         val = hwc->config;
1050         if (cpuc->enabled)
1051                 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1052
1053         (void)checking_wrmsrl(hwc->config_base + idx, val);
1054 }
1055
1056
1057 static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1058 {
1059         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1060                 intel_pmu_enable_fixed(hwc, idx);
1061                 return;
1062         }
1063
1064         x86_pmu_enable_counter(hwc, idx);
1065 }
1066
1067 static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1068 {
1069         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1070
1071         if (cpuc->enabled)
1072                 x86_pmu_enable_counter(hwc, idx);
1073 }
1074
1075 static int
1076 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
1077 {
1078         unsigned int event;
1079
1080         if (!x86_pmu.num_counters_fixed)
1081                 return -1;
1082
1083         event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1084
1085         if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1086                 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1087         if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1088                 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1089         if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1090                 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1091
1092         return -1;
1093 }
1094
1095 /*
1096  * Find a PMC slot for the freshly enabled / scheduled in counter:
1097  */
1098 static int x86_pmu_enable(struct perf_counter *counter)
1099 {
1100         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1101         struct hw_perf_counter *hwc = &counter->hw;
1102         int idx;
1103
1104         idx = fixed_mode_idx(counter, hwc);
1105         if (idx >= 0) {
1106                 /*
1107                  * Try to get the fixed counter, if that is already taken
1108                  * then try to get a generic counter:
1109                  */
1110                 if (test_and_set_bit(idx, cpuc->used_mask))
1111                         goto try_generic;
1112
1113                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1114                 /*
1115                  * We set it so that counter_base + idx in wrmsr/rdmsr maps to
1116                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1117                  */
1118                 hwc->counter_base =
1119                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1120                 hwc->idx = idx;
1121         } else {
1122                 idx = hwc->idx;
1123                 /* Try to get the previous generic counter again */
1124                 if (test_and_set_bit(idx, cpuc->used_mask)) {
1125 try_generic:
1126                         idx = find_first_zero_bit(cpuc->used_mask,
1127                                                   x86_pmu.num_counters);
1128                         if (idx == x86_pmu.num_counters)
1129                                 return -EAGAIN;
1130
1131                         set_bit(idx, cpuc->used_mask);
1132                         hwc->idx = idx;
1133                 }
1134                 hwc->config_base  = x86_pmu.eventsel;
1135                 hwc->counter_base = x86_pmu.perfctr;
1136         }
1137
1138         perf_counters_lapic_init();
1139
1140         x86_pmu.disable(hwc, idx);
1141
1142         cpuc->counters[idx] = counter;
1143         set_bit(idx, cpuc->active_mask);
1144
1145         x86_perf_counter_set_period(counter, hwc, idx);
1146         x86_pmu.enable(hwc, idx);
1147
1148         perf_counter_update_userpage(counter);
1149
1150         return 0;
1151 }
1152
1153 static void x86_pmu_unthrottle(struct perf_counter *counter)
1154 {
1155         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1156         struct hw_perf_counter *hwc = &counter->hw;
1157
1158         if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1159                                 cpuc->counters[hwc->idx] != counter))
1160                 return;
1161
1162         x86_pmu.enable(hwc, hwc->idx);
1163 }
1164
1165 void perf_counter_print_debug(void)
1166 {
1167         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1168         struct cpu_hw_counters *cpuc;
1169         unsigned long flags;
1170         int cpu, idx;
1171
1172         if (!x86_pmu.num_counters)
1173                 return;
1174
1175         local_irq_save(flags);
1176
1177         cpu = smp_processor_id();
1178         cpuc = &per_cpu(cpu_hw_counters, cpu);
1179
1180         if (x86_pmu.version >= 2) {
1181                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1182                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1183                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1184                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1185
1186                 pr_info("\n");
1187                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
1188                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
1189                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
1190                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
1191         }
1192         pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1193
1194         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1195                 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1196                 rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
1197
1198                 prev_left = per_cpu(prev_left[idx], cpu);
1199
1200                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
1201                         cpu, idx, pmc_ctrl);
1202                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
1203                         cpu, idx, pmc_count);
1204                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
1205                         cpu, idx, prev_left);
1206         }
1207         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1208                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1209
1210                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1211                         cpu, idx, pmc_count);
1212         }
1213         local_irq_restore(flags);
1214 }
1215
1216 static void x86_pmu_disable(struct perf_counter *counter)
1217 {
1218         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1219         struct hw_perf_counter *hwc = &counter->hw;
1220         int idx = hwc->idx;
1221
1222         /*
1223          * Must be done before we disable, otherwise the nmi handler
1224          * could reenable again:
1225          */
1226         clear_bit(idx, cpuc->active_mask);
1227         x86_pmu.disable(hwc, idx);
1228
1229         /*
1230          * Make sure the cleared pointer becomes visible before we
1231          * (potentially) free the counter:
1232          */
1233         barrier();
1234
1235         /*
1236          * Drain the remaining delta count out of a counter
1237          * that we are disabling:
1238          */
1239         x86_perf_counter_update(counter, hwc, idx);
1240         cpuc->counters[idx] = NULL;
1241         clear_bit(idx, cpuc->used_mask);
1242
1243         perf_counter_update_userpage(counter);
1244 }
1245
1246 /*
1247  * Save and restart an expired counter. Called by NMI contexts,
1248  * so it has to be careful about preempting normal counter ops:
1249  */
1250 static int intel_pmu_save_and_restart(struct perf_counter *counter)
1251 {
1252         struct hw_perf_counter *hwc = &counter->hw;
1253         int idx = hwc->idx;
1254         int ret;
1255
1256         x86_perf_counter_update(counter, hwc, idx);
1257         ret = x86_perf_counter_set_period(counter, hwc, idx);
1258
1259         if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1260                 intel_pmu_enable_counter(hwc, idx);
1261
1262         return ret;
1263 }
1264
1265 static void intel_pmu_reset(void)
1266 {
1267         unsigned long flags;
1268         int idx;
1269
1270         if (!x86_pmu.num_counters)
1271                 return;
1272
1273         local_irq_save(flags);
1274
1275         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1276
1277         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1278                 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1279                 checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
1280         }
1281         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1282                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1283         }
1284
1285         local_irq_restore(flags);
1286 }
1287
1288 static int p6_pmu_handle_irq(struct pt_regs *regs)
1289 {
1290         struct perf_sample_data data;
1291         struct cpu_hw_counters *cpuc;
1292         struct perf_counter *counter;
1293         struct hw_perf_counter *hwc;
1294         int idx, handled = 0;
1295         u64 val;
1296
1297         data.regs = regs;
1298         data.addr = 0;
1299
1300         cpuc = &__get_cpu_var(cpu_hw_counters);
1301
1302         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1303                 if (!test_bit(idx, cpuc->active_mask))
1304                         continue;
1305
1306                 counter = cpuc->counters[idx];
1307                 hwc = &counter->hw;
1308
1309                 val = x86_perf_counter_update(counter, hwc, idx);
1310                 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1311                         continue;
1312
1313                 /*
1314                  * counter overflow
1315                  */
1316                 handled         = 1;
1317                 data.period     = counter->hw.last_period;
1318
1319                 if (!x86_perf_counter_set_period(counter, hwc, idx))
1320                         continue;
1321
1322                 if (perf_counter_overflow(counter, 1, &data))
1323                         p6_pmu_disable_counter(hwc, idx);
1324         }
1325
1326         if (handled)
1327                 inc_irq_stat(apic_perf_irqs);
1328
1329         return handled;
1330 }
1331
1332 /*
1333  * This handler is triggered by the local APIC, so the APIC IRQ handling
1334  * rules apply:
1335  */
1336 static int intel_pmu_handle_irq(struct pt_regs *regs)
1337 {
1338         struct perf_sample_data data;
1339         struct cpu_hw_counters *cpuc;
1340         int bit, loops;
1341         u64 ack, status;
1342
1343         data.regs = regs;
1344         data.addr = 0;
1345
1346         cpuc = &__get_cpu_var(cpu_hw_counters);
1347
1348         perf_disable();
1349         status = intel_pmu_get_status();
1350         if (!status) {
1351                 perf_enable();
1352                 return 0;
1353         }
1354
1355         loops = 0;
1356 again:
1357         if (++loops > 100) {
1358                 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
1359                 perf_counter_print_debug();
1360                 intel_pmu_reset();
1361                 perf_enable();
1362                 return 1;
1363         }
1364
1365         inc_irq_stat(apic_perf_irqs);
1366         ack = status;
1367         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1368                 struct perf_counter *counter = cpuc->counters[bit];
1369
1370                 clear_bit(bit, (unsigned long *) &status);
1371                 if (!test_bit(bit, cpuc->active_mask))
1372                         continue;
1373
1374                 if (!intel_pmu_save_and_restart(counter))
1375                         continue;
1376
1377                 data.period = counter->hw.last_period;
1378
1379                 if (perf_counter_overflow(counter, 1, &data))
1380                         intel_pmu_disable_counter(&counter->hw, bit);
1381         }
1382
1383         intel_pmu_ack_status(ack);
1384
1385         /*
1386          * Repeat if there is more work to be done:
1387          */
1388         status = intel_pmu_get_status();
1389         if (status)
1390                 goto again;
1391
1392         perf_enable();
1393
1394         return 1;
1395 }
1396
1397 static int amd_pmu_handle_irq(struct pt_regs *regs)
1398 {
1399         struct perf_sample_data data;
1400         struct cpu_hw_counters *cpuc;
1401         struct perf_counter *counter;
1402         struct hw_perf_counter *hwc;
1403         int idx, handled = 0;
1404         u64 val;
1405
1406         data.regs = regs;
1407         data.addr = 0;
1408
1409         cpuc = &__get_cpu_var(cpu_hw_counters);
1410
1411         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1412                 if (!test_bit(idx, cpuc->active_mask))
1413                         continue;
1414
1415                 counter = cpuc->counters[idx];
1416                 hwc = &counter->hw;
1417
1418                 val = x86_perf_counter_update(counter, hwc, idx);
1419                 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1420                         continue;
1421
1422                 /*
1423                  * counter overflow
1424                  */
1425                 handled         = 1;
1426                 data.period     = counter->hw.last_period;
1427
1428                 if (!x86_perf_counter_set_period(counter, hwc, idx))
1429                         continue;
1430
1431                 if (perf_counter_overflow(counter, 1, &data))
1432                         amd_pmu_disable_counter(hwc, idx);
1433         }
1434
1435         if (handled)
1436                 inc_irq_stat(apic_perf_irqs);
1437
1438         return handled;
1439 }
1440
1441 void smp_perf_pending_interrupt(struct pt_regs *regs)
1442 {
1443         irq_enter();
1444         ack_APIC_irq();
1445         inc_irq_stat(apic_pending_irqs);
1446         perf_counter_do_pending();
1447         irq_exit();
1448 }
1449
1450 void set_perf_counter_pending(void)
1451 {
1452         apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1453 }
1454
1455 void perf_counters_lapic_init(void)
1456 {
1457         if (!x86_pmu_initialized())
1458                 return;
1459
1460         /*
1461          * Always use NMI for PMU
1462          */
1463         apic_write(APIC_LVTPC, APIC_DM_NMI);
1464 }
1465
1466 static int __kprobes
1467 perf_counter_nmi_handler(struct notifier_block *self,
1468                          unsigned long cmd, void *__args)
1469 {
1470         struct die_args *args = __args;
1471         struct pt_regs *regs;
1472
1473         if (!atomic_read(&active_counters))
1474                 return NOTIFY_DONE;
1475
1476         switch (cmd) {
1477         case DIE_NMI:
1478         case DIE_NMI_IPI:
1479                 break;
1480
1481         default:
1482                 return NOTIFY_DONE;
1483         }
1484
1485         regs = args->regs;
1486
1487         apic_write(APIC_LVTPC, APIC_DM_NMI);
1488         /*
1489          * Can't rely on the handled return value to say it was our NMI, two
1490          * counters could trigger 'simultaneously' raising two back-to-back NMIs.
1491          *
1492          * If the first NMI handles both, the latter will be empty and daze
1493          * the CPU.
1494          */
1495         x86_pmu.handle_irq(regs);
1496
1497         return NOTIFY_STOP;
1498 }
1499
1500 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1501         .notifier_call          = perf_counter_nmi_handler,
1502         .next                   = NULL,
1503         .priority               = 1
1504 };
1505
1506 static struct x86_pmu p6_pmu = {
1507         .name                   = "p6",
1508         .handle_irq             = p6_pmu_handle_irq,
1509         .disable_all            = p6_pmu_disable_all,
1510         .enable_all             = p6_pmu_enable_all,
1511         .enable                 = p6_pmu_enable_counter,
1512         .disable                = p6_pmu_disable_counter,
1513         .eventsel               = MSR_P6_EVNTSEL0,
1514         .perfctr                = MSR_P6_PERFCTR0,
1515         .event_map              = p6_pmu_event_map,
1516         .raw_event              = p6_pmu_raw_event,
1517         .max_events             = ARRAY_SIZE(p6_perfmon_event_map),
1518         .max_period             = (1ULL << 31) - 1,
1519         .version                = 0,
1520         .num_counters           = 2,
1521         /*
1522          * Counters have 40 bits implemented. However they are designed such
1523          * that bits [32-39] are sign extensions of bit 31. As such the
1524          * effective width of a counter for P6-like PMU is 32 bits only.
1525          *
1526          * See IA-32 Intel Architecture Software developer manual Vol 3B
1527          */
1528         .counter_bits           = 32,
1529         .counter_mask           = (1ULL << 32) - 1,
1530 };
1531
1532 static struct x86_pmu intel_pmu = {
1533         .name                   = "Intel",
1534         .handle_irq             = intel_pmu_handle_irq,
1535         .disable_all            = intel_pmu_disable_all,
1536         .enable_all             = intel_pmu_enable_all,
1537         .enable                 = intel_pmu_enable_counter,
1538         .disable                = intel_pmu_disable_counter,
1539         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
1540         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
1541         .event_map              = intel_pmu_event_map,
1542         .raw_event              = intel_pmu_raw_event,
1543         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
1544         /*
1545          * Intel PMCs cannot be accessed sanely above 32 bit width,
1546          * so we install an artificial 1<<31 period regardless of
1547          * the generic counter period:
1548          */
1549         .max_period             = (1ULL << 31) - 1,
1550 };
1551
1552 static struct x86_pmu amd_pmu = {
1553         .name                   = "AMD",
1554         .handle_irq             = amd_pmu_handle_irq,
1555         .disable_all            = amd_pmu_disable_all,
1556         .enable_all             = amd_pmu_enable_all,
1557         .enable                 = amd_pmu_enable_counter,
1558         .disable                = amd_pmu_disable_counter,
1559         .eventsel               = MSR_K7_EVNTSEL0,
1560         .perfctr                = MSR_K7_PERFCTR0,
1561         .event_map              = amd_pmu_event_map,
1562         .raw_event              = amd_pmu_raw_event,
1563         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
1564         .num_counters           = 4,
1565         .counter_bits           = 48,
1566         .counter_mask           = (1ULL << 48) - 1,
1567         /* use highest bit to detect overflow */
1568         .max_period             = (1ULL << 47) - 1,
1569 };
1570
1571 static int p6_pmu_init(void)
1572 {
1573         switch (boot_cpu_data.x86_model) {
1574         case 1:
1575         case 3:  /* Pentium Pro */
1576         case 5:
1577         case 6:  /* Pentium II */
1578         case 7:
1579         case 8:
1580         case 11: /* Pentium III */
1581                 break;
1582         case 9:
1583         case 13:
1584                 /* Pentium M */
1585                 break;
1586         default:
1587                 pr_cont("unsupported p6 CPU model %d ",
1588                         boot_cpu_data.x86_model);
1589                 return -ENODEV;
1590         }
1591
1592         if (!cpu_has_apic) {
1593                 pr_info("no Local APIC, try rebooting with lapic");
1594                 return -ENODEV;
1595         }
1596
1597         x86_pmu                         = p6_pmu;
1598
1599         return 0;
1600 }
1601
1602 static int intel_pmu_init(void)
1603 {
1604         union cpuid10_edx edx;
1605         union cpuid10_eax eax;
1606         unsigned int unused;
1607         unsigned int ebx;
1608         int version;
1609
1610         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1611                 /* check for P6 processor family */
1612            if (boot_cpu_data.x86 == 6) {
1613                 return p6_pmu_init();
1614            } else {
1615                 return -ENODEV;
1616            }
1617         }
1618
1619         /*
1620          * Check whether the Architectural PerfMon supports
1621          * Branch Misses Retired Event or not.
1622          */
1623         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1624         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1625                 return -ENODEV;
1626
1627         version = eax.split.version_id;
1628         if (version < 2)
1629                 return -ENODEV;
1630
1631         x86_pmu                         = intel_pmu;
1632         x86_pmu.version                 = version;
1633         x86_pmu.num_counters            = eax.split.num_counters;
1634         x86_pmu.counter_bits            = eax.split.bit_width;
1635         x86_pmu.counter_mask            = (1ULL << eax.split.bit_width) - 1;
1636
1637         /*
1638          * Quirk: v2 perfmon does not report fixed-purpose counters, so
1639          * assume at least 3 counters:
1640          */
1641         x86_pmu.num_counters_fixed      = max((int)edx.split.num_counters_fixed, 3);
1642
1643         /*
1644          * Install the hw-cache-events table:
1645          */
1646         switch (boot_cpu_data.x86_model) {
1647         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1648         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1649         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1650         case 29: /* six-core 45 nm xeon "Dunnington" */
1651                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1652                        sizeof(hw_cache_event_ids));
1653
1654                 pr_cont("Core2 events, ");
1655                 break;
1656         default:
1657         case 26:
1658                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1659                        sizeof(hw_cache_event_ids));
1660
1661                 pr_cont("Nehalem/Corei7 events, ");
1662                 break;
1663         case 28:
1664                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1665                        sizeof(hw_cache_event_ids));
1666
1667                 pr_cont("Atom events, ");
1668                 break;
1669         }
1670         return 0;
1671 }
1672
1673 static int amd_pmu_init(void)
1674 {
1675         /* Performance-monitoring supported from K7 and later: */
1676         if (boot_cpu_data.x86 < 6)
1677                 return -ENODEV;
1678
1679         x86_pmu = amd_pmu;
1680
1681         /* Events are common for all AMDs */
1682         memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1683                sizeof(hw_cache_event_ids));
1684
1685         return 0;
1686 }
1687
1688 void __init init_hw_perf_counters(void)
1689 {
1690         int err;
1691
1692         pr_info("Performance Counters: ");
1693
1694         switch (boot_cpu_data.x86_vendor) {
1695         case X86_VENDOR_INTEL:
1696                 err = intel_pmu_init();
1697                 break;
1698         case X86_VENDOR_AMD:
1699                 err = amd_pmu_init();
1700                 break;
1701         default:
1702                 return;
1703         }
1704         if (err != 0) {
1705                 pr_cont("no PMU driver, software counters only.\n");
1706                 return;
1707         }
1708
1709         pr_cont("%s PMU driver.\n", x86_pmu.name);
1710
1711         if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1712                 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1713                      x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1714                 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1715         }
1716         perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1717         perf_max_counters = x86_pmu.num_counters;
1718
1719         if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1720                 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1721                      x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1722                 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1723         }
1724
1725         perf_counter_mask |=
1726                 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1727         x86_pmu.intel_ctrl = perf_counter_mask;
1728
1729         perf_counters_lapic_init();
1730         register_die_notifier(&perf_counter_nmi_notifier);
1731
1732         pr_info("... version:                 %d\n",     x86_pmu.version);
1733         pr_info("... bit width:               %d\n",     x86_pmu.counter_bits);
1734         pr_info("... generic counters:        %d\n",     x86_pmu.num_counters);
1735         pr_info("... value mask:              %016Lx\n", x86_pmu.counter_mask);
1736         pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
1737         pr_info("... fixed-purpose counters:  %d\n",     x86_pmu.num_counters_fixed);
1738         pr_info("... counter mask:            %016Lx\n", perf_counter_mask);
1739 }
1740
1741 static inline void x86_pmu_read(struct perf_counter *counter)
1742 {
1743         x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1744 }
1745
1746 static const struct pmu pmu = {
1747         .enable         = x86_pmu_enable,
1748         .disable        = x86_pmu_disable,
1749         .read           = x86_pmu_read,
1750         .unthrottle     = x86_pmu_unthrottle,
1751 };
1752
1753 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1754 {
1755         int err;
1756
1757         err = __hw_perf_counter_init(counter);
1758         if (err)
1759                 return ERR_PTR(err);
1760
1761         return &pmu;
1762 }
1763
1764 /*
1765  * callchain support
1766  */
1767
1768 static inline
1769 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1770 {
1771         if (entry->nr < PERF_MAX_STACK_DEPTH)
1772                 entry->ip[entry->nr++] = ip;
1773 }
1774
1775 static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1776 static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1777 static DEFINE_PER_CPU(int, in_nmi_frame);
1778
1779
1780 static void
1781 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1782 {
1783         /* Ignore warnings */
1784 }
1785
1786 static void backtrace_warning(void *data, char *msg)
1787 {
1788         /* Ignore warnings */
1789 }
1790
1791 static int backtrace_stack(void *data, char *name)
1792 {
1793         per_cpu(in_nmi_frame, smp_processor_id()) =
1794                         x86_is_stack_id(NMI_STACK, name);
1795
1796         return 0;
1797 }
1798
1799 static void backtrace_address(void *data, unsigned long addr, int reliable)
1800 {
1801         struct perf_callchain_entry *entry = data;
1802
1803         if (per_cpu(in_nmi_frame, smp_processor_id()))
1804                 return;
1805
1806         if (reliable)
1807                 callchain_store(entry, addr);
1808 }
1809
1810 static const struct stacktrace_ops backtrace_ops = {
1811         .warning                = backtrace_warning,
1812         .warning_symbol         = backtrace_warning_symbol,
1813         .stack                  = backtrace_stack,
1814         .address                = backtrace_address,
1815 };
1816
1817 #include "../dumpstack.h"
1818
1819 static void
1820 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1821 {
1822         callchain_store(entry, PERF_CONTEXT_KERNEL);
1823         callchain_store(entry, regs->ip);
1824
1825         dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1826 }
1827
1828 /*
1829  * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1830  */
1831 static unsigned long
1832 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1833 {
1834         unsigned long offset, addr = (unsigned long)from;
1835         int type = in_nmi() ? KM_NMI : KM_IRQ0;
1836         unsigned long size, len = 0;
1837         struct page *page;
1838         void *map;
1839         int ret;
1840
1841         do {
1842                 ret = __get_user_pages_fast(addr, 1, 0, &page);
1843                 if (!ret)
1844                         break;
1845
1846                 offset = addr & (PAGE_SIZE - 1);
1847                 size = min(PAGE_SIZE - offset, n - len);
1848
1849                 map = kmap_atomic(page, type);
1850                 memcpy(to, map+offset, size);
1851                 kunmap_atomic(map, type);
1852                 put_page(page);
1853
1854                 len  += size;
1855                 to   += size;
1856                 addr += size;
1857
1858         } while (len < n);
1859
1860         return len;
1861 }
1862
1863 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1864 {
1865         unsigned long bytes;
1866
1867         bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1868
1869         return bytes == sizeof(*frame);
1870 }
1871
1872 static void
1873 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1874 {
1875         struct stack_frame frame;
1876         const void __user *fp;
1877
1878         if (!user_mode(regs))
1879                 regs = task_pt_regs(current);
1880
1881         fp = (void __user *)regs->bp;
1882
1883         callchain_store(entry, PERF_CONTEXT_USER);
1884         callchain_store(entry, regs->ip);
1885
1886         while (entry->nr < PERF_MAX_STACK_DEPTH) {
1887                 frame.next_frame             = NULL;
1888                 frame.return_address = 0;
1889
1890                 if (!copy_stack_frame(fp, &frame))
1891                         break;
1892
1893                 if ((unsigned long)fp < regs->sp)
1894                         break;
1895
1896                 callchain_store(entry, frame.return_address);
1897                 fp = frame.next_frame;
1898         }
1899 }
1900
1901 static void
1902 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1903 {
1904         int is_user;
1905
1906         if (!regs)
1907                 return;
1908
1909         is_user = user_mode(regs);
1910
1911         if (!current || current->pid == 0)
1912                 return;
1913
1914         if (is_user && current->state != TASK_RUNNING)
1915                 return;
1916
1917         if (!is_user)
1918                 perf_callchain_kernel(regs, entry);
1919
1920         if (current->mm)
1921                 perf_callchain_user(regs, entry);
1922 }
1923
1924 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1925 {
1926         struct perf_callchain_entry *entry;
1927
1928         if (in_nmi())
1929                 entry = &__get_cpu_var(nmi_entry);
1930         else
1931                 entry = &__get_cpu_var(irq_entry);
1932
1933         entry->nr = 0;
1934
1935         perf_do_callchain(regs, entry);
1936
1937         return entry;
1938 }