Merge branch 'upstream/jump-label-noearly' of git://git.kernel.org/pub/scm/linux...
[pandora-kernel.git] / arch / x86 / kernel / cpu / perf_event_intel.c
1 /*
2  * Per core/cpu state
3  *
4  * Used to coordinate shared registers between HT threads or
5  * among events on a single PMU.
6  */
7
8 #include <linux/stddef.h>
9 #include <linux/types.h>
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/export.h>
13
14 #include <asm/hardirq.h>
15 #include <asm/apic.h>
16
17 #include "perf_event.h"
18
19 /*
20  * Intel PerfMon, used on Core and later.
21  */
22 static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
23 {
24   [PERF_COUNT_HW_CPU_CYCLES]            = 0x003c,
25   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
26   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x4f2e,
27   [PERF_COUNT_HW_CACHE_MISSES]          = 0x412e,
28   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
29   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
30   [PERF_COUNT_HW_BUS_CYCLES]            = 0x013c,
31 };
32
33 static struct event_constraint intel_core_event_constraints[] __read_mostly =
34 {
35         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
36         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
37         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
38         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
39         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
40         INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
41         EVENT_CONSTRAINT_END
42 };
43
44 static struct event_constraint intel_core2_event_constraints[] __read_mostly =
45 {
46         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
47         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
48         /*
49          * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
50          * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
51          * ratio between these counters.
52          */
53         /* FIXED_EVENT_CONSTRAINT(0x013c, 2),  CPU_CLK_UNHALTED.REF */
54         INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
55         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
56         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
57         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
58         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
59         INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
60         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
61         INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
62         INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
63         INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
64         EVENT_CONSTRAINT_END
65 };
66
67 static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
68 {
69         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
70         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
71         /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
72         INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
73         INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
74         INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
75         INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
76         INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
77         INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
78         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
79         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
80         EVENT_CONSTRAINT_END
81 };
82
83 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
84 {
85         INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
86         EVENT_EXTRA_END
87 };
88
89 static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
90 {
91         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
92         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
93         /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
94         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
95         INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
96         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
97         INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
98         EVENT_CONSTRAINT_END
99 };
100
101 static struct event_constraint intel_snb_event_constraints[] __read_mostly =
102 {
103         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
105         /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
106         INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
107         INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
108         INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
109         EVENT_CONSTRAINT_END
110 };
111
112 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
113 {
114         INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
115         INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
116         EVENT_EXTRA_END
117 };
118
119 static struct event_constraint intel_v1_event_constraints[] __read_mostly =
120 {
121         EVENT_CONSTRAINT_END
122 };
123
124 static struct event_constraint intel_gen_event_constraints[] __read_mostly =
125 {
126         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
127         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
128         /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
129         EVENT_CONSTRAINT_END
130 };
131
132 static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
133         INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
134         INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
135         EVENT_EXTRA_END
136 };
137
138 static u64 intel_pmu_event_map(int hw_event)
139 {
140         return intel_perfmon_event_map[hw_event];
141 }
142
143 static __initconst const u64 snb_hw_cache_event_ids
144                                 [PERF_COUNT_HW_CACHE_MAX]
145                                 [PERF_COUNT_HW_CACHE_OP_MAX]
146                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
147 {
148  [ C(L1D) ] = {
149         [ C(OP_READ) ] = {
150                 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
151                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
152         },
153         [ C(OP_WRITE) ] = {
154                 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
155                 [ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
156         },
157         [ C(OP_PREFETCH) ] = {
158                 [ C(RESULT_ACCESS) ] = 0x0,
159                 [ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
160         },
161  },
162  [ C(L1I ) ] = {
163         [ C(OP_READ) ] = {
164                 [ C(RESULT_ACCESS) ] = 0x0,
165                 [ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
166         },
167         [ C(OP_WRITE) ] = {
168                 [ C(RESULT_ACCESS) ] = -1,
169                 [ C(RESULT_MISS)   ] = -1,
170         },
171         [ C(OP_PREFETCH) ] = {
172                 [ C(RESULT_ACCESS) ] = 0x0,
173                 [ C(RESULT_MISS)   ] = 0x0,
174         },
175  },
176  [ C(LL  ) ] = {
177         [ C(OP_READ) ] = {
178                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
179                 [ C(RESULT_ACCESS) ] = 0x01b7,
180                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
181                 [ C(RESULT_MISS)   ] = 0x01b7,
182         },
183         [ C(OP_WRITE) ] = {
184                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
185                 [ C(RESULT_ACCESS) ] = 0x01b7,
186                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
187                 [ C(RESULT_MISS)   ] = 0x01b7,
188         },
189         [ C(OP_PREFETCH) ] = {
190                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
191                 [ C(RESULT_ACCESS) ] = 0x01b7,
192                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
193                 [ C(RESULT_MISS)   ] = 0x01b7,
194         },
195  },
196  [ C(DTLB) ] = {
197         [ C(OP_READ) ] = {
198                 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
199                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
200         },
201         [ C(OP_WRITE) ] = {
202                 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
203                 [ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
204         },
205         [ C(OP_PREFETCH) ] = {
206                 [ C(RESULT_ACCESS) ] = 0x0,
207                 [ C(RESULT_MISS)   ] = 0x0,
208         },
209  },
210  [ C(ITLB) ] = {
211         [ C(OP_READ) ] = {
212                 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
213                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
214         },
215         [ C(OP_WRITE) ] = {
216                 [ C(RESULT_ACCESS) ] = -1,
217                 [ C(RESULT_MISS)   ] = -1,
218         },
219         [ C(OP_PREFETCH) ] = {
220                 [ C(RESULT_ACCESS) ] = -1,
221                 [ C(RESULT_MISS)   ] = -1,
222         },
223  },
224  [ C(BPU ) ] = {
225         [ C(OP_READ) ] = {
226                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
227                 [ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
228         },
229         [ C(OP_WRITE) ] = {
230                 [ C(RESULT_ACCESS) ] = -1,
231                 [ C(RESULT_MISS)   ] = -1,
232         },
233         [ C(OP_PREFETCH) ] = {
234                 [ C(RESULT_ACCESS) ] = -1,
235                 [ C(RESULT_MISS)   ] = -1,
236         },
237  },
238  [ C(NODE) ] = {
239         [ C(OP_READ) ] = {
240                 [ C(RESULT_ACCESS) ] = -1,
241                 [ C(RESULT_MISS)   ] = -1,
242         },
243         [ C(OP_WRITE) ] = {
244                 [ C(RESULT_ACCESS) ] = -1,
245                 [ C(RESULT_MISS)   ] = -1,
246         },
247         [ C(OP_PREFETCH) ] = {
248                 [ C(RESULT_ACCESS) ] = -1,
249                 [ C(RESULT_MISS)   ] = -1,
250         },
251  },
252
253 };
254
255 static __initconst const u64 westmere_hw_cache_event_ids
256                                 [PERF_COUNT_HW_CACHE_MAX]
257                                 [PERF_COUNT_HW_CACHE_OP_MAX]
258                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
259 {
260  [ C(L1D) ] = {
261         [ C(OP_READ) ] = {
262                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
263                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
264         },
265         [ C(OP_WRITE) ] = {
266                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
267                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
268         },
269         [ C(OP_PREFETCH) ] = {
270                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
271                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
272         },
273  },
274  [ C(L1I ) ] = {
275         [ C(OP_READ) ] = {
276                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
277                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
278         },
279         [ C(OP_WRITE) ] = {
280                 [ C(RESULT_ACCESS) ] = -1,
281                 [ C(RESULT_MISS)   ] = -1,
282         },
283         [ C(OP_PREFETCH) ] = {
284                 [ C(RESULT_ACCESS) ] = 0x0,
285                 [ C(RESULT_MISS)   ] = 0x0,
286         },
287  },
288  [ C(LL  ) ] = {
289         [ C(OP_READ) ] = {
290                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
291                 [ C(RESULT_ACCESS) ] = 0x01b7,
292                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
293                 [ C(RESULT_MISS)   ] = 0x01b7,
294         },
295         /*
296          * Use RFO, not WRITEBACK, because a write miss would typically occur
297          * on RFO.
298          */
299         [ C(OP_WRITE) ] = {
300                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
301                 [ C(RESULT_ACCESS) ] = 0x01b7,
302                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
303                 [ C(RESULT_MISS)   ] = 0x01b7,
304         },
305         [ C(OP_PREFETCH) ] = {
306                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
307                 [ C(RESULT_ACCESS) ] = 0x01b7,
308                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
309                 [ C(RESULT_MISS)   ] = 0x01b7,
310         },
311  },
312  [ C(DTLB) ] = {
313         [ C(OP_READ) ] = {
314                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
315                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
316         },
317         [ C(OP_WRITE) ] = {
318                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
319                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
320         },
321         [ C(OP_PREFETCH) ] = {
322                 [ C(RESULT_ACCESS) ] = 0x0,
323                 [ C(RESULT_MISS)   ] = 0x0,
324         },
325  },
326  [ C(ITLB) ] = {
327         [ C(OP_READ) ] = {
328                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
329                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
330         },
331         [ C(OP_WRITE) ] = {
332                 [ C(RESULT_ACCESS) ] = -1,
333                 [ C(RESULT_MISS)   ] = -1,
334         },
335         [ C(OP_PREFETCH) ] = {
336                 [ C(RESULT_ACCESS) ] = -1,
337                 [ C(RESULT_MISS)   ] = -1,
338         },
339  },
340  [ C(BPU ) ] = {
341         [ C(OP_READ) ] = {
342                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
343                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
344         },
345         [ C(OP_WRITE) ] = {
346                 [ C(RESULT_ACCESS) ] = -1,
347                 [ C(RESULT_MISS)   ] = -1,
348         },
349         [ C(OP_PREFETCH) ] = {
350                 [ C(RESULT_ACCESS) ] = -1,
351                 [ C(RESULT_MISS)   ] = -1,
352         },
353  },
354  [ C(NODE) ] = {
355         [ C(OP_READ) ] = {
356                 [ C(RESULT_ACCESS) ] = 0x01b7,
357                 [ C(RESULT_MISS)   ] = 0x01b7,
358         },
359         [ C(OP_WRITE) ] = {
360                 [ C(RESULT_ACCESS) ] = 0x01b7,
361                 [ C(RESULT_MISS)   ] = 0x01b7,
362         },
363         [ C(OP_PREFETCH) ] = {
364                 [ C(RESULT_ACCESS) ] = 0x01b7,
365                 [ C(RESULT_MISS)   ] = 0x01b7,
366         },
367  },
368 };
369
370 /*
371  * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
372  * See IA32 SDM Vol 3B 30.6.1.3
373  */
374
375 #define NHM_DMND_DATA_RD        (1 << 0)
376 #define NHM_DMND_RFO            (1 << 1)
377 #define NHM_DMND_IFETCH         (1 << 2)
378 #define NHM_DMND_WB             (1 << 3)
379 #define NHM_PF_DATA_RD          (1 << 4)
380 #define NHM_PF_DATA_RFO         (1 << 5)
381 #define NHM_PF_IFETCH           (1 << 6)
382 #define NHM_OFFCORE_OTHER       (1 << 7)
383 #define NHM_UNCORE_HIT          (1 << 8)
384 #define NHM_OTHER_CORE_HIT_SNP  (1 << 9)
385 #define NHM_OTHER_CORE_HITM     (1 << 10)
386                                 /* reserved */
387 #define NHM_REMOTE_CACHE_FWD    (1 << 12)
388 #define NHM_REMOTE_DRAM         (1 << 13)
389 #define NHM_LOCAL_DRAM          (1 << 14)
390 #define NHM_NON_DRAM            (1 << 15)
391
392 #define NHM_ALL_DRAM            (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
393
394 #define NHM_DMND_READ           (NHM_DMND_DATA_RD)
395 #define NHM_DMND_WRITE          (NHM_DMND_RFO|NHM_DMND_WB)
396 #define NHM_DMND_PREFETCH       (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
397
398 #define NHM_L3_HIT      (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
399 #define NHM_L3_MISS     (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
400 #define NHM_L3_ACCESS   (NHM_L3_HIT|NHM_L3_MISS)
401
402 static __initconst const u64 nehalem_hw_cache_extra_regs
403                                 [PERF_COUNT_HW_CACHE_MAX]
404                                 [PERF_COUNT_HW_CACHE_OP_MAX]
405                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
406 {
407  [ C(LL  ) ] = {
408         [ C(OP_READ) ] = {
409                 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
410                 [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
411         },
412         [ C(OP_WRITE) ] = {
413                 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
414                 [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
415         },
416         [ C(OP_PREFETCH) ] = {
417                 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
418                 [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
419         },
420  },
421  [ C(NODE) ] = {
422         [ C(OP_READ) ] = {
423                 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
424                 [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
425         },
426         [ C(OP_WRITE) ] = {
427                 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
428                 [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
429         },
430         [ C(OP_PREFETCH) ] = {
431                 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
432                 [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
433         },
434  },
435 };
436
437 static __initconst const u64 nehalem_hw_cache_event_ids
438                                 [PERF_COUNT_HW_CACHE_MAX]
439                                 [PERF_COUNT_HW_CACHE_OP_MAX]
440                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
441 {
442  [ C(L1D) ] = {
443         [ C(OP_READ) ] = {
444                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
445                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
446         },
447         [ C(OP_WRITE) ] = {
448                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
449                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
450         },
451         [ C(OP_PREFETCH) ] = {
452                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
453                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
454         },
455  },
456  [ C(L1I ) ] = {
457         [ C(OP_READ) ] = {
458                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
459                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
460         },
461         [ C(OP_WRITE) ] = {
462                 [ C(RESULT_ACCESS) ] = -1,
463                 [ C(RESULT_MISS)   ] = -1,
464         },
465         [ C(OP_PREFETCH) ] = {
466                 [ C(RESULT_ACCESS) ] = 0x0,
467                 [ C(RESULT_MISS)   ] = 0x0,
468         },
469  },
470  [ C(LL  ) ] = {
471         [ C(OP_READ) ] = {
472                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
473                 [ C(RESULT_ACCESS) ] = 0x01b7,
474                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
475                 [ C(RESULT_MISS)   ] = 0x01b7,
476         },
477         /*
478          * Use RFO, not WRITEBACK, because a write miss would typically occur
479          * on RFO.
480          */
481         [ C(OP_WRITE) ] = {
482                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
483                 [ C(RESULT_ACCESS) ] = 0x01b7,
484                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
485                 [ C(RESULT_MISS)   ] = 0x01b7,
486         },
487         [ C(OP_PREFETCH) ] = {
488                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
489                 [ C(RESULT_ACCESS) ] = 0x01b7,
490                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
491                 [ C(RESULT_MISS)   ] = 0x01b7,
492         },
493  },
494  [ C(DTLB) ] = {
495         [ C(OP_READ) ] = {
496                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
497                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
498         },
499         [ C(OP_WRITE) ] = {
500                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
501                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
502         },
503         [ C(OP_PREFETCH) ] = {
504                 [ C(RESULT_ACCESS) ] = 0x0,
505                 [ C(RESULT_MISS)   ] = 0x0,
506         },
507  },
508  [ C(ITLB) ] = {
509         [ C(OP_READ) ] = {
510                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
511                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
512         },
513         [ C(OP_WRITE) ] = {
514                 [ C(RESULT_ACCESS) ] = -1,
515                 [ C(RESULT_MISS)   ] = -1,
516         },
517         [ C(OP_PREFETCH) ] = {
518                 [ C(RESULT_ACCESS) ] = -1,
519                 [ C(RESULT_MISS)   ] = -1,
520         },
521  },
522  [ C(BPU ) ] = {
523         [ C(OP_READ) ] = {
524                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
525                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
526         },
527         [ C(OP_WRITE) ] = {
528                 [ C(RESULT_ACCESS) ] = -1,
529                 [ C(RESULT_MISS)   ] = -1,
530         },
531         [ C(OP_PREFETCH) ] = {
532                 [ C(RESULT_ACCESS) ] = -1,
533                 [ C(RESULT_MISS)   ] = -1,
534         },
535  },
536  [ C(NODE) ] = {
537         [ C(OP_READ) ] = {
538                 [ C(RESULT_ACCESS) ] = 0x01b7,
539                 [ C(RESULT_MISS)   ] = 0x01b7,
540         },
541         [ C(OP_WRITE) ] = {
542                 [ C(RESULT_ACCESS) ] = 0x01b7,
543                 [ C(RESULT_MISS)   ] = 0x01b7,
544         },
545         [ C(OP_PREFETCH) ] = {
546                 [ C(RESULT_ACCESS) ] = 0x01b7,
547                 [ C(RESULT_MISS)   ] = 0x01b7,
548         },
549  },
550 };
551
552 static __initconst const u64 core2_hw_cache_event_ids
553                                 [PERF_COUNT_HW_CACHE_MAX]
554                                 [PERF_COUNT_HW_CACHE_OP_MAX]
555                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
556 {
557  [ C(L1D) ] = {
558         [ C(OP_READ) ] = {
559                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
560                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
561         },
562         [ C(OP_WRITE) ] = {
563                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
564                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
565         },
566         [ C(OP_PREFETCH) ] = {
567                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
568                 [ C(RESULT_MISS)   ] = 0,
569         },
570  },
571  [ C(L1I ) ] = {
572         [ C(OP_READ) ] = {
573                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
574                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
575         },
576         [ C(OP_WRITE) ] = {
577                 [ C(RESULT_ACCESS) ] = -1,
578                 [ C(RESULT_MISS)   ] = -1,
579         },
580         [ C(OP_PREFETCH) ] = {
581                 [ C(RESULT_ACCESS) ] = 0,
582                 [ C(RESULT_MISS)   ] = 0,
583         },
584  },
585  [ C(LL  ) ] = {
586         [ C(OP_READ) ] = {
587                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
588                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
589         },
590         [ C(OP_WRITE) ] = {
591                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
592                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
593         },
594         [ C(OP_PREFETCH) ] = {
595                 [ C(RESULT_ACCESS) ] = 0,
596                 [ C(RESULT_MISS)   ] = 0,
597         },
598  },
599  [ C(DTLB) ] = {
600         [ C(OP_READ) ] = {
601                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
602                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
603         },
604         [ C(OP_WRITE) ] = {
605                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
606                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
607         },
608         [ C(OP_PREFETCH) ] = {
609                 [ C(RESULT_ACCESS) ] = 0,
610                 [ C(RESULT_MISS)   ] = 0,
611         },
612  },
613  [ C(ITLB) ] = {
614         [ C(OP_READ) ] = {
615                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
616                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
617         },
618         [ C(OP_WRITE) ] = {
619                 [ C(RESULT_ACCESS) ] = -1,
620                 [ C(RESULT_MISS)   ] = -1,
621         },
622         [ C(OP_PREFETCH) ] = {
623                 [ C(RESULT_ACCESS) ] = -1,
624                 [ C(RESULT_MISS)   ] = -1,
625         },
626  },
627  [ C(BPU ) ] = {
628         [ C(OP_READ) ] = {
629                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
630                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
631         },
632         [ C(OP_WRITE) ] = {
633                 [ C(RESULT_ACCESS) ] = -1,
634                 [ C(RESULT_MISS)   ] = -1,
635         },
636         [ C(OP_PREFETCH) ] = {
637                 [ C(RESULT_ACCESS) ] = -1,
638                 [ C(RESULT_MISS)   ] = -1,
639         },
640  },
641 };
642
643 static __initconst const u64 atom_hw_cache_event_ids
644                                 [PERF_COUNT_HW_CACHE_MAX]
645                                 [PERF_COUNT_HW_CACHE_OP_MAX]
646                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
647 {
648  [ C(L1D) ] = {
649         [ C(OP_READ) ] = {
650                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
651                 [ C(RESULT_MISS)   ] = 0,
652         },
653         [ C(OP_WRITE) ] = {
654                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
655                 [ C(RESULT_MISS)   ] = 0,
656         },
657         [ C(OP_PREFETCH) ] = {
658                 [ C(RESULT_ACCESS) ] = 0x0,
659                 [ C(RESULT_MISS)   ] = 0,
660         },
661  },
662  [ C(L1I ) ] = {
663         [ C(OP_READ) ] = {
664                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
665                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
666         },
667         [ C(OP_WRITE) ] = {
668                 [ C(RESULT_ACCESS) ] = -1,
669                 [ C(RESULT_MISS)   ] = -1,
670         },
671         [ C(OP_PREFETCH) ] = {
672                 [ C(RESULT_ACCESS) ] = 0,
673                 [ C(RESULT_MISS)   ] = 0,
674         },
675  },
676  [ C(LL  ) ] = {
677         [ C(OP_READ) ] = {
678                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
679                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
680         },
681         [ C(OP_WRITE) ] = {
682                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
683                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
684         },
685         [ C(OP_PREFETCH) ] = {
686                 [ C(RESULT_ACCESS) ] = 0,
687                 [ C(RESULT_MISS)   ] = 0,
688         },
689  },
690  [ C(DTLB) ] = {
691         [ C(OP_READ) ] = {
692                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
693                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
694         },
695         [ C(OP_WRITE) ] = {
696                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
697                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
698         },
699         [ C(OP_PREFETCH) ] = {
700                 [ C(RESULT_ACCESS) ] = 0,
701                 [ C(RESULT_MISS)   ] = 0,
702         },
703  },
704  [ C(ITLB) ] = {
705         [ C(OP_READ) ] = {
706                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
707                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
708         },
709         [ C(OP_WRITE) ] = {
710                 [ C(RESULT_ACCESS) ] = -1,
711                 [ C(RESULT_MISS)   ] = -1,
712         },
713         [ C(OP_PREFETCH) ] = {
714                 [ C(RESULT_ACCESS) ] = -1,
715                 [ C(RESULT_MISS)   ] = -1,
716         },
717  },
718  [ C(BPU ) ] = {
719         [ C(OP_READ) ] = {
720                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
721                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
722         },
723         [ C(OP_WRITE) ] = {
724                 [ C(RESULT_ACCESS) ] = -1,
725                 [ C(RESULT_MISS)   ] = -1,
726         },
727         [ C(OP_PREFETCH) ] = {
728                 [ C(RESULT_ACCESS) ] = -1,
729                 [ C(RESULT_MISS)   ] = -1,
730         },
731  },
732 };
733
734 static void intel_pmu_disable_all(void)
735 {
736         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
737
738         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
739
740         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
741                 intel_pmu_disable_bts();
742
743         intel_pmu_pebs_disable_all();
744         intel_pmu_lbr_disable_all();
745 }
746
747 static void intel_pmu_enable_all(int added)
748 {
749         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
750
751         intel_pmu_pebs_enable_all();
752         intel_pmu_lbr_enable_all();
753         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
754                         x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
755
756         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
757                 struct perf_event *event =
758                         cpuc->events[X86_PMC_IDX_FIXED_BTS];
759
760                 if (WARN_ON_ONCE(!event))
761                         return;
762
763                 intel_pmu_enable_bts(event->hw.config);
764         }
765 }
766
767 /*
768  * Workaround for:
769  *   Intel Errata AAK100 (model 26)
770  *   Intel Errata AAP53  (model 30)
771  *   Intel Errata BD53   (model 44)
772  *
773  * The official story:
774  *   These chips need to be 'reset' when adding counters by programming the
775  *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
776  *   in sequence on the same PMC or on different PMCs.
777  *
778  * In practise it appears some of these events do in fact count, and
779  * we need to programm all 4 events.
780  */
781 static void intel_pmu_nhm_workaround(void)
782 {
783         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
784         static const unsigned long nhm_magic[4] = {
785                 0x4300B5,
786                 0x4300D2,
787                 0x4300B1,
788                 0x4300B1
789         };
790         struct perf_event *event;
791         int i;
792
793         /*
794          * The Errata requires below steps:
795          * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
796          * 2) Configure 4 PERFEVTSELx with the magic events and clear
797          *    the corresponding PMCx;
798          * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
799          * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
800          * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
801          */
802
803         /*
804          * The real steps we choose are a little different from above.
805          * A) To reduce MSR operations, we don't run step 1) as they
806          *    are already cleared before this function is called;
807          * B) Call x86_perf_event_update to save PMCx before configuring
808          *    PERFEVTSELx with magic number;
809          * C) With step 5), we do clear only when the PERFEVTSELx is
810          *    not used currently.
811          * D) Call x86_perf_event_set_period to restore PMCx;
812          */
813
814         /* We always operate 4 pairs of PERF Counters */
815         for (i = 0; i < 4; i++) {
816                 event = cpuc->events[i];
817                 if (event)
818                         x86_perf_event_update(event);
819         }
820
821         for (i = 0; i < 4; i++) {
822                 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
823                 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
824         }
825
826         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
827         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
828
829         for (i = 0; i < 4; i++) {
830                 event = cpuc->events[i];
831
832                 if (event) {
833                         x86_perf_event_set_period(event);
834                         __x86_pmu_enable_event(&event->hw,
835                                         ARCH_PERFMON_EVENTSEL_ENABLE);
836                 } else
837                         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
838         }
839 }
840
841 static void intel_pmu_nhm_enable_all(int added)
842 {
843         if (added)
844                 intel_pmu_nhm_workaround();
845         intel_pmu_enable_all(added);
846 }
847
848 static inline u64 intel_pmu_get_status(void)
849 {
850         u64 status;
851
852         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
853
854         return status;
855 }
856
857 static inline void intel_pmu_ack_status(u64 ack)
858 {
859         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
860 }
861
862 static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
863 {
864         int idx = hwc->idx - X86_PMC_IDX_FIXED;
865         u64 ctrl_val, mask;
866
867         mask = 0xfULL << (idx * 4);
868
869         rdmsrl(hwc->config_base, ctrl_val);
870         ctrl_val &= ~mask;
871         wrmsrl(hwc->config_base, ctrl_val);
872 }
873
874 static void intel_pmu_disable_event(struct perf_event *event)
875 {
876         struct hw_perf_event *hwc = &event->hw;
877         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
878
879         if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
880                 intel_pmu_disable_bts();
881                 intel_pmu_drain_bts_buffer();
882                 return;
883         }
884
885         cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
886         cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
887
888         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
889                 intel_pmu_disable_fixed(hwc);
890                 return;
891         }
892
893         x86_pmu_disable_event(event);
894
895         if (unlikely(event->attr.precise_ip))
896                 intel_pmu_pebs_disable(event);
897 }
898
899 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
900 {
901         int idx = hwc->idx - X86_PMC_IDX_FIXED;
902         u64 ctrl_val, bits, mask;
903
904         /*
905          * Enable IRQ generation (0x8),
906          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
907          * if requested:
908          */
909         bits = 0x8ULL;
910         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
911                 bits |= 0x2;
912         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
913                 bits |= 0x1;
914
915         /*
916          * ANY bit is supported in v3 and up
917          */
918         if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
919                 bits |= 0x4;
920
921         bits <<= (idx * 4);
922         mask = 0xfULL << (idx * 4);
923
924         rdmsrl(hwc->config_base, ctrl_val);
925         ctrl_val &= ~mask;
926         ctrl_val |= bits;
927         wrmsrl(hwc->config_base, ctrl_val);
928 }
929
930 static void intel_pmu_enable_event(struct perf_event *event)
931 {
932         struct hw_perf_event *hwc = &event->hw;
933         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
934
935         if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
936                 if (!__this_cpu_read(cpu_hw_events.enabled))
937                         return;
938
939                 intel_pmu_enable_bts(hwc->config);
940                 return;
941         }
942
943         if (event->attr.exclude_host)
944                 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
945         if (event->attr.exclude_guest)
946                 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
947
948         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
949                 intel_pmu_enable_fixed(hwc);
950                 return;
951         }
952
953         if (unlikely(event->attr.precise_ip))
954                 intel_pmu_pebs_enable(event);
955
956         __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
957 }
958
959 /*
960  * Save and restart an expired event. Called by NMI contexts,
961  * so it has to be careful about preempting normal event ops:
962  */
963 int intel_pmu_save_and_restart(struct perf_event *event)
964 {
965         x86_perf_event_update(event);
966         return x86_perf_event_set_period(event);
967 }
968
969 static void intel_pmu_reset(void)
970 {
971         struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
972         unsigned long flags;
973         int idx;
974
975         if (!x86_pmu.num_counters)
976                 return;
977
978         local_irq_save(flags);
979
980         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
981
982         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
983                 checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
984                 checking_wrmsrl(x86_pmu_event_addr(idx),  0ull);
985         }
986         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
987                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
988
989         if (ds)
990                 ds->bts_index = ds->bts_buffer_base;
991
992         local_irq_restore(flags);
993 }
994
995 /*
996  * This handler is triggered by the local APIC, so the APIC IRQ handling
997  * rules apply:
998  */
999 static int intel_pmu_handle_irq(struct pt_regs *regs)
1000 {
1001         struct perf_sample_data data;
1002         struct cpu_hw_events *cpuc;
1003         int bit, loops;
1004         u64 status;
1005         int handled;
1006
1007         perf_sample_data_init(&data, 0);
1008
1009         cpuc = &__get_cpu_var(cpu_hw_events);
1010
1011         /*
1012          * Some chipsets need to unmask the LVTPC in a particular spot
1013          * inside the nmi handler.  As a result, the unmasking was pushed
1014          * into all the nmi handlers.
1015          *
1016          * This handler doesn't seem to have any issues with the unmasking
1017          * so it was left at the top.
1018          */
1019         apic_write(APIC_LVTPC, APIC_DM_NMI);
1020
1021         intel_pmu_disable_all();
1022         handled = intel_pmu_drain_bts_buffer();
1023         status = intel_pmu_get_status();
1024         if (!status) {
1025                 intel_pmu_enable_all(0);
1026                 return handled;
1027         }
1028
1029         loops = 0;
1030 again:
1031         intel_pmu_ack_status(status);
1032         if (++loops > 100) {
1033                 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1034                 perf_event_print_debug();
1035                 intel_pmu_reset();
1036                 goto done;
1037         }
1038
1039         inc_irq_stat(apic_perf_irqs);
1040
1041         intel_pmu_lbr_read();
1042
1043         /*
1044          * PEBS overflow sets bit 62 in the global status register
1045          */
1046         if (__test_and_clear_bit(62, (unsigned long *)&status)) {
1047                 handled++;
1048                 x86_pmu.drain_pebs(regs);
1049         }
1050
1051         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1052                 struct perf_event *event = cpuc->events[bit];
1053
1054                 handled++;
1055
1056                 if (!test_bit(bit, cpuc->active_mask))
1057                         continue;
1058
1059                 if (!intel_pmu_save_and_restart(event))
1060                         continue;
1061
1062                 data.period = event->hw.last_period;
1063
1064                 if (perf_event_overflow(event, &data, regs))
1065                         x86_pmu_stop(event, 0);
1066         }
1067
1068         /*
1069          * Repeat if there is more work to be done:
1070          */
1071         status = intel_pmu_get_status();
1072         if (status)
1073                 goto again;
1074
1075 done:
1076         intel_pmu_enable_all(0);
1077         return handled;
1078 }
1079
1080 static struct event_constraint *
1081 intel_bts_constraints(struct perf_event *event)
1082 {
1083         struct hw_perf_event *hwc = &event->hw;
1084         unsigned int hw_event, bts_event;
1085
1086         if (event->attr.freq)
1087                 return NULL;
1088
1089         hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
1090         bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
1091
1092         if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
1093                 return &bts_constraint;
1094
1095         return NULL;
1096 }
1097
1098 static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
1099 {
1100         if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1101                 return false;
1102
1103         if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
1104                 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1105                 event->hw.config |= 0x01bb;
1106                 event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
1107                 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1108         } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
1109                 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1110                 event->hw.config |= 0x01b7;
1111                 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1112                 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1113         }
1114
1115         if (event->hw.extra_reg.idx == orig_idx)
1116                 return false;
1117
1118         return true;
1119 }
1120
1121 /*
1122  * manage allocation of shared extra msr for certain events
1123  *
1124  * sharing can be:
1125  * per-cpu: to be shared between the various events on a single PMU
1126  * per-core: per-cpu + shared by HT threads
1127  */
1128 static struct event_constraint *
1129 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1130                                    struct perf_event *event)
1131 {
1132         struct event_constraint *c = &emptyconstraint;
1133         struct hw_perf_event_extra *reg = &event->hw.extra_reg;
1134         struct er_account *era;
1135         unsigned long flags;
1136         int orig_idx = reg->idx;
1137
1138         /* already allocated shared msr */
1139         if (reg->alloc)
1140                 return &unconstrained;
1141
1142 again:
1143         era = &cpuc->shared_regs->regs[reg->idx];
1144         /*
1145          * we use spin_lock_irqsave() to avoid lockdep issues when
1146          * passing a fake cpuc
1147          */
1148         raw_spin_lock_irqsave(&era->lock, flags);
1149
1150         if (!atomic_read(&era->ref) || era->config == reg->config) {
1151
1152                 /* lock in msr value */
1153                 era->config = reg->config;
1154                 era->reg = reg->reg;
1155
1156                 /* one more user */
1157                 atomic_inc(&era->ref);
1158
1159                 /* no need to reallocate during incremental event scheduling */
1160                 reg->alloc = 1;
1161
1162                 /*
1163                  * All events using extra_reg are unconstrained.
1164                  * Avoids calling x86_get_event_constraints()
1165                  *
1166                  * Must revisit if extra_reg controlling events
1167                  * ever have constraints. Worst case we go through
1168                  * the regular event constraint table.
1169                  */
1170                 c = &unconstrained;
1171         } else if (intel_try_alt_er(event, orig_idx)) {
1172                 raw_spin_unlock(&era->lock);
1173                 goto again;
1174         }
1175         raw_spin_unlock_irqrestore(&era->lock, flags);
1176
1177         return c;
1178 }
1179
1180 static void
1181 __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1182                                    struct hw_perf_event_extra *reg)
1183 {
1184         struct er_account *era;
1185
1186         /*
1187          * only put constraint if extra reg was actually
1188          * allocated. Also takes care of event which do
1189          * not use an extra shared reg
1190          */
1191         if (!reg->alloc)
1192                 return;
1193
1194         era = &cpuc->shared_regs->regs[reg->idx];
1195
1196         /* one fewer user */
1197         atomic_dec(&era->ref);
1198
1199         /* allocate again next time */
1200         reg->alloc = 0;
1201 }
1202
1203 static struct event_constraint *
1204 intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1205                               struct perf_event *event)
1206 {
1207         struct event_constraint *c = NULL;
1208
1209         if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
1210                 c = __intel_shared_reg_get_constraints(cpuc, event);
1211
1212         return c;
1213 }
1214
1215 struct event_constraint *
1216 x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1217 {
1218         struct event_constraint *c;
1219
1220         if (x86_pmu.event_constraints) {
1221                 for_each_event_constraint(c, x86_pmu.event_constraints) {
1222                         if ((event->hw.config & c->cmask) == c->code)
1223                                 return c;
1224                 }
1225         }
1226
1227         return &unconstrained;
1228 }
1229
1230 static struct event_constraint *
1231 intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1232 {
1233         struct event_constraint *c;
1234
1235         c = intel_bts_constraints(event);
1236         if (c)
1237                 return c;
1238
1239         c = intel_pebs_constraints(event);
1240         if (c)
1241                 return c;
1242
1243         c = intel_shared_regs_constraints(cpuc, event);
1244         if (c)
1245                 return c;
1246
1247         return x86_get_event_constraints(cpuc, event);
1248 }
1249
1250 static void
1251 intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1252                                         struct perf_event *event)
1253 {
1254         struct hw_perf_event_extra *reg;
1255
1256         reg = &event->hw.extra_reg;
1257         if (reg->idx != EXTRA_REG_NONE)
1258                 __intel_shared_reg_put_constraints(cpuc, reg);
1259 }
1260
1261 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1262                                         struct perf_event *event)
1263 {
1264         intel_put_shared_regs_event_constraints(cpuc, event);
1265 }
1266
1267 static int intel_pmu_hw_config(struct perf_event *event)
1268 {
1269         int ret = x86_pmu_hw_config(event);
1270
1271         if (ret)
1272                 return ret;
1273
1274         if (event->attr.precise_ip &&
1275             (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1276                 /*
1277                  * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1278                  * (0x003c) so that we can use it with PEBS.
1279                  *
1280                  * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1281                  * PEBS capable. However we can use INST_RETIRED.ANY_P
1282                  * (0x00c0), which is a PEBS capable event, to get the same
1283                  * count.
1284                  *
1285                  * INST_RETIRED.ANY_P counts the number of cycles that retires
1286                  * CNTMASK instructions. By setting CNTMASK to a value (16)
1287                  * larger than the maximum number of instructions that can be
1288                  * retired per cycle (4) and then inverting the condition, we
1289                  * count all cycles that retire 16 or less instructions, which
1290                  * is every cycle.
1291                  *
1292                  * Thereby we gain a PEBS capable cycle counter.
1293                  */
1294                 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
1295
1296                 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1297                 event->hw.config = alt_config;
1298         }
1299
1300         if (event->attr.type != PERF_TYPE_RAW)
1301                 return 0;
1302
1303         if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
1304                 return 0;
1305
1306         if (x86_pmu.version < 3)
1307                 return -EINVAL;
1308
1309         if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1310                 return -EACCES;
1311
1312         event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
1313
1314         return 0;
1315 }
1316
1317 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
1318 {
1319         if (x86_pmu.guest_get_msrs)
1320                 return x86_pmu.guest_get_msrs(nr);
1321         *nr = 0;
1322         return NULL;
1323 }
1324 EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
1325
1326 static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
1327 {
1328         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1329         struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1330
1331         arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
1332         arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
1333         arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
1334
1335         *nr = 1;
1336         return arr;
1337 }
1338
1339 static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
1340 {
1341         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1342         struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
1343         int idx;
1344
1345         for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
1346                 struct perf_event *event = cpuc->events[idx];
1347
1348                 arr[idx].msr = x86_pmu_config_addr(idx);
1349                 arr[idx].host = arr[idx].guest = 0;
1350
1351                 if (!test_bit(idx, cpuc->active_mask))
1352                         continue;
1353
1354                 arr[idx].host = arr[idx].guest =
1355                         event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
1356
1357                 if (event->attr.exclude_host)
1358                         arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1359                 else if (event->attr.exclude_guest)
1360                         arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
1361         }
1362
1363         *nr = x86_pmu.num_counters;
1364         return arr;
1365 }
1366
1367 static void core_pmu_enable_event(struct perf_event *event)
1368 {
1369         if (!event->attr.exclude_host)
1370                 x86_pmu_enable_event(event);
1371 }
1372
1373 static void core_pmu_enable_all(int added)
1374 {
1375         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1376         int idx;
1377
1378         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1379                 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
1380
1381                 if (!test_bit(idx, cpuc->active_mask) ||
1382                                 cpuc->events[idx]->attr.exclude_host)
1383                         continue;
1384
1385                 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
1386         }
1387 }
1388
1389 static __initconst const struct x86_pmu core_pmu = {
1390         .name                   = "core",
1391         .handle_irq             = x86_pmu_handle_irq,
1392         .disable_all            = x86_pmu_disable_all,
1393         .enable_all             = core_pmu_enable_all,
1394         .enable                 = core_pmu_enable_event,
1395         .disable                = x86_pmu_disable_event,
1396         .hw_config              = x86_pmu_hw_config,
1397         .schedule_events        = x86_schedule_events,
1398         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
1399         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
1400         .event_map              = intel_pmu_event_map,
1401         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
1402         .apic                   = 1,
1403         /*
1404          * Intel PMCs cannot be accessed sanely above 32 bit width,
1405          * so we install an artificial 1<<31 period regardless of
1406          * the generic event period:
1407          */
1408         .max_period             = (1ULL << 31) - 1,
1409         .get_event_constraints  = intel_get_event_constraints,
1410         .put_event_constraints  = intel_put_event_constraints,
1411         .event_constraints      = intel_core_event_constraints,
1412         .guest_get_msrs         = core_guest_get_msrs,
1413 };
1414
1415 struct intel_shared_regs *allocate_shared_regs(int cpu)
1416 {
1417         struct intel_shared_regs *regs;
1418         int i;
1419
1420         regs = kzalloc_node(sizeof(struct intel_shared_regs),
1421                             GFP_KERNEL, cpu_to_node(cpu));
1422         if (regs) {
1423                 /*
1424                  * initialize the locks to keep lockdep happy
1425                  */
1426                 for (i = 0; i < EXTRA_REG_MAX; i++)
1427                         raw_spin_lock_init(&regs->regs[i].lock);
1428
1429                 regs->core_id = -1;
1430         }
1431         return regs;
1432 }
1433
1434 static int intel_pmu_cpu_prepare(int cpu)
1435 {
1436         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1437
1438         if (!x86_pmu.extra_regs)
1439                 return NOTIFY_OK;
1440
1441         cpuc->shared_regs = allocate_shared_regs(cpu);
1442         if (!cpuc->shared_regs)
1443                 return NOTIFY_BAD;
1444
1445         return NOTIFY_OK;
1446 }
1447
1448 static void intel_pmu_cpu_starting(int cpu)
1449 {
1450         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1451         int core_id = topology_core_id(cpu);
1452         int i;
1453
1454         init_debug_store_on_cpu(cpu);
1455         /*
1456          * Deal with CPUs that don't clear their LBRs on power-up.
1457          */
1458         intel_pmu_lbr_reset();
1459
1460         if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
1461                 return;
1462
1463         for_each_cpu(i, topology_thread_cpumask(cpu)) {
1464                 struct intel_shared_regs *pc;
1465
1466                 pc = per_cpu(cpu_hw_events, i).shared_regs;
1467                 if (pc && pc->core_id == core_id) {
1468                         cpuc->kfree_on_online = cpuc->shared_regs;
1469                         cpuc->shared_regs = pc;
1470                         break;
1471                 }
1472         }
1473
1474         cpuc->shared_regs->core_id = core_id;
1475         cpuc->shared_regs->refcnt++;
1476 }
1477
1478 static void intel_pmu_cpu_dying(int cpu)
1479 {
1480         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1481         struct intel_shared_regs *pc;
1482
1483         pc = cpuc->shared_regs;
1484         if (pc) {
1485                 if (pc->core_id == -1 || --pc->refcnt == 0)
1486                         kfree(pc);
1487                 cpuc->shared_regs = NULL;
1488         }
1489
1490         fini_debug_store_on_cpu(cpu);
1491 }
1492
1493 static __initconst const struct x86_pmu intel_pmu = {
1494         .name                   = "Intel",
1495         .handle_irq             = intel_pmu_handle_irq,
1496         .disable_all            = intel_pmu_disable_all,
1497         .enable_all             = intel_pmu_enable_all,
1498         .enable                 = intel_pmu_enable_event,
1499         .disable                = intel_pmu_disable_event,
1500         .hw_config              = intel_pmu_hw_config,
1501         .schedule_events        = x86_schedule_events,
1502         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
1503         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
1504         .event_map              = intel_pmu_event_map,
1505         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
1506         .apic                   = 1,
1507         /*
1508          * Intel PMCs cannot be accessed sanely above 32 bit width,
1509          * so we install an artificial 1<<31 period regardless of
1510          * the generic event period:
1511          */
1512         .max_period             = (1ULL << 31) - 1,
1513         .get_event_constraints  = intel_get_event_constraints,
1514         .put_event_constraints  = intel_put_event_constraints,
1515
1516         .cpu_prepare            = intel_pmu_cpu_prepare,
1517         .cpu_starting           = intel_pmu_cpu_starting,
1518         .cpu_dying              = intel_pmu_cpu_dying,
1519         .guest_get_msrs         = intel_guest_get_msrs,
1520 };
1521
1522 static void intel_clovertown_quirks(void)
1523 {
1524         /*
1525          * PEBS is unreliable due to:
1526          *
1527          *   AJ67  - PEBS may experience CPL leaks
1528          *   AJ68  - PEBS PMI may be delayed by one event
1529          *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
1530          *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
1531          *
1532          * AJ67 could be worked around by restricting the OS/USR flags.
1533          * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
1534          *
1535          * AJ106 could possibly be worked around by not allowing LBR
1536          *       usage from PEBS, including the fixup.
1537          * AJ68  could possibly be worked around by always programming
1538          *       a pebs_event_reset[0] value and coping with the lost events.
1539          *
1540          * But taken together it might just make sense to not enable PEBS on
1541          * these chips.
1542          */
1543         printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1544         x86_pmu.pebs = 0;
1545         x86_pmu.pebs_constraints = NULL;
1546 }
1547
1548 __init int intel_pmu_init(void)
1549 {
1550         union cpuid10_edx edx;
1551         union cpuid10_eax eax;
1552         unsigned int unused;
1553         unsigned int ebx;
1554         int version;
1555
1556         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1557                 switch (boot_cpu_data.x86) {
1558                 case 0x6:
1559                         return p6_pmu_init();
1560                 case 0xf:
1561                         return p4_pmu_init();
1562                 }
1563                 return -ENODEV;
1564         }
1565
1566         /*
1567          * Check whether the Architectural PerfMon supports
1568          * Branch Misses Retired hw_event or not.
1569          */
1570         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1571         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1572                 return -ENODEV;
1573
1574         version = eax.split.version_id;
1575         if (version < 2)
1576                 x86_pmu = core_pmu;
1577         else
1578                 x86_pmu = intel_pmu;
1579
1580         x86_pmu.version                 = version;
1581         x86_pmu.num_counters            = eax.split.num_counters;
1582         x86_pmu.cntval_bits             = eax.split.bit_width;
1583         x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
1584
1585         /*
1586          * Quirk: v2 perfmon does not report fixed-purpose events, so
1587          * assume at least 3 events:
1588          */
1589         if (version > 1)
1590                 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1591
1592         /*
1593          * v2 and above have a perf capabilities MSR
1594          */
1595         if (version > 1) {
1596                 u64 capabilities;
1597
1598                 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
1599                 x86_pmu.intel_cap.capabilities = capabilities;
1600         }
1601
1602         intel_ds_init();
1603
1604         /*
1605          * Install the hw-cache-events table:
1606          */
1607         switch (boot_cpu_data.x86_model) {
1608         case 14: /* 65 nm core solo/duo, "Yonah" */
1609                 pr_cont("Core events, ");
1610                 break;
1611
1612         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1613                 x86_pmu.quirks = intel_clovertown_quirks;
1614         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1615         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1616         case 29: /* six-core 45 nm xeon "Dunnington" */
1617                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1618                        sizeof(hw_cache_event_ids));
1619
1620                 intel_pmu_lbr_init_core();
1621
1622                 x86_pmu.event_constraints = intel_core2_event_constraints;
1623                 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
1624                 pr_cont("Core2 events, ");
1625                 break;
1626
1627         case 26: /* 45 nm nehalem, "Bloomfield" */
1628         case 30: /* 45 nm nehalem, "Lynnfield" */
1629         case 46: /* 45 nm nehalem-ex, "Beckton" */
1630                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1631                        sizeof(hw_cache_event_ids));
1632                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1633                        sizeof(hw_cache_extra_regs));
1634
1635                 intel_pmu_lbr_init_nhm();
1636
1637                 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1638                 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1639                 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1640                 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1641
1642                 /* UOPS_ISSUED.STALLED_CYCLES */
1643                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1644                 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1645                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1646
1647                 if (ebx & 0x40) {
1648                         /*
1649                          * Erratum AAJ80 detected, we work it around by using
1650                          * the BR_MISP_EXEC.ANY event. This will over-count
1651                          * branch-misses, but it's still much better than the
1652                          * architectural event which is often completely bogus:
1653                          */
1654                         intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1655
1656                         pr_cont("erratum AAJ80 worked around, ");
1657                 }
1658                 pr_cont("Nehalem events, ");
1659                 break;
1660
1661         case 28: /* Atom */
1662                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1663                        sizeof(hw_cache_event_ids));
1664
1665                 intel_pmu_lbr_init_atom();
1666
1667                 x86_pmu.event_constraints = intel_gen_event_constraints;
1668                 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
1669                 pr_cont("Atom events, ");
1670                 break;
1671
1672         case 37: /* 32 nm nehalem, "Clarkdale" */
1673         case 44: /* 32 nm nehalem, "Gulftown" */
1674         case 47: /* 32 nm Xeon E7 */
1675                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1676                        sizeof(hw_cache_event_ids));
1677                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1678                        sizeof(hw_cache_extra_regs));
1679
1680                 intel_pmu_lbr_init_nhm();
1681
1682                 x86_pmu.event_constraints = intel_westmere_event_constraints;
1683                 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1684                 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1685                 x86_pmu.extra_regs = intel_westmere_extra_regs;
1686                 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1687
1688                 /* UOPS_ISSUED.STALLED_CYCLES */
1689                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1690                 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
1691                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
1692
1693                 pr_cont("Westmere events, ");
1694                 break;
1695
1696         case 42: /* SandyBridge */
1697         case 45: /* SandyBridge, "Romely-EP" */
1698                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1699                        sizeof(hw_cache_event_ids));
1700
1701                 intel_pmu_lbr_init_nhm();
1702
1703                 x86_pmu.event_constraints = intel_snb_event_constraints;
1704                 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
1705                 x86_pmu.extra_regs = intel_snb_extra_regs;
1706                 /* all extra regs are per-cpu when HT is on */
1707                 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1708                 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
1709
1710                 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1711                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
1712                 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
1713                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
1714
1715                 pr_cont("SandyBridge events, ");
1716                 break;
1717
1718         default:
1719                 switch (x86_pmu.version) {
1720                 case 1:
1721                         x86_pmu.event_constraints = intel_v1_event_constraints;
1722                         pr_cont("generic architected perfmon v1, ");
1723                         break;
1724                 default:
1725                         /*
1726                          * default constraints for v2 and up
1727                          */
1728                         x86_pmu.event_constraints = intel_gen_event_constraints;
1729                         pr_cont("generic architected perfmon, ");
1730                         break;
1731                 }
1732         }
1733         return 0;
1734 }