Merge branch 'drm-intel-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/keith...
[pandora-kernel.git] / arch / x86 / kernel / cpu / perf_event_intel.c
1 #ifdef CONFIG_CPU_SUP_INTEL
2
3 #define MAX_EXTRA_REGS 2
4
5 /*
6  * Per register state.
7  */
8 struct er_account {
9         int                     ref;            /* reference count */
10         unsigned int            extra_reg;      /* extra MSR number */
11         u64                     extra_config;   /* extra MSR config */
12 };
13
14 /*
15  * Per core state
16  * This used to coordinate shared registers for HT threads.
17  */
18 struct intel_percore {
19         raw_spinlock_t          lock;           /* protect structure */
20         struct er_account       regs[MAX_EXTRA_REGS];
21         int                     refcnt;         /* number of threads */
22         unsigned                core_id;
23 };
24
25 /*
26  * Intel PerfMon, used on Core and later.
27  */
28 static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
29 {
30   [PERF_COUNT_HW_CPU_CYCLES]            = 0x003c,
31   [PERF_COUNT_HW_INSTRUCTIONS]          = 0x00c0,
32   [PERF_COUNT_HW_CACHE_REFERENCES]      = 0x4f2e,
33   [PERF_COUNT_HW_CACHE_MISSES]          = 0x412e,
34   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]   = 0x00c4,
35   [PERF_COUNT_HW_BRANCH_MISSES]         = 0x00c5,
36   [PERF_COUNT_HW_BUS_CYCLES]            = 0x013c,
37 };
38
39 static struct event_constraint intel_core_event_constraints[] =
40 {
41         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
42         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
43         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
44         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
45         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
46         INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
47         EVENT_CONSTRAINT_END
48 };
49
50 static struct event_constraint intel_core2_event_constraints[] =
51 {
52         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
53         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
54         /*
55          * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
56          * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
57          * ratio between these counters.
58          */
59         /* FIXED_EVENT_CONSTRAINT(0x013c, 2),  CPU_CLK_UNHALTED.REF */
60         INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
61         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
62         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
63         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
64         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
65         INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
66         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
67         INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
68         INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
69         INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
70         EVENT_CONSTRAINT_END
71 };
72
73 static struct event_constraint intel_nehalem_event_constraints[] =
74 {
75         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
76         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
77         /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
78         INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
79         INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
80         INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
81         INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
82         INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
83         INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
84         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
85         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
86         EVENT_CONSTRAINT_END
87 };
88
89 static struct extra_reg intel_nehalem_extra_regs[] =
90 {
91         INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
92         EVENT_EXTRA_END
93 };
94
95 static struct event_constraint intel_nehalem_percore_constraints[] =
96 {
97         INTEL_EVENT_CONSTRAINT(0xb7, 0),
98         EVENT_CONSTRAINT_END
99 };
100
101 static struct event_constraint intel_westmere_event_constraints[] =
102 {
103         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
104         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
105         /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
106         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
107         INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
108         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
109         INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
110         EVENT_CONSTRAINT_END
111 };
112
113 static struct event_constraint intel_snb_event_constraints[] =
114 {
115         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
116         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117         /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118         INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119         INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120         INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121         INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122         INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123         EVENT_CONSTRAINT_END
124 };
125
126 static struct extra_reg intel_westmere_extra_regs[] =
127 {
128         INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
129         INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
130         EVENT_EXTRA_END
131 };
132
133 static struct event_constraint intel_westmere_percore_constraints[] =
134 {
135         INTEL_EVENT_CONSTRAINT(0xb7, 0),
136         INTEL_EVENT_CONSTRAINT(0xbb, 0),
137         EVENT_CONSTRAINT_END
138 };
139
140 static struct event_constraint intel_gen_event_constraints[] =
141 {
142         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
143         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
144         /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
145         EVENT_CONSTRAINT_END
146 };
147
148 static u64 intel_pmu_event_map(int hw_event)
149 {
150         return intel_perfmon_event_map[hw_event];
151 }
152
153 static __initconst const u64 snb_hw_cache_event_ids
154                                 [PERF_COUNT_HW_CACHE_MAX]
155                                 [PERF_COUNT_HW_CACHE_OP_MAX]
156                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
157 {
158  [ C(L1D) ] = {
159         [ C(OP_READ) ] = {
160                 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
161                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
162         },
163         [ C(OP_WRITE) ] = {
164                 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
165                 [ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
166         },
167         [ C(OP_PREFETCH) ] = {
168                 [ C(RESULT_ACCESS) ] = 0x0,
169                 [ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
170         },
171  },
172  [ C(L1I ) ] = {
173         [ C(OP_READ) ] = {
174                 [ C(RESULT_ACCESS) ] = 0x0,
175                 [ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
176         },
177         [ C(OP_WRITE) ] = {
178                 [ C(RESULT_ACCESS) ] = -1,
179                 [ C(RESULT_MISS)   ] = -1,
180         },
181         [ C(OP_PREFETCH) ] = {
182                 [ C(RESULT_ACCESS) ] = 0x0,
183                 [ C(RESULT_MISS)   ] = 0x0,
184         },
185  },
186  [ C(LL  ) ] = {
187         [ C(OP_READ) ] = {
188                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
189                 [ C(RESULT_ACCESS) ] = 0x01b7,
190                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
191                 [ C(RESULT_MISS)   ] = 0x01b7,
192         },
193         [ C(OP_WRITE) ] = {
194                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
195                 [ C(RESULT_ACCESS) ] = 0x01b7,
196                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
197                 [ C(RESULT_MISS)   ] = 0x01b7,
198         },
199         [ C(OP_PREFETCH) ] = {
200                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
201                 [ C(RESULT_ACCESS) ] = 0x01b7,
202                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
203                 [ C(RESULT_MISS)   ] = 0x01b7,
204         },
205  },
206  [ C(DTLB) ] = {
207         [ C(OP_READ) ] = {
208                 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
209                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
210         },
211         [ C(OP_WRITE) ] = {
212                 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
213                 [ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
214         },
215         [ C(OP_PREFETCH) ] = {
216                 [ C(RESULT_ACCESS) ] = 0x0,
217                 [ C(RESULT_MISS)   ] = 0x0,
218         },
219  },
220  [ C(ITLB) ] = {
221         [ C(OP_READ) ] = {
222                 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
223                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
224         },
225         [ C(OP_WRITE) ] = {
226                 [ C(RESULT_ACCESS) ] = -1,
227                 [ C(RESULT_MISS)   ] = -1,
228         },
229         [ C(OP_PREFETCH) ] = {
230                 [ C(RESULT_ACCESS) ] = -1,
231                 [ C(RESULT_MISS)   ] = -1,
232         },
233  },
234  [ C(BPU ) ] = {
235         [ C(OP_READ) ] = {
236                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
237                 [ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
238         },
239         [ C(OP_WRITE) ] = {
240                 [ C(RESULT_ACCESS) ] = -1,
241                 [ C(RESULT_MISS)   ] = -1,
242         },
243         [ C(OP_PREFETCH) ] = {
244                 [ C(RESULT_ACCESS) ] = -1,
245                 [ C(RESULT_MISS)   ] = -1,
246         },
247  },
248 };
249
250 static __initconst const u64 westmere_hw_cache_event_ids
251                                 [PERF_COUNT_HW_CACHE_MAX]
252                                 [PERF_COUNT_HW_CACHE_OP_MAX]
253                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
254 {
255  [ C(L1D) ] = {
256         [ C(OP_READ) ] = {
257                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
258                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
259         },
260         [ C(OP_WRITE) ] = {
261                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
262                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
263         },
264         [ C(OP_PREFETCH) ] = {
265                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
266                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
267         },
268  },
269  [ C(L1I ) ] = {
270         [ C(OP_READ) ] = {
271                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
272                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
273         },
274         [ C(OP_WRITE) ] = {
275                 [ C(RESULT_ACCESS) ] = -1,
276                 [ C(RESULT_MISS)   ] = -1,
277         },
278         [ C(OP_PREFETCH) ] = {
279                 [ C(RESULT_ACCESS) ] = 0x0,
280                 [ C(RESULT_MISS)   ] = 0x0,
281         },
282  },
283  [ C(LL  ) ] = {
284         [ C(OP_READ) ] = {
285                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
286                 [ C(RESULT_ACCESS) ] = 0x01b7,
287                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
288                 [ C(RESULT_MISS)   ] = 0x01b7,
289         },
290         /*
291          * Use RFO, not WRITEBACK, because a write miss would typically occur
292          * on RFO.
293          */
294         [ C(OP_WRITE) ] = {
295                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
296                 [ C(RESULT_ACCESS) ] = 0x01b7,
297                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
298                 [ C(RESULT_MISS)   ] = 0x01b7,
299         },
300         [ C(OP_PREFETCH) ] = {
301                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
302                 [ C(RESULT_ACCESS) ] = 0x01b7,
303                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
304                 [ C(RESULT_MISS)   ] = 0x01b7,
305         },
306  },
307  [ C(DTLB) ] = {
308         [ C(OP_READ) ] = {
309                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
310                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
311         },
312         [ C(OP_WRITE) ] = {
313                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
314                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
315         },
316         [ C(OP_PREFETCH) ] = {
317                 [ C(RESULT_ACCESS) ] = 0x0,
318                 [ C(RESULT_MISS)   ] = 0x0,
319         },
320  },
321  [ C(ITLB) ] = {
322         [ C(OP_READ) ] = {
323                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
324                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
325         },
326         [ C(OP_WRITE) ] = {
327                 [ C(RESULT_ACCESS) ] = -1,
328                 [ C(RESULT_MISS)   ] = -1,
329         },
330         [ C(OP_PREFETCH) ] = {
331                 [ C(RESULT_ACCESS) ] = -1,
332                 [ C(RESULT_MISS)   ] = -1,
333         },
334  },
335  [ C(BPU ) ] = {
336         [ C(OP_READ) ] = {
337                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
338                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
339         },
340         [ C(OP_WRITE) ] = {
341                 [ C(RESULT_ACCESS) ] = -1,
342                 [ C(RESULT_MISS)   ] = -1,
343         },
344         [ C(OP_PREFETCH) ] = {
345                 [ C(RESULT_ACCESS) ] = -1,
346                 [ C(RESULT_MISS)   ] = -1,
347         },
348  },
349 };
350
351 /*
352  * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
353  * See IA32 SDM Vol 3B 30.6.1.3
354  */
355
356 #define NHM_DMND_DATA_RD        (1 << 0)
357 #define NHM_DMND_RFO            (1 << 1)
358 #define NHM_DMND_IFETCH         (1 << 2)
359 #define NHM_DMND_WB             (1 << 3)
360 #define NHM_PF_DATA_RD          (1 << 4)
361 #define NHM_PF_DATA_RFO         (1 << 5)
362 #define NHM_PF_IFETCH           (1 << 6)
363 #define NHM_OFFCORE_OTHER       (1 << 7)
364 #define NHM_UNCORE_HIT          (1 << 8)
365 #define NHM_OTHER_CORE_HIT_SNP  (1 << 9)
366 #define NHM_OTHER_CORE_HITM     (1 << 10)
367                                 /* reserved */
368 #define NHM_REMOTE_CACHE_FWD    (1 << 12)
369 #define NHM_REMOTE_DRAM         (1 << 13)
370 #define NHM_LOCAL_DRAM          (1 << 14)
371 #define NHM_NON_DRAM            (1 << 15)
372
373 #define NHM_ALL_DRAM            (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
374
375 #define NHM_DMND_READ           (NHM_DMND_DATA_RD)
376 #define NHM_DMND_WRITE          (NHM_DMND_RFO|NHM_DMND_WB)
377 #define NHM_DMND_PREFETCH       (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
378
379 #define NHM_L3_HIT      (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
380 #define NHM_L3_MISS     (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
381 #define NHM_L3_ACCESS   (NHM_L3_HIT|NHM_L3_MISS)
382
383 static __initconst const u64 nehalem_hw_cache_extra_regs
384                                 [PERF_COUNT_HW_CACHE_MAX]
385                                 [PERF_COUNT_HW_CACHE_OP_MAX]
386                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
387 {
388  [ C(LL  ) ] = {
389         [ C(OP_READ) ] = {
390                 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
391                 [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
392         },
393         [ C(OP_WRITE) ] = {
394                 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
395                 [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
396         },
397         [ C(OP_PREFETCH) ] = {
398                 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
399                 [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
400         },
401  }
402 };
403
404 static __initconst const u64 nehalem_hw_cache_event_ids
405                                 [PERF_COUNT_HW_CACHE_MAX]
406                                 [PERF_COUNT_HW_CACHE_OP_MAX]
407                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
408 {
409  [ C(L1D) ] = {
410         [ C(OP_READ) ] = {
411                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
412                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
413         },
414         [ C(OP_WRITE) ] = {
415                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
416                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
417         },
418         [ C(OP_PREFETCH) ] = {
419                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
420                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
421         },
422  },
423  [ C(L1I ) ] = {
424         [ C(OP_READ) ] = {
425                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
426                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
427         },
428         [ C(OP_WRITE) ] = {
429                 [ C(RESULT_ACCESS) ] = -1,
430                 [ C(RESULT_MISS)   ] = -1,
431         },
432         [ C(OP_PREFETCH) ] = {
433                 [ C(RESULT_ACCESS) ] = 0x0,
434                 [ C(RESULT_MISS)   ] = 0x0,
435         },
436  },
437  [ C(LL  ) ] = {
438         [ C(OP_READ) ] = {
439                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
440                 [ C(RESULT_ACCESS) ] = 0x01b7,
441                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
442                 [ C(RESULT_MISS)   ] = 0x01b7,
443         },
444         /*
445          * Use RFO, not WRITEBACK, because a write miss would typically occur
446          * on RFO.
447          */
448         [ C(OP_WRITE) ] = {
449                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
450                 [ C(RESULT_ACCESS) ] = 0x01b7,
451                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
452                 [ C(RESULT_MISS)   ] = 0x01b7,
453         },
454         [ C(OP_PREFETCH) ] = {
455                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
456                 [ C(RESULT_ACCESS) ] = 0x01b7,
457                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
458                 [ C(RESULT_MISS)   ] = 0x01b7,
459         },
460  },
461  [ C(DTLB) ] = {
462         [ C(OP_READ) ] = {
463                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
464                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
465         },
466         [ C(OP_WRITE) ] = {
467                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
468                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
469         },
470         [ C(OP_PREFETCH) ] = {
471                 [ C(RESULT_ACCESS) ] = 0x0,
472                 [ C(RESULT_MISS)   ] = 0x0,
473         },
474  },
475  [ C(ITLB) ] = {
476         [ C(OP_READ) ] = {
477                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
478                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
479         },
480         [ C(OP_WRITE) ] = {
481                 [ C(RESULT_ACCESS) ] = -1,
482                 [ C(RESULT_MISS)   ] = -1,
483         },
484         [ C(OP_PREFETCH) ] = {
485                 [ C(RESULT_ACCESS) ] = -1,
486                 [ C(RESULT_MISS)   ] = -1,
487         },
488  },
489  [ C(BPU ) ] = {
490         [ C(OP_READ) ] = {
491                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
492                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
493         },
494         [ C(OP_WRITE) ] = {
495                 [ C(RESULT_ACCESS) ] = -1,
496                 [ C(RESULT_MISS)   ] = -1,
497         },
498         [ C(OP_PREFETCH) ] = {
499                 [ C(RESULT_ACCESS) ] = -1,
500                 [ C(RESULT_MISS)   ] = -1,
501         },
502  },
503 };
504
505 static __initconst const u64 core2_hw_cache_event_ids
506                                 [PERF_COUNT_HW_CACHE_MAX]
507                                 [PERF_COUNT_HW_CACHE_OP_MAX]
508                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
509 {
510  [ C(L1D) ] = {
511         [ C(OP_READ) ] = {
512                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
513                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
514         },
515         [ C(OP_WRITE) ] = {
516                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
517                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
518         },
519         [ C(OP_PREFETCH) ] = {
520                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
521                 [ C(RESULT_MISS)   ] = 0,
522         },
523  },
524  [ C(L1I ) ] = {
525         [ C(OP_READ) ] = {
526                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
527                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
528         },
529         [ C(OP_WRITE) ] = {
530                 [ C(RESULT_ACCESS) ] = -1,
531                 [ C(RESULT_MISS)   ] = -1,
532         },
533         [ C(OP_PREFETCH) ] = {
534                 [ C(RESULT_ACCESS) ] = 0,
535                 [ C(RESULT_MISS)   ] = 0,
536         },
537  },
538  [ C(LL  ) ] = {
539         [ C(OP_READ) ] = {
540                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
541                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
542         },
543         [ C(OP_WRITE) ] = {
544                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
545                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
546         },
547         [ C(OP_PREFETCH) ] = {
548                 [ C(RESULT_ACCESS) ] = 0,
549                 [ C(RESULT_MISS)   ] = 0,
550         },
551  },
552  [ C(DTLB) ] = {
553         [ C(OP_READ) ] = {
554                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
555                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
556         },
557         [ C(OP_WRITE) ] = {
558                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
559                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
560         },
561         [ C(OP_PREFETCH) ] = {
562                 [ C(RESULT_ACCESS) ] = 0,
563                 [ C(RESULT_MISS)   ] = 0,
564         },
565  },
566  [ C(ITLB) ] = {
567         [ C(OP_READ) ] = {
568                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
569                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
570         },
571         [ C(OP_WRITE) ] = {
572                 [ C(RESULT_ACCESS) ] = -1,
573                 [ C(RESULT_MISS)   ] = -1,
574         },
575         [ C(OP_PREFETCH) ] = {
576                 [ C(RESULT_ACCESS) ] = -1,
577                 [ C(RESULT_MISS)   ] = -1,
578         },
579  },
580  [ C(BPU ) ] = {
581         [ C(OP_READ) ] = {
582                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
583                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
584         },
585         [ C(OP_WRITE) ] = {
586                 [ C(RESULT_ACCESS) ] = -1,
587                 [ C(RESULT_MISS)   ] = -1,
588         },
589         [ C(OP_PREFETCH) ] = {
590                 [ C(RESULT_ACCESS) ] = -1,
591                 [ C(RESULT_MISS)   ] = -1,
592         },
593  },
594 };
595
596 static __initconst const u64 atom_hw_cache_event_ids
597                                 [PERF_COUNT_HW_CACHE_MAX]
598                                 [PERF_COUNT_HW_CACHE_OP_MAX]
599                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
600 {
601  [ C(L1D) ] = {
602         [ C(OP_READ) ] = {
603                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
604                 [ C(RESULT_MISS)   ] = 0,
605         },
606         [ C(OP_WRITE) ] = {
607                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
608                 [ C(RESULT_MISS)   ] = 0,
609         },
610         [ C(OP_PREFETCH) ] = {
611                 [ C(RESULT_ACCESS) ] = 0x0,
612                 [ C(RESULT_MISS)   ] = 0,
613         },
614  },
615  [ C(L1I ) ] = {
616         [ C(OP_READ) ] = {
617                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
618                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
619         },
620         [ C(OP_WRITE) ] = {
621                 [ C(RESULT_ACCESS) ] = -1,
622                 [ C(RESULT_MISS)   ] = -1,
623         },
624         [ C(OP_PREFETCH) ] = {
625                 [ C(RESULT_ACCESS) ] = 0,
626                 [ C(RESULT_MISS)   ] = 0,
627         },
628  },
629  [ C(LL  ) ] = {
630         [ C(OP_READ) ] = {
631                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
632                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
633         },
634         [ C(OP_WRITE) ] = {
635                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
636                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
637         },
638         [ C(OP_PREFETCH) ] = {
639                 [ C(RESULT_ACCESS) ] = 0,
640                 [ C(RESULT_MISS)   ] = 0,
641         },
642  },
643  [ C(DTLB) ] = {
644         [ C(OP_READ) ] = {
645                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
646                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
647         },
648         [ C(OP_WRITE) ] = {
649                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
650                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
651         },
652         [ C(OP_PREFETCH) ] = {
653                 [ C(RESULT_ACCESS) ] = 0,
654                 [ C(RESULT_MISS)   ] = 0,
655         },
656  },
657  [ C(ITLB) ] = {
658         [ C(OP_READ) ] = {
659                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
660                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
661         },
662         [ C(OP_WRITE) ] = {
663                 [ C(RESULT_ACCESS) ] = -1,
664                 [ C(RESULT_MISS)   ] = -1,
665         },
666         [ C(OP_PREFETCH) ] = {
667                 [ C(RESULT_ACCESS) ] = -1,
668                 [ C(RESULT_MISS)   ] = -1,
669         },
670  },
671  [ C(BPU ) ] = {
672         [ C(OP_READ) ] = {
673                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
674                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
675         },
676         [ C(OP_WRITE) ] = {
677                 [ C(RESULT_ACCESS) ] = -1,
678                 [ C(RESULT_MISS)   ] = -1,
679         },
680         [ C(OP_PREFETCH) ] = {
681                 [ C(RESULT_ACCESS) ] = -1,
682                 [ C(RESULT_MISS)   ] = -1,
683         },
684  },
685 };
686
687 static void intel_pmu_disable_all(void)
688 {
689         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
690
691         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
692
693         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
694                 intel_pmu_disable_bts();
695
696         intel_pmu_pebs_disable_all();
697         intel_pmu_lbr_disable_all();
698 }
699
700 static void intel_pmu_enable_all(int added)
701 {
702         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
703
704         intel_pmu_pebs_enable_all();
705         intel_pmu_lbr_enable_all();
706         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
707
708         if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
709                 struct perf_event *event =
710                         cpuc->events[X86_PMC_IDX_FIXED_BTS];
711
712                 if (WARN_ON_ONCE(!event))
713                         return;
714
715                 intel_pmu_enable_bts(event->hw.config);
716         }
717 }
718
719 /*
720  * Workaround for:
721  *   Intel Errata AAK100 (model 26)
722  *   Intel Errata AAP53  (model 30)
723  *   Intel Errata BD53   (model 44)
724  *
725  * The official story:
726  *   These chips need to be 'reset' when adding counters by programming the
727  *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
728  *   in sequence on the same PMC or on different PMCs.
729  *
730  * In practise it appears some of these events do in fact count, and
731  * we need to programm all 4 events.
732  */
733 static void intel_pmu_nhm_workaround(void)
734 {
735         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
736         static const unsigned long nhm_magic[4] = {
737                 0x4300B5,
738                 0x4300D2,
739                 0x4300B1,
740                 0x4300B1
741         };
742         struct perf_event *event;
743         int i;
744
745         /*
746          * The Errata requires below steps:
747          * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
748          * 2) Configure 4 PERFEVTSELx with the magic events and clear
749          *    the corresponding PMCx;
750          * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
751          * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
752          * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
753          */
754
755         /*
756          * The real steps we choose are a little different from above.
757          * A) To reduce MSR operations, we don't run step 1) as they
758          *    are already cleared before this function is called;
759          * B) Call x86_perf_event_update to save PMCx before configuring
760          *    PERFEVTSELx with magic number;
761          * C) With step 5), we do clear only when the PERFEVTSELx is
762          *    not used currently.
763          * D) Call x86_perf_event_set_period to restore PMCx;
764          */
765
766         /* We always operate 4 pairs of PERF Counters */
767         for (i = 0; i < 4; i++) {
768                 event = cpuc->events[i];
769                 if (event)
770                         x86_perf_event_update(event);
771         }
772
773         for (i = 0; i < 4; i++) {
774                 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
775                 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
776         }
777
778         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
779         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
780
781         for (i = 0; i < 4; i++) {
782                 event = cpuc->events[i];
783
784                 if (event) {
785                         x86_perf_event_set_period(event);
786                         __x86_pmu_enable_event(&event->hw,
787                                         ARCH_PERFMON_EVENTSEL_ENABLE);
788                 } else
789                         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
790         }
791 }
792
793 static void intel_pmu_nhm_enable_all(int added)
794 {
795         if (added)
796                 intel_pmu_nhm_workaround();
797         intel_pmu_enable_all(added);
798 }
799
800 static inline u64 intel_pmu_get_status(void)
801 {
802         u64 status;
803
804         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
805
806         return status;
807 }
808
809 static inline void intel_pmu_ack_status(u64 ack)
810 {
811         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
812 }
813
814 static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
815 {
816         int idx = hwc->idx - X86_PMC_IDX_FIXED;
817         u64 ctrl_val, mask;
818
819         mask = 0xfULL << (idx * 4);
820
821         rdmsrl(hwc->config_base, ctrl_val);
822         ctrl_val &= ~mask;
823         wrmsrl(hwc->config_base, ctrl_val);
824 }
825
826 static void intel_pmu_disable_event(struct perf_event *event)
827 {
828         struct hw_perf_event *hwc = &event->hw;
829
830         if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
831                 intel_pmu_disable_bts();
832                 intel_pmu_drain_bts_buffer();
833                 return;
834         }
835
836         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
837                 intel_pmu_disable_fixed(hwc);
838                 return;
839         }
840
841         x86_pmu_disable_event(event);
842
843         if (unlikely(event->attr.precise_ip))
844                 intel_pmu_pebs_disable(event);
845 }
846
847 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
848 {
849         int idx = hwc->idx - X86_PMC_IDX_FIXED;
850         u64 ctrl_val, bits, mask;
851
852         /*
853          * Enable IRQ generation (0x8),
854          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
855          * if requested:
856          */
857         bits = 0x8ULL;
858         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
859                 bits |= 0x2;
860         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
861                 bits |= 0x1;
862
863         /*
864          * ANY bit is supported in v3 and up
865          */
866         if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
867                 bits |= 0x4;
868
869         bits <<= (idx * 4);
870         mask = 0xfULL << (idx * 4);
871
872         rdmsrl(hwc->config_base, ctrl_val);
873         ctrl_val &= ~mask;
874         ctrl_val |= bits;
875         wrmsrl(hwc->config_base, ctrl_val);
876 }
877
878 static void intel_pmu_enable_event(struct perf_event *event)
879 {
880         struct hw_perf_event *hwc = &event->hw;
881
882         if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
883                 if (!__this_cpu_read(cpu_hw_events.enabled))
884                         return;
885
886                 intel_pmu_enable_bts(hwc->config);
887                 return;
888         }
889
890         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
891                 intel_pmu_enable_fixed(hwc);
892                 return;
893         }
894
895         if (unlikely(event->attr.precise_ip))
896                 intel_pmu_pebs_enable(event);
897
898         __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
899 }
900
901 /*
902  * Save and restart an expired event. Called by NMI contexts,
903  * so it has to be careful about preempting normal event ops:
904  */
905 static int intel_pmu_save_and_restart(struct perf_event *event)
906 {
907         x86_perf_event_update(event);
908         return x86_perf_event_set_period(event);
909 }
910
911 static void intel_pmu_reset(void)
912 {
913         struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
914         unsigned long flags;
915         int idx;
916
917         if (!x86_pmu.num_counters)
918                 return;
919
920         local_irq_save(flags);
921
922         printk("clearing PMU state on CPU#%d\n", smp_processor_id());
923
924         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
925                 checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
926                 checking_wrmsrl(x86_pmu_event_addr(idx),  0ull);
927         }
928         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
929                 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
930
931         if (ds)
932                 ds->bts_index = ds->bts_buffer_base;
933
934         local_irq_restore(flags);
935 }
936
937 /*
938  * This handler is triggered by the local APIC, so the APIC IRQ handling
939  * rules apply:
940  */
941 static int intel_pmu_handle_irq(struct pt_regs *regs)
942 {
943         struct perf_sample_data data;
944         struct cpu_hw_events *cpuc;
945         int bit, loops;
946         u64 status;
947         int handled;
948
949         perf_sample_data_init(&data, 0);
950
951         cpuc = &__get_cpu_var(cpu_hw_events);
952
953         /*
954          * Some chipsets need to unmask the LVTPC in a particular spot
955          * inside the nmi handler.  As a result, the unmasking was pushed
956          * into all the nmi handlers.
957          *
958          * This handler doesn't seem to have any issues with the unmasking
959          * so it was left at the top.
960          */
961         apic_write(APIC_LVTPC, APIC_DM_NMI);
962
963         intel_pmu_disable_all();
964         handled = intel_pmu_drain_bts_buffer();
965         status = intel_pmu_get_status();
966         if (!status) {
967                 intel_pmu_enable_all(0);
968                 return handled;
969         }
970
971         loops = 0;
972 again:
973         intel_pmu_ack_status(status);
974         if (++loops > 100) {
975                 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
976                 perf_event_print_debug();
977                 intel_pmu_reset();
978                 goto done;
979         }
980
981         inc_irq_stat(apic_perf_irqs);
982
983         intel_pmu_lbr_read();
984
985         /*
986          * PEBS overflow sets bit 62 in the global status register
987          */
988         if (__test_and_clear_bit(62, (unsigned long *)&status)) {
989                 handled++;
990                 x86_pmu.drain_pebs(regs);
991         }
992
993         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
994                 struct perf_event *event = cpuc->events[bit];
995
996                 handled++;
997
998                 if (!test_bit(bit, cpuc->active_mask))
999                         continue;
1000
1001                 if (!intel_pmu_save_and_restart(event))
1002                         continue;
1003
1004                 data.period = event->hw.last_period;
1005
1006                 if (perf_event_overflow(event, 1, &data, regs))
1007                         x86_pmu_stop(event, 0);
1008         }
1009
1010         /*
1011          * Repeat if there is more work to be done:
1012          */
1013         status = intel_pmu_get_status();
1014         if (status)
1015                 goto again;
1016
1017 done:
1018         intel_pmu_enable_all(0);
1019         return handled;
1020 }
1021
1022 static struct event_constraint *
1023 intel_bts_constraints(struct perf_event *event)
1024 {
1025         struct hw_perf_event *hwc = &event->hw;
1026         unsigned int hw_event, bts_event;
1027
1028         if (event->attr.freq)
1029                 return NULL;
1030
1031         hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
1032         bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
1033
1034         if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
1035                 return &bts_constraint;
1036
1037         return NULL;
1038 }
1039
1040 static struct event_constraint *
1041 intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1042 {
1043         struct hw_perf_event *hwc = &event->hw;
1044         unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
1045         struct event_constraint *c;
1046         struct intel_percore *pc;
1047         struct er_account *era;
1048         int i;
1049         int free_slot;
1050         int found;
1051
1052         if (!x86_pmu.percore_constraints || hwc->extra_alloc)
1053                 return NULL;
1054
1055         for (c = x86_pmu.percore_constraints; c->cmask; c++) {
1056                 if (e != c->code)
1057                         continue;
1058
1059                 /*
1060                  * Allocate resource per core.
1061                  */
1062                 pc = cpuc->per_core;
1063                 if (!pc)
1064                         break;
1065                 c = &emptyconstraint;
1066                 raw_spin_lock(&pc->lock);
1067                 free_slot = -1;
1068                 found = 0;
1069                 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1070                         era = &pc->regs[i];
1071                         if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
1072                                 /* Allow sharing same config */
1073                                 if (hwc->extra_config == era->extra_config) {
1074                                         era->ref++;
1075                                         cpuc->percore_used = 1;
1076                                         hwc->extra_alloc = 1;
1077                                         c = NULL;
1078                                 }
1079                                 /* else conflict */
1080                                 found = 1;
1081                                 break;
1082                         } else if (era->ref == 0 && free_slot == -1)
1083                                 free_slot = i;
1084                 }
1085                 if (!found && free_slot != -1) {
1086                         era = &pc->regs[free_slot];
1087                         era->ref = 1;
1088                         era->extra_reg = hwc->extra_reg;
1089                         era->extra_config = hwc->extra_config;
1090                         cpuc->percore_used = 1;
1091                         hwc->extra_alloc = 1;
1092                         c = NULL;
1093                 }
1094                 raw_spin_unlock(&pc->lock);
1095                 return c;
1096         }
1097
1098         return NULL;
1099 }
1100
1101 static struct event_constraint *
1102 intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1103 {
1104         struct event_constraint *c;
1105
1106         c = intel_bts_constraints(event);
1107         if (c)
1108                 return c;
1109
1110         c = intel_pebs_constraints(event);
1111         if (c)
1112                 return c;
1113
1114         c = intel_percore_constraints(cpuc, event);
1115         if (c)
1116                 return c;
1117
1118         return x86_get_event_constraints(cpuc, event);
1119 }
1120
1121 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1122                                         struct perf_event *event)
1123 {
1124         struct extra_reg *er;
1125         struct intel_percore *pc;
1126         struct er_account *era;
1127         struct hw_perf_event *hwc = &event->hw;
1128         int i, allref;
1129
1130         if (!cpuc->percore_used)
1131                 return;
1132
1133         for (er = x86_pmu.extra_regs; er->msr; er++) {
1134                 if (er->event != (hwc->config & er->config_mask))
1135                         continue;
1136
1137                 pc = cpuc->per_core;
1138                 raw_spin_lock(&pc->lock);
1139                 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1140                         era = &pc->regs[i];
1141                         if (era->ref > 0 &&
1142                             era->extra_config == hwc->extra_config &&
1143                             era->extra_reg == er->msr) {
1144                                 era->ref--;
1145                                 hwc->extra_alloc = 0;
1146                                 break;
1147                         }
1148                 }
1149                 allref = 0;
1150                 for (i = 0; i < MAX_EXTRA_REGS; i++)
1151                         allref += pc->regs[i].ref;
1152                 if (allref == 0)
1153                         cpuc->percore_used = 0;
1154                 raw_spin_unlock(&pc->lock);
1155                 break;
1156         }
1157 }
1158
1159 static int intel_pmu_hw_config(struct perf_event *event)
1160 {
1161         int ret = x86_pmu_hw_config(event);
1162
1163         if (ret)
1164                 return ret;
1165
1166         if (event->attr.precise_ip &&
1167             (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
1168                 /*
1169                  * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
1170                  * (0x003c) so that we can use it with PEBS.
1171                  *
1172                  * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
1173                  * PEBS capable. However we can use INST_RETIRED.ANY_P
1174                  * (0x00c0), which is a PEBS capable event, to get the same
1175                  * count.
1176                  *
1177                  * INST_RETIRED.ANY_P counts the number of cycles that retires
1178                  * CNTMASK instructions. By setting CNTMASK to a value (16)
1179                  * larger than the maximum number of instructions that can be
1180                  * retired per cycle (4) and then inverting the condition, we
1181                  * count all cycles that retire 16 or less instructions, which
1182                  * is every cycle.
1183                  *
1184                  * Thereby we gain a PEBS capable cycle counter.
1185                  */
1186                 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
1187
1188                 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
1189                 event->hw.config = alt_config;
1190         }
1191
1192         if (event->attr.type != PERF_TYPE_RAW)
1193                 return 0;
1194
1195         if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
1196                 return 0;
1197
1198         if (x86_pmu.version < 3)
1199                 return -EINVAL;
1200
1201         if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
1202                 return -EACCES;
1203
1204         event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
1205
1206         return 0;
1207 }
1208
1209 static __initconst const struct x86_pmu core_pmu = {
1210         .name                   = "core",
1211         .handle_irq             = x86_pmu_handle_irq,
1212         .disable_all            = x86_pmu_disable_all,
1213         .enable_all             = x86_pmu_enable_all,
1214         .enable                 = x86_pmu_enable_event,
1215         .disable                = x86_pmu_disable_event,
1216         .hw_config              = x86_pmu_hw_config,
1217         .schedule_events        = x86_schedule_events,
1218         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
1219         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
1220         .event_map              = intel_pmu_event_map,
1221         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
1222         .apic                   = 1,
1223         /*
1224          * Intel PMCs cannot be accessed sanely above 32 bit width,
1225          * so we install an artificial 1<<31 period regardless of
1226          * the generic event period:
1227          */
1228         .max_period             = (1ULL << 31) - 1,
1229         .get_event_constraints  = intel_get_event_constraints,
1230         .put_event_constraints  = intel_put_event_constraints,
1231         .event_constraints      = intel_core_event_constraints,
1232 };
1233
1234 static int intel_pmu_cpu_prepare(int cpu)
1235 {
1236         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1237
1238         if (!cpu_has_ht_siblings())
1239                 return NOTIFY_OK;
1240
1241         cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
1242                                       GFP_KERNEL, cpu_to_node(cpu));
1243         if (!cpuc->per_core)
1244                 return NOTIFY_BAD;
1245
1246         raw_spin_lock_init(&cpuc->per_core->lock);
1247         cpuc->per_core->core_id = -1;
1248         return NOTIFY_OK;
1249 }
1250
1251 static void intel_pmu_cpu_starting(int cpu)
1252 {
1253         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1254         int core_id = topology_core_id(cpu);
1255         int i;
1256
1257         init_debug_store_on_cpu(cpu);
1258         /*
1259          * Deal with CPUs that don't clear their LBRs on power-up.
1260          */
1261         intel_pmu_lbr_reset();
1262
1263         if (!cpu_has_ht_siblings())
1264                 return;
1265
1266         for_each_cpu(i, topology_thread_cpumask(cpu)) {
1267                 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
1268
1269                 if (pc && pc->core_id == core_id) {
1270                         kfree(cpuc->per_core);
1271                         cpuc->per_core = pc;
1272                         break;
1273                 }
1274         }
1275
1276         cpuc->per_core->core_id = core_id;
1277         cpuc->per_core->refcnt++;
1278 }
1279
1280 static void intel_pmu_cpu_dying(int cpu)
1281 {
1282         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1283         struct intel_percore *pc = cpuc->per_core;
1284
1285         if (pc) {
1286                 if (pc->core_id == -1 || --pc->refcnt == 0)
1287                         kfree(pc);
1288                 cpuc->per_core = NULL;
1289         }
1290
1291         fini_debug_store_on_cpu(cpu);
1292 }
1293
1294 static __initconst const struct x86_pmu intel_pmu = {
1295         .name                   = "Intel",
1296         .handle_irq             = intel_pmu_handle_irq,
1297         .disable_all            = intel_pmu_disable_all,
1298         .enable_all             = intel_pmu_enable_all,
1299         .enable                 = intel_pmu_enable_event,
1300         .disable                = intel_pmu_disable_event,
1301         .hw_config              = intel_pmu_hw_config,
1302         .schedule_events        = x86_schedule_events,
1303         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
1304         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
1305         .event_map              = intel_pmu_event_map,
1306         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
1307         .apic                   = 1,
1308         /*
1309          * Intel PMCs cannot be accessed sanely above 32 bit width,
1310          * so we install an artificial 1<<31 period regardless of
1311          * the generic event period:
1312          */
1313         .max_period             = (1ULL << 31) - 1,
1314         .get_event_constraints  = intel_get_event_constraints,
1315         .put_event_constraints  = intel_put_event_constraints,
1316
1317         .cpu_prepare            = intel_pmu_cpu_prepare,
1318         .cpu_starting           = intel_pmu_cpu_starting,
1319         .cpu_dying              = intel_pmu_cpu_dying,
1320 };
1321
1322 static void intel_clovertown_quirks(void)
1323 {
1324         /*
1325          * PEBS is unreliable due to:
1326          *
1327          *   AJ67  - PEBS may experience CPL leaks
1328          *   AJ68  - PEBS PMI may be delayed by one event
1329          *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
1330          *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
1331          *
1332          * AJ67 could be worked around by restricting the OS/USR flags.
1333          * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
1334          *
1335          * AJ106 could possibly be worked around by not allowing LBR
1336          *       usage from PEBS, including the fixup.
1337          * AJ68  could possibly be worked around by always programming
1338          *       a pebs_event_reset[0] value and coping with the lost events.
1339          *
1340          * But taken together it might just make sense to not enable PEBS on
1341          * these chips.
1342          */
1343         printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
1344         x86_pmu.pebs = 0;
1345         x86_pmu.pebs_constraints = NULL;
1346 }
1347
1348 static __init int intel_pmu_init(void)
1349 {
1350         union cpuid10_edx edx;
1351         union cpuid10_eax eax;
1352         unsigned int unused;
1353         unsigned int ebx;
1354         int version;
1355
1356         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1357                 switch (boot_cpu_data.x86) {
1358                 case 0x6:
1359                         return p6_pmu_init();
1360                 case 0xf:
1361                         return p4_pmu_init();
1362                 }
1363                 return -ENODEV;
1364         }
1365
1366         /*
1367          * Check whether the Architectural PerfMon supports
1368          * Branch Misses Retired hw_event or not.
1369          */
1370         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1371         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1372                 return -ENODEV;
1373
1374         version = eax.split.version_id;
1375         if (version < 2)
1376                 x86_pmu = core_pmu;
1377         else
1378                 x86_pmu = intel_pmu;
1379
1380         x86_pmu.version                 = version;
1381         x86_pmu.num_counters            = eax.split.num_counters;
1382         x86_pmu.cntval_bits             = eax.split.bit_width;
1383         x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
1384
1385         /*
1386          * Quirk: v2 perfmon does not report fixed-purpose events, so
1387          * assume at least 3 events:
1388          */
1389         if (version > 1)
1390                 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1391
1392         /*
1393          * v2 and above have a perf capabilities MSR
1394          */
1395         if (version > 1) {
1396                 u64 capabilities;
1397
1398                 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
1399                 x86_pmu.intel_cap.capabilities = capabilities;
1400         }
1401
1402         intel_ds_init();
1403
1404         /*
1405          * Install the hw-cache-events table:
1406          */
1407         switch (boot_cpu_data.x86_model) {
1408         case 14: /* 65 nm core solo/duo, "Yonah" */
1409                 pr_cont("Core events, ");
1410                 break;
1411
1412         case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1413                 x86_pmu.quirks = intel_clovertown_quirks;
1414         case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1415         case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1416         case 29: /* six-core 45 nm xeon "Dunnington" */
1417                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1418                        sizeof(hw_cache_event_ids));
1419
1420                 intel_pmu_lbr_init_core();
1421
1422                 x86_pmu.event_constraints = intel_core2_event_constraints;
1423                 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
1424                 pr_cont("Core2 events, ");
1425                 break;
1426
1427         case 26: /* 45 nm nehalem, "Bloomfield" */
1428         case 30: /* 45 nm nehalem, "Lynnfield" */
1429         case 46: /* 45 nm nehalem-ex, "Beckton" */
1430                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1431                        sizeof(hw_cache_event_ids));
1432                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1433                        sizeof(hw_cache_extra_regs));
1434
1435                 intel_pmu_lbr_init_nhm();
1436
1437                 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1438                 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1439                 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1440                 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1441                 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442
1443                 if (ebx & 0x40) {
1444                         /*
1445                          * Erratum AAJ80 detected, we work it around by using
1446                          * the BR_MISP_EXEC.ANY event. This will over-count
1447                          * branch-misses, but it's still much better than the
1448                          * architectural event which is often completely bogus:
1449                          */
1450                         intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1451
1452                         pr_cont("erratum AAJ80 worked around, ");
1453                 }
1454                 pr_cont("Nehalem events, ");
1455                 break;
1456
1457         case 28: /* Atom */
1458                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1459                        sizeof(hw_cache_event_ids));
1460
1461                 intel_pmu_lbr_init_atom();
1462
1463                 x86_pmu.event_constraints = intel_gen_event_constraints;
1464                 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
1465                 pr_cont("Atom events, ");
1466                 break;
1467
1468         case 37: /* 32 nm nehalem, "Clarkdale" */
1469         case 44: /* 32 nm nehalem, "Gulftown" */
1470         case 47: /* 32 nm Xeon E7 */
1471                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
1472                        sizeof(hw_cache_event_ids));
1473                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
1474                        sizeof(hw_cache_extra_regs));
1475
1476                 intel_pmu_lbr_init_nhm();
1477
1478                 x86_pmu.event_constraints = intel_westmere_event_constraints;
1479                 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1480                 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1481                 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1482                 x86_pmu.extra_regs = intel_westmere_extra_regs;
1483                 pr_cont("Westmere events, ");
1484                 break;
1485
1486         case 42: /* SandyBridge */
1487                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
1488                        sizeof(hw_cache_event_ids));
1489
1490                 intel_pmu_lbr_init_nhm();
1491
1492                 x86_pmu.event_constraints = intel_snb_event_constraints;
1493                 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1494                 pr_cont("SandyBridge events, ");
1495                 break;
1496
1497         default:
1498                 /*
1499                  * default constraints for v2 and up
1500                  */
1501                 x86_pmu.event_constraints = intel_gen_event_constraints;
1502                 pr_cont("generic architected perfmon, ");
1503         }
1504         return 0;
1505 }
1506
1507 #else /* CONFIG_CPU_SUP_INTEL */
1508
1509 static int intel_pmu_init(void)
1510 {
1511         return 0;
1512 }
1513
1514 #endif /* CONFIG_CPU_SUP_INTEL */