x86: use pr_info in perf_counter.c
[pandora-kernel.git] / arch / x86 / kernel / cpu / perf_counter.c
1 /*
2  * Performance counter x86 architecture code
3  *
4  *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
6  *  Copyright(C) 2009 Jaswinder Singh Rajput
7  *
8  *  For licencing details see kernel-base/COPYING
9  */
10
11 #include <linux/perf_counter.h>
12 #include <linux/capability.h>
13 #include <linux/notifier.h>
14 #include <linux/hardirq.h>
15 #include <linux/kprobes.h>
16 #include <linux/module.h>
17 #include <linux/kdebug.h>
18 #include <linux/sched.h>
19
20 #include <asm/perf_counter.h>
21 #include <asm/apic.h>
22
23 static bool perf_counters_initialized __read_mostly;
24
25 /*
26  * Number of (generic) HW counters:
27  */
28 static int nr_counters_generic __read_mostly;
29 static u64 perf_counter_mask __read_mostly;
30 static u64 counter_value_mask __read_mostly;
31
32 static int nr_counters_fixed __read_mostly;
33
34 struct cpu_hw_counters {
35         struct perf_counter     *counters[X86_PMC_IDX_MAX];
36         unsigned long           used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
37         unsigned long           interrupts;
38         u64                     global_enable;
39 };
40
41 /*
42  * struct pmc_x86_ops - performance counter x86 ops
43  */
44 struct pmc_x86_ops {
45         u64             (*save_disable_all)(void);
46         void            (*restore_all)(u64 ctrl);
47         unsigned        eventsel;
48         unsigned        perfctr;
49         int             (*event_map)(int event);
50         int             max_events;
51 };
52
53 static struct pmc_x86_ops *pmc_ops;
54
55 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
56
57 /*
58  * Intel PerfMon v3. Used on Core2 and later.
59  */
60 static const int intel_perfmon_event_map[] =
61 {
62   [PERF_COUNT_CPU_CYCLES]               = 0x003c,
63   [PERF_COUNT_INSTRUCTIONS]             = 0x00c0,
64   [PERF_COUNT_CACHE_REFERENCES]         = 0x4f2e,
65   [PERF_COUNT_CACHE_MISSES]             = 0x412e,
66   [PERF_COUNT_BRANCH_INSTRUCTIONS]      = 0x00c4,
67   [PERF_COUNT_BRANCH_MISSES]            = 0x00c5,
68   [PERF_COUNT_BUS_CYCLES]               = 0x013c,
69 };
70
71 static int pmc_intel_event_map(int event)
72 {
73         return intel_perfmon_event_map[event];
74 }
75
76 /*
77  * AMD Performance Monitor K7 and later.
78  */
79 static const int amd_perfmon_event_map[] =
80 {
81   [PERF_COUNT_CPU_CYCLES]               = 0x0076,
82   [PERF_COUNT_INSTRUCTIONS]             = 0x00c0,
83   [PERF_COUNT_CACHE_REFERENCES]         = 0x0080,
84   [PERF_COUNT_CACHE_MISSES]             = 0x0081,
85   [PERF_COUNT_BRANCH_INSTRUCTIONS]      = 0x00c4,
86   [PERF_COUNT_BRANCH_MISSES]            = 0x00c5,
87 };
88
89 static int pmc_amd_event_map(int event)
90 {
91         return amd_perfmon_event_map[event];
92 }
93
94 /*
95  * Propagate counter elapsed time into the generic counter.
96  * Can only be executed on the CPU where the counter is active.
97  * Returns the delta events processed.
98  */
99 static void
100 x86_perf_counter_update(struct perf_counter *counter,
101                         struct hw_perf_counter *hwc, int idx)
102 {
103         u64 prev_raw_count, new_raw_count, delta;
104
105         /*
106          * Careful: an NMI might modify the previous counter value.
107          *
108          * Our tactic to handle this is to first atomically read and
109          * exchange a new raw count - then add that new-prev delta
110          * count to the generic counter atomically:
111          */
112 again:
113         prev_raw_count = atomic64_read(&hwc->prev_count);
114         rdmsrl(hwc->counter_base + idx, new_raw_count);
115
116         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
117                                         new_raw_count) != prev_raw_count)
118                 goto again;
119
120         /*
121          * Now we have the new raw value and have updated the prev
122          * timestamp already. We can now calculate the elapsed delta
123          * (counter-)time and add that to the generic counter.
124          *
125          * Careful, not all hw sign-extends above the physical width
126          * of the count, so we do that by clipping the delta to 32 bits:
127          */
128         delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
129
130         atomic64_add(delta, &counter->count);
131         atomic64_sub(delta, &hwc->period_left);
132 }
133
134 /*
135  * Setup the hardware configuration for a given hw_event_type
136  */
137 static int __hw_perf_counter_init(struct perf_counter *counter)
138 {
139         struct perf_counter_hw_event *hw_event = &counter->hw_event;
140         struct hw_perf_counter *hwc = &counter->hw;
141
142         if (unlikely(!perf_counters_initialized))
143                 return -EINVAL;
144
145         /*
146          * Generate PMC IRQs:
147          * (keep 'enabled' bit clear for now)
148          */
149         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
150
151         /*
152          * Count user and OS events unless requested not to.
153          */
154         if (!hw_event->exclude_user)
155                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
156         if (!hw_event->exclude_kernel)
157                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
158
159         /*
160          * If privileged enough, allow NMI events:
161          */
162         hwc->nmi = 0;
163         if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
164                 hwc->nmi = 1;
165
166         hwc->irq_period         = hw_event->irq_period;
167         /*
168          * Intel PMCs cannot be accessed sanely above 32 bit width,
169          * so we install an artificial 1<<31 period regardless of
170          * the generic counter period:
171          */
172         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
173                 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
174                         hwc->irq_period = 0x7FFFFFFF;
175
176         atomic64_set(&hwc->period_left, hwc->irq_period);
177
178         /*
179          * Raw event type provide the config in the event structure
180          */
181         if (hw_event->raw) {
182                 hwc->config |= hw_event->type;
183         } else {
184                 if (hw_event->type >= pmc_ops->max_events)
185                         return -EINVAL;
186                 /*
187                  * The generic map:
188                  */
189                 hwc->config |= pmc_ops->event_map(hw_event->type);
190         }
191         counter->wakeup_pending = 0;
192
193         return 0;
194 }
195
196 static u64 pmc_intel_save_disable_all(void)
197 {
198         u64 ctrl;
199
200         rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
201         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
202
203         return ctrl;
204 }
205
206 static u64 pmc_amd_save_disable_all(void)
207 {
208         int idx;
209         u64 val, ctrl = 0;
210
211         for (idx = 0; idx < nr_counters_generic; idx++) {
212                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
213                 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
214                         ctrl |= (1 << idx);
215                 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
216                 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
217         }
218
219         return ctrl;
220 }
221
222 u64 hw_perf_save_disable(void)
223 {
224         if (unlikely(!perf_counters_initialized))
225                 return 0;
226
227         return pmc_ops->save_disable_all();
228 }
229 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
230
231 static void pmc_intel_restore_all(u64 ctrl)
232 {
233         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
234 }
235
236 static void pmc_amd_restore_all(u64 ctrl)
237 {
238         u64 val;
239         int idx;
240
241         for (idx = 0; idx < nr_counters_generic; idx++) {
242                 if (ctrl & (1 << idx)) {
243                         rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
244                         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
245                         wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
246                 }
247         }
248 }
249
250 void hw_perf_restore(u64 ctrl)
251 {
252         if (unlikely(!perf_counters_initialized))
253                 return;
254
255         pmc_ops->restore_all(ctrl);
256 }
257 EXPORT_SYMBOL_GPL(hw_perf_restore);
258
259 static inline void
260 __pmc_fixed_disable(struct perf_counter *counter,
261                     struct hw_perf_counter *hwc, unsigned int __idx)
262 {
263         int idx = __idx - X86_PMC_IDX_FIXED;
264         u64 ctrl_val, mask;
265         int err;
266
267         mask = 0xfULL << (idx * 4);
268
269         rdmsrl(hwc->config_base, ctrl_val);
270         ctrl_val &= ~mask;
271         err = checking_wrmsrl(hwc->config_base, ctrl_val);
272 }
273
274 static inline void
275 __pmc_generic_disable(struct perf_counter *counter,
276                            struct hw_perf_counter *hwc, unsigned int idx)
277 {
278         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
279                 __pmc_fixed_disable(counter, hwc, idx);
280         else
281                 wrmsr_safe(hwc->config_base + idx, hwc->config, 0);
282 }
283
284 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
285
286 /*
287  * Set the next IRQ period, based on the hwc->period_left value.
288  * To be called with the counter disabled in hw:
289  */
290 static void
291 __hw_perf_counter_set_period(struct perf_counter *counter,
292                              struct hw_perf_counter *hwc, int idx)
293 {
294         s64 left = atomic64_read(&hwc->period_left);
295         s32 period = hwc->irq_period;
296         int err;
297
298         /*
299          * If we are way outside a reasoable range then just skip forward:
300          */
301         if (unlikely(left <= -period)) {
302                 left = period;
303                 atomic64_set(&hwc->period_left, left);
304         }
305
306         if (unlikely(left <= 0)) {
307                 left += period;
308                 atomic64_set(&hwc->period_left, left);
309         }
310
311         per_cpu(prev_left[idx], smp_processor_id()) = left;
312
313         /*
314          * The hw counter starts counting from this counter offset,
315          * mark it to be able to extra future deltas:
316          */
317         atomic64_set(&hwc->prev_count, (u64)-left);
318
319         err = checking_wrmsrl(hwc->counter_base + idx,
320                              (u64)(-left) & counter_value_mask);
321 }
322
323 static inline void
324 __pmc_fixed_enable(struct perf_counter *counter,
325                    struct hw_perf_counter *hwc, unsigned int __idx)
326 {
327         int idx = __idx - X86_PMC_IDX_FIXED;
328         u64 ctrl_val, bits, mask;
329         int err;
330
331         /*
332          * Enable IRQ generation (0x8),
333          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
334          * if requested:
335          */
336         bits = 0x8ULL;
337         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
338                 bits |= 0x2;
339         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
340                 bits |= 0x1;
341         bits <<= (idx * 4);
342         mask = 0xfULL << (idx * 4);
343
344         rdmsrl(hwc->config_base, ctrl_val);
345         ctrl_val &= ~mask;
346         ctrl_val |= bits;
347         err = checking_wrmsrl(hwc->config_base, ctrl_val);
348 }
349
350 static void
351 __pmc_generic_enable(struct perf_counter *counter,
352                           struct hw_perf_counter *hwc, int idx)
353 {
354         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
355                 __pmc_fixed_enable(counter, hwc, idx);
356         else
357                 wrmsr(hwc->config_base + idx,
358                       hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0);
359 }
360
361 static int
362 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
363 {
364         unsigned int event;
365
366         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
367                 return -1;
368
369         if (unlikely(hwc->nmi))
370                 return -1;
371
372         event = hwc->config & ARCH_PERFMON_EVENT_MASK;
373
374         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
375                 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
376         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
377                 return X86_PMC_IDX_FIXED_CPU_CYCLES;
378         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
379                 return X86_PMC_IDX_FIXED_BUS_CYCLES;
380
381         return -1;
382 }
383
384 /*
385  * Find a PMC slot for the freshly enabled / scheduled in counter:
386  */
387 static int pmc_generic_enable(struct perf_counter *counter)
388 {
389         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
390         struct hw_perf_counter *hwc = &counter->hw;
391         int idx;
392
393         idx = fixed_mode_idx(counter, hwc);
394         if (idx >= 0) {
395                 /*
396                  * Try to get the fixed counter, if that is already taken
397                  * then try to get a generic counter:
398                  */
399                 if (test_and_set_bit(idx, cpuc->used))
400                         goto try_generic;
401
402                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
403                 /*
404                  * We set it so that counter_base + idx in wrmsr/rdmsr maps to
405                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
406                  */
407                 hwc->counter_base =
408                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
409                 hwc->idx = idx;
410         } else {
411                 idx = hwc->idx;
412                 /* Try to get the previous generic counter again */
413                 if (test_and_set_bit(idx, cpuc->used)) {
414 try_generic:
415                         idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
416                         if (idx == nr_counters_generic)
417                                 return -EAGAIN;
418
419                         set_bit(idx, cpuc->used);
420                         hwc->idx = idx;
421                 }
422                 hwc->config_base  = pmc_ops->eventsel;
423                 hwc->counter_base = pmc_ops->perfctr;
424         }
425
426         perf_counters_lapic_init(hwc->nmi);
427
428         __pmc_generic_disable(counter, hwc, idx);
429
430         cpuc->counters[idx] = counter;
431         /*
432          * Make it visible before enabling the hw:
433          */
434         smp_wmb();
435
436         __hw_perf_counter_set_period(counter, hwc, idx);
437         __pmc_generic_enable(counter, hwc, idx);
438
439         return 0;
440 }
441
442 void perf_counter_print_debug(void)
443 {
444         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
445         struct cpu_hw_counters *cpuc;
446         int cpu, idx;
447
448         if (!nr_counters_generic)
449                 return;
450
451         local_irq_disable();
452
453         cpu = smp_processor_id();
454         cpuc = &per_cpu(cpu_hw_counters, cpu);
455
456         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
457                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
458                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
459                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
460                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
461
462                 pr_info("\n");
463                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
464                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
465                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
466                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
467         }
468         pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
469
470         for (idx = 0; idx < nr_counters_generic; idx++) {
471                 rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
472                 rdmsrl(pmc_ops->perfctr  + idx, pmc_count);
473
474                 prev_left = per_cpu(prev_left[idx], cpu);
475
476                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
477                         cpu, idx, pmc_ctrl);
478                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
479                         cpu, idx, pmc_count);
480                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
481                         cpu, idx, prev_left);
482         }
483         for (idx = 0; idx < nr_counters_fixed; idx++) {
484                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
485
486                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
487                         cpu, idx, pmc_count);
488         }
489         local_irq_enable();
490 }
491
492 static void pmc_generic_disable(struct perf_counter *counter)
493 {
494         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
495         struct hw_perf_counter *hwc = &counter->hw;
496         unsigned int idx = hwc->idx;
497
498         __pmc_generic_disable(counter, hwc, idx);
499
500         clear_bit(idx, cpuc->used);
501         cpuc->counters[idx] = NULL;
502         /*
503          * Make sure the cleared pointer becomes visible before we
504          * (potentially) free the counter:
505          */
506         smp_wmb();
507
508         /*
509          * Drain the remaining delta count out of a counter
510          * that we are disabling:
511          */
512         x86_perf_counter_update(counter, hwc, idx);
513 }
514
515 static void perf_store_irq_data(struct perf_counter *counter, u64 data)
516 {
517         struct perf_data *irqdata = counter->irqdata;
518
519         if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
520                 irqdata->overrun++;
521         } else {
522                 u64 *p = (u64 *) &irqdata->data[irqdata->len];
523
524                 *p = data;
525                 irqdata->len += sizeof(u64);
526         }
527 }
528
529 /*
530  * Save and restart an expired counter. Called by NMI contexts,
531  * so it has to be careful about preempting normal counter ops:
532  */
533 static void perf_save_and_restart(struct perf_counter *counter)
534 {
535         struct hw_perf_counter *hwc = &counter->hw;
536         int idx = hwc->idx;
537
538         x86_perf_counter_update(counter, hwc, idx);
539         __hw_perf_counter_set_period(counter, hwc, idx);
540
541         if (counter->state == PERF_COUNTER_STATE_ACTIVE)
542                 __pmc_generic_enable(counter, hwc, idx);
543 }
544
545 static void
546 perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
547 {
548         struct perf_counter *counter, *group_leader = sibling->group_leader;
549
550         /*
551          * Store sibling timestamps (if any):
552          */
553         list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
554
555                 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
556                 perf_store_irq_data(sibling, counter->hw_event.type);
557                 perf_store_irq_data(sibling, atomic64_read(&counter->count));
558         }
559 }
560
561 /*
562  * Maximum interrupt frequency of 100KHz per CPU
563  */
564 #define PERFMON_MAX_INTERRUPTS (100000/HZ)
565
566 /*
567  * This handler is triggered by the local APIC, so the APIC IRQ handling
568  * rules apply:
569  */
570 static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
571 {
572         int bit, cpu = smp_processor_id();
573         u64 ack, status;
574         struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
575
576         rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
577
578         /* Disable counters globally */
579         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
580         ack_APIC_irq();
581
582         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
583         if (!status)
584                 goto out;
585
586 again:
587         inc_irq_stat(apic_perf_irqs);
588         ack = status;
589         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
590                 struct perf_counter *counter = cpuc->counters[bit];
591
592                 clear_bit(bit, (unsigned long *) &status);
593                 if (!counter)
594                         continue;
595
596                 perf_save_and_restart(counter);
597
598                 switch (counter->hw_event.record_type) {
599                 case PERF_RECORD_SIMPLE:
600                         continue;
601                 case PERF_RECORD_IRQ:
602                         perf_store_irq_data(counter, instruction_pointer(regs));
603                         break;
604                 case PERF_RECORD_GROUP:
605                         perf_handle_group(counter, &status, &ack);
606                         break;
607                 }
608                 /*
609                  * From NMI context we cannot call into the scheduler to
610                  * do a task wakeup - but we mark these generic as
611                  * wakeup_pending and initate a wakeup callback:
612                  */
613                 if (nmi) {
614                         counter->wakeup_pending = 1;
615                         set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
616                 } else {
617                         wake_up(&counter->waitq);
618                 }
619         }
620
621         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
622
623         /*
624          * Repeat if there is more work to be done:
625          */
626         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
627         if (status)
628                 goto again;
629 out:
630         /*
631          * Restore - do not reenable when global enable is off or throttled:
632          */
633         if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
634                 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
635 }
636
637 void perf_counter_unthrottle(void)
638 {
639         struct cpu_hw_counters *cpuc;
640         u64 global_enable;
641
642         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
643                 return;
644
645         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
646                 return;
647
648         if (unlikely(!perf_counters_initialized))
649                 return;
650
651         cpuc = &per_cpu(cpu_hw_counters, smp_processor_id());
652         if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
653                 if (printk_ratelimit())
654                         printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
655                 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
656         }
657         rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable);
658         if (unlikely(cpuc->global_enable && !global_enable))
659                 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable);
660         cpuc->interrupts = 0;
661 }
662
663 void smp_perf_counter_interrupt(struct pt_regs *regs)
664 {
665         irq_enter();
666         apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
667         __smp_perf_counter_interrupt(regs, 0);
668
669         irq_exit();
670 }
671
672 /*
673  * This handler is triggered by NMI contexts:
674  */
675 void perf_counter_notify(struct pt_regs *regs)
676 {
677         struct cpu_hw_counters *cpuc;
678         unsigned long flags;
679         int bit, cpu;
680
681         local_irq_save(flags);
682         cpu = smp_processor_id();
683         cpuc = &per_cpu(cpu_hw_counters, cpu);
684
685         for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
686                 struct perf_counter *counter = cpuc->counters[bit];
687
688                 if (!counter)
689                         continue;
690
691                 if (counter->wakeup_pending) {
692                         counter->wakeup_pending = 0;
693                         wake_up(&counter->waitq);
694                 }
695         }
696
697         local_irq_restore(flags);
698 }
699
700 void perf_counters_lapic_init(int nmi)
701 {
702         u32 apic_val;
703
704         if (!perf_counters_initialized)
705                 return;
706         /*
707          * Enable the performance counter vector in the APIC LVT:
708          */
709         apic_val = apic_read(APIC_LVTERR);
710
711         apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
712         if (nmi)
713                 apic_write(APIC_LVTPC, APIC_DM_NMI);
714         else
715                 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
716         apic_write(APIC_LVTERR, apic_val);
717 }
718
719 static int __kprobes
720 perf_counter_nmi_handler(struct notifier_block *self,
721                          unsigned long cmd, void *__args)
722 {
723         struct die_args *args = __args;
724         struct pt_regs *regs;
725
726         if (likely(cmd != DIE_NMI_IPI))
727                 return NOTIFY_DONE;
728
729         regs = args->regs;
730
731         apic_write(APIC_LVTPC, APIC_DM_NMI);
732         __smp_perf_counter_interrupt(regs, 1);
733
734         return NOTIFY_STOP;
735 }
736
737 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
738         .notifier_call          = perf_counter_nmi_handler,
739         .next                   = NULL,
740         .priority               = 1
741 };
742
743 static struct pmc_x86_ops pmc_intel_ops = {
744         .save_disable_all       = pmc_intel_save_disable_all,
745         .restore_all            = pmc_intel_restore_all,
746         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
747         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
748         .event_map              = pmc_intel_event_map,
749         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
750 };
751
752 static struct pmc_x86_ops pmc_amd_ops = {
753         .save_disable_all       = pmc_amd_save_disable_all,
754         .restore_all            = pmc_amd_restore_all,
755         .eventsel               = MSR_K7_EVNTSEL0,
756         .perfctr                = MSR_K7_PERFCTR0,
757         .event_map              = pmc_amd_event_map,
758         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
759 };
760
761 static struct pmc_x86_ops *pmc_intel_init(void)
762 {
763         union cpuid10_eax eax;
764         unsigned int ebx;
765         unsigned int unused;
766         union cpuid10_edx edx;
767
768         /*
769          * Check whether the Architectural PerfMon supports
770          * Branch Misses Retired Event or not.
771          */
772         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
773         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
774                 return NULL;
775
776         pr_info("Intel Performance Monitoring support detected.\n");
777         pr_info("... version:         %d\n", eax.split.version_id);
778         pr_info("... bit width:       %d\n", eax.split.bit_width);
779         pr_info("... mask length:     %d\n", eax.split.mask_length);
780
781         nr_counters_generic = eax.split.num_counters;
782         nr_counters_fixed = edx.split.num_counters_fixed;
783         counter_value_mask = (1ULL << eax.split.bit_width) - 1;
784
785         return &pmc_intel_ops;
786 }
787
788 static struct pmc_x86_ops *pmc_amd_init(void)
789 {
790         nr_counters_generic = 4;
791         nr_counters_fixed = 0;
792
793         pr_info("AMD Performance Monitoring support detected.\n");
794
795         return &pmc_amd_ops;
796 }
797
798 void __init init_hw_perf_counters(void)
799 {
800         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
801                 return;
802
803         switch (boot_cpu_data.x86_vendor) {
804         case X86_VENDOR_INTEL:
805                 pmc_ops = pmc_intel_init();
806                 break;
807         case X86_VENDOR_AMD:
808                 pmc_ops = pmc_amd_init();
809                 break;
810         }
811         if (!pmc_ops)
812                 return;
813
814         pr_info("... num counters:    %d\n", nr_counters_generic);
815         if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
816                 nr_counters_generic = X86_PMC_MAX_GENERIC;
817                 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
818                         nr_counters_generic, X86_PMC_MAX_GENERIC);
819         }
820         perf_counter_mask = (1 << nr_counters_generic) - 1;
821         perf_max_counters = nr_counters_generic;
822
823         pr_info("... value mask:      %016Lx\n", counter_value_mask);
824
825         if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
826                 nr_counters_fixed = X86_PMC_MAX_FIXED;
827                 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
828                         nr_counters_fixed, X86_PMC_MAX_FIXED);
829         }
830         pr_info("... fixed counters:  %d\n", nr_counters_fixed);
831
832         perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
833
834         pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
835         perf_counters_initialized = true;
836
837         perf_counters_lapic_init(0);
838         register_die_notifier(&perf_counter_nmi_notifier);
839 }
840
841 static void pmc_generic_read(struct perf_counter *counter)
842 {
843         x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
844 }
845
846 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
847         .enable         = pmc_generic_enable,
848         .disable        = pmc_generic_disable,
849         .read           = pmc_generic_read,
850 };
851
852 const struct hw_perf_counter_ops *
853 hw_perf_counter_init(struct perf_counter *counter)
854 {
855         int err;
856
857         err = __hw_perf_counter_init(counter);
858         if (err)
859                 return NULL;
860
861         return &x86_perf_counter_ops;
862 }