Merge branch 'stable/swiotlb-0.9' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / arch / x86 / kernel / cpu / perfctr-watchdog.c
1 /*
2  * local apic based NMI watchdog for various CPUs.
3  *
4  * This file also handles reservation of performance counters for coordination
5  * with other users (like oprofile).
6  *
7  * Note that these events normally don't tick when the CPU idles. This means
8  * the frequency varies with CPU load.
9  *
10  * Original code for K7/P6 written by Keith Owens
11  *
12  */
13
14 #include <linux/percpu.h>
15 #include <linux/module.h>
16 #include <linux/kernel.h>
17 #include <linux/bitops.h>
18 #include <linux/smp.h>
19 #include <linux/nmi.h>
20 #include <linux/kprobes.h>
21
22 #include <asm/apic.h>
23 #include <asm/perf_event.h>
24
25 struct nmi_watchdog_ctlblk {
26         unsigned int cccr_msr;
27         unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
28         unsigned int evntsel_msr;  /* the MSR to select the events to handle */
29 };
30
31 /* Interface defining a CPU specific perfctr watchdog */
32 struct wd_ops {
33         int (*reserve)(void);
34         void (*unreserve)(void);
35         int (*setup)(unsigned nmi_hz);
36         void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37         void (*stop)(void);
38         unsigned perfctr;
39         unsigned evntsel;
40         u64 checkbit;
41 };
42
43 static const struct wd_ops *wd_ops;
44
45 /*
46  * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47  * offset from MSR_P4_BSU_ESCR0.
48  *
49  * It will be the max for all platforms (for now)
50  */
51 #define NMI_MAX_COUNTER_BITS 66
52
53 /*
54  * perfctr_nmi_owner tracks the ownership of the perfctr registers:
55  * evtsel_nmi_owner tracks the ownership of the event selection
56  * - different performance counters/ event selection may be reserved for
57  *   different subsystems this reservation system just tries to coordinate
58  *   things a little
59  */
60 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62
63 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64
65 /* converts an msr to an appropriate reservation bit */
66 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67 {
68         /* returns the bit offset of the performance counter register */
69         switch (boot_cpu_data.x86_vendor) {
70         case X86_VENDOR_AMD:
71                 return msr - MSR_K7_PERFCTR0;
72         case X86_VENDOR_INTEL:
73                 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
74                         return msr - MSR_ARCH_PERFMON_PERFCTR0;
75
76                 switch (boot_cpu_data.x86) {
77                 case 6:
78                         return msr - MSR_P6_PERFCTR0;
79                 case 15:
80                         return msr - MSR_P4_BPU_PERFCTR0;
81                 }
82         }
83         return 0;
84 }
85
86 /*
87  * converts an msr to an appropriate reservation bit
88  * returns the bit offset of the event selection register
89  */
90 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
91 {
92         /* returns the bit offset of the event selection register */
93         switch (boot_cpu_data.x86_vendor) {
94         case X86_VENDOR_AMD:
95                 return msr - MSR_K7_EVNTSEL0;
96         case X86_VENDOR_INTEL:
97                 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
98                         return msr - MSR_ARCH_PERFMON_EVENTSEL0;
99
100                 switch (boot_cpu_data.x86) {
101                 case 6:
102                         return msr - MSR_P6_EVNTSEL0;
103                 case 15:
104                         return msr - MSR_P4_BSU_ESCR0;
105                 }
106         }
107         return 0;
108
109 }
110
111 /* checks for a bit availability (hack for oprofile) */
112 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
113 {
114         BUG_ON(counter > NMI_MAX_COUNTER_BITS);
115
116         return !test_bit(counter, perfctr_nmi_owner);
117 }
118 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
119
120 int reserve_perfctr_nmi(unsigned int msr)
121 {
122         unsigned int counter;
123
124         counter = nmi_perfctr_msr_to_bit(msr);
125         /* register not managed by the allocator? */
126         if (counter > NMI_MAX_COUNTER_BITS)
127                 return 1;
128
129         if (!test_and_set_bit(counter, perfctr_nmi_owner))
130                 return 1;
131         return 0;
132 }
133 EXPORT_SYMBOL(reserve_perfctr_nmi);
134
135 void release_perfctr_nmi(unsigned int msr)
136 {
137         unsigned int counter;
138
139         counter = nmi_perfctr_msr_to_bit(msr);
140         /* register not managed by the allocator? */
141         if (counter > NMI_MAX_COUNTER_BITS)
142                 return;
143
144         clear_bit(counter, perfctr_nmi_owner);
145 }
146 EXPORT_SYMBOL(release_perfctr_nmi);
147
148 int reserve_evntsel_nmi(unsigned int msr)
149 {
150         unsigned int counter;
151
152         counter = nmi_evntsel_msr_to_bit(msr);
153         /* register not managed by the allocator? */
154         if (counter > NMI_MAX_COUNTER_BITS)
155                 return 1;
156
157         if (!test_and_set_bit(counter, evntsel_nmi_owner))
158                 return 1;
159         return 0;
160 }
161 EXPORT_SYMBOL(reserve_evntsel_nmi);
162
163 void release_evntsel_nmi(unsigned int msr)
164 {
165         unsigned int counter;
166
167         counter = nmi_evntsel_msr_to_bit(msr);
168         /* register not managed by the allocator? */
169         if (counter > NMI_MAX_COUNTER_BITS)
170                 return;
171
172         clear_bit(counter, evntsel_nmi_owner);
173 }
174 EXPORT_SYMBOL(release_evntsel_nmi);
175
176 void disable_lapic_nmi_watchdog(void)
177 {
178         BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179
180         if (atomic_read(&nmi_active) <= 0)
181                 return;
182
183         on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184
185         if (wd_ops)
186                 wd_ops->unreserve();
187
188         BUG_ON(atomic_read(&nmi_active) != 0);
189 }
190
191 void enable_lapic_nmi_watchdog(void)
192 {
193         BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
194
195         /* are we already enabled */
196         if (atomic_read(&nmi_active) != 0)
197                 return;
198
199         /* are we lapic aware */
200         if (!wd_ops)
201                 return;
202         if (!wd_ops->reserve()) {
203                 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
204                 return;
205         }
206
207         on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
208         touch_nmi_watchdog();
209 }
210
211 /*
212  * Activate the NMI watchdog via the local APIC.
213  */
214
215 static unsigned int adjust_for_32bit_ctr(unsigned int hz)
216 {
217         u64 counter_val;
218         unsigned int retval = hz;
219
220         /*
221          * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
222          * are writable, with higher bits sign extending from bit 31.
223          * So, we can only program the counter with 31 bit values and
224          * 32nd bit should be 1, for 33.. to be 1.
225          * Find the appropriate nmi_hz
226          */
227         counter_val = (u64)cpu_khz * 1000;
228         do_div(counter_val, retval);
229         if (counter_val > 0x7fffffffULL) {
230                 u64 count = (u64)cpu_khz * 1000;
231                 do_div(count, 0x7fffffffUL);
232                 retval = count + 1;
233         }
234         return retval;
235 }
236
237 static void write_watchdog_counter(unsigned int perfctr_msr,
238                                 const char *descr, unsigned nmi_hz)
239 {
240         u64 count = (u64)cpu_khz * 1000;
241
242         do_div(count, nmi_hz);
243         if (descr)
244                 pr_debug("setting %s to -0x%08Lx\n", descr, count);
245         wrmsrl(perfctr_msr, 0 - count);
246 }
247
248 static void write_watchdog_counter32(unsigned int perfctr_msr,
249                                 const char *descr, unsigned nmi_hz)
250 {
251         u64 count = (u64)cpu_khz * 1000;
252
253         do_div(count, nmi_hz);
254         if (descr)
255                 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256         wrmsr(perfctr_msr, (u32)(-count), 0);
257 }
258
259 /*
260  * AMD K7/K8/Family10h/Family11h support.
261  * AMD keeps this interface nicely stable so there is not much variety
262  */
263 #define K7_EVNTSEL_ENABLE       (1 << 22)
264 #define K7_EVNTSEL_INT          (1 << 20)
265 #define K7_EVNTSEL_OS           (1 << 17)
266 #define K7_EVNTSEL_USR          (1 << 16)
267 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING    0x76
268 #define K7_NMI_EVENT            K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
269
270 static int setup_k7_watchdog(unsigned nmi_hz)
271 {
272         unsigned int perfctr_msr, evntsel_msr;
273         unsigned int evntsel;
274         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
275
276         perfctr_msr = wd_ops->perfctr;
277         evntsel_msr = wd_ops->evntsel;
278
279         wrmsrl(perfctr_msr, 0UL);
280
281         evntsel = K7_EVNTSEL_INT
282                 | K7_EVNTSEL_OS
283                 | K7_EVNTSEL_USR
284                 | K7_NMI_EVENT;
285
286         /* setup the timer */
287         wrmsr(evntsel_msr, evntsel, 0);
288         write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
289
290         /* initialize the wd struct before enabling */
291         wd->perfctr_msr = perfctr_msr;
292         wd->evntsel_msr = evntsel_msr;
293         wd->cccr_msr = 0;  /* unused */
294
295         /* ok, everything is initialized, announce that we're set */
296         cpu_nmi_set_wd_enabled();
297
298         apic_write(APIC_LVTPC, APIC_DM_NMI);
299         evntsel |= K7_EVNTSEL_ENABLE;
300         wrmsr(evntsel_msr, evntsel, 0);
301
302         return 1;
303 }
304
305 static void single_msr_stop_watchdog(void)
306 {
307         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308
309         wrmsr(wd->evntsel_msr, 0, 0);
310 }
311
312 static int single_msr_reserve(void)
313 {
314         if (!reserve_perfctr_nmi(wd_ops->perfctr))
315                 return 0;
316
317         if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
318                 release_perfctr_nmi(wd_ops->perfctr);
319                 return 0;
320         }
321         return 1;
322 }
323
324 static void single_msr_unreserve(void)
325 {
326         release_evntsel_nmi(wd_ops->evntsel);
327         release_perfctr_nmi(wd_ops->perfctr);
328 }
329
330 static void __kprobes
331 single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
332 {
333         /* start the cycle over again */
334         write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
335 }
336
337 static const struct wd_ops k7_wd_ops = {
338         .reserve        = single_msr_reserve,
339         .unreserve      = single_msr_unreserve,
340         .setup          = setup_k7_watchdog,
341         .rearm          = single_msr_rearm,
342         .stop           = single_msr_stop_watchdog,
343         .perfctr        = MSR_K7_PERFCTR0,
344         .evntsel        = MSR_K7_EVNTSEL0,
345         .checkbit       = 1ULL << 47,
346 };
347
348 /*
349  * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
350  */
351 #define P6_EVNTSEL0_ENABLE      (1 << 22)
352 #define P6_EVNTSEL_INT          (1 << 20)
353 #define P6_EVNTSEL_OS           (1 << 17)
354 #define P6_EVNTSEL_USR          (1 << 16)
355 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED  0x79
356 #define P6_NMI_EVENT            P6_EVENT_CPU_CLOCKS_NOT_HALTED
357
358 static int setup_p6_watchdog(unsigned nmi_hz)
359 {
360         unsigned int perfctr_msr, evntsel_msr;
361         unsigned int evntsel;
362         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
363
364         perfctr_msr = wd_ops->perfctr;
365         evntsel_msr = wd_ops->evntsel;
366
367         /* KVM doesn't implement this MSR */
368         if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
369                 return 0;
370
371         evntsel = P6_EVNTSEL_INT
372                 | P6_EVNTSEL_OS
373                 | P6_EVNTSEL_USR
374                 | P6_NMI_EVENT;
375
376         /* setup the timer */
377         wrmsr(evntsel_msr, evntsel, 0);
378         nmi_hz = adjust_for_32bit_ctr(nmi_hz);
379         write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
380
381         /* initialize the wd struct before enabling */
382         wd->perfctr_msr = perfctr_msr;
383         wd->evntsel_msr = evntsel_msr;
384         wd->cccr_msr = 0;  /* unused */
385
386         /* ok, everything is initialized, announce that we're set */
387         cpu_nmi_set_wd_enabled();
388
389         apic_write(APIC_LVTPC, APIC_DM_NMI);
390         evntsel |= P6_EVNTSEL0_ENABLE;
391         wrmsr(evntsel_msr, evntsel, 0);
392
393         return 1;
394 }
395
396 static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
397 {
398         /*
399          * P6 based Pentium M need to re-unmask
400          * the apic vector but it doesn't hurt
401          * other P6 variant.
402          * ArchPerfom/Core Duo also needs this
403          */
404         apic_write(APIC_LVTPC, APIC_DM_NMI);
405
406         /* P6/ARCH_PERFMON has 32 bit counter write */
407         write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
408 }
409
410 static const struct wd_ops p6_wd_ops = {
411         .reserve        = single_msr_reserve,
412         .unreserve      = single_msr_unreserve,
413         .setup          = setup_p6_watchdog,
414         .rearm          = p6_rearm,
415         .stop           = single_msr_stop_watchdog,
416         .perfctr        = MSR_P6_PERFCTR0,
417         .evntsel        = MSR_P6_EVNTSEL0,
418         .checkbit       = 1ULL << 39,
419 };
420
421 /*
422  * Intel P4 performance counters.
423  * By far the most complicated of all.
424  */
425 #define MSR_P4_MISC_ENABLE_PERF_AVAIL   (1 << 7)
426 #define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
427 #define P4_ESCR_OS              (1 << 3)
428 #define P4_ESCR_USR             (1 << 2)
429 #define P4_CCCR_OVF_PMI0        (1 << 26)
430 #define P4_CCCR_OVF_PMI1        (1 << 27)
431 #define P4_CCCR_THRESHOLD(N)    ((N) << 20)
432 #define P4_CCCR_COMPLEMENT      (1 << 19)
433 #define P4_CCCR_COMPARE         (1 << 18)
434 #define P4_CCCR_REQUIRED        (3 << 16)
435 #define P4_CCCR_ESCR_SELECT(N)  ((N) << 13)
436 #define P4_CCCR_ENABLE          (1 << 12)
437 #define P4_CCCR_OVF             (1 << 31)
438
439 #define P4_CONTROLS 18
440 static unsigned int p4_controls[18] = {
441         MSR_P4_BPU_CCCR0,
442         MSR_P4_BPU_CCCR1,
443         MSR_P4_BPU_CCCR2,
444         MSR_P4_BPU_CCCR3,
445         MSR_P4_MS_CCCR0,
446         MSR_P4_MS_CCCR1,
447         MSR_P4_MS_CCCR2,
448         MSR_P4_MS_CCCR3,
449         MSR_P4_FLAME_CCCR0,
450         MSR_P4_FLAME_CCCR1,
451         MSR_P4_FLAME_CCCR2,
452         MSR_P4_FLAME_CCCR3,
453         MSR_P4_IQ_CCCR0,
454         MSR_P4_IQ_CCCR1,
455         MSR_P4_IQ_CCCR2,
456         MSR_P4_IQ_CCCR3,
457         MSR_P4_IQ_CCCR4,
458         MSR_P4_IQ_CCCR5,
459 };
460 /*
461  * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
462  * CRU_ESCR0 (with any non-null event selector) through a complemented
463  * max threshold. [IA32-Vol3, Section 14.9.9]
464  */
465 static int setup_p4_watchdog(unsigned nmi_hz)
466 {
467         unsigned int perfctr_msr, evntsel_msr, cccr_msr;
468         unsigned int evntsel, cccr_val;
469         unsigned int misc_enable, dummy;
470         unsigned int ht_num;
471         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472
473         rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
474         if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
475                 return 0;
476
477 #ifdef CONFIG_SMP
478         /* detect which hyperthread we are on */
479         if (smp_num_siblings == 2) {
480                 unsigned int ebx, apicid;
481
482                 ebx = cpuid_ebx(1);
483                 apicid = (ebx >> 24) & 0xff;
484                 ht_num = apicid & 1;
485         } else
486 #endif
487                 ht_num = 0;
488
489         /*
490          * performance counters are shared resources
491          * assign each hyperthread its own set
492          * (re-use the ESCR0 register, seems safe
493          * and keeps the cccr_val the same)
494          */
495         if (!ht_num) {
496                 /* logical cpu 0 */
497                 perfctr_msr = MSR_P4_IQ_PERFCTR0;
498                 evntsel_msr = MSR_P4_CRU_ESCR0;
499                 cccr_msr = MSR_P4_IQ_CCCR0;
500                 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
501
502                 /*
503                  * If we're on the kdump kernel or other situation, we may
504                  * still have other performance counter registers set to
505                  * interrupt and they'll keep interrupting forever because
506                  * of the P4_CCCR_OVF quirk. So we need to ACK all the
507                  * pending interrupts and disable all the registers here,
508                  * before reenabling the NMI delivery. Refer to p4_rearm()
509                  * about the P4_CCCR_OVF quirk.
510                  */
511                 if (reset_devices) {
512                         unsigned int low, high;
513                         int i;
514
515                         for (i = 0; i < P4_CONTROLS; i++) {
516                                 rdmsr(p4_controls[i], low, high);
517                                 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
518                                 wrmsr(p4_controls[i], low, high);
519                         }
520                 }
521         } else {
522                 /* logical cpu 1 */
523                 perfctr_msr = MSR_P4_IQ_PERFCTR1;
524                 evntsel_msr = MSR_P4_CRU_ESCR0;
525                 cccr_msr = MSR_P4_IQ_CCCR1;
526
527                 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
528                 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
529                         cccr_val = P4_CCCR_OVF_PMI0;
530                 else
531                         cccr_val = P4_CCCR_OVF_PMI1;
532                 cccr_val |= P4_CCCR_ESCR_SELECT(4);
533         }
534
535         evntsel = P4_ESCR_EVENT_SELECT(0x3F)
536                 | P4_ESCR_OS
537                 | P4_ESCR_USR;
538
539         cccr_val |= P4_CCCR_THRESHOLD(15)
540                  | P4_CCCR_COMPLEMENT
541                  | P4_CCCR_COMPARE
542                  | P4_CCCR_REQUIRED;
543
544         wrmsr(evntsel_msr, evntsel, 0);
545         wrmsr(cccr_msr, cccr_val, 0);
546         write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
547
548         wd->perfctr_msr = perfctr_msr;
549         wd->evntsel_msr = evntsel_msr;
550         wd->cccr_msr = cccr_msr;
551
552         /* ok, everything is initialized, announce that we're set */
553         cpu_nmi_set_wd_enabled();
554
555         apic_write(APIC_LVTPC, APIC_DM_NMI);
556         cccr_val |= P4_CCCR_ENABLE;
557         wrmsr(cccr_msr, cccr_val, 0);
558         return 1;
559 }
560
561 static void stop_p4_watchdog(void)
562 {
563         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
564         wrmsr(wd->cccr_msr, 0, 0);
565         wrmsr(wd->evntsel_msr, 0, 0);
566 }
567
568 static int p4_reserve(void)
569 {
570         if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
571                 return 0;
572 #ifdef CONFIG_SMP
573         if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
574                 goto fail1;
575 #endif
576         if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
577                 goto fail2;
578         /* RED-PEN why is ESCR1 not reserved here? */
579         return 1;
580  fail2:
581 #ifdef CONFIG_SMP
582         if (smp_num_siblings > 1)
583                 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
584  fail1:
585 #endif
586         release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
587         return 0;
588 }
589
590 static void p4_unreserve(void)
591 {
592 #ifdef CONFIG_SMP
593         if (smp_num_siblings > 1)
594                 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595 #endif
596         release_evntsel_nmi(MSR_P4_CRU_ESCR0);
597         release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598 }
599
600 static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
601 {
602         unsigned dummy;
603         /*
604          * P4 quirks:
605          * - An overflown perfctr will assert its interrupt
606          *   until the OVF flag in its CCCR is cleared.
607          * - LVTPC is masked on interrupt and must be
608          *   unmasked by the LVTPC handler.
609          */
610         rdmsrl(wd->cccr_msr, dummy);
611         dummy &= ~P4_CCCR_OVF;
612         wrmsrl(wd->cccr_msr, dummy);
613         apic_write(APIC_LVTPC, APIC_DM_NMI);
614         /* start the cycle over again */
615         write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
616 }
617
618 static const struct wd_ops p4_wd_ops = {
619         .reserve        = p4_reserve,
620         .unreserve      = p4_unreserve,
621         .setup          = setup_p4_watchdog,
622         .rearm          = p4_rearm,
623         .stop           = stop_p4_watchdog,
624         /* RED-PEN this is wrong for the other sibling */
625         .perfctr        = MSR_P4_BPU_PERFCTR0,
626         .evntsel        = MSR_P4_BSU_ESCR0,
627         .checkbit       = 1ULL << 39,
628 };
629
630 /*
631  * Watchdog using the Intel architected PerfMon.
632  * Used for Core2 and hopefully all future Intel CPUs.
633  */
634 #define ARCH_PERFMON_NMI_EVENT_SEL      ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
635 #define ARCH_PERFMON_NMI_EVENT_UMASK    ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
636
637 static struct wd_ops intel_arch_wd_ops;
638
639 static int setup_intel_arch_watchdog(unsigned nmi_hz)
640 {
641         unsigned int ebx;
642         union cpuid10_eax eax;
643         unsigned int unused;
644         unsigned int perfctr_msr, evntsel_msr;
645         unsigned int evntsel;
646         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
647
648         /*
649          * Check whether the Architectural PerfMon supports
650          * Unhalted Core Cycles Event or not.
651          * NOTE: Corresponding bit = 0 in ebx indicates event present.
652          */
653         cpuid(10, &(eax.full), &ebx, &unused, &unused);
654         if ((eax.split.mask_length <
655                         (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
656             (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
657                 return 0;
658
659         perfctr_msr = wd_ops->perfctr;
660         evntsel_msr = wd_ops->evntsel;
661
662         wrmsrl(perfctr_msr, 0UL);
663
664         evntsel = ARCH_PERFMON_EVENTSEL_INT
665                 | ARCH_PERFMON_EVENTSEL_OS
666                 | ARCH_PERFMON_EVENTSEL_USR
667                 | ARCH_PERFMON_NMI_EVENT_SEL
668                 | ARCH_PERFMON_NMI_EVENT_UMASK;
669
670         /* setup the timer */
671         wrmsr(evntsel_msr, evntsel, 0);
672         nmi_hz = adjust_for_32bit_ctr(nmi_hz);
673         write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
674
675         wd->perfctr_msr = perfctr_msr;
676         wd->evntsel_msr = evntsel_msr;
677         wd->cccr_msr = 0;  /* unused */
678
679         /* ok, everything is initialized, announce that we're set */
680         cpu_nmi_set_wd_enabled();
681
682         apic_write(APIC_LVTPC, APIC_DM_NMI);
683         evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684         wrmsr(evntsel_msr, evntsel, 0);
685         intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686         return 1;
687 }
688
689 static struct wd_ops intel_arch_wd_ops __read_mostly = {
690         .reserve        = single_msr_reserve,
691         .unreserve      = single_msr_unreserve,
692         .setup          = setup_intel_arch_watchdog,
693         .rearm          = p6_rearm,
694         .stop           = single_msr_stop_watchdog,
695         .perfctr        = MSR_ARCH_PERFMON_PERFCTR1,
696         .evntsel        = MSR_ARCH_PERFMON_EVENTSEL1,
697 };
698
699 static void probe_nmi_watchdog(void)
700 {
701         switch (boot_cpu_data.x86_vendor) {
702         case X86_VENDOR_AMD:
703                 if (boot_cpu_data.x86 == 6 ||
704                     (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705                         wd_ops = &k7_wd_ops;
706                 return;
707         case X86_VENDOR_INTEL:
708                 /* Work around where perfctr1 doesn't have a working enable
709                  * bit as described in the following errata:
710                  * AE49 Core Duo and Intel Core Solo 65 nm
711                  * AN49 Intel Pentium Dual-Core
712                  * AF49 Dual-Core Intel Xeon Processor LV
713                  */
714                 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
715                     ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
716                      boot_cpu_data.x86_mask == 4))) {
717                         intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
718                         intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
719                 }
720                 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
721                         wd_ops = &intel_arch_wd_ops;
722                         break;
723                 }
724                 switch (boot_cpu_data.x86) {
725                 case 6:
726                         if (boot_cpu_data.x86_model > 13)
727                                 return;
728
729                         wd_ops = &p6_wd_ops;
730                         break;
731                 case 15:
732                         wd_ops = &p4_wd_ops;
733                         break;
734                 default:
735                         return;
736                 }
737                 break;
738         }
739 }
740
741 /* Interface to nmi.c */
742
743 int lapic_watchdog_init(unsigned nmi_hz)
744 {
745         if (!wd_ops) {
746                 probe_nmi_watchdog();
747                 if (!wd_ops) {
748                         printk(KERN_INFO "NMI watchdog: CPU not supported\n");
749                         return -1;
750                 }
751
752                 if (!wd_ops->reserve()) {
753                         printk(KERN_ERR
754                                 "NMI watchdog: cannot reserve perfctrs\n");
755                         return -1;
756                 }
757         }
758
759         if (!(wd_ops->setup(nmi_hz))) {
760                 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
761                        raw_smp_processor_id());
762                 return -1;
763         }
764
765         return 0;
766 }
767
768 void lapic_watchdog_stop(void)
769 {
770         if (wd_ops)
771                 wd_ops->stop();
772 }
773
774 unsigned lapic_adjust_nmi_hz(unsigned hz)
775 {
776         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
777         if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
778             wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
779                 hz = adjust_for_32bit_ctr(hz);
780         return hz;
781 }
782
783 int __kprobes lapic_wd_event(unsigned nmi_hz)
784 {
785         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786         u64 ctr;
787
788         rdmsrl(wd->perfctr_msr, ctr);
789         if (ctr & wd_ops->checkbit) /* perfctr still running? */
790                 return 0;
791
792         wd_ops->rearm(wd, nmi_hz);
793         return 1;
794 }