[PATCH] i386: Add SMP support on i386 to reservation framework
[pandora-kernel.git] / arch / i386 / kernel / nmi.c
1 /*
2  *  linux/arch/i386/nmi.c
3  *
4  *  NMI watchdog support on APIC systems
5  *
6  *  Started by Ingo Molnar <mingo@redhat.com>
7  *
8  *  Fixes:
9  *  Mikael Pettersson   : AMD K7 support for local APIC NMI watchdog.
10  *  Mikael Pettersson   : Power Management for local APIC NMI watchdog.
11  *  Mikael Pettersson   : Pentium 4 support for local APIC NMI watchdog.
12  *  Pavel Machek and
13  *  Mikael Pettersson   : PM converted to driver model. Disable/enable API.
14  */
15
16 #include <linux/config.h>
17 #include <linux/delay.h>
18 #include <linux/interrupt.h>
19 #include <linux/module.h>
20 #include <linux/nmi.h>
21 #include <linux/sysdev.h>
22 #include <linux/sysctl.h>
23 #include <linux/percpu.h>
24
25 #include <asm/smp.h>
26 #include <asm/nmi.h>
27 #include <asm/kdebug.h>
28
29 #include "mach_traps.h"
30
31 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
32  * evtsel_nmi_owner tracks the ownership of the event selection
33  * - different performance counters/ event selection may be reserved for
34  *   different subsystems this reservation system just tries to coordinate
35  *   things a little
36  */
37 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
38 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
39
40 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
41  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
42  */
43 #define NMI_MAX_COUNTER_BITS 66
44
45 /*
46  * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
47  * - it may be reserved by some other driver, or not
48  * - when not reserved by some other driver, it may be used for
49  *   the NMI watchdog, or not
50  *
51  * This is maintained separately from nmi_active because the NMI
52  * watchdog may also be driven from the I/O APIC timer.
53  */
54 static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
55 static unsigned int lapic_nmi_owner;
56 #define LAPIC_NMI_WATCHDOG      (1<<0)
57 #define LAPIC_NMI_RESERVED      (1<<1)
58
59 /* nmi_active:
60  * >0: the lapic NMI watchdog is active, but can be disabled
61  * <0: the lapic NMI watchdog has not been set up, and cannot
62  *     be enabled
63  *  0: the lapic NMI watchdog is disabled, but can be enabled
64  */
65 atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
66
67 unsigned int nmi_watchdog = NMI_DEFAULT;
68 static unsigned int nmi_hz = HZ;
69
70 struct nmi_watchdog_ctlblk {
71         int enabled;
72         u64 check_bit;
73         unsigned int cccr_msr;
74         unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
75         unsigned int evntsel_msr;  /* the MSR to select the events to handle */
76 };
77 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
78
79 /* local prototypes */
80 static void stop_apic_nmi_watchdog(void *unused);
81 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
82
83 extern void show_registers(struct pt_regs *regs);
84 extern int unknown_nmi_panic;
85
86 /* converts an msr to an appropriate reservation bit */
87 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
88 {
89         /* returns the bit offset of the performance counter register */
90         switch (boot_cpu_data.x86_vendor) {
91         case X86_VENDOR_AMD:
92                 return (msr - MSR_K7_PERFCTR0);
93         case X86_VENDOR_INTEL:
94                 switch (boot_cpu_data.x86) {
95                 case 6:
96                         return (msr - MSR_P6_PERFCTR0);
97                 case 15:
98                         return (msr - MSR_P4_BPU_PERFCTR0);
99                 }
100         }
101         return 0;
102 }
103
104 /* converts an msr to an appropriate reservation bit */
105 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
106 {
107         /* returns the bit offset of the event selection register */
108         switch (boot_cpu_data.x86_vendor) {
109         case X86_VENDOR_AMD:
110                 return (msr - MSR_K7_EVNTSEL0);
111         case X86_VENDOR_INTEL:
112                 switch (boot_cpu_data.x86) {
113                 case 6:
114                         return (msr - MSR_P6_EVNTSEL0);
115                 case 15:
116                         return (msr - MSR_P4_BSU_ESCR0);
117                 }
118         }
119         return 0;
120 }
121
122 /* checks for a bit availability (hack for oprofile) */
123 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
124 {
125         BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126
127         return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
128 }
129
130 /* checks the an msr for availability */
131 int avail_to_resrv_perfctr_nmi(unsigned int msr)
132 {
133         unsigned int counter;
134
135         counter = nmi_perfctr_msr_to_bit(msr);
136         BUG_ON(counter > NMI_MAX_COUNTER_BITS);
137
138         return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
139 }
140
141 int reserve_perfctr_nmi(unsigned int msr)
142 {
143         unsigned int counter;
144
145         counter = nmi_perfctr_msr_to_bit(msr);
146         BUG_ON(counter > NMI_MAX_COUNTER_BITS);
147
148         if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
149                 return 1;
150         return 0;
151 }
152
153 void release_perfctr_nmi(unsigned int msr)
154 {
155         unsigned int counter;
156
157         counter = nmi_perfctr_msr_to_bit(msr);
158         BUG_ON(counter > NMI_MAX_COUNTER_BITS);
159
160         clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
161 }
162
163 int reserve_evntsel_nmi(unsigned int msr)
164 {
165         unsigned int counter;
166
167         counter = nmi_evntsel_msr_to_bit(msr);
168         BUG_ON(counter > NMI_MAX_COUNTER_BITS);
169
170         if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
171                 return 1;
172         return 0;
173 }
174
175 void release_evntsel_nmi(unsigned int msr)
176 {
177         unsigned int counter;
178
179         counter = nmi_evntsel_msr_to_bit(msr);
180         BUG_ON(counter > NMI_MAX_COUNTER_BITS);
181
182         clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
183 }
184
185 static __cpuinit inline int nmi_known_cpu(void)
186 {
187         switch (boot_cpu_data.x86_vendor) {
188         case X86_VENDOR_AMD:
189                 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
190         case X86_VENDOR_INTEL:
191                 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
192         }
193         return 0;
194 }
195
196 #ifdef CONFIG_SMP
197 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
198  * the CPU is idle. To make sure the NMI watchdog really ticks on all
199  * CPUs during the test make them busy.
200  */
201 static __init void nmi_cpu_busy(void *data)
202 {
203         volatile int *endflag = data;
204         local_irq_enable_in_hardirq();
205         /* Intentionally don't use cpu_relax here. This is
206            to make sure that the performance counter really ticks,
207            even if there is a simulator or similar that catches the
208            pause instruction. On a real HT machine this is fine because
209            all other CPUs are busy with "useless" delay loops and don't
210            care if they get somewhat less cycles. */
211         while (*endflag == 0)
212                 barrier();
213 }
214 #endif
215
216 static int __init check_nmi_watchdog(void)
217 {
218         volatile int endflag = 0;
219         unsigned int *prev_nmi_count;
220         int cpu;
221
222         if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
223                 return 0;
224
225         if (!atomic_read(&nmi_active))
226                 return 0;
227
228         prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
229         if (!prev_nmi_count)
230                 return -1;
231
232         printk(KERN_INFO "Testing NMI watchdog ... ");
233
234         if (nmi_watchdog == NMI_LOCAL_APIC)
235                 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
236
237         for_each_possible_cpu(cpu)
238                 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
239         local_irq_enable();
240         mdelay((10*1000)/nmi_hz); // wait 10 ticks
241
242         for_each_possible_cpu(cpu) {
243 #ifdef CONFIG_SMP
244                 /* Check cpu_callin_map here because that is set
245                    after the timer is started. */
246                 if (!cpu_isset(cpu, cpu_callin_map))
247                         continue;
248 #endif
249                 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
250                         continue;
251                 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
252                         printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
253                                 cpu,
254                                 prev_nmi_count[cpu],
255                                 nmi_count(cpu));
256                         per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
257                         atomic_dec(&nmi_active);
258                 }
259         }
260         if (!atomic_read(&nmi_active)) {
261                 kfree(prev_nmi_count);
262                 atomic_set(&nmi_active, -1);
263                 return -1;
264         }
265         endflag = 1;
266         printk("OK.\n");
267
268         /* now that we know it works we can reduce NMI frequency to
269            something more reasonable; makes a difference in some configs */
270         if (nmi_watchdog == NMI_LOCAL_APIC)
271                 nmi_hz = 1;
272
273         kfree(prev_nmi_count);
274         return 0;
275 }
276 /* This needs to happen later in boot so counters are working */
277 late_initcall(check_nmi_watchdog);
278
279 static int __init setup_nmi_watchdog(char *str)
280 {
281         int nmi;
282
283         get_option(&str, &nmi);
284
285         if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
286                 return 0;
287         /*
288          * If any other x86 CPU has a local APIC, then
289          * please test the NMI stuff there and send me the
290          * missing bits. Right now Intel P6/P4 and AMD K7 only.
291          */
292         if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
293                 return 0;  /* no lapic support */
294         nmi_watchdog = nmi;
295         return 1;
296 }
297
298 __setup("nmi_watchdog=", setup_nmi_watchdog);
299
300 static void disable_lapic_nmi_watchdog(void)
301 {
302         BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
303
304         if (atomic_read(&nmi_active) <= 0)
305                 return;
306
307         on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
308
309         BUG_ON(atomic_read(&nmi_active) != 0);
310 }
311
312 static void enable_lapic_nmi_watchdog(void)
313 {
314         BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
315
316         /* are we already enabled */
317         if (atomic_read(&nmi_active) != 0)
318                 return;
319
320         /* are we lapic aware */
321         if (nmi_known_cpu() <= 0)
322                 return;
323
324         on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
325         touch_nmi_watchdog();
326 }
327
328 int reserve_lapic_nmi(void)
329 {
330         unsigned int old_owner;
331
332         spin_lock(&lapic_nmi_owner_lock);
333         old_owner = lapic_nmi_owner;
334         lapic_nmi_owner |= LAPIC_NMI_RESERVED;
335         spin_unlock(&lapic_nmi_owner_lock);
336         if (old_owner & LAPIC_NMI_RESERVED)
337                 return -EBUSY;
338         if (old_owner & LAPIC_NMI_WATCHDOG)
339                 disable_lapic_nmi_watchdog();
340         return 0;
341 }
342
343 void release_lapic_nmi(void)
344 {
345         unsigned int new_owner;
346
347         spin_lock(&lapic_nmi_owner_lock);
348         new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
349         lapic_nmi_owner = new_owner;
350         spin_unlock(&lapic_nmi_owner_lock);
351         if (new_owner & LAPIC_NMI_WATCHDOG)
352                 enable_lapic_nmi_watchdog();
353 }
354
355 void disable_timer_nmi_watchdog(void)
356 {
357         BUG_ON(nmi_watchdog != NMI_IO_APIC);
358
359         if (atomic_read(&nmi_active) <= 0)
360                 return;
361
362         disable_irq(0);
363         on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
364
365         BUG_ON(atomic_read(&nmi_active) != 0);
366 }
367
368 void enable_timer_nmi_watchdog(void)
369 {
370         BUG_ON(nmi_watchdog != NMI_IO_APIC);
371
372         if (atomic_read(&nmi_active) == 0) {
373                 touch_nmi_watchdog();
374                 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
375                 enable_irq(0);
376         }
377 }
378
379 #ifdef CONFIG_PM
380
381 static int nmi_pm_active; /* nmi_active before suspend */
382
383 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
384 {
385         nmi_pm_active = atomic_read(&nmi_active);
386         disable_lapic_nmi_watchdog();
387         return 0;
388 }
389
390 static int lapic_nmi_resume(struct sys_device *dev)
391 {
392         if (nmi_pm_active > 0)
393                 enable_lapic_nmi_watchdog();
394         return 0;
395 }
396
397
398 static struct sysdev_class nmi_sysclass = {
399         set_kset_name("lapic_nmi"),
400         .resume         = lapic_nmi_resume,
401         .suspend        = lapic_nmi_suspend,
402 };
403
404 static struct sys_device device_lapic_nmi = {
405         .id     = 0,
406         .cls    = &nmi_sysclass,
407 };
408
409 static int __init init_lapic_nmi_sysfs(void)
410 {
411         int error;
412
413         /* should really be a BUG_ON but b/c this is an
414          * init call, it just doesn't work.  -dcz
415          */
416         if (nmi_watchdog != NMI_LOCAL_APIC)
417                 return 0;
418
419         if ( atomic_read(&nmi_active) < 0 )
420                 return 0;
421
422         error = sysdev_class_register(&nmi_sysclass);
423         if (!error)
424                 error = sysdev_register(&device_lapic_nmi);
425         return error;
426 }
427 /* must come after the local APIC's device_initcall() */
428 late_initcall(init_lapic_nmi_sysfs);
429
430 #endif  /* CONFIG_PM */
431
432 /*
433  * Activate the NMI watchdog via the local APIC.
434  * Original code written by Keith Owens.
435  */
436
437 static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
438 {
439         u64 count = (u64)cpu_khz * 1000;
440
441         do_div(count, nmi_hz);
442         if(descr)
443                 Dprintk("setting %s to -0x%08Lx\n", descr, count);
444         wrmsrl(perfctr_msr, 0 - count);
445 }
446
447 /* Note that these events don't tick when the CPU idles. This means
448    the frequency varies with CPU load. */
449
450 #define K7_EVNTSEL_ENABLE       (1 << 22)
451 #define K7_EVNTSEL_INT          (1 << 20)
452 #define K7_EVNTSEL_OS           (1 << 17)
453 #define K7_EVNTSEL_USR          (1 << 16)
454 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING    0x76
455 #define K7_NMI_EVENT            K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
456
457 static int setup_k7_watchdog(void)
458 {
459         unsigned int perfctr_msr, evntsel_msr;
460         unsigned int evntsel;
461         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
462
463         perfctr_msr = MSR_K7_PERFCTR0;
464         evntsel_msr = MSR_K7_EVNTSEL0;
465         if (!reserve_perfctr_nmi(perfctr_msr))
466                 goto fail;
467
468         if (!reserve_evntsel_nmi(evntsel_msr))
469                 goto fail1;
470
471         wrmsrl(perfctr_msr, 0UL);
472
473         evntsel = K7_EVNTSEL_INT
474                 | K7_EVNTSEL_OS
475                 | K7_EVNTSEL_USR
476                 | K7_NMI_EVENT;
477
478         /* setup the timer */
479         wrmsr(evntsel_msr, evntsel, 0);
480         write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
481         apic_write(APIC_LVTPC, APIC_DM_NMI);
482         evntsel |= K7_EVNTSEL_ENABLE;
483         wrmsr(evntsel_msr, evntsel, 0);
484
485         wd->perfctr_msr = perfctr_msr;
486         wd->evntsel_msr = evntsel_msr;
487         wd->cccr_msr = 0;  //unused
488         wd->check_bit = 1ULL<<63;
489         return 1;
490 fail1:
491         release_perfctr_nmi(perfctr_msr);
492 fail:
493         return 0;
494 }
495
496 static void stop_k7_watchdog(void)
497 {
498         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
499
500         wrmsr(wd->evntsel_msr, 0, 0);
501
502         release_evntsel_nmi(wd->evntsel_msr);
503         release_perfctr_nmi(wd->perfctr_msr);
504 }
505
506 #define P6_EVNTSEL0_ENABLE      (1 << 22)
507 #define P6_EVNTSEL_INT          (1 << 20)
508 #define P6_EVNTSEL_OS           (1 << 17)
509 #define P6_EVNTSEL_USR          (1 << 16)
510 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED  0x79
511 #define P6_NMI_EVENT            P6_EVENT_CPU_CLOCKS_NOT_HALTED
512
513 static int setup_p6_watchdog(void)
514 {
515         unsigned int perfctr_msr, evntsel_msr;
516         unsigned int evntsel;
517         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
518
519         perfctr_msr = MSR_P6_PERFCTR0;
520         evntsel_msr = MSR_P6_EVNTSEL0;
521         if (!reserve_perfctr_nmi(perfctr_msr))
522                 goto fail;
523
524         if (!reserve_evntsel_nmi(evntsel_msr))
525                 goto fail1;
526
527         wrmsrl(perfctr_msr, 0UL);
528
529         evntsel = P6_EVNTSEL_INT
530                 | P6_EVNTSEL_OS
531                 | P6_EVNTSEL_USR
532                 | P6_NMI_EVENT;
533
534         /* setup the timer */
535         wrmsr(evntsel_msr, evntsel, 0);
536         write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
537         apic_write(APIC_LVTPC, APIC_DM_NMI);
538         evntsel |= P6_EVNTSEL0_ENABLE;
539         wrmsr(evntsel_msr, evntsel, 0);
540
541         wd->perfctr_msr = perfctr_msr;
542         wd->evntsel_msr = evntsel_msr;
543         wd->cccr_msr = 0;  //unused
544         wd->check_bit = 1ULL<<39;
545         return 1;
546 fail1:
547         release_perfctr_nmi(perfctr_msr);
548 fail:
549         return 0;
550 }
551
552 static void stop_p6_watchdog(void)
553 {
554         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
555
556         wrmsr(wd->evntsel_msr, 0, 0);
557
558         release_evntsel_nmi(wd->evntsel_msr);
559         release_perfctr_nmi(wd->perfctr_msr);
560 }
561
562 /* Note that these events don't tick when the CPU idles. This means
563    the frequency varies with CPU load. */
564
565 #define MSR_P4_MISC_ENABLE_PERF_AVAIL   (1<<7)
566 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
567 #define P4_ESCR_OS              (1<<3)
568 #define P4_ESCR_USR             (1<<2)
569 #define P4_CCCR_OVF_PMI0        (1<<26)
570 #define P4_CCCR_OVF_PMI1        (1<<27)
571 #define P4_CCCR_THRESHOLD(N)    ((N)<<20)
572 #define P4_CCCR_COMPLEMENT      (1<<19)
573 #define P4_CCCR_COMPARE         (1<<18)
574 #define P4_CCCR_REQUIRED        (3<<16)
575 #define P4_CCCR_ESCR_SELECT(N)  ((N)<<13)
576 #define P4_CCCR_ENABLE          (1<<12)
577 #define P4_CCCR_OVF             (1<<31)
578 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
579    CRU_ESCR0 (with any non-null event selector) through a complemented
580    max threshold. [IA32-Vol3, Section 14.9.9] */
581
582 static int setup_p4_watchdog(void)
583 {
584         unsigned int perfctr_msr, evntsel_msr, cccr_msr;
585         unsigned int evntsel, cccr_val;
586         unsigned int misc_enable, dummy;
587         unsigned int ht_num;
588         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
589
590         rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
591         if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
592                 return 0;
593
594 #ifdef CONFIG_SMP
595         /* detect which hyperthread we are on */
596         if (smp_num_siblings == 2) {
597                 unsigned int ebx, apicid;
598
599                 ebx = cpuid_ebx(1);
600                 apicid = (ebx >> 24) & 0xff;
601                 ht_num = apicid & 1;
602         } else
603 #endif
604                 ht_num = 0;
605
606         /* performance counters are shared resources
607          * assign each hyperthread its own set
608          * (re-use the ESCR0 register, seems safe
609          * and keeps the cccr_val the same)
610          */
611         if (!ht_num) {
612                 /* logical cpu 0 */
613                 perfctr_msr = MSR_P4_IQ_PERFCTR0;
614                 evntsel_msr = MSR_P4_CRU_ESCR0;
615                 cccr_msr = MSR_P4_IQ_CCCR0;
616                 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
617         } else {
618                 /* logical cpu 1 */
619                 perfctr_msr = MSR_P4_IQ_PERFCTR1;
620                 evntsel_msr = MSR_P4_CRU_ESCR0;
621                 cccr_msr = MSR_P4_IQ_CCCR1;
622                 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
623         }
624
625         if (!reserve_perfctr_nmi(perfctr_msr))
626                 goto fail;
627
628         if (!reserve_evntsel_nmi(evntsel_msr))
629                 goto fail1;
630
631         evntsel = P4_ESCR_EVENT_SELECT(0x3F)
632                 | P4_ESCR_OS
633                 | P4_ESCR_USR;
634
635         cccr_val |= P4_CCCR_THRESHOLD(15)
636                  | P4_CCCR_COMPLEMENT
637                  | P4_CCCR_COMPARE
638                  | P4_CCCR_REQUIRED;
639
640         wrmsr(evntsel_msr, evntsel, 0);
641         wrmsr(cccr_msr, cccr_val, 0);
642         write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
643         apic_write(APIC_LVTPC, APIC_DM_NMI);
644         cccr_val |= P4_CCCR_ENABLE;
645         wrmsr(cccr_msr, cccr_val, 0);
646         wd->perfctr_msr = perfctr_msr;
647         wd->evntsel_msr = evntsel_msr;
648         wd->cccr_msr = cccr_msr;
649         wd->check_bit = 1ULL<<39;
650         return 1;
651 fail1:
652         release_perfctr_nmi(perfctr_msr);
653 fail:
654         return 0;
655 }
656
657 static void stop_p4_watchdog(void)
658 {
659         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
660
661         wrmsr(wd->cccr_msr, 0, 0);
662         wrmsr(wd->evntsel_msr, 0, 0);
663
664         release_evntsel_nmi(wd->evntsel_msr);
665         release_perfctr_nmi(wd->perfctr_msr);
666 }
667
668 void setup_apic_nmi_watchdog (void *unused)
669 {
670         /* only support LOCAL and IO APICs for now */
671         if ((nmi_watchdog != NMI_LOCAL_APIC) &&
672             (nmi_watchdog != NMI_IO_APIC))
673                 return;
674
675         if (nmi_watchdog == NMI_LOCAL_APIC) {
676                 switch (boot_cpu_data.x86_vendor) {
677                 case X86_VENDOR_AMD:
678                         if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
679                                 return;
680                         if (!setup_k7_watchdog())
681                                 return;
682                         break;
683                 case X86_VENDOR_INTEL:
684                         switch (boot_cpu_data.x86) {
685                         case 6:
686                                 if (boot_cpu_data.x86_model > 0xd)
687                                         return;
688
689                                 if (!setup_p6_watchdog())
690                                         return;
691                                 break;
692                         case 15:
693                                 if (boot_cpu_data.x86_model > 0x4)
694                                         return;
695
696                                 if (!setup_p4_watchdog())
697                                         return;
698                                 break;
699                         default:
700                                 return;
701                         }
702                         break;
703                 default:
704                         return;
705                 }
706         }
707         __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1;
708         atomic_inc(&nmi_active);
709 }
710
711 static void stop_apic_nmi_watchdog(void *unused)
712 {
713         /* only support LOCAL and IO APICs for now */
714         if ((nmi_watchdog != NMI_LOCAL_APIC) &&
715             (nmi_watchdog != NMI_IO_APIC))
716                 return;
717
718         if (nmi_watchdog == NMI_LOCAL_APIC) {
719                 switch (boot_cpu_data.x86_vendor) {
720                 case X86_VENDOR_AMD:
721                         stop_k7_watchdog();
722                         break;
723                 case X86_VENDOR_INTEL:
724                         switch (boot_cpu_data.x86) {
725                         case 6:
726                                 if (boot_cpu_data.x86_model > 0xd)
727                                         break;
728                                 stop_p6_watchdog();
729                                 break;
730                         case 15:
731                                 if (boot_cpu_data.x86_model > 0x4)
732                                         break;
733                                 stop_p4_watchdog();
734                                 break;
735                         }
736                         break;
737                 default:
738                         return;
739                 }
740         }
741         __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0;
742         atomic_dec(&nmi_active);
743 }
744
745 /*
746  * the best way to detect whether a CPU has a 'hard lockup' problem
747  * is to check it's local APIC timer IRQ counts. If they are not
748  * changing then that CPU has some problem.
749  *
750  * as these watchdog NMI IRQs are generated on every CPU, we only
751  * have to check the current processor.
752  *
753  * since NMIs don't listen to _any_ locks, we have to be extremely
754  * careful not to rely on unsafe variables. The printk might lock
755  * up though, so we have to break up any console locks first ...
756  * [when there will be more tty-related locks, break them up
757  *  here too!]
758  */
759
760 static unsigned int
761         last_irq_sums [NR_CPUS],
762         alert_counter [NR_CPUS];
763
764 void touch_nmi_watchdog (void)
765 {
766         int i;
767
768         /*
769          * Just reset the alert counters, (other CPUs might be
770          * spinning on locks we hold):
771          */
772         for_each_possible_cpu(i)
773                 alert_counter[i] = 0;
774
775         /*
776          * Tickle the softlockup detector too:
777          */
778         touch_softlockup_watchdog();
779 }
780 EXPORT_SYMBOL(touch_nmi_watchdog);
781
782 extern void die_nmi(struct pt_regs *, const char *msg);
783
784 void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
785 {
786
787         /*
788          * Since current_thread_info()-> is always on the stack, and we
789          * always switch the stack NMI-atomically, it's safe to use
790          * smp_processor_id().
791          */
792         unsigned int sum;
793         int touched = 0;
794         int cpu = smp_processor_id();
795         struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
796         u64 dummy;
797
798         /* check for other users first */
799         if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
800                         == NOTIFY_STOP) {
801                 touched = 1;
802         }
803
804         sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
805
806         /* if the apic timer isn't firing, this cpu isn't doing much */
807         if (!touched && last_irq_sums[cpu] == sum) {
808                 /*
809                  * Ayiee, looks like this CPU is stuck ...
810                  * wait a few IRQs (5 seconds) before doing the oops ...
811                  */
812                 alert_counter[cpu]++;
813                 if (alert_counter[cpu] == 5*nmi_hz)
814                         /*
815                          * die_nmi will return ONLY if NOTIFY_STOP happens..
816                          */
817                         die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
818         } else {
819                 last_irq_sums[cpu] = sum;
820                 alert_counter[cpu] = 0;
821         }
822         /* see if the nmi watchdog went off */
823         if (wd->enabled) {
824                 if (nmi_watchdog == NMI_LOCAL_APIC) {
825                         rdmsrl(wd->perfctr_msr, dummy);
826                         if (dummy & wd->check_bit){
827                                 /* this wasn't a watchdog timer interrupt */
828                                 goto done;
829                         }
830
831                         /* only Intel P4 uses the cccr msr */
832                         if (wd->cccr_msr != 0) {
833                                 /*
834                                  * P4 quirks:
835                                  * - An overflown perfctr will assert its interrupt
836                                  *   until the OVF flag in its CCCR is cleared.
837                                  * - LVTPC is masked on interrupt and must be
838                                  *   unmasked by the LVTPC handler.
839                                  */
840                                 rdmsrl(wd->cccr_msr, dummy);
841                                 dummy &= ~P4_CCCR_OVF;
842                                 wrmsrl(wd->cccr_msr, dummy);
843                                 apic_write(APIC_LVTPC, APIC_DM_NMI);
844                         }
845                         else if (wd->perfctr_msr == MSR_P6_PERFCTR0) {
846                                 /* Only P6 based Pentium M need to re-unmask
847                                  * the apic vector but it doesn't hurt
848                                  * other P6 variant */
849                                 apic_write(APIC_LVTPC, APIC_DM_NMI);
850                         }
851                         /* start the cycle over again */
852                         write_watchdog_counter(wd->perfctr_msr, NULL);
853                 }
854         }
855 done:
856         return;
857 }
858
859 #ifdef CONFIG_SYSCTL
860
861 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
862 {
863         unsigned char reason = get_nmi_reason();
864         char buf[64];
865
866         if (!(reason & 0xc0)) {
867                 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
868                 die_nmi(regs, buf);
869         }
870         return 0;
871 }
872
873 /*
874  * proc handler for /proc/sys/kernel/unknown_nmi_panic
875  */
876 int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
877                         void __user *buffer, size_t *length, loff_t *ppos)
878 {
879         int old_state;
880
881         old_state = unknown_nmi_panic;
882         proc_dointvec(table, write, file, buffer, length, ppos);
883         if (!!old_state == !!unknown_nmi_panic)
884                 return 0;
885
886         if (unknown_nmi_panic) {
887                 if (reserve_lapic_nmi() < 0) {
888                         unknown_nmi_panic = 0;
889                         return -EBUSY;
890                 } else {
891                         set_nmi_callback(unknown_nmi_panic_callback);
892                 }
893         } else {
894                 release_lapic_nmi();
895                 unset_nmi_callback();
896         }
897         return 0;
898 }
899
900 #endif
901
902 EXPORT_SYMBOL(nmi_active);
903 EXPORT_SYMBOL(nmi_watchdog);
904 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
905 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
906 EXPORT_SYMBOL(reserve_perfctr_nmi);
907 EXPORT_SYMBOL(release_perfctr_nmi);
908 EXPORT_SYMBOL(reserve_evntsel_nmi);
909 EXPORT_SYMBOL(release_evntsel_nmi);
910 EXPORT_SYMBOL(reserve_lapic_nmi);
911 EXPORT_SYMBOL(release_lapic_nmi);
912 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
913 EXPORT_SYMBOL(enable_timer_nmi_watchdog);