2 * linux/arch/i386/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/config.h>
17 #include <linux/delay.h>
18 #include <linux/interrupt.h>
19 #include <linux/module.h>
20 #include <linux/nmi.h>
21 #include <linux/sysdev.h>
22 #include <linux/sysctl.h>
23 #include <linux/percpu.h>
28 #include "mach_traps.h"
30 unsigned int nmi_watchdog = NMI_NONE;
31 extern int unknown_nmi_panic;
32 static unsigned int nmi_hz = HZ;
33 static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
34 static unsigned int nmi_p4_cccr_val;
35 extern void show_registers(struct pt_regs *regs);
37 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for
40 * different subsystems this reservation system just tries to coordinate
43 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
44 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
46 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
49 #define NMI_MAX_COUNTER_BITS 66
52 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
53 * - it may be reserved by some other driver, or not
54 * - when not reserved by some other driver, it may be used for
55 * the NMI watchdog, or not
57 * This is maintained separately from nmi_active because the NMI
58 * watchdog may also be driven from the I/O APIC timer.
60 static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
61 static unsigned int lapic_nmi_owner;
62 #define LAPIC_NMI_WATCHDOG (1<<0)
63 #define LAPIC_NMI_RESERVED (1<<1)
66 * +1: the lapic NMI watchdog is active, but can be disabled
67 * 0: the lapic NMI watchdog has not been set up, and cannot
69 * -1: the lapic NMI watchdog is disabled, but can be enabled
73 #define K7_EVNTSEL_ENABLE (1 << 22)
74 #define K7_EVNTSEL_INT (1 << 20)
75 #define K7_EVNTSEL_OS (1 << 17)
76 #define K7_EVNTSEL_USR (1 << 16)
77 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
78 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
80 #define P6_EVNTSEL0_ENABLE (1 << 22)
81 #define P6_EVNTSEL_INT (1 << 20)
82 #define P6_EVNTSEL_OS (1 << 17)
83 #define P6_EVNTSEL_USR (1 << 16)
84 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
85 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
87 #define MSR_P4_MISC_ENABLE 0x1A0
88 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
89 #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
90 #define MSR_P4_PERFCTR0 0x300
91 #define MSR_P4_CCCR0 0x360
92 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
93 #define P4_ESCR_OS (1<<3)
94 #define P4_ESCR_USR (1<<2)
95 #define P4_CCCR_OVF_PMI0 (1<<26)
96 #define P4_CCCR_OVF_PMI1 (1<<27)
97 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
98 #define P4_CCCR_COMPLEMENT (1<<19)
99 #define P4_CCCR_COMPARE (1<<18)
100 #define P4_CCCR_REQUIRED (3<<16)
101 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
102 #define P4_CCCR_ENABLE (1<<12)
103 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
104 CRU_ESCR0 (with any non-null event selector) through a complemented
105 max threshold. [IA32-Vol3, Section 14.9.9] */
106 #define MSR_P4_IQ_COUNTER0 0x30C
107 #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
108 #define P4_NMI_IQ_CCCR0 \
109 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
110 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
112 /* converts an msr to an appropriate reservation bit */
113 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
115 /* returns the bit offset of the performance counter register */
116 switch (boot_cpu_data.x86_vendor) {
118 return (msr - MSR_K7_PERFCTR0);
119 case X86_VENDOR_INTEL:
120 switch (boot_cpu_data.x86) {
122 return (msr - MSR_P6_PERFCTR0);
124 return (msr - MSR_P4_BPU_PERFCTR0);
130 /* converts an msr to an appropriate reservation bit */
131 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
133 /* returns the bit offset of the event selection register */
134 switch (boot_cpu_data.x86_vendor) {
136 return (msr - MSR_K7_EVNTSEL0);
137 case X86_VENDOR_INTEL:
138 switch (boot_cpu_data.x86) {
140 return (msr - MSR_P6_EVNTSEL0);
142 return (msr - MSR_P4_BSU_ESCR0);
148 /* checks for a bit availability (hack for oprofile) */
149 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
151 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
153 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
156 /* checks the an msr for availability */
157 int avail_to_resrv_perfctr_nmi(unsigned int msr)
159 unsigned int counter;
161 counter = nmi_perfctr_msr_to_bit(msr);
162 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
164 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
167 int reserve_perfctr_nmi(unsigned int msr)
169 unsigned int counter;
171 counter = nmi_perfctr_msr_to_bit(msr);
172 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
174 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
179 void release_perfctr_nmi(unsigned int msr)
181 unsigned int counter;
183 counter = nmi_perfctr_msr_to_bit(msr);
184 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
186 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
189 int reserve_evntsel_nmi(unsigned int msr)
191 unsigned int counter;
193 counter = nmi_evntsel_msr_to_bit(msr);
194 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
196 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
201 void release_evntsel_nmi(unsigned int msr)
203 unsigned int counter;
205 counter = nmi_evntsel_msr_to_bit(msr);
206 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
208 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
212 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
213 * the CPU is idle. To make sure the NMI watchdog really ticks on all
214 * CPUs during the test make them busy.
216 static __init void nmi_cpu_busy(void *data)
218 volatile int *endflag = data;
219 local_irq_enable_in_hardirq();
220 /* Intentionally don't use cpu_relax here. This is
221 to make sure that the performance counter really ticks,
222 even if there is a simulator or similar that catches the
223 pause instruction. On a real HT machine this is fine because
224 all other CPUs are busy with "useless" delay loops and don't
225 care if they get somewhat less cycles. */
226 while (*endflag == 0)
231 static int __init check_nmi_watchdog(void)
233 volatile int endflag = 0;
234 unsigned int *prev_nmi_count;
237 if (nmi_watchdog == NMI_NONE)
240 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
244 printk(KERN_INFO "Testing NMI watchdog ... ");
246 if (nmi_watchdog == NMI_LOCAL_APIC)
247 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
249 for_each_possible_cpu(cpu)
250 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
252 mdelay((10*1000)/nmi_hz); // wait 10 ticks
254 for_each_possible_cpu(cpu) {
256 /* Check cpu_callin_map here because that is set
257 after the timer is started. */
258 if (!cpu_isset(cpu, cpu_callin_map))
261 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
263 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
268 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
269 kfree(prev_nmi_count);
276 /* now that we know it works we can reduce NMI frequency to
277 something more reasonable; makes a difference in some configs */
278 if (nmi_watchdog == NMI_LOCAL_APIC)
281 kfree(prev_nmi_count);
284 /* This needs to happen later in boot so counters are working */
285 late_initcall(check_nmi_watchdog);
287 static int __init setup_nmi_watchdog(char *str)
291 get_option(&str, &nmi);
293 if (nmi >= NMI_INVALID)
298 * If any other x86 CPU has a local APIC, then
299 * please test the NMI stuff there and send me the
300 * missing bits. Right now Intel P6/P4 and AMD K7 only.
302 if ((nmi == NMI_LOCAL_APIC) &&
303 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
304 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
306 if ((nmi == NMI_LOCAL_APIC) &&
307 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
308 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
311 * We can enable the IO-APIC watchdog
314 if (nmi == NMI_IO_APIC) {
321 __setup("nmi_watchdog=", setup_nmi_watchdog);
323 static void disable_lapic_nmi_watchdog(void)
327 switch (boot_cpu_data.x86_vendor) {
329 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
331 case X86_VENDOR_INTEL:
332 switch (boot_cpu_data.x86) {
334 if (boot_cpu_data.x86_model > 0xd)
337 wrmsr(MSR_P6_EVNTSEL0, 0, 0);
340 if (boot_cpu_data.x86_model > 0x4)
343 wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
344 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
350 /* tell do_nmi() and others that we're not active any more */
354 static void enable_lapic_nmi_watchdog(void)
356 if (nmi_active < 0) {
357 nmi_watchdog = NMI_LOCAL_APIC;
358 setup_apic_nmi_watchdog();
362 int reserve_lapic_nmi(void)
364 unsigned int old_owner;
366 spin_lock(&lapic_nmi_owner_lock);
367 old_owner = lapic_nmi_owner;
368 lapic_nmi_owner |= LAPIC_NMI_RESERVED;
369 spin_unlock(&lapic_nmi_owner_lock);
370 if (old_owner & LAPIC_NMI_RESERVED)
372 if (old_owner & LAPIC_NMI_WATCHDOG)
373 disable_lapic_nmi_watchdog();
377 void release_lapic_nmi(void)
379 unsigned int new_owner;
381 spin_lock(&lapic_nmi_owner_lock);
382 new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
383 lapic_nmi_owner = new_owner;
384 spin_unlock(&lapic_nmi_owner_lock);
385 if (new_owner & LAPIC_NMI_WATCHDOG)
386 enable_lapic_nmi_watchdog();
389 void disable_timer_nmi_watchdog(void)
391 if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
394 unset_nmi_callback();
396 nmi_watchdog = NMI_NONE;
399 void enable_timer_nmi_watchdog(void)
401 if (nmi_active < 0) {
402 nmi_watchdog = NMI_IO_APIC;
403 touch_nmi_watchdog();
410 static int nmi_pm_active; /* nmi_active before suspend */
412 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
414 nmi_pm_active = nmi_active;
415 disable_lapic_nmi_watchdog();
419 static int lapic_nmi_resume(struct sys_device *dev)
421 if (nmi_pm_active > 0)
422 enable_lapic_nmi_watchdog();
427 static struct sysdev_class nmi_sysclass = {
428 set_kset_name("lapic_nmi"),
429 .resume = lapic_nmi_resume,
430 .suspend = lapic_nmi_suspend,
433 static struct sys_device device_lapic_nmi = {
435 .cls = &nmi_sysclass,
438 static int __init init_lapic_nmi_sysfs(void)
442 if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
445 error = sysdev_class_register(&nmi_sysclass);
447 error = sysdev_register(&device_lapic_nmi);
450 /* must come after the local APIC's device_initcall() */
451 late_initcall(init_lapic_nmi_sysfs);
453 #endif /* CONFIG_PM */
456 * Activate the NMI watchdog via the local APIC.
457 * Original code written by Keith Owens.
460 static void write_watchdog_counter(const char *descr)
462 u64 count = (u64)cpu_khz * 1000;
464 do_div(count, nmi_hz);
466 Dprintk("setting %s to -0x%08Lx\n", descr, count);
467 wrmsrl(nmi_perfctr_msr, 0 - count);
470 static int setup_k7_watchdog(void)
472 unsigned int evntsel;
474 nmi_perfctr_msr = MSR_K7_PERFCTR0;
476 if (!reserve_perfctr_nmi(nmi_perfctr_msr))
479 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0))
482 wrmsrl(MSR_K7_PERFCTR0, 0UL);
484 evntsel = K7_EVNTSEL_INT
489 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
490 write_watchdog_counter("K7_PERFCTR0");
491 apic_write(APIC_LVTPC, APIC_DM_NMI);
492 evntsel |= K7_EVNTSEL_ENABLE;
493 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
496 release_perfctr_nmi(nmi_perfctr_msr);
501 static int setup_p6_watchdog(void)
503 unsigned int evntsel;
505 nmi_perfctr_msr = MSR_P6_PERFCTR0;
507 if (!reserve_perfctr_nmi(nmi_perfctr_msr))
510 if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0))
513 evntsel = P6_EVNTSEL_INT
518 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
519 write_watchdog_counter("P6_PERFCTR0");
520 apic_write(APIC_LVTPC, APIC_DM_NMI);
521 evntsel |= P6_EVNTSEL0_ENABLE;
522 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
525 release_perfctr_nmi(nmi_perfctr_msr);
530 static int setup_p4_watchdog(void)
532 unsigned int misc_enable, dummy;
534 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
535 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
538 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
539 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
541 if (smp_num_siblings == 2)
542 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
545 if (!reserve_perfctr_nmi(nmi_perfctr_msr))
548 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
551 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
552 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
553 write_watchdog_counter("P4_IQ_COUNTER0");
554 apic_write(APIC_LVTPC, APIC_DM_NMI);
555 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
558 release_perfctr_nmi(nmi_perfctr_msr);
563 void setup_apic_nmi_watchdog (void)
565 switch (boot_cpu_data.x86_vendor) {
567 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
569 if (!setup_k7_watchdog())
572 case X86_VENDOR_INTEL:
573 switch (boot_cpu_data.x86) {
575 if (boot_cpu_data.x86_model > 0xd)
578 if(!setup_p6_watchdog())
582 if (boot_cpu_data.x86_model > 0x4)
585 if (!setup_p4_watchdog())
595 lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
600 * the best way to detect whether a CPU has a 'hard lockup' problem
601 * is to check it's local APIC timer IRQ counts. If they are not
602 * changing then that CPU has some problem.
604 * as these watchdog NMI IRQs are generated on every CPU, we only
605 * have to check the current processor.
607 * since NMIs don't listen to _any_ locks, we have to be extremely
608 * careful not to rely on unsafe variables. The printk might lock
609 * up though, so we have to break up any console locks first ...
610 * [when there will be more tty-related locks, break them up
615 last_irq_sums [NR_CPUS],
616 alert_counter [NR_CPUS];
618 void touch_nmi_watchdog (void)
623 * Just reset the alert counters, (other CPUs might be
624 * spinning on locks we hold):
626 for_each_possible_cpu(i)
627 alert_counter[i] = 0;
630 * Tickle the softlockup detector too:
632 touch_softlockup_watchdog();
634 EXPORT_SYMBOL(touch_nmi_watchdog);
636 extern void die_nmi(struct pt_regs *, const char *msg);
638 void nmi_watchdog_tick (struct pt_regs * regs)
642 * Since current_thread_info()-> is always on the stack, and we
643 * always switch the stack NMI-atomically, it's safe to use
644 * smp_processor_id().
647 int cpu = smp_processor_id();
649 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
651 if (last_irq_sums[cpu] == sum) {
653 * Ayiee, looks like this CPU is stuck ...
654 * wait a few IRQs (5 seconds) before doing the oops ...
656 alert_counter[cpu]++;
657 if (alert_counter[cpu] == 5*nmi_hz)
659 * die_nmi will return ONLY if NOTIFY_STOP happens..
661 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
663 last_irq_sums[cpu] = sum;
664 alert_counter[cpu] = 0;
666 if (nmi_perfctr_msr) {
667 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
670 * - An overflown perfctr will assert its interrupt
671 * until the OVF flag in its CCCR is cleared.
672 * - LVTPC is masked on interrupt and must be
673 * unmasked by the LVTPC handler.
675 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
676 apic_write(APIC_LVTPC, APIC_DM_NMI);
678 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
679 /* Only P6 based Pentium M need to re-unmask
680 * the apic vector but it doesn't hurt
681 * other P6 variant */
682 apic_write(APIC_LVTPC, APIC_DM_NMI);
684 write_watchdog_counter(NULL);
690 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
692 unsigned char reason = get_nmi_reason();
695 if (!(reason & 0xc0)) {
696 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
703 * proc handler for /proc/sys/kernel/unknown_nmi_panic
705 int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
706 void __user *buffer, size_t *length, loff_t *ppos)
710 old_state = unknown_nmi_panic;
711 proc_dointvec(table, write, file, buffer, length, ppos);
712 if (!!old_state == !!unknown_nmi_panic)
715 if (unknown_nmi_panic) {
716 if (reserve_lapic_nmi() < 0) {
717 unknown_nmi_panic = 0;
720 set_nmi_callback(unknown_nmi_panic_callback);
724 unset_nmi_callback();
731 EXPORT_SYMBOL(nmi_active);
732 EXPORT_SYMBOL(nmi_watchdog);
733 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
734 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
735 EXPORT_SYMBOL(reserve_perfctr_nmi);
736 EXPORT_SYMBOL(release_perfctr_nmi);
737 EXPORT_SYMBOL(reserve_evntsel_nmi);
738 EXPORT_SYMBOL(release_evntsel_nmi);
739 EXPORT_SYMBOL(reserve_lapic_nmi);
740 EXPORT_SYMBOL(release_lapic_nmi);
741 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
742 EXPORT_SYMBOL(enable_timer_nmi_watchdog);