2 * linux/arch/i386/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
13 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
16 #include <linux/delay.h>
17 #include <linux/interrupt.h>
18 #include <linux/module.h>
19 #include <linux/nmi.h>
20 #include <linux/sysdev.h>
21 #include <linux/sysctl.h>
22 #include <linux/percpu.h>
23 #include <linux/dmi.h>
24 #include <linux/kprobes.h>
25 #include <linux/cpumask.h>
29 #include <asm/kdebug.h>
30 #include <asm/intel_arch_perfmon.h>
32 #include "mach_traps.h"
34 int unknown_nmi_panic;
35 int nmi_watchdog_enabled;
37 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
38 * evtsel_nmi_owner tracks the ownership of the event selection
39 * - different performance counters/ event selection may be reserved for
40 * different subsystems this reservation system just tries to coordinate
43 static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
44 static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
46 static cpumask_t backtrace_mask = CPU_MASK_NONE;
48 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
49 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
51 #define NMI_MAX_COUNTER_BITS 66
54 * >0: the lapic NMI watchdog is active, but can be disabled
55 * <0: the lapic NMI watchdog has not been set up, and cannot
57 * 0: the lapic NMI watchdog is disabled, but can be enabled
59 atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
61 unsigned int nmi_watchdog = NMI_DEFAULT;
62 static unsigned int nmi_hz = HZ;
64 struct nmi_watchdog_ctlblk {
67 unsigned int cccr_msr;
68 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
69 unsigned int evntsel_msr; /* the MSR to select the events to handle */
71 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
73 /* local prototypes */
74 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
76 extern void show_registers(struct pt_regs *regs);
77 extern int unknown_nmi_panic;
79 /* converts an msr to an appropriate reservation bit */
80 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
82 /* returns the bit offset of the performance counter register */
83 switch (boot_cpu_data.x86_vendor) {
85 return (msr - MSR_K7_PERFCTR0);
86 case X86_VENDOR_INTEL:
87 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
88 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
90 switch (boot_cpu_data.x86) {
92 return (msr - MSR_P6_PERFCTR0);
94 return (msr - MSR_P4_BPU_PERFCTR0);
100 /* converts an msr to an appropriate reservation bit */
101 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
103 /* returns the bit offset of the event selection register */
104 switch (boot_cpu_data.x86_vendor) {
106 return (msr - MSR_K7_EVNTSEL0);
107 case X86_VENDOR_INTEL:
108 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
109 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
111 switch (boot_cpu_data.x86) {
113 return (msr - MSR_P6_EVNTSEL0);
115 return (msr - MSR_P4_BSU_ESCR0);
121 /* checks for a bit availability (hack for oprofile) */
122 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
124 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
129 /* checks the an msr for availability */
130 int avail_to_resrv_perfctr_nmi(unsigned int msr)
132 unsigned int counter;
134 counter = nmi_perfctr_msr_to_bit(msr);
135 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
137 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
140 int reserve_perfctr_nmi(unsigned int msr)
142 unsigned int counter;
144 counter = nmi_perfctr_msr_to_bit(msr);
145 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
147 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
152 void release_perfctr_nmi(unsigned int msr)
154 unsigned int counter;
156 counter = nmi_perfctr_msr_to_bit(msr);
157 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
159 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
162 int reserve_evntsel_nmi(unsigned int msr)
164 unsigned int counter;
166 counter = nmi_evntsel_msr_to_bit(msr);
167 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
169 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
174 void release_evntsel_nmi(unsigned int msr)
176 unsigned int counter;
178 counter = nmi_evntsel_msr_to_bit(msr);
179 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
181 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
184 static __cpuinit inline int nmi_known_cpu(void)
186 switch (boot_cpu_data.x86_vendor) {
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
189 || (boot_cpu_data.x86 == 16));
190 case X86_VENDOR_INTEL:
191 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
194 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
199 static int endflag __initdata = 0;
202 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
203 * the CPU is idle. To make sure the NMI watchdog really ticks on all
204 * CPUs during the test make them busy.
206 static __init void nmi_cpu_busy(void *data)
208 local_irq_enable_in_hardirq();
209 /* Intentionally don't use cpu_relax here. This is
210 to make sure that the performance counter really ticks,
211 even if there is a simulator or similar that catches the
212 pause instruction. On a real HT machine this is fine because
213 all other CPUs are busy with "useless" delay loops and don't
214 care if they get somewhat less cycles. */
220 static unsigned int adjust_for_32bit_ctr(unsigned int hz)
223 unsigned int retval = hz;
226 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
227 * are writable, with higher bits sign extending from bit 31.
228 * So, we can only program the counter with 31 bit values and
229 * 32nd bit should be 1, for 33.. to be 1.
230 * Find the appropriate nmi_hz
232 counter_val = (u64)cpu_khz * 1000;
233 do_div(counter_val, retval);
234 if (counter_val > 0x7fffffffULL) {
235 u64 count = (u64)cpu_khz * 1000;
236 do_div(count, 0x7fffffffUL);
242 static int __init check_nmi_watchdog(void)
244 unsigned int *prev_nmi_count;
247 /* Enable NMI watchdog for newer systems.
248 Probably safe on most older systems too, but let's be careful.
249 IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
250 which hangs the system. Disable watchdog for all thinkpads */
251 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
252 !dmi_name_in_vendors("ThinkPad"))
253 nmi_watchdog = NMI_LOCAL_APIC;
255 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
258 if (!atomic_read(&nmi_active))
261 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
265 printk(KERN_INFO "Testing NMI watchdog ... ");
267 if (nmi_watchdog == NMI_LOCAL_APIC)
268 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
270 for_each_possible_cpu(cpu)
271 prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
273 mdelay((10*1000)/nmi_hz); // wait 10 ticks
275 for_each_possible_cpu(cpu) {
277 /* Check cpu_callin_map here because that is set
278 after the timer is started. */
279 if (!cpu_isset(cpu, cpu_callin_map))
282 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
284 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
285 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
289 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
290 atomic_dec(&nmi_active);
293 if (!atomic_read(&nmi_active)) {
294 kfree(prev_nmi_count);
295 atomic_set(&nmi_active, -1);
301 /* now that we know it works we can reduce NMI frequency to
302 something more reasonable; makes a difference in some configs */
303 if (nmi_watchdog == NMI_LOCAL_APIC) {
304 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
309 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
310 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
314 kfree(prev_nmi_count);
317 /* This needs to happen later in boot so counters are working */
318 late_initcall(check_nmi_watchdog);
320 static int __init setup_nmi_watchdog(char *str)
324 get_option(&str, &nmi);
326 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
333 __setup("nmi_watchdog=", setup_nmi_watchdog);
335 static void disable_lapic_nmi_watchdog(void)
337 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
339 if (atomic_read(&nmi_active) <= 0)
342 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
344 BUG_ON(atomic_read(&nmi_active) != 0);
347 static void enable_lapic_nmi_watchdog(void)
349 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
351 /* are we already enabled */
352 if (atomic_read(&nmi_active) != 0)
355 /* are we lapic aware */
356 if (nmi_known_cpu() <= 0)
359 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
360 touch_nmi_watchdog();
363 void disable_timer_nmi_watchdog(void)
365 BUG_ON(nmi_watchdog != NMI_IO_APIC);
367 if (atomic_read(&nmi_active) <= 0)
371 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
373 BUG_ON(atomic_read(&nmi_active) != 0);
376 void enable_timer_nmi_watchdog(void)
378 BUG_ON(nmi_watchdog != NMI_IO_APIC);
380 if (atomic_read(&nmi_active) == 0) {
381 touch_nmi_watchdog();
382 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
387 static void __acpi_nmi_disable(void *__unused)
389 apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
393 * Disable timer based NMIs on all CPUs:
395 void acpi_nmi_disable(void)
397 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
398 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
401 static void __acpi_nmi_enable(void *__unused)
403 apic_write_around(APIC_LVT0, APIC_DM_NMI);
407 * Enable timer based NMIs on all CPUs:
409 void acpi_nmi_enable(void)
411 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
412 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
417 static int nmi_pm_active; /* nmi_active before suspend */
419 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
421 /* only CPU0 goes here, other CPUs should be offline */
422 nmi_pm_active = atomic_read(&nmi_active);
423 stop_apic_nmi_watchdog(NULL);
424 BUG_ON(atomic_read(&nmi_active) != 0);
428 static int lapic_nmi_resume(struct sys_device *dev)
430 /* only CPU0 goes here, other CPUs should be offline */
431 if (nmi_pm_active > 0) {
432 setup_apic_nmi_watchdog(NULL);
433 touch_nmi_watchdog();
439 static struct sysdev_class nmi_sysclass = {
440 set_kset_name("lapic_nmi"),
441 .resume = lapic_nmi_resume,
442 .suspend = lapic_nmi_suspend,
445 static struct sys_device device_lapic_nmi = {
447 .cls = &nmi_sysclass,
450 static int __init init_lapic_nmi_sysfs(void)
454 /* should really be a BUG_ON but b/c this is an
455 * init call, it just doesn't work. -dcz
457 if (nmi_watchdog != NMI_LOCAL_APIC)
460 if ( atomic_read(&nmi_active) < 0 )
463 error = sysdev_class_register(&nmi_sysclass);
465 error = sysdev_register(&device_lapic_nmi);
468 /* must come after the local APIC's device_initcall() */
469 late_initcall(init_lapic_nmi_sysfs);
471 #endif /* CONFIG_PM */
474 * Activate the NMI watchdog via the local APIC.
475 * Original code written by Keith Owens.
478 static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
480 u64 count = (u64)cpu_khz * 1000;
482 do_div(count, nmi_hz);
484 Dprintk("setting %s to -0x%08Lx\n", descr, count);
485 wrmsrl(perfctr_msr, 0 - count);
488 static void write_watchdog_counter32(unsigned int perfctr_msr,
491 u64 count = (u64)cpu_khz * 1000;
493 do_div(count, nmi_hz);
495 Dprintk("setting %s to -0x%08Lx\n", descr, count);
496 wrmsr(perfctr_msr, (u32)(-count), 0);
499 /* Note that these events don't tick when the CPU idles. This means
500 the frequency varies with CPU load. */
502 #define K7_EVNTSEL_ENABLE (1 << 22)
503 #define K7_EVNTSEL_INT (1 << 20)
504 #define K7_EVNTSEL_OS (1 << 17)
505 #define K7_EVNTSEL_USR (1 << 16)
506 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
507 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
509 static int setup_k7_watchdog(void)
511 unsigned int perfctr_msr, evntsel_msr;
512 unsigned int evntsel;
513 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
515 perfctr_msr = MSR_K7_PERFCTR0;
516 evntsel_msr = MSR_K7_EVNTSEL0;
517 if (!reserve_perfctr_nmi(perfctr_msr))
520 if (!reserve_evntsel_nmi(evntsel_msr))
523 wrmsrl(perfctr_msr, 0UL);
525 evntsel = K7_EVNTSEL_INT
530 /* setup the timer */
531 wrmsr(evntsel_msr, evntsel, 0);
532 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
533 apic_write(APIC_LVTPC, APIC_DM_NMI);
534 evntsel |= K7_EVNTSEL_ENABLE;
535 wrmsr(evntsel_msr, evntsel, 0);
537 wd->perfctr_msr = perfctr_msr;
538 wd->evntsel_msr = evntsel_msr;
539 wd->cccr_msr = 0; //unused
540 wd->check_bit = 1ULL<<63;
543 release_perfctr_nmi(perfctr_msr);
548 static void stop_k7_watchdog(void)
550 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
552 wrmsr(wd->evntsel_msr, 0, 0);
554 release_evntsel_nmi(wd->evntsel_msr);
555 release_perfctr_nmi(wd->perfctr_msr);
558 #define P6_EVNTSEL0_ENABLE (1 << 22)
559 #define P6_EVNTSEL_INT (1 << 20)
560 #define P6_EVNTSEL_OS (1 << 17)
561 #define P6_EVNTSEL_USR (1 << 16)
562 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
563 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
565 static int setup_p6_watchdog(void)
567 unsigned int perfctr_msr, evntsel_msr;
568 unsigned int evntsel;
569 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
571 perfctr_msr = MSR_P6_PERFCTR0;
572 evntsel_msr = MSR_P6_EVNTSEL0;
573 if (!reserve_perfctr_nmi(perfctr_msr))
576 if (!reserve_evntsel_nmi(evntsel_msr))
579 wrmsrl(perfctr_msr, 0UL);
581 evntsel = P6_EVNTSEL_INT
586 /* setup the timer */
587 wrmsr(evntsel_msr, evntsel, 0);
588 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
589 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
590 apic_write(APIC_LVTPC, APIC_DM_NMI);
591 evntsel |= P6_EVNTSEL0_ENABLE;
592 wrmsr(evntsel_msr, evntsel, 0);
594 wd->perfctr_msr = perfctr_msr;
595 wd->evntsel_msr = evntsel_msr;
596 wd->cccr_msr = 0; //unused
597 wd->check_bit = 1ULL<<39;
600 release_perfctr_nmi(perfctr_msr);
605 static void stop_p6_watchdog(void)
607 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
609 wrmsr(wd->evntsel_msr, 0, 0);
611 release_evntsel_nmi(wd->evntsel_msr);
612 release_perfctr_nmi(wd->perfctr_msr);
615 /* Note that these events don't tick when the CPU idles. This means
616 the frequency varies with CPU load. */
618 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
619 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
620 #define P4_ESCR_OS (1<<3)
621 #define P4_ESCR_USR (1<<2)
622 #define P4_CCCR_OVF_PMI0 (1<<26)
623 #define P4_CCCR_OVF_PMI1 (1<<27)
624 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
625 #define P4_CCCR_COMPLEMENT (1<<19)
626 #define P4_CCCR_COMPARE (1<<18)
627 #define P4_CCCR_REQUIRED (3<<16)
628 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
629 #define P4_CCCR_ENABLE (1<<12)
630 #define P4_CCCR_OVF (1<<31)
631 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
632 CRU_ESCR0 (with any non-null event selector) through a complemented
633 max threshold. [IA32-Vol3, Section 14.9.9] */
635 static int setup_p4_watchdog(void)
637 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
638 unsigned int evntsel, cccr_val;
639 unsigned int misc_enable, dummy;
641 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
643 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
644 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
648 /* detect which hyperthread we are on */
649 if (smp_num_siblings == 2) {
650 unsigned int ebx, apicid;
653 apicid = (ebx >> 24) & 0xff;
659 /* performance counters are shared resources
660 * assign each hyperthread its own set
661 * (re-use the ESCR0 register, seems safe
662 * and keeps the cccr_val the same)
666 perfctr_msr = MSR_P4_IQ_PERFCTR0;
667 evntsel_msr = MSR_P4_CRU_ESCR0;
668 cccr_msr = MSR_P4_IQ_CCCR0;
669 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
672 perfctr_msr = MSR_P4_IQ_PERFCTR1;
673 evntsel_msr = MSR_P4_CRU_ESCR0;
674 cccr_msr = MSR_P4_IQ_CCCR1;
675 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
678 if (!reserve_perfctr_nmi(perfctr_msr))
681 if (!reserve_evntsel_nmi(evntsel_msr))
684 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
688 cccr_val |= P4_CCCR_THRESHOLD(15)
693 wrmsr(evntsel_msr, evntsel, 0);
694 wrmsr(cccr_msr, cccr_val, 0);
695 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
696 apic_write(APIC_LVTPC, APIC_DM_NMI);
697 cccr_val |= P4_CCCR_ENABLE;
698 wrmsr(cccr_msr, cccr_val, 0);
699 wd->perfctr_msr = perfctr_msr;
700 wd->evntsel_msr = evntsel_msr;
701 wd->cccr_msr = cccr_msr;
702 wd->check_bit = 1ULL<<39;
705 release_perfctr_nmi(perfctr_msr);
710 static void stop_p4_watchdog(void)
712 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
714 wrmsr(wd->cccr_msr, 0, 0);
715 wrmsr(wd->evntsel_msr, 0, 0);
717 release_evntsel_nmi(wd->evntsel_msr);
718 release_perfctr_nmi(wd->perfctr_msr);
721 #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
722 #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
724 static int setup_intel_arch_watchdog(void)
727 union cpuid10_eax eax;
729 unsigned int perfctr_msr, evntsel_msr;
730 unsigned int evntsel;
731 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
734 * Check whether the Architectural PerfMon supports
735 * Unhalted Core Cycles Event or not.
736 * NOTE: Corresponding bit = 0 in ebx indicates event present.
738 cpuid(10, &(eax.full), &ebx, &unused, &unused);
739 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
740 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
743 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
744 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
746 if (!reserve_perfctr_nmi(perfctr_msr))
749 if (!reserve_evntsel_nmi(evntsel_msr))
752 wrmsrl(perfctr_msr, 0UL);
754 evntsel = ARCH_PERFMON_EVENTSEL_INT
755 | ARCH_PERFMON_EVENTSEL_OS
756 | ARCH_PERFMON_EVENTSEL_USR
757 | ARCH_PERFMON_NMI_EVENT_SEL
758 | ARCH_PERFMON_NMI_EVENT_UMASK;
760 /* setup the timer */
761 wrmsr(evntsel_msr, evntsel, 0);
762 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
763 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
764 apic_write(APIC_LVTPC, APIC_DM_NMI);
765 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
766 wrmsr(evntsel_msr, evntsel, 0);
768 wd->perfctr_msr = perfctr_msr;
769 wd->evntsel_msr = evntsel_msr;
770 wd->cccr_msr = 0; //unused
771 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
774 release_perfctr_nmi(perfctr_msr);
779 static void stop_intel_arch_watchdog(void)
782 union cpuid10_eax eax;
784 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
787 * Check whether the Architectural PerfMon supports
788 * Unhalted Core Cycles Event or not.
789 * NOTE: Corresponding bit = 0 in ebx indicates event present.
791 cpuid(10, &(eax.full), &ebx, &unused, &unused);
792 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
793 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
796 wrmsr(wd->evntsel_msr, 0, 0);
797 release_evntsel_nmi(wd->evntsel_msr);
798 release_perfctr_nmi(wd->perfctr_msr);
801 void setup_apic_nmi_watchdog (void *unused)
803 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
805 /* only support LOCAL and IO APICs for now */
806 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
807 (nmi_watchdog != NMI_IO_APIC))
810 if (wd->enabled == 1)
813 /* cheap hack to support suspend/resume */
814 /* if cpu0 is not active neither should the other cpus */
815 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
818 if (nmi_watchdog == NMI_LOCAL_APIC) {
819 switch (boot_cpu_data.x86_vendor) {
821 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
822 boot_cpu_data.x86 != 16)
824 if (!setup_k7_watchdog())
827 case X86_VENDOR_INTEL:
828 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
829 if (!setup_intel_arch_watchdog())
833 switch (boot_cpu_data.x86) {
835 if (boot_cpu_data.x86_model > 0xd)
838 if (!setup_p6_watchdog())
842 if (boot_cpu_data.x86_model > 0x4)
845 if (!setup_p4_watchdog())
857 atomic_inc(&nmi_active);
860 void stop_apic_nmi_watchdog(void *unused)
862 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
864 /* only support LOCAL and IO APICs for now */
865 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
866 (nmi_watchdog != NMI_IO_APIC))
869 if (wd->enabled == 0)
872 if (nmi_watchdog == NMI_LOCAL_APIC) {
873 switch (boot_cpu_data.x86_vendor) {
877 case X86_VENDOR_INTEL:
878 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
879 stop_intel_arch_watchdog();
882 switch (boot_cpu_data.x86) {
884 if (boot_cpu_data.x86_model > 0xd)
889 if (boot_cpu_data.x86_model > 0x4)
900 atomic_dec(&nmi_active);
904 * the best way to detect whether a CPU has a 'hard lockup' problem
905 * is to check it's local APIC timer IRQ counts. If they are not
906 * changing then that CPU has some problem.
908 * as these watchdog NMI IRQs are generated on every CPU, we only
909 * have to check the current processor.
911 * since NMIs don't listen to _any_ locks, we have to be extremely
912 * careful not to rely on unsafe variables. The printk might lock
913 * up though, so we have to break up any console locks first ...
914 * [when there will be more tty-related locks, break them up
919 last_irq_sums [NR_CPUS],
920 alert_counter [NR_CPUS];
922 void touch_nmi_watchdog (void)
924 if (nmi_watchdog > 0) {
928 * Just reset the alert counters, (other CPUs might be
929 * spinning on locks we hold):
931 for_each_present_cpu (cpu)
932 alert_counter[cpu] = 0;
936 * Tickle the softlockup detector too:
938 touch_softlockup_watchdog();
940 EXPORT_SYMBOL(touch_nmi_watchdog);
942 extern void die_nmi(struct pt_regs *, const char *msg);
944 __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
948 * Since current_thread_info()-> is always on the stack, and we
949 * always switch the stack NMI-atomically, it's safe to use
950 * smp_processor_id().
954 int cpu = smp_processor_id();
955 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
959 /* check for other users first */
960 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
966 if (cpu_isset(cpu, backtrace_mask)) {
967 static DEFINE_SPINLOCK(lock); /* Serialise the printks */
970 printk("NMI backtrace for cpu %d\n", cpu);
973 cpu_clear(cpu, backtrace_mask);
976 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
978 /* if the apic timer isn't firing, this cpu isn't doing much */
979 if (!touched && last_irq_sums[cpu] == sum) {
981 * Ayiee, looks like this CPU is stuck ...
982 * wait a few IRQs (5 seconds) before doing the oops ...
984 alert_counter[cpu]++;
985 if (alert_counter[cpu] == 5*nmi_hz)
987 * die_nmi will return ONLY if NOTIFY_STOP happens..
989 die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
991 last_irq_sums[cpu] = sum;
992 alert_counter[cpu] = 0;
994 /* see if the nmi watchdog went off */
996 if (nmi_watchdog == NMI_LOCAL_APIC) {
997 rdmsrl(wd->perfctr_msr, dummy);
998 if (dummy & wd->check_bit){
999 /* this wasn't a watchdog timer interrupt */
1003 /* only Intel P4 uses the cccr msr */
1004 if (wd->cccr_msr != 0) {
1007 * - An overflown perfctr will assert its interrupt
1008 * until the OVF flag in its CCCR is cleared.
1009 * - LVTPC is masked on interrupt and must be
1010 * unmasked by the LVTPC handler.
1012 rdmsrl(wd->cccr_msr, dummy);
1013 dummy &= ~P4_CCCR_OVF;
1014 wrmsrl(wd->cccr_msr, dummy);
1015 apic_write(APIC_LVTPC, APIC_DM_NMI);
1016 /* start the cycle over again */
1017 write_watchdog_counter(wd->perfctr_msr, NULL);
1019 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
1020 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
1021 /* P6 based Pentium M need to re-unmask
1022 * the apic vector but it doesn't hurt
1024 * ArchPerfom/Core Duo also needs this */
1025 apic_write(APIC_LVTPC, APIC_DM_NMI);
1026 /* P6/ARCH_PERFMON has 32 bit counter write */
1027 write_watchdog_counter32(wd->perfctr_msr, NULL);
1029 /* start the cycle over again */
1030 write_watchdog_counter(wd->perfctr_msr, NULL);
1033 } else if (nmi_watchdog == NMI_IO_APIC) {
1034 /* don't know how to accurately check for this.
1035 * just assume it was a watchdog timer interrupt
1036 * This matches the old behaviour.
1045 int do_nmi_callback(struct pt_regs * regs, int cpu)
1047 #ifdef CONFIG_SYSCTL
1048 if (unknown_nmi_panic)
1049 return unknown_nmi_panic_callback(regs, cpu);
1054 #ifdef CONFIG_SYSCTL
1056 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
1058 unsigned char reason = get_nmi_reason();
1061 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
1067 * proc handler for /proc/sys/kernel/nmi
1069 int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
1070 void __user *buffer, size_t *length, loff_t *ppos)
1074 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1075 old_state = nmi_watchdog_enabled;
1076 proc_dointvec(table, write, file, buffer, length, ppos);
1077 if (!!old_state == !!nmi_watchdog_enabled)
1080 if (atomic_read(&nmi_active) < 0) {
1081 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
1085 if (nmi_watchdog == NMI_DEFAULT) {
1086 if (nmi_known_cpu() > 0)
1087 nmi_watchdog = NMI_LOCAL_APIC;
1089 nmi_watchdog = NMI_IO_APIC;
1092 if (nmi_watchdog == NMI_LOCAL_APIC) {
1093 if (nmi_watchdog_enabled)
1094 enable_lapic_nmi_watchdog();
1096 disable_lapic_nmi_watchdog();
1098 printk( KERN_WARNING
1099 "NMI watchdog doesn't know what hardware to touch\n");
1107 void __trigger_all_cpu_backtrace(void)
1111 backtrace_mask = cpu_online_map;
1112 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
1113 for (i = 0; i < 10 * 1000; i++) {
1114 if (cpus_empty(backtrace_mask))
1120 EXPORT_SYMBOL(nmi_active);
1121 EXPORT_SYMBOL(nmi_watchdog);
1122 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
1123 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1124 EXPORT_SYMBOL(reserve_perfctr_nmi);
1125 EXPORT_SYMBOL(release_perfctr_nmi);
1126 EXPORT_SYMBOL(reserve_evntsel_nmi);
1127 EXPORT_SYMBOL(release_evntsel_nmi);
1128 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
1129 EXPORT_SYMBOL(enable_timer_nmi_watchdog);