#include <linux/sysdev.h>
#include <linux/sysctl.h>
#include <linux/percpu.h>
+#include <linux/dmi.h>
+#include <linux/kprobes.h>
#include <asm/smp.h>
#include <asm/nmi.h>
#include <asm/kdebug.h>
+#include <asm/intel_arch_perfmon.h>
#include "mach_traps.h"
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
/* local prototypes */
-static void stop_apic_nmi_watchdog(void *unused);
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
extern void show_registers(struct pt_regs *regs);
case X86_VENDOR_AMD:
return (msr - MSR_K7_PERFCTR0);
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return (msr - MSR_ARCH_PERFMON_PERFCTR0);
+
switch (boot_cpu_data.x86) {
case 6:
return (msr - MSR_P6_PERFCTR0);
case X86_VENDOR_AMD:
return (msr - MSR_K7_EVNTSEL0);
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
+
switch (boot_cpu_data.x86) {
case 6:
return (msr - MSR_P6_EVNTSEL0);
case X86_VENDOR_AMD:
return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
case X86_VENDOR_INTEL:
- return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+ return 1;
+ else
+ return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
}
return 0;
}
unsigned int *prev_nmi_count;
int cpu;
+ /* Enable NMI watchdog for newer systems.
+ Actually it should be safe for most systems before 2004 too except
+ for some IBM systems that corrupt registers when NMI happens
+ during SMM. Unfortunately we don't have more exact information
+ on these and use this coarse check. */
+ if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
+ nmi_watchdog = NMI_LOCAL_APIC;
+
if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
return 0;
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
- if (nmi_watchdog == NMI_LOCAL_APIC)
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
nmi_hz = 1;
+ /*
+ * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
+ * are writable, with higher bits sign extending from bit 31.
+ * So, we can only program the counter with 31 bit values and
+ * 32nd bit should be 1, for 33.. to be 1.
+ * Find the appropriate nmi_hz
+ */
+ if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
+ ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
+ u64 count = (u64)cpu_khz * 1000;
+ do_div(count, 0x7fffffffUL);
+ nmi_hz = count + 1;
+ }
+ }
kfree(prev_nmi_count);
return 0;
static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
+ /* only CPU0 goes here, other CPUs should be offline */
nmi_pm_active = atomic_read(&nmi_active);
- disable_lapic_nmi_watchdog();
+ stop_apic_nmi_watchdog(NULL);
+ BUG_ON(atomic_read(&nmi_active) != 0);
return 0;
}
static int lapic_nmi_resume(struct sys_device *dev)
{
- if (nmi_pm_active > 0)
- enable_lapic_nmi_watchdog();
+ /* only CPU0 goes here, other CPUs should be offline */
+ if (nmi_pm_active > 0) {
+ setup_apic_nmi_watchdog(NULL);
+ touch_nmi_watchdog();
+ }
return 0;
}
release_perfctr_nmi(wd->perfctr_msr);
}
+#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
+static int setup_intel_arch_watchdog(void)
+{
+ unsigned int ebx;
+ union cpuid10_eax eax;
+ unsigned int unused;
+ unsigned int perfctr_msr, evntsel_msr;
+ unsigned int evntsel;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebx indicates event present.
+ */
+ cpuid(10, &(eax.full), &ebx, &unused, &unused);
+ if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+ (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ goto fail;
+
+ perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+ evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
+
+ if (!reserve_perfctr_nmi(perfctr_msr))
+ goto fail;
+
+ if (!reserve_evntsel_nmi(evntsel_msr))
+ goto fail1;
+
+ wrmsrl(perfctr_msr, 0UL);
+
+ evntsel = ARCH_PERFMON_EVENTSEL_INT
+ | ARCH_PERFMON_EVENTSEL_OS
+ | ARCH_PERFMON_EVENTSEL_USR
+ | ARCH_PERFMON_NMI_EVENT_SEL
+ | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+ /* setup the timer */
+ wrmsr(evntsel_msr, evntsel, 0);
+ write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+ wrmsr(evntsel_msr, evntsel, 0);
+
+ wd->perfctr_msr = perfctr_msr;
+ wd->evntsel_msr = evntsel_msr;
+ wd->cccr_msr = 0; //unused
+ wd->check_bit = 1ULL << (eax.split.bit_width - 1);
+ return 1;
+fail1:
+ release_perfctr_nmi(perfctr_msr);
+fail:
+ return 0;
+}
+
+static void stop_intel_arch_watchdog(void)
+{
+ unsigned int ebx;
+ union cpuid10_eax eax;
+ unsigned int unused;
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+ /*
+ * Check whether the Architectural PerfMon supports
+ * Unhalted Core Cycles Event or not.
+ * NOTE: Corresponding bit = 0 in ebx indicates event present.
+ */
+ cpuid(10, &(eax.full), &ebx, &unused, &unused);
+ if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+ (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+ return;
+
+ wrmsr(wd->evntsel_msr, 0, 0);
+ release_evntsel_nmi(wd->evntsel_msr);
+ release_perfctr_nmi(wd->perfctr_msr);
+}
+
void setup_apic_nmi_watchdog (void *unused)
{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
+ if (wd->enabled == 1)
+ return;
+
+ /* cheap hack to support suspend/resume */
+ /* if cpu0 is not active neither should the other cpus */
+ if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
+ return;
+
if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
return;
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ if (!setup_intel_arch_watchdog())
+ return;
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
return;
}
}
- __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 1;
+ wd->enabled = 1;
atomic_inc(&nmi_active);
}
-static void stop_apic_nmi_watchdog(void *unused)
+void stop_apic_nmi_watchdog(void *unused)
{
+ struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
/* only support LOCAL and IO APICs for now */
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
(nmi_watchdog != NMI_IO_APIC))
return;
+ if (wd->enabled == 0)
+ return;
+
if (nmi_watchdog == NMI_LOCAL_APIC) {
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
stop_k7_watchdog();
break;
case X86_VENDOR_INTEL:
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+ stop_intel_arch_watchdog();
+ break;
+ }
switch (boot_cpu_data.x86) {
case 6:
if (boot_cpu_data.x86_model > 0xd)
return;
}
}
- __get_cpu_var(nmi_watchdog_ctlblk.enabled) = 0;
+ wd->enabled = 0;
atomic_dec(&nmi_active);
}
extern void die_nmi(struct pt_regs *, const char *msg);
-int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
+__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
{
/*
wrmsrl(wd->cccr_msr, dummy);
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
- else if (wd->perfctr_msr == MSR_P6_PERFCTR0) {
- /* Only P6 based Pentium M need to re-unmask
+ else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
+ wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+ /* P6 based Pentium M need to re-unmask
* the apic vector but it doesn't hurt
- * other P6 variant */
+ * other P6 variant.
+ * ArchPerfom/Core Duo also needs this */
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
/* start the cycle over again */
* This matches the old behaviour.
*/
rc = 1;
- } else
- printk(KERN_WARNING "Unknown enabled NMI hardware?!\n");
+ }
}
done:
return rc;
return 0;
}
+/*
+ * proc handler for /proc/sys/kernel/nmi
+ */
+int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+ void __user *buffer, size_t *length, loff_t *ppos)
+{
+ int old_state;
+
+ nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+ old_state = nmi_watchdog_enabled;
+ proc_dointvec(table, write, file, buffer, length, ppos);
+ if (!!old_state == !!nmi_watchdog_enabled)
+ return 0;
+
+ if (atomic_read(&nmi_active) < 0) {
+ printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
+ return -EIO;
+ }
+
+ if (nmi_watchdog == NMI_DEFAULT) {
+ if (nmi_known_cpu() > 0)
+ nmi_watchdog = NMI_LOCAL_APIC;
+ else
+ nmi_watchdog = NMI_IO_APIC;
+ }
+
+ if (nmi_watchdog == NMI_LOCAL_APIC) {
+ if (nmi_watchdog_enabled)
+ enable_lapic_nmi_watchdog();
+ else
+ disable_lapic_nmi_watchdog();
+ } else {
+ printk( KERN_WARNING
+ "NMI watchdog doesn't know what hardware to touch\n");
+ return -EIO;
+ }
+ return 0;
+}
+
#endif
EXPORT_SYMBOL(nmi_active);