perf, x86: Fix Intel-nhm PMU programming errata workaround
authorZhang, Yanmin <yanmin_zhang@linux.intel.com>
Fri, 6 Aug 2010 05:39:08 +0000 (13:39 +0800)
committerIngo Molnar <mingo@elte.hu>
Wed, 18 Aug 2010 09:17:39 +0000 (11:17 +0200)
Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented
workaround we implemented in:

 11164cd: perf, x86: Add Nehelem PMU programming errata workaround

doesn't actually work fully and causes a stuck PMU state
under load and non-functioning perf profiling.

A functional workaround was found by trial & error.

Affects all Nehalem-class Intel PMUs.

Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org> # .35.x
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/cpu/perf_event_intel.c

index 214ac86..d8d86d0 100644 (file)
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
  *   Intel Errata AAP53  (model 30)
  *   Intel Errata BD53   (model 44)
  *
- * These chips need to be 'reset' when adding counters by programming
- * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
- * either in sequence on the same PMC or on different PMCs.
+ * The official story:
+ *   These chips need to be 'reset' when adding counters by programming the
+ *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ *   in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
  */
-static void intel_pmu_nhm_enable_all(int added)
+static void intel_pmu_nhm_workaround(void)
 {
-       if (added) {
-               struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-               int i;
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       static const unsigned long nhm_magic[4] = {
+               0x4300B5,
+               0x4300D2,
+               0x4300B1,
+               0x4300B1
+       };
+       struct perf_event *event;
+       int i;
+
+       /*
+        * The Errata requires below steps:
+        * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+        * 2) Configure 4 PERFEVTSELx with the magic events and clear
+        *    the corresponding PMCx;
+        * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+        * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+        * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+        */
+
+       /*
+        * The real steps we choose are a little different from above.
+        * A) To reduce MSR operations, we don't run step 1) as they
+        *    are already cleared before this function is called;
+        * B) Call x86_perf_event_update to save PMCx before configuring
+        *    PERFEVTSELx with magic number;
+        * C) With step 5), we do clear only when the PERFEVTSELx is
+        *    not used currently.
+        * D) Call x86_perf_event_set_period to restore PMCx;
+        */
 
-               wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
-               wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
-               wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
+       /* We always operate 4 pairs of PERF Counters */
+       for (i = 0; i < 4; i++) {
+               event = cpuc->events[i];
+               if (event)
+                       x86_perf_event_update(event);
+       }
 
-               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
-               wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+       for (i = 0; i < 4; i++) {
+               wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+               wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+       }
 
-               for (i = 0; i < 3; i++) {
-                       struct perf_event *event = cpuc->events[i];
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
 
-                       if (!event)
-                               continue;
+       for (i = 0; i < 4; i++) {
+               event = cpuc->events[i];
 
+               if (event) {
+                       x86_perf_event_set_period(event);
                        __x86_pmu_enable_event(&event->hw,
-                                              ARCH_PERFMON_EVENTSEL_ENABLE);
-               }
+                                       ARCH_PERFMON_EVENTSEL_ENABLE);
+               } else
+                       wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
        }
+}
+
+static void intel_pmu_nhm_enable_all(int added)
+{
+       if (added)
+               intel_pmu_nhm_workaround();
        intel_pmu_enable_all(added);
 }