perf, intel: Try alternative OFFCORE encodings

[pandora-kernel.git] / arch / x86 / kernel / cpu / perf_event.c
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c

index 3a0338b..c53d433 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -44,6 +44,29 @@ do {                                                         \
  } while (0)
  #endif
  
+/*
+ *          |   NHM/WSM    |      SNB     |
+ * register -------------------------------
+ *          |  HT  | no HT |  HT  | no HT |
+ *-----------------------------------------
+ * offcore  | core | core  | cpu  | core  |
+ * lbr_sel  | core | core  | cpu  | core  |
+ * ld_lat   | cpu  | core  | cpu  | core  |
+ *-----------------------------------------
+ *
+ * Given that there is a small number of shared regs,
+ * we can pre-allocate their slot in the per-cpu
+ * per-core reg tables.
+ */
+enum extra_reg_type {
+       EXTRA_REG_NONE  = -1,   /* not used */
+
+       EXTRA_REG_RSP_0 = 0,    /* offcore_response_0 */
+       EXTRA_REG_RSP_1 = 1,    /* offcore_response_1 */
+
+       EXTRA_REG_MAX           /* number of entries needed */
+};
+
  /*
   * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
   */
@@ -132,11 +155,10 @@ struct cpu_hw_events {
         struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
  
         /*
-        * Intel percore register state.
-        * Coordinate shared resources between HT threads.
+        * manage shared (per-core, per-cpu) registers
+        * used on Intel NHM/WSM/SNB
          */
-       int                             percore_used; /* Used by this CPU? */
-       struct intel_percore            *per_core;
+       struct intel_shared_regs        *shared_regs;
  
         /*
          * AMD specific bits
@@ -186,27 +208,46 @@ struct cpu_hw_events {
  #define for_each_event_constraint(e, c)        \
         for ((e) = (c); (e)->weight; (e)++)
  
+/*
+ * Per register state.
+ */
+struct er_account {
+       raw_spinlock_t          lock;   /* per-core: protect structure */
+       u64                     config; /* extra MSR config */
+       u64                     reg;    /* extra MSR number */
+       atomic_t                ref;    /* reference count */
+};
+
  /*
   * Extra registers for specific events.
+ *
   * Some events need large masks and require external MSRs.
- * Define a mapping to these extra registers.
+ * Those extra MSRs end up being shared for all events on
+ * a PMU and sometimes between PMU of sibling HT threads.
+ * In either case, the kernel needs to handle conflicting
+ * accesses to those extra, shared, regs. The data structure
+ * to manage those registers is stored in cpu_hw_event.
   */
  struct extra_reg {
         unsigned int            event;
         unsigned int            msr;
         u64                     config_mask;
         u64                     valid_mask;
+       int                     idx;  /* per_xxx->regs[] reg index */
  };
  
-#define EVENT_EXTRA_REG(e, ms, m, vm) {        \
+#define EVENT_EXTRA_REG(e, ms, m, vm, i) {     \
         .event = (e),           \
         .msr = (ms),            \
         .config_mask = (m),     \
         .valid_mask = (vm),     \
+       .idx = EXTRA_REG_##i    \
         }
-#define INTEL_EVENT_EXTRA_REG(event, msr, vm)  \
-       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
-#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
+
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)     \
+       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
+
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
  
  union perf_capabilities {
         struct {
@@ -233,6 +274,7 @@ struct x86_pmu {
         void            (*enable_all)(int added);
         void            (*enable)(struct perf_event *);
         void            (*disable)(struct perf_event *);
+       void            (*hw_watchdog_set_attr)(struct perf_event_attr *attr);
         int             (*hw_config)(struct perf_event *event);
         int             (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
         unsigned        eventsel;
@@ -252,7 +294,6 @@ struct x86_pmu {
         void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
                                                  struct perf_event *event);
         struct event_constraint *event_constraints;
-       struct event_constraint *percore_constraints;
         void            (*quirks)(void);
         int             perfctr_second_write;
  
@@ -286,8 +327,12 @@ struct x86_pmu {
          * Extra registers for events
          */
         struct extra_reg *extra_regs;
+       unsigned int er_flags;
  };
  
+#define ERF_NO_HT_SHARING      1
+#define ERF_HAS_RSP_1          2
+
  static struct x86_pmu x86_pmu __read_mostly;
  
  static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
@@ -315,6 +360,12 @@ static u64 __read_mostly hw_cache_extra_regs
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
                                 [PERF_COUNT_HW_CACHE_RESULT_MAX];
  
+void hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr)
+{
+       if (x86_pmu.hw_watchdog_set_attr)
+               x86_pmu.hw_watchdog_set_attr(wd_attr);
+}
+
  /*
   * Propagate event elapsed time into the generic event.
   * Can only be executed on the CPU where the event is active.
@@ -393,10 +444,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
   */
  static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
  {
+       struct hw_perf_event_extra *reg;
         struct extra_reg *er;
  
-       event->hw.extra_reg = 0;
-       event->hw.extra_config = 0;
+       reg = &event->hw.extra_reg;
  
         if (!x86_pmu.extra_regs)
                 return 0;
@@ -406,8 +457,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
                         continue;
                 if (event->attr.config1 & ~er->valid_mask)
                         return -EINVAL;
-               event->hw.extra_reg = er->msr;
-               event->hw.extra_config = event->attr.config1;
+
+               reg->idx = er->idx;
+               reg->config = event->attr.config1;
+               reg->reg = er->msr;
                 break;
         }
         return 0;
@@ -706,6 +759,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
         event->hw.last_cpu = -1;
         event->hw.last_tag = ~0ULL;
  
+       /* mark unused */
+       event->hw.extra_reg.idx = EXTRA_REG_NONE;
+
         return x86_pmu.hw_config(event);
  }
  
@@ -747,8 +803,8 @@ static void x86_pmu_disable(struct pmu *pmu)
  static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
                                           u64 enable_mask)
  {
-       if (hwc->extra_reg)
-               wrmsrl(hwc->extra_reg, hwc->extra_config);
+       if (hwc->extra_reg.reg)
+               wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
         wrmsrl(hwc->config_base, hwc->config | enable_mask);
  }
  
@@ -1332,7 +1388,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
                 if (!x86_perf_event_set_period(event))
                         continue;
  
-               if (perf_event_overflow(event, 1, &data, regs))
+               if (perf_event_overflow(event, &data, regs))
                         x86_pmu_stop(event, 0);
         }
  
@@ -1637,6 +1693,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
         perf_pmu_enable(pmu);
         return 0;
  }
+/*
+ * a fake_cpuc is used to validate event groups. Due to
+ * the extra reg logic, we need to also allocate a fake
+ * per_core and per_cpu structure. Otherwise, group events
+ * using extra reg may conflict without the kernel being
+ * able to catch this when the last event gets added to
+ * the group.
+ */
+static void free_fake_cpuc(struct cpu_hw_events *cpuc)
+{
+       kfree(cpuc->shared_regs);
+       kfree(cpuc);
+}
+
+static struct cpu_hw_events *allocate_fake_cpuc(void)
+{
+       struct cpu_hw_events *cpuc;
+       int cpu = raw_smp_processor_id();
+
+       cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
+       if (!cpuc)
+               return ERR_PTR(-ENOMEM);
+
+       /* only needed, if we have extra_regs */
+       if (x86_pmu.extra_regs) {
+               cpuc->shared_regs = allocate_shared_regs(cpu);
+               if (!cpuc->shared_regs)
+                       goto error;
+       }
+       return cpuc;
+error:
+       free_fake_cpuc(cpuc);
+       return ERR_PTR(-ENOMEM);
+}
  
  /*
   * validate that we can schedule this event
@@ -1647,9 +1737,9 @@ static int validate_event(struct perf_event *event)
         struct event_constraint *c;
         int ret = 0;
  
-       fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
-       if (!fake_cpuc)
-               return -ENOMEM;
+       fake_cpuc = allocate_fake_cpuc();
+       if (IS_ERR(fake_cpuc))
+               return PTR_ERR(fake_cpuc);
  
         c = x86_pmu.get_event_constraints(fake_cpuc, event);
  
@@ -1659,7 +1749,7 @@ static int validate_event(struct perf_event *event)
         if (x86_pmu.put_event_constraints)
                 x86_pmu.put_event_constraints(fake_cpuc, event);
  
-       kfree(fake_cpuc);
+       free_fake_cpuc(fake_cpuc);
  
         return ret;
  }
@@ -1679,36 +1769,32 @@ static int validate_group(struct perf_event *event)
  {
         struct perf_event *leader = event->group_leader;
         struct cpu_hw_events *fake_cpuc;
-       int ret, n;
-
-       ret = -ENOMEM;
-       fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
-       if (!fake_cpuc)
-               goto out;
+       int ret = -ENOSPC, n;
  
+       fake_cpuc = allocate_fake_cpuc();
+       if (IS_ERR(fake_cpuc))
+               return PTR_ERR(fake_cpuc);
         /*
          * the event is not yet connected with its
          * siblings therefore we must first collect
          * existing siblings, then add the new event
          * before we can simulate the scheduling
          */
-       ret = -ENOSPC;
         n = collect_events(fake_cpuc, leader, true);
         if (n < 0)
-               goto out_free;
+               goto out;
  
         fake_cpuc->n_events = n;
         n = collect_events(fake_cpuc, event, false);
         if (n < 0)
-               goto out_free;
+               goto out;
  
         fake_cpuc->n_events = n;
  
         ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
  
-out_free:
-       kfree(fake_cpuc);
  out:
+       free_fake_cpuc(fake_cpuc);
         return ret;
  }