perf/x86/intel: Add mem-loads/stores support for Haswell
[pandora-kernel.git] / arch / x86 / kernel / cpu / perf_event_intel.c
index f60d41f..a6eccf1 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 
+#include <asm/cpufeature.h>
 #include <asm/hardirq.h>
 #include <asm/apic.h>
 
@@ -165,13 +166,13 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
        INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
        INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
        INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
-       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
        EVENT_EXTRA_END
 };
 
 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
        INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
        INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+       INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
        EVENT_EXTRA_END
 };
 
@@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = {
        NULL,
 };
 
+static struct event_constraint intel_hsw_event_constraints[] = {
+       FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+       FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+       FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+       INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
+       INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+       INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+       /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+       INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
+       /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+       INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
+       /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+       INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
+       EVENT_CONSTRAINT_END
+};
+
 static u64 intel_pmu_event_map(int hw_event)
 {
        return intel_perfmon_event_map[hw_event];
@@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
                return true;
 
        /* implicit branch sampling to correct PEBS skid */
-       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
+           x86_pmu.intel_cap.pebs_format < 2)
                return true;
 
        return false;
@@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
        cpuc = &__get_cpu_var(cpu_hw_events);
 
        /*
-        * Some chipsets need to unmask the LVTPC in a particular spot
-        * inside the nmi handler.  As a result, the unmasking was pushed
-        * into all the nmi handlers.
-        *
-        * This handler doesn't seem to have any issues with the unmasking
-        * so it was left at the top.
+        * No known reason to not always do late ACK,
+        * but just in case do it opt-in.
         */
-       apic_write(APIC_LVTPC, APIC_DM_NMI);
-
+       if (!x86_pmu.late_ack)
+               apic_write(APIC_LVTPC, APIC_DM_NMI);
        intel_pmu_disable_all();
        handled = intel_pmu_drain_bts_buffer();
        status = intel_pmu_get_status();
@@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 again:
        intel_pmu_ack_status(status);
        if (++loops > 100) {
-               WARN_ONCE(1, "perfevents: irq loop stuck!\n");
-               perf_event_print_debug();
+               static bool warned = false;
+               if (!warned) {
+                       WARN(1, "perfevents: irq loop stuck!\n");
+                       perf_event_print_debug();
+                       warned = true;
+               }
                intel_pmu_reset();
                goto done;
        }
@@ -1235,6 +1253,13 @@ again:
 
 done:
        intel_pmu_enable_all(0);
+       /*
+        * Only unmask the NMI after the overflow counters
+        * have been reset. This avoids spurious NMIs on
+        * Haswell CPUs.
+        */
+       if (x86_pmu.late_ack)
+               apic_write(APIC_LVTPC, APIC_DM_NMI);
        return handled;
 }
 
@@ -1646,6 +1671,47 @@ static void core_pmu_enable_all(int added)
        }
 }
 
+static int hsw_hw_config(struct perf_event *event)
+{
+       int ret = intel_pmu_hw_config(event);
+
+       if (ret)
+               return ret;
+       if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+               return 0;
+       event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+
+       /*
+        * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
+        * PEBS or in ANY thread mode. Since the results are non-sensical forbid
+        * this combination.
+        */
+       if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
+            ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+             event->attr.precise_ip > 0))
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
+static struct event_constraint counter2_constraint =
+                       EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+       struct event_constraint *c = intel_get_event_constraints(cpuc, event);
+
+       /* Handle special quirk on in_tx_checkpointed only in counter 2 */
+       if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
+               if (c->idxmsk64 & (1U << 2))
+                       return &counter2_constraint;
+               return &emptyconstraint;
+       }
+
+       return c;
+}
+
 PMU_FORMAT_ATTR(event, "config:0-7"    );
 PMU_FORMAT_ATTR(umask, "config:8-15"   );
 PMU_FORMAT_ATTR(edge,  "config:18"     );
@@ -1653,6 +1719,8 @@ PMU_FORMAT_ATTR(pc,       "config:19"     );
 PMU_FORMAT_ATTR(any,   "config:21"     ); /* v3 + */
 PMU_FORMAT_ATTR(inv,   "config:23"     );
 PMU_FORMAT_ATTR(cmask, "config:24-31"  );
+PMU_FORMAT_ATTR(in_tx,  "config:32");
+PMU_FORMAT_ATTR(in_tx_cp, "config:33");
 
 static struct attribute *intel_arch_formats_attr[] = {
        &format_attr_event.attr,
@@ -1807,6 +1875,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
        &format_attr_any.attr,
        &format_attr_inv.attr,
        &format_attr_cmask.attr,
+       &format_attr_in_tx.attr,
+       &format_attr_in_tx_cp.attr,
 
        &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
        &format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2036,15 @@ static __init void intel_nehalem_quirk(void)
        }
 }
 
+EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
+
+static struct attribute *hsw_events_attrs[] = {
+       EVENT_PTR(mem_ld_hsw),
+       EVENT_PTR(mem_st_hsw),
+       NULL
+};
+
 __init int intel_pmu_init(void)
 {
        union cpuid10_edx edx;
@@ -2189,6 +2268,30 @@ __init int intel_pmu_init(void)
                break;
 
 
+       case 60: /* Haswell Client */
+       case 70:
+       case 71:
+       case 63:
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+               intel_pmu_lbr_init_snb();
+
+               x86_pmu.event_constraints = intel_hsw_event_constraints;
+               x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_snb_extra_regs;
+               x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+               /* all extra regs are per-cpu when HT is on */
+               x86_pmu.er_flags |= ERF_HAS_RSP_1;
+               x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+               x86_pmu.hw_config = hsw_hw_config;
+               x86_pmu.get_event_constraints = hsw_get_event_constraints;
+               x86_pmu.cpu_events = hsw_events_attrs;
+               pr_cont("Haswell events, ");
+               break;
+
        default:
                switch (x86_pmu.version) {
                case 1:
@@ -2227,7 +2330,7 @@ __init int intel_pmu_init(void)
                 * counter, so do not extend mask to generic counters
                 */
                for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if (c->cmask != X86_RAW_EVENT_MASK
+                       if (c->cmask != FIXED_EVENT_FLAGS
                            || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
                                continue;
                        }