Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Ingo Molnar <mingo@kernel.org>

Wed, 12 Aug 2015 10:16:11 +0000 (12:16 +0200)

committer Ingo Molnar <mingo@kernel.org>

Wed, 12 Aug 2015 10:16:11 +0000 (12:16 +0200)
author Ingo Molnar <mingo@kernel.org>
Wed, 12 Aug 2015 10:16:11 +0000 (12:16 +0200)
committer Ingo Molnar <mingo@kernel.org>
Wed, 12 Aug 2015 10:16:11 +0000 (12:16 +0200)
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h

index feb293e..336878a 100644 (file)
--- a/arch/x86/kernel/cpu/intel_pt.h
+++ b/arch/x86/kernel/cpu/intel_pt.h
@@ -25,32 +25,11 @@
   */
  #define TOPA_PMI_MARGIN 512
  
-/*
- * Table of Physical Addresses bits
- */
-enum topa_sz {
-       TOPA_4K = 0,
-       TOPA_8K,
-       TOPA_16K,
-       TOPA_32K,
-       TOPA_64K,
-       TOPA_128K,
-       TOPA_256K,
-       TOPA_512K,
-       TOPA_1MB,
-       TOPA_2MB,
-       TOPA_4MB,
-       TOPA_8MB,
-       TOPA_16MB,
-       TOPA_32MB,
-       TOPA_64MB,
-       TOPA_128MB,
-       TOPA_SZ_END,
-};
+#define TOPA_SHIFT 12
  
-static inline unsigned int sizes(enum topa_sz tsz)
+static inline unsigned int sizes(unsigned int tsz)
  {
-       return 1 << (tsz + 12);
+       return 1 << (tsz + TOPA_SHIFT);
  };
  
  struct topa_entry {
@@ -66,8 +45,8 @@ struct topa_entry {
         u64     rsvd4   : 16;
  };
  
-#define TOPA_SHIFT 12
-#define PT_CPUID_LEAVES 2
+#define PT_CPUID_LEAVES                2
+#define PT_CPUID_REGS_NUM      4 /* number of regsters (eax, ebx, ecx, edx) */
  
  enum pt_capabilities {
         PT_CAP_max_subleaf = 0,
@@ -85,7 +64,7 @@ enum pt_capabilities {
  
  struct pt_pmu {
         struct pmu              pmu;
-       u32                     caps[4 * PT_CPUID_LEAVES];
+       u32                     caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
  };
  
  /**
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c

index a478e3c..3f124d5 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2758,7 +2758,7 @@ static int intel_pmu_cpu_prepare(int cpu)
         if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
                 cpuc->shared_regs = allocate_shared_regs(cpu);
                 if (!cpuc->shared_regs)
-                       return NOTIFY_BAD;
+                       goto err;
         }
  
         if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
@@ -2766,18 +2766,27 @@ static int intel_pmu_cpu_prepare(int cpu)
  
                 cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
                 if (!cpuc->constraint_list)
-                       return NOTIFY_BAD;
+                       goto err_shared_regs;
  
                 cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
-               if (!cpuc->excl_cntrs) {
-                       kfree(cpuc->constraint_list);
-                       kfree(cpuc->shared_regs);
-                       return NOTIFY_BAD;
-               }
+               if (!cpuc->excl_cntrs)
+                       goto err_constraint_list;
+
                 cpuc->excl_thread_id = 0;
         }
  
         return NOTIFY_OK;
+
+err_constraint_list:
+       kfree(cpuc->constraint_list);
+       cpuc->constraint_list = NULL;
+
+err_shared_regs:
+       kfree(cpuc->shared_regs);
+       cpuc->shared_regs = NULL;
+
+err:
+       return NOTIFY_BAD;
  }
  
  static void intel_pmu_cpu_starting(int cpu)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c

index 63eb68b..377e8f8 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -1255,7 +1255,7 @@ static inline void cqm_pick_event_reader(int cpu)
         cpumask_set_cpu(cpu, &cqm_cpumask);
  }
  
-static void intel_cqm_cpu_prepare(unsigned int cpu)
+static void intel_cqm_cpu_starting(unsigned int cpu)
  {
         struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
         struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -1296,13 +1296,11 @@ static int intel_cqm_cpu_notifier(struct notifier_block *nb,
         unsigned int cpu  = (unsigned long)hcpu;
  
         switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               intel_cqm_cpu_prepare(cpu);
-               break;
         case CPU_DOWN_PREPARE:
                 intel_cqm_cpu_exit(cpu);
                 break;
         case CPU_STARTING:
+               intel_cqm_cpu_starting(cpu);
                 cqm_pick_event_reader(cpu);
                 break;
         }
@@ -1373,7 +1371,7 @@ static int __init intel_cqm_init(void)
                 goto out;
  
         for_each_online_cpu(i) {
-               intel_cqm_cpu_prepare(i);
+               intel_cqm_cpu_starting(i);
                 cqm_pick_event_reader(i);
         }
  
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c

index e20cfac..4216928 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -79,7 +79,7 @@ static struct pt_cap_desc {
  static u32 pt_cap_get(enum pt_capabilities cap)
  {
         struct pt_cap_desc *cd = &pt_caps[cap];
-       u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg];
+       u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
         unsigned int shift = __ffs(cd->mask);
  
         return (c & cd->mask) >> shift;
@@ -145,10 +145,10 @@ static int __init pt_pmu_hw_init(void)
  
         for (i = 0; i < PT_CPUID_LEAVES; i++) {
                 cpuid_count(20, i,
-                           &pt_pmu.caps[CR_EAX + i*4],
-                           &pt_pmu.caps[CR_EBX + i*4],
-                           &pt_pmu.caps[CR_ECX + i*4],
-                           &pt_pmu.caps[CR_EDX + i*4]);
+                           &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
         }
  
         ret = -ENOMEM;
diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c

index af216e9..b0dd2e8 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_msr.c
+++ b/arch/x86/kernel/cpu/perf_event_msr.c
@@ -10,17 +10,63 @@ enum perf_msr_id {
         PERF_MSR_EVENT_MAX,
  };
  
+bool test_aperfmperf(int idx)
+{
+       return boot_cpu_has(X86_FEATURE_APERFMPERF);
+}
+
+bool test_intel(int idx)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+           boot_cpu_data.x86 != 6)
+               return false;
+
+       switch (boot_cpu_data.x86_model) {
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 69: /* 22nm Haswell ULT */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case 76: /* 14nm Atom "Airmont"                   */
+               if (idx == PERF_MSR_SMI)
+                       return true;
+               break;
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
  struct perf_msr {
-       int     id;
         u64     msr;
-};
-
-static struct perf_msr msr[] = {
-       { PERF_MSR_TSC, 0 },
-       { PERF_MSR_APERF, MSR_IA32_APERF },
-       { PERF_MSR_MPERF, MSR_IA32_MPERF },
-       { PERF_MSR_PPERF, MSR_PPERF },
-       { PERF_MSR_SMI, MSR_SMI_COUNT },
+       struct  perf_pmu_events_attr *attr;
+       bool    (*test)(int idx);
  };
  
  PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
@@ -29,8 +75,16 @@ PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
  PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
  PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
  
+static struct perf_msr msr[] = {
+       [PERF_MSR_TSC]   = { 0,                 &evattr_tsc,    NULL,            },
+       [PERF_MSR_APERF] = { MSR_IA32_APERF,    &evattr_aperf,  test_aperfmperf, },
+       [PERF_MSR_MPERF] = { MSR_IA32_MPERF,    &evattr_mperf,  test_aperfmperf, },
+       [PERF_MSR_PPERF] = { MSR_PPERF,         &evattr_pperf,  test_intel,      },
+       [PERF_MSR_SMI]   = { MSR_SMI_COUNT,     &evattr_smi,    test_intel,      },
+};
+
  static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
-       &evattr_tsc.attr.attr,
+       NULL,
  };
  
  static struct attribute_group events_attr_group = {
@@ -74,6 +128,9 @@ static int msr_event_init(struct perf_event *event)
             event->attr.sample_period) /* no sampling */
                 return -EINVAL;
  
+       if (!msr[cfg].attr)
+               return -EINVAL;
+
         event->hw.idx = -1;
         event->hw.event_base = msr[cfg].msr;
         event->hw.config = cfg;
@@ -151,89 +208,32 @@ static struct pmu pmu_msr = {
         .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
  };
  
-static int __init intel_msr_init(int idx)
-{
-       if (boot_cpu_data.x86 != 6)
-               return 0;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-               events_attrs[idx++] = &evattr_smi.attr.attr;
-               break;
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               events_attrs[idx++] = &evattr_pperf.attr.attr;
-               events_attrs[idx++] = &evattr_smi.attr.attr;
-               break;
-
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 76: /* 14nm Atom "Airmont"                   */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-               events_attrs[idx++] = &evattr_smi.attr.attr;
-               break;
-       }
-
-       events_attrs[idx] = NULL;
-
-       return 0;
-}
-
-static int __init amd_msr_init(int idx)
-{
-       return 0;
-}
-
  static int __init msr_init(void)
  {
-       int err;
-       int idx = 1;
+       int i, j = 0;
  
-       if (boot_cpu_has(X86_FEATURE_APERFMPERF)) {
-               events_attrs[idx++] = &evattr_aperf.attr.attr;
-               events_attrs[idx++] = &evattr_mperf.attr.attr;
-               events_attrs[idx] = NULL;
+       if (!boot_cpu_has(X86_FEATURE_TSC)) {
+               pr_cont("no MSR PMU driver.\n");
+               return 0;
         }
  
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_INTEL:
-               err = intel_msr_init(idx);
-               break;
-
-       case X86_VENDOR_AMD:
-               err = amd_msr_init(idx);
-               break;
+       /* Probe the MSRs. */
+       for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
+               u64 val;
  
-       default:
-               err = -ENOTSUPP;
+               /*
+                * Virt sucks arse; you cannot tell if a R/O MSR is present :/
+                */
+               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+                       msr[i].attr = NULL;
         }
  
-       if (err != 0) {
-               pr_cont("no msr PMU driver.\n");
-               return 0;
+       /* List remaining MSRs in the sysfs attrs. */
+       for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
+               if (msr[i].attr)
+                       events_attrs[j++] = &msr[i].attr->attr.attr;
         }
+       events_attrs[j] = NULL;
  
         perf_pmu_register(&pmu_msr, "msr", -1);
  
diff --git a/kernel/events/core.c b/kernel/events/core.c

index 77f9e5d..ae16867 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3972,28 +3972,21 @@ static void perf_event_for_each(struct perf_event *event,
                 perf_event_for_each_child(sibling, func);
  }
  
-static int perf_event_period(struct perf_event *event, u64 __user *arg)
-{
-       struct perf_event_context *ctx = event->ctx;
-       int ret = 0, active;
+struct period_event {
+       struct perf_event *event;
         u64 value;
+};
  
-       if (!is_sampling_event(event))
-               return -EINVAL;
-
-       if (copy_from_user(&value, arg, sizeof(value)))
-               return -EFAULT;
-
-       if (!value)
-               return -EINVAL;
+static int __perf_event_period(void *info)
+{
+       struct period_event *pe = info;
+       struct perf_event *event = pe->event;
+       struct perf_event_context *ctx = event->ctx;
+       u64 value = pe->value;
+       bool active;
  
-       raw_spin_lock_irq(&ctx->lock);
+       raw_spin_lock(&ctx->lock);
         if (event->attr.freq) {
-               if (value > sysctl_perf_event_sample_rate) {
-                       ret = -EINVAL;
-                       goto unlock;
-               }
-
                 event->attr.sample_freq = value;
         } else {
                 event->attr.sample_period = value;
@@ -4012,11 +4005,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
                 event->pmu->start(event, PERF_EF_RELOAD);
                 perf_pmu_enable(ctx->pmu);
         }
+       raw_spin_unlock(&ctx->lock);
  
-unlock:
+       return 0;
+}
+
+static int perf_event_period(struct perf_event *event, u64 __user *arg)
+{
+       struct period_event pe = { .event = event, };
+       struct perf_event_context *ctx = event->ctx;
+       struct task_struct *task;
+       u64 value;
+
+       if (!is_sampling_event(event))
+               return -EINVAL;
+
+       if (copy_from_user(&value, arg, sizeof(value)))
+               return -EFAULT;
+
+       if (!value)
+               return -EINVAL;
+
+       if (event->attr.freq && value > sysctl_perf_event_sample_rate)
+               return -EINVAL;
+
+       task = ctx->task;
+       pe.value = value;
+
+       if (!task) {
+               cpu_function_call(event->cpu, __perf_event_period, &pe);
+               return 0;
+       }
+
+retry:
+       if (!task_function_call(task, __perf_event_period, &pe))
+               return 0;
+
+       raw_spin_lock_irq(&ctx->lock);
+       if (ctx->is_active) {
+               raw_spin_unlock_irq(&ctx->lock);
+               task = ctx->task;
+               goto retry;
+       }
+
+       __perf_event_period(&pe);
         raw_spin_unlock_irq(&ctx->lock);
  
-       return ret;
+       return 0;
  }
  
  static const struct file_operations perf_fops;
@@ -4754,12 +4789,20 @@ static const struct file_operations perf_fops = {
   * to user-space before waking everybody up.
   */
  
+static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
+{
+       /* only the parent has fasync state */
+       if (event->parent)
+               event = event->parent;
+       return &event->fasync;
+}
+
  void perf_event_wakeup(struct perf_event *event)
  {
         ring_buffer_wakeup(event);
  
         if (event->pending_kill) {
-               kill_fasync(&event->fasync, SIGIO, event->pending_kill);
+               kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
                 event->pending_kill = 0;
         }
  }
@@ -6221,7 +6264,7 @@ static int __perf_event_overflow(struct perf_event *event,
         else
                 perf_event_output(event, data, regs);
  
-       if (event->fasync && event->pending_kill) {
+       if (*perf_event_fasync(event) && event->pending_kill) {
                 event->pending_wakeup = 1;
                 irq_work_queue(&event->pending);
         }
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c

index b2be01b..182bc30 100644 (file)
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -437,7 +437,10 @@ static struct page *rb_alloc_aux_page(int node, int order)
  
         if (page && order) {
                 /*
-                * Communicate the allocation size to the driver
+                * Communicate the allocation size to the driver:
+                * if we managed to secure a high-order allocation,
+                * set its first page's private to this order;
+                * !PagePrivate(page) means it's just a normal page.
                  */
                 split_page(page, order);
                 SetPagePrivate(page);
@@ -559,11 +562,13 @@ static void __rb_free_aux(struct ring_buffer *rb)
                 rb->aux_priv = NULL;
         }
  
-       for (pg = 0; pg < rb->aux_nr_pages; pg++)
-               rb_free_aux_page(rb, pg);
+       if (rb->aux_nr_pages) {
+               for (pg = 0; pg < rb->aux_nr_pages; pg++)
+                       rb_free_aux_page(rb, pg);
  
-       kfree(rb->aux_pages);
-       rb->aux_nr_pages = 0;
+               kfree(rb->aux_pages);
+               rb->aux_nr_pages = 0;
+       }
  }
  
  void rb_free_aux(struct ring_buffer *rb)
author	Ingo Molnar <mingo@kernel.org>
	Wed, 12 Aug 2015 10:16:11 +0000 (12:16 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 12 Aug 2015 10:16:11 +0000 (12:16 +0200)
arch/x86/kernel/cpu/intel_pt.h		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_intel.c		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_intel_cqm.c		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_intel_pt.c		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_msr.c		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history
kernel/events/ring_buffer.c		patch \| blob \| history