Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorIngo Molnar <mingo@kernel.org>
Wed, 12 Aug 2015 10:16:11 +0000 (12:16 +0200)
committerIngo Molnar <mingo@kernel.org>
Wed, 12 Aug 2015 10:16:11 +0000 (12:16 +0200)
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  - Introduce 'srcfile' sort key: (Andi Kleen)

    # perf record -F 10000 usleep 1
    # perf report --stdio --dsos '[kernel.vmlinux]' -s srcfile
    <SNIP>
    # Overhead  Source File
        26.49%  copy_page_64.S
         5.49%  signal.c
         0.51%  msr.h
    #

    It can be combined with other fields, for instance, experiment with
    '-s srcfile,symbol'.

    There are some oddities in some distros and with some specific DSOs, being
    investigated, so your mileage may vary.

  - Update the column width for the "srcline" sort key (Arnaldo Carvalho de Melo)

  - Support per-event 'freq' term: (Namhyung Kim)

    $ perf record -e 'cpu/instructions,freq=1234/',cycles -c 1000 sleep 1
    $ perf evlist -F
    cpu/instructions,freq=1234/: sample_freq=1234
    cycles: sample_period=1000
    $

Infrastructure changes:

  - Move perf_counts struct and functions into separate object (Jiri Olsa)

  - Unset perf_event_attr::freq when period term is set (Jiri Olsa)

  - Move callchain option parsing code to util.c (Kan Liang)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/kernel/cpu/intel_pt.h
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_cqm.c
arch/x86/kernel/cpu/perf_event_intel_pt.c
arch/x86/kernel/cpu/perf_event_msr.c
kernel/events/core.c
kernel/events/ring_buffer.c

index feb293e..336878a 100644 (file)
  */
 #define TOPA_PMI_MARGIN 512
 
-/*
- * Table of Physical Addresses bits
- */
-enum topa_sz {
-       TOPA_4K = 0,
-       TOPA_8K,
-       TOPA_16K,
-       TOPA_32K,
-       TOPA_64K,
-       TOPA_128K,
-       TOPA_256K,
-       TOPA_512K,
-       TOPA_1MB,
-       TOPA_2MB,
-       TOPA_4MB,
-       TOPA_8MB,
-       TOPA_16MB,
-       TOPA_32MB,
-       TOPA_64MB,
-       TOPA_128MB,
-       TOPA_SZ_END,
-};
+#define TOPA_SHIFT 12
 
-static inline unsigned int sizes(enum topa_sz tsz)
+static inline unsigned int sizes(unsigned int tsz)
 {
-       return 1 << (tsz + 12);
+       return 1 << (tsz + TOPA_SHIFT);
 };
 
 struct topa_entry {
@@ -66,8 +45,8 @@ struct topa_entry {
        u64     rsvd4   : 16;
 };
 
-#define TOPA_SHIFT 12
-#define PT_CPUID_LEAVES 2
+#define PT_CPUID_LEAVES                2
+#define PT_CPUID_REGS_NUM      4 /* number of regsters (eax, ebx, ecx, edx) */
 
 enum pt_capabilities {
        PT_CAP_max_subleaf = 0,
@@ -85,7 +64,7 @@ enum pt_capabilities {
 
 struct pt_pmu {
        struct pmu              pmu;
-       u32                     caps[4 * PT_CPUID_LEAVES];
+       u32                     caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
 };
 
 /**
index a478e3c..3f124d5 100644 (file)
@@ -2758,7 +2758,7 @@ static int intel_pmu_cpu_prepare(int cpu)
        if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
                cpuc->shared_regs = allocate_shared_regs(cpu);
                if (!cpuc->shared_regs)
-                       return NOTIFY_BAD;
+                       goto err;
        }
 
        if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
@@ -2766,18 +2766,27 @@ static int intel_pmu_cpu_prepare(int cpu)
 
                cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
                if (!cpuc->constraint_list)
-                       return NOTIFY_BAD;
+                       goto err_shared_regs;
 
                cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
-               if (!cpuc->excl_cntrs) {
-                       kfree(cpuc->constraint_list);
-                       kfree(cpuc->shared_regs);
-                       return NOTIFY_BAD;
-               }
+               if (!cpuc->excl_cntrs)
+                       goto err_constraint_list;
+
                cpuc->excl_thread_id = 0;
        }
 
        return NOTIFY_OK;
+
+err_constraint_list:
+       kfree(cpuc->constraint_list);
+       cpuc->constraint_list = NULL;
+
+err_shared_regs:
+       kfree(cpuc->shared_regs);
+       cpuc->shared_regs = NULL;
+
+err:
+       return NOTIFY_BAD;
 }
 
 static void intel_pmu_cpu_starting(int cpu)
index 63eb68b..377e8f8 100644 (file)
@@ -1255,7 +1255,7 @@ static inline void cqm_pick_event_reader(int cpu)
        cpumask_set_cpu(cpu, &cqm_cpumask);
 }
 
-static void intel_cqm_cpu_prepare(unsigned int cpu)
+static void intel_cqm_cpu_starting(unsigned int cpu)
 {
        struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
        struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -1296,13 +1296,11 @@ static int intel_cqm_cpu_notifier(struct notifier_block *nb,
        unsigned int cpu  = (unsigned long)hcpu;
 
        switch (action & ~CPU_TASKS_FROZEN) {
-       case CPU_UP_PREPARE:
-               intel_cqm_cpu_prepare(cpu);
-               break;
        case CPU_DOWN_PREPARE:
                intel_cqm_cpu_exit(cpu);
                break;
        case CPU_STARTING:
+               intel_cqm_cpu_starting(cpu);
                cqm_pick_event_reader(cpu);
                break;
        }
@@ -1373,7 +1371,7 @@ static int __init intel_cqm_init(void)
                goto out;
 
        for_each_online_cpu(i) {
-               intel_cqm_cpu_prepare(i);
+               intel_cqm_cpu_starting(i);
                cqm_pick_event_reader(i);
        }
 
index e20cfac..4216928 100644 (file)
@@ -79,7 +79,7 @@ static struct pt_cap_desc {
 static u32 pt_cap_get(enum pt_capabilities cap)
 {
        struct pt_cap_desc *cd = &pt_caps[cap];
-       u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg];
+       u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg];
        unsigned int shift = __ffs(cd->mask);
 
        return (c & cd->mask) >> shift;
@@ -145,10 +145,10 @@ static int __init pt_pmu_hw_init(void)
 
        for (i = 0; i < PT_CPUID_LEAVES; i++) {
                cpuid_count(20, i,
-                           &pt_pmu.caps[CR_EAX + i*4],
-                           &pt_pmu.caps[CR_EBX + i*4],
-                           &pt_pmu.caps[CR_ECX + i*4],
-                           &pt_pmu.caps[CR_EDX + i*4]);
+                           &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
+                           &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
        }
 
        ret = -ENOMEM;
index af216e9..b0dd2e8 100644 (file)
@@ -10,17 +10,63 @@ enum perf_msr_id {
        PERF_MSR_EVENT_MAX,
 };
 
+bool test_aperfmperf(int idx)
+{
+       return boot_cpu_has(X86_FEATURE_APERFMPERF);
+}
+
+bool test_intel(int idx)
+{
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+           boot_cpu_data.x86 != 6)
+               return false;
+
+       switch (boot_cpu_data.x86_model) {
+       case 30: /* 45nm Nehalem    */
+       case 26: /* 45nm Nehalem-EP */
+       case 46: /* 45nm Nehalem-EX */
+
+       case 37: /* 32nm Westmere    */
+       case 44: /* 32nm Westmere-EP */
+       case 47: /* 32nm Westmere-EX */
+
+       case 42: /* 32nm SandyBridge         */
+       case 45: /* 32nm SandyBridge-E/EN/EP */
+
+       case 58: /* 22nm IvyBridge       */
+       case 62: /* 22nm IvyBridge-EP/EX */
+
+       case 60: /* 22nm Haswell Core */
+       case 63: /* 22nm Haswell Server */
+       case 69: /* 22nm Haswell ULT */
+       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+
+       case 61: /* 14nm Broadwell Core-M */
+       case 86: /* 14nm Broadwell Xeon D */
+       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
+       case 79: /* 14nm Broadwell Server */
+
+       case 55: /* 22nm Atom "Silvermont"                */
+       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
+       case 76: /* 14nm Atom "Airmont"                   */
+               if (idx == PERF_MSR_SMI)
+                       return true;
+               break;
+
+       case 78: /* 14nm Skylake Mobile */
+       case 94: /* 14nm Skylake Desktop */
+               if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
+                       return true;
+               break;
+       }
+
+       return false;
+}
+
 struct perf_msr {
-       int     id;
        u64     msr;
-};
-
-static struct perf_msr msr[] = {
-       { PERF_MSR_TSC, 0 },
-       { PERF_MSR_APERF, MSR_IA32_APERF },
-       { PERF_MSR_MPERF, MSR_IA32_MPERF },
-       { PERF_MSR_PPERF, MSR_PPERF },
-       { PERF_MSR_SMI, MSR_SMI_COUNT },
+       struct  perf_pmu_events_attr *attr;
+       bool    (*test)(int idx);
 };
 
 PMU_EVENT_ATTR_STRING(tsc,   evattr_tsc,   "event=0x00");
@@ -29,8 +75,16 @@ PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02");
 PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03");
 PMU_EVENT_ATTR_STRING(smi,   evattr_smi,   "event=0x04");
 
+static struct perf_msr msr[] = {
+       [PERF_MSR_TSC]   = { 0,                 &evattr_tsc,    NULL,            },
+       [PERF_MSR_APERF] = { MSR_IA32_APERF,    &evattr_aperf,  test_aperfmperf, },
+       [PERF_MSR_MPERF] = { MSR_IA32_MPERF,    &evattr_mperf,  test_aperfmperf, },
+       [PERF_MSR_PPERF] = { MSR_PPERF,         &evattr_pperf,  test_intel,      },
+       [PERF_MSR_SMI]   = { MSR_SMI_COUNT,     &evattr_smi,    test_intel,      },
+};
+
 static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = {
-       &evattr_tsc.attr.attr,
+       NULL,
 };
 
 static struct attribute_group events_attr_group = {
@@ -74,6 +128,9 @@ static int msr_event_init(struct perf_event *event)
            event->attr.sample_period) /* no sampling */
                return -EINVAL;
 
+       if (!msr[cfg].attr)
+               return -EINVAL;
+
        event->hw.idx = -1;
        event->hw.event_base = msr[cfg].msr;
        event->hw.config = cfg;
@@ -151,89 +208,32 @@ static struct pmu pmu_msr = {
        .capabilities   = PERF_PMU_CAP_NO_INTERRUPT,
 };
 
-static int __init intel_msr_init(int idx)
-{
-       if (boot_cpu_data.x86 != 6)
-               return 0;
-
-       switch (boot_cpu_data.x86_model) {
-       case 30: /* 45nm Nehalem    */
-       case 26: /* 45nm Nehalem-EP */
-       case 46: /* 45nm Nehalem-EX */
-
-       case 37: /* 32nm Westmere    */
-       case 44: /* 32nm Westmere-EP */
-       case 47: /* 32nm Westmere-EX */
-
-       case 42: /* 32nm SandyBridge         */
-       case 45: /* 32nm SandyBridge-E/EN/EP */
-
-       case 58: /* 22nm IvyBridge       */
-       case 62: /* 22nm IvyBridge-EP/EX */
-
-       case 60: /* 22nm Haswell Core */
-       case 63: /* 22nm Haswell Server */
-       case 69: /* 22nm Haswell ULT */
-       case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
-
-       case 61: /* 14nm Broadwell Core-M */
-       case 86: /* 14nm Broadwell Xeon D */
-       case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */
-       case 79: /* 14nm Broadwell Server */
-               events_attrs[idx++] = &evattr_smi.attr.attr;
-               break;
-
-       case 78: /* 14nm Skylake Mobile */
-       case 94: /* 14nm Skylake Desktop */
-               events_attrs[idx++] = &evattr_pperf.attr.attr;
-               events_attrs[idx++] = &evattr_smi.attr.attr;
-               break;
-
-       case 55: /* 22nm Atom "Silvermont"                */
-       case 76: /* 14nm Atom "Airmont"                   */
-       case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */
-               events_attrs[idx++] = &evattr_smi.attr.attr;
-               break;
-       }
-
-       events_attrs[idx] = NULL;
-
-       return 0;
-}
-
-static int __init amd_msr_init(int idx)
-{
-       return 0;
-}
-
 static int __init msr_init(void)
 {
-       int err;
-       int idx = 1;
+       int i, j = 0;
 
-       if (boot_cpu_has(X86_FEATURE_APERFMPERF)) {
-               events_attrs[idx++] = &evattr_aperf.attr.attr;
-               events_attrs[idx++] = &evattr_mperf.attr.attr;
-               events_attrs[idx] = NULL;
+       if (!boot_cpu_has(X86_FEATURE_TSC)) {
+               pr_cont("no MSR PMU driver.\n");
+               return 0;
        }
 
-       switch (boot_cpu_data.x86_vendor) {
-       case X86_VENDOR_INTEL:
-               err = intel_msr_init(idx);
-               break;
-
-       case X86_VENDOR_AMD:
-               err = amd_msr_init(idx);
-               break;
+       /* Probe the MSRs. */
+       for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) {
+               u64 val;
 
-       default:
-               err = -ENOTSUPP;
+               /*
+                * Virt sucks arse; you cannot tell if a R/O MSR is present :/
+                */
+               if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val))
+                       msr[i].attr = NULL;
        }
 
-       if (err != 0) {
-               pr_cont("no msr PMU driver.\n");
-               return 0;
+       /* List remaining MSRs in the sysfs attrs. */
+       for (i = 0; i < PERF_MSR_EVENT_MAX; i++) {
+               if (msr[i].attr)
+                       events_attrs[j++] = &msr[i].attr->attr.attr;
        }
+       events_attrs[j] = NULL;
 
        perf_pmu_register(&pmu_msr, "msr", -1);
 
index 77f9e5d..ae16867 100644 (file)
@@ -3972,28 +3972,21 @@ static void perf_event_for_each(struct perf_event *event,
                perf_event_for_each_child(sibling, func);
 }
 
-static int perf_event_period(struct perf_event *event, u64 __user *arg)
-{
-       struct perf_event_context *ctx = event->ctx;
-       int ret = 0, active;
+struct period_event {
+       struct perf_event *event;
        u64 value;
+};
 
-       if (!is_sampling_event(event))
-               return -EINVAL;
-
-       if (copy_from_user(&value, arg, sizeof(value)))
-               return -EFAULT;
-
-       if (!value)
-               return -EINVAL;
+static int __perf_event_period(void *info)
+{
+       struct period_event *pe = info;
+       struct perf_event *event = pe->event;
+       struct perf_event_context *ctx = event->ctx;
+       u64 value = pe->value;
+       bool active;
 
-       raw_spin_lock_irq(&ctx->lock);
+       raw_spin_lock(&ctx->lock);
        if (event->attr.freq) {
-               if (value > sysctl_perf_event_sample_rate) {
-                       ret = -EINVAL;
-                       goto unlock;
-               }
-
                event->attr.sample_freq = value;
        } else {
                event->attr.sample_period = value;
@@ -4012,11 +4005,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
                event->pmu->start(event, PERF_EF_RELOAD);
                perf_pmu_enable(ctx->pmu);
        }
+       raw_spin_unlock(&ctx->lock);
 
-unlock:
+       return 0;
+}
+
+static int perf_event_period(struct perf_event *event, u64 __user *arg)
+{
+       struct period_event pe = { .event = event, };
+       struct perf_event_context *ctx = event->ctx;
+       struct task_struct *task;
+       u64 value;
+
+       if (!is_sampling_event(event))
+               return -EINVAL;
+
+       if (copy_from_user(&value, arg, sizeof(value)))
+               return -EFAULT;
+
+       if (!value)
+               return -EINVAL;
+
+       if (event->attr.freq && value > sysctl_perf_event_sample_rate)
+               return -EINVAL;
+
+       task = ctx->task;
+       pe.value = value;
+
+       if (!task) {
+               cpu_function_call(event->cpu, __perf_event_period, &pe);
+               return 0;
+       }
+
+retry:
+       if (!task_function_call(task, __perf_event_period, &pe))
+               return 0;
+
+       raw_spin_lock_irq(&ctx->lock);
+       if (ctx->is_active) {
+               raw_spin_unlock_irq(&ctx->lock);
+               task = ctx->task;
+               goto retry;
+       }
+
+       __perf_event_period(&pe);
        raw_spin_unlock_irq(&ctx->lock);
 
-       return ret;
+       return 0;
 }
 
 static const struct file_operations perf_fops;
@@ -4754,12 +4789,20 @@ static const struct file_operations perf_fops = {
  * to user-space before waking everybody up.
  */
 
+static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
+{
+       /* only the parent has fasync state */
+       if (event->parent)
+               event = event->parent;
+       return &event->fasync;
+}
+
 void perf_event_wakeup(struct perf_event *event)
 {
        ring_buffer_wakeup(event);
 
        if (event->pending_kill) {
-               kill_fasync(&event->fasync, SIGIO, event->pending_kill);
+               kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill);
                event->pending_kill = 0;
        }
 }
@@ -6221,7 +6264,7 @@ static int __perf_event_overflow(struct perf_event *event,
        else
                perf_event_output(event, data, regs);
 
-       if (event->fasync && event->pending_kill) {
+       if (*perf_event_fasync(event) && event->pending_kill) {
                event->pending_wakeup = 1;
                irq_work_queue(&event->pending);
        }
index b2be01b..182bc30 100644 (file)
@@ -437,7 +437,10 @@ static struct page *rb_alloc_aux_page(int node, int order)
 
        if (page && order) {
                /*
-                * Communicate the allocation size to the driver
+                * Communicate the allocation size to the driver:
+                * if we managed to secure a high-order allocation,
+                * set its first page's private to this order;
+                * !PagePrivate(page) means it's just a normal page.
                 */
                split_page(page, order);
                SetPagePrivate(page);
@@ -559,11 +562,13 @@ static void __rb_free_aux(struct ring_buffer *rb)
                rb->aux_priv = NULL;
        }
 
-       for (pg = 0; pg < rb->aux_nr_pages; pg++)
-               rb_free_aux_page(rb, pg);
+       if (rb->aux_nr_pages) {
+               for (pg = 0; pg < rb->aux_nr_pages; pg++)
+                       rb_free_aux_page(rb, pg);
 
-       kfree(rb->aux_pages);
-       rb->aux_nr_pages = 0;
+               kfree(rb->aux_pages);
+               rb->aux_nr_pages = 0;
+       }
 }
 
 void rb_free_aux(struct ring_buffer *rb)