perf: Fix irq_work 'tail' recursion
[pandora-kernel.git] / kernel / events / core.c
index 5bbe443..4277095 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/compat.h>
 
 #include "internal.h"
 
@@ -242,9 +243,9 @@ perf_cgroup_match(struct perf_event *event)
        return !event->cgrp || event->cgrp == cpuctx->cgrp;
 }
 
-static inline void perf_get_cgroup(struct perf_event *event)
+static inline bool perf_tryget_cgroup(struct perf_event *event)
 {
-       css_get(&event->cgrp->css);
+       return css_tryget(&event->cgrp->css);
 }
 
 static inline void perf_put_cgroup(struct perf_event *event)
@@ -360,6 +361,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+               if (cpuctx->unique_pmu != pmu)
+                       continue; /* ensure we process each cpuctx once */
 
                /*
                 * perf_cgroup_events says at least one
@@ -383,9 +386,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 
                        if (mode & PERF_CGROUP_SWIN) {
                                WARN_ON_ONCE(cpuctx->cgrp);
-                               /* set cgrp before ctxsw in to
-                                * allow event_filter_match() to not
-                                * have to pass task around
+                               /*
+                                * set cgrp before ctxsw in to allow
+                                * event_filter_match() to not have to pass
+                                * task around
                                 */
                                cpuctx->cgrp = perf_cgroup_from_task(task);
                                cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
@@ -473,7 +477,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
        event->cgrp = cgrp;
 
        /* must be done before we fput() the file */
-       perf_get_cgroup(event);
+       if (!perf_tryget_cgroup(event)) {
+               event->cgrp = NULL;
+               ret = -ENOENT;
+               goto out;
+       }
 
        /*
         * all events in a group must monitor
@@ -1173,6 +1181,11 @@ group_sched_out(struct perf_event *group_event,
                cpuctx->exclusive = 0;
 }
 
+struct remove_event {
+       struct perf_event *event;
+       bool detach_group;
+};
+
 /*
  * Cross CPU call to remove a performance event
  *
@@ -1181,12 +1194,15 @@ group_sched_out(struct perf_event *group_event,
  */
 static int __perf_remove_from_context(void *info)
 {
-       struct perf_event *event = info;
+       struct remove_event *re = info;
+       struct perf_event *event = re->event;
        struct perf_event_context *ctx = event->ctx;
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
 
        raw_spin_lock(&ctx->lock);
        event_sched_out(event, cpuctx, ctx);
+       if (re->detach_group)
+               perf_group_detach(event);
        list_del_event(event, ctx);
        if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
                ctx->is_active = 0;
@@ -1211,10 +1227,14 @@ static int __perf_remove_from_context(void *info)
  * When called from perf_event_exit_task, it's OK because the
  * context has been detached from its task.
  */
-static void perf_remove_from_context(struct perf_event *event)
+static void perf_remove_from_context(struct perf_event *event, bool detach_group)
 {
        struct perf_event_context *ctx = event->ctx;
        struct task_struct *task = ctx->task;
+       struct remove_event re = {
+               .event = event,
+               .detach_group = detach_group,
+       };
 
        lockdep_assert_held(&ctx->mutex);
 
@@ -1223,12 +1243,12 @@ static void perf_remove_from_context(struct perf_event *event)
                 * Per cpu events are removed via an smp call and
                 * the removal is always successful.
                 */
-               cpu_function_call(event->cpu, __perf_remove_from_context, event);
+               cpu_function_call(event->cpu, __perf_remove_from_context, &re);
                return;
        }
 
 retry:
-       if (!task_function_call(task, __perf_remove_from_context, event))
+       if (!task_function_call(task, __perf_remove_from_context, &re))
                return;
 
        raw_spin_lock_irq(&ctx->lock);
@@ -1245,6 +1265,8 @@ retry:
         * Since the task isn't running, its safe to remove the event, us
         * holding the ctx->lock ensures the task won't get scheduled in.
         */
+       if (detach_group)
+               perf_group_detach(event);
        list_del_event(event, ctx);
        raw_spin_unlock_irq(&ctx->lock);
 }
@@ -1662,6 +1684,16 @@ retry:
         */
        if (ctx->is_active) {
                raw_spin_unlock_irq(&ctx->lock);
+               /*
+                * Reload the task pointer, it might have been changed by
+                * a concurrent perf_event_context_sched_out().
+                */
+               task = ctx->task;
+               /*
+                * Reload the task pointer, it might have been changed by
+                * a concurrent perf_event_context_sched_out().
+                */
+               task = ctx->task;
                goto retry;
        }
 
@@ -3039,10 +3071,7 @@ int perf_event_release_kernel(struct perf_event *event)
         *     to trigger the AB-BA case.
         */
        mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
-       raw_spin_lock_irq(&ctx->lock);
-       perf_group_detach(event);
-       raw_spin_unlock_irq(&ctx->lock);
-       perf_remove_from_context(event);
+       perf_remove_from_context(event, true);
        mutex_unlock(&ctx->mutex);
 
        free_event(event);
@@ -3416,6 +3445,25 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        return 0;
 }
 
+#ifdef CONFIG_COMPAT
+static long perf_compat_ioctl(struct file *file, unsigned int cmd,
+                               unsigned long arg)
+{
+       switch (_IOC_NR(cmd)) {
+       case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
+               /* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */
+               if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
+                       cmd &= ~IOCSIZE_MASK;
+                       cmd |= sizeof(void *) << IOCSIZE_SHIFT;
+               }
+               break;
+       }
+       return perf_ioctl(file, cmd, arg);
+}
+#else
+# define perf_compat_ioctl NULL
+#endif
+
 int perf_event_task_enable(void)
 {
        struct perf_event *event;
@@ -3882,7 +3930,7 @@ static const struct file_operations perf_fops = {
        .read                   = perf_read,
        .poll                   = perf_poll,
        .unlocked_ioctl         = perf_ioctl,
-       .compat_ioctl           = perf_ioctl,
+       .compat_ioctl           = perf_compat_ioctl,
        .mmap                   = perf_mmap,
        .fasync                 = perf_fasync,
 };
@@ -3908,6 +3956,13 @@ static void perf_pending_event(struct irq_work *entry)
 {
        struct perf_event *event = container_of(entry,
                        struct perf_event, pending);
+       int rctx;
+
+       rctx = perf_swevent_get_recursion_context();
+       /*
+        * If we 'fail' here, that's OK, it means recursion is already disabled
+        * and we won't recurse 'further'.
+        */
 
        if (event->pending_disable) {
                event->pending_disable = 0;
@@ -3918,6 +3973,9 @@ static void perf_pending_event(struct irq_work *entry)
                event->pending_wakeup = 0;
                perf_event_wakeup(event);
        }
+
+       if (rctx >= 0)
+               perf_swevent_put_recursion_context(rctx);
 }
 
 /*
@@ -4377,7 +4435,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-               if (cpuctx->active_pmu != pmu)
+               if (cpuctx->unique_pmu != pmu)
                        goto next;
                perf_event_task_ctx(&cpuctx->ctx, task_event);
 
@@ -4523,7 +4581,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-               if (cpuctx->active_pmu != pmu)
+               if (cpuctx->unique_pmu != pmu)
                        goto next;
                perf_event_comm_ctx(&cpuctx->ctx, comm_event);
 
@@ -4719,7 +4777,7 @@ got_name:
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-               if (cpuctx->active_pmu != pmu)
+               if (cpuctx->unique_pmu != pmu)
                        goto next;
                perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
                                        vma->vm_flags & VM_EXEC);
@@ -4892,6 +4950,9 @@ struct swevent_htable {
 
        /* Recursion avoidance in each contexts */
        int                             recursion[PERF_NR_CONTEXTS];
+
+       /* Keeps track of cpu being initialized/exited */
+       bool                            online;
 };
 
 static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
@@ -5134,8 +5195,14 @@ static int perf_swevent_add(struct perf_event *event, int flags)
        hwc->state = !(flags & PERF_EF_START);
 
        head = find_swevent_head(swhash, event);
-       if (WARN_ON_ONCE(!head))
+       if (!head) {
+               /*
+                * We can race with cpu hotplug code. Do not
+                * WARN if the cpu just got unplugged.
+                */
+               WARN_ON_ONCE(swhash->online);
                return -EINVAL;
+       }
 
        hlist_add_head_rcu(&event->hlist_entry, head);
 
@@ -5741,8 +5808,8 @@ static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
 
                cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 
-               if (cpuctx->active_pmu == old_pmu)
-                       cpuctx->active_pmu = pmu;
+               if (cpuctx->unique_pmu == old_pmu)
+                       cpuctx->unique_pmu = pmu;
        }
 }
 
@@ -5877,7 +5944,7 @@ skip_type:
                cpuctx->ctx.pmu = pmu;
                cpuctx->jiffies_interval = 1;
                INIT_LIST_HEAD(&cpuctx->rotation_list);
-               cpuctx->active_pmu = pmu;
+               cpuctx->unique_pmu = pmu;
        }
 
 got_cpu_context:
@@ -6294,6 +6361,9 @@ SYSCALL_DEFINE5(perf_event_open,
        if (attr.freq) {
                if (attr.sample_freq > sysctl_perf_event_sample_rate)
                        return -EINVAL;
+       } else {
+               if (attr.sample_period & (1ULL << 63))
+                       return -EINVAL;
        }
 
        /*
@@ -6440,7 +6510,7 @@ SYSCALL_DEFINE5(perf_event_open,
                struct perf_event_context *gctx = group_leader->ctx;
 
                mutex_lock(&gctx->mutex);
-               perf_remove_from_context(group_leader);
+               perf_remove_from_context(group_leader, false);
 
                /*
                 * Removing from the context ends up with disabled
@@ -6450,7 +6520,7 @@ SYSCALL_DEFINE5(perf_event_open,
                perf_event__state_init(group_leader);
                list_for_each_entry(sibling, &group_leader->sibling_list,
                                    group_entry) {
-                       perf_remove_from_context(sibling);
+                       perf_remove_from_context(sibling, false);
                        perf_event__state_init(sibling);
                        put_ctx(gctx);
                }
@@ -6603,13 +6673,7 @@ __perf_event_exit_task(struct perf_event *child_event,
                         struct perf_event_context *child_ctx,
                         struct task_struct *child)
 {
-       if (child_event->parent) {
-               raw_spin_lock_irq(&child_ctx->lock);
-               perf_group_detach(child_event);
-               raw_spin_unlock_irq(&child_ctx->lock);
-       }
-
-       perf_remove_from_context(child_event);
+       perf_remove_from_context(child_event, !!child_event->parent);
 
        /*
         * It can happen that the parent exits first, and has events
@@ -7047,8 +7111,10 @@ int perf_event_init_task(struct task_struct *child)
 
        for_each_task_context_nr(ctxn) {
                ret = perf_event_init_context(child, ctxn);
-               if (ret)
+               if (ret) {
+                       perf_event_free_task(child);
                        return ret;
+               }
        }
 
        return 0;
@@ -7071,6 +7137,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
        mutex_lock(&swhash->hlist_mutex);
+       swhash->online = true;
        if (swhash->hlist_refcount > 0) {
                struct swevent_hlist *hlist;
 
@@ -7093,15 +7160,15 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
 
 static void __perf_event_exit_context(void *__info)
 {
+       struct remove_event re = { .detach_group = false };
        struct perf_event_context *ctx = __info;
-       struct perf_event *event, *tmp;
 
        perf_pmu_rotate_stop(ctx->pmu);
 
-       list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
-               __perf_remove_from_context(event);
-       list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
-               __perf_remove_from_context(event);
+       rcu_read_lock();
+       list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
+               __perf_remove_from_context(&re);
+       rcu_read_unlock();
 }
 
 static void perf_event_exit_cpu_context(int cpu)
@@ -7125,11 +7192,12 @@ static void perf_event_exit_cpu(int cpu)
 {
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
+       perf_event_exit_cpu_context(cpu);
+
        mutex_lock(&swhash->hlist_mutex);
+       swhash->online = false;
        swevent_hlist_release(swhash);
        mutex_unlock(&swhash->hlist_mutex);
-
-       perf_event_exit_cpu_context(cpu);
 }
 #else
 static inline void perf_event_exit_cpu(int cpu) { }