perf: Limit perf_event_attr::sample_period to 63 bits
[pandora-kernel.git] / kernel / events / core.c
index 58690af..f04d7b6 100644 (file)
@@ -185,9 +185,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
 static void update_context_time(struct perf_event_context *ctx);
 static u64 perf_event_time(struct perf_event *event);
 
-static void ring_buffer_attach(struct perf_event *event,
-                              struct ring_buffer *rb);
-
 void __weak perf_event_print_debug(void)       { }
 
 extern __weak const char *perf_pmu_name(void)
@@ -245,9 +242,9 @@ perf_cgroup_match(struct perf_event *event)
        return !event->cgrp || event->cgrp == cpuctx->cgrp;
 }
 
-static inline void perf_get_cgroup(struct perf_event *event)
+static inline bool perf_tryget_cgroup(struct perf_event *event)
 {
-       css_get(&event->cgrp->css);
+       return css_tryget(&event->cgrp->css);
 }
 
 static inline void perf_put_cgroup(struct perf_event *event)
@@ -363,6 +360,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+               if (cpuctx->unique_pmu != pmu)
+                       continue; /* ensure we process each cpuctx once */
 
                /*
                 * perf_cgroup_events says at least one
@@ -386,9 +385,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
 
                        if (mode & PERF_CGROUP_SWIN) {
                                WARN_ON_ONCE(cpuctx->cgrp);
-                               /* set cgrp before ctxsw in to
-                                * allow event_filter_match() to not
-                                * have to pass task around
+                               /*
+                                * set cgrp before ctxsw in to allow
+                                * event_filter_match() to not have to pass
+                                * task around
                                 */
                                cpuctx->cgrp = perf_cgroup_from_task(task);
                                cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
@@ -476,7 +476,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
        event->cgrp = cgrp;
 
        /* must be done before we fput() the file */
-       perf_get_cgroup(event);
+       if (!perf_tryget_cgroup(event)) {
+               event->cgrp = NULL;
+               ret = -ENOENT;
+               goto out;
+       }
 
        /*
         * all events in a group must monitor
@@ -714,8 +718,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
 {
        struct perf_event_context *ctx;
 
-       rcu_read_lock();
 retry:
+       /*
+        * One of the few rules of preemptible RCU is that one cannot do
+        * rcu_read_unlock() while holding a scheduler (or nested) lock when
+        * part of the read side critical section was preemptible -- see
+        * rcu_read_unlock_special().
+        *
+        * Since ctx->lock nests under rq->lock we must ensure the entire read
+        * side critical section is non-preemptible.
+        */
+       preempt_disable();
+       rcu_read_lock();
        ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
        if (ctx) {
                /*
@@ -731,6 +745,8 @@ retry:
                raw_spin_lock_irqsave(&ctx->lock, *flags);
                if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
                        raw_spin_unlock_irqrestore(&ctx->lock, *flags);
+                       rcu_read_unlock();
+                       preempt_enable();
                        goto retry;
                }
 
@@ -740,6 +756,7 @@ retry:
                }
        }
        rcu_read_unlock();
+       preempt_enable();
        return ctx;
 }
 
@@ -889,6 +906,15 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
                ctx->nr_stat++;
 }
 
+/*
+ * Initialize event state based on the perf_event_attr::disabled.
+ */
+static inline void perf_event__state_init(struct perf_event *event)
+{
+       event->state = event->attr.disabled ? PERF_EVENT_STATE_OFF :
+                                             PERF_EVENT_STATE_INACTIVE;
+}
+
 /*
  * Called at perf_event creation and when events are attached/detached from a
  * group.
@@ -1687,7 +1713,16 @@ static int __perf_event_enable(void *info)
        struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
        int err;
 
-       if (WARN_ON_ONCE(!ctx->is_active))
+       /*
+        * There's a time window between 'ctx->is_active' check
+        * in perf_event_enable function and this place having:
+        *   - IRQs on
+        *   - ctx->lock unlocked
+        *
+        * where the task could be killed and 'ctx' deactivated
+        * by perf_event_exit_task.
+        */
+       if (!ctx->is_active)
                return -EINVAL;
 
        raw_spin_lock(&ctx->lock);
@@ -2939,6 +2974,7 @@ static void free_event_rcu(struct rcu_head *head)
 }
 
 static void ring_buffer_put(struct ring_buffer *rb);
+static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
 
 static void free_event(struct perf_event *event)
 {
@@ -2962,8 +2998,22 @@ static void free_event(struct perf_event *event)
        }
 
        if (event->rb) {
-               ring_buffer_put(event->rb);
-               event->rb = NULL;
+               struct ring_buffer *rb;
+
+               /*
+                * Can happen when we close an event with re-directed output.
+                *
+                * Since we have a 0 refcount, perf_mmap_close() will skip
+                * over us; possibly making our ring_buffer_put() the last.
+                */
+               mutex_lock(&event->mmap_mutex);
+               rb = event->rb;
+               if (rb) {
+                       rcu_assign_pointer(event->rb, NULL);
+                       ring_buffer_detach(event, rb);
+                       ring_buffer_put(rb); /* could be last */
+               }
+               mutex_unlock(&event->mmap_mutex);
        }
 
        if (is_cgroup_event(event))
@@ -3011,12 +3061,12 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 /*
  * Called when the last reference to the file is gone.
  */
-static int perf_release(struct inode *inode, struct file *file)
+static void put_event(struct perf_event *event)
 {
-       struct perf_event *event = file->private_data;
        struct task_struct *owner;
 
-       file->private_data = NULL;
+       if (!atomic_long_dec_and_test(&event->refcount))
+               return;
 
        rcu_read_lock();
        owner = ACCESS_ONCE(event->owner);
@@ -3051,7 +3101,13 @@ static int perf_release(struct inode *inode, struct file *file)
                put_task_struct(owner);
        }
 
-       return perf_event_release_kernel(event);
+       perf_event_release_kernel(event);
+}
+
+static int perf_release(struct inode *inode, struct file *file)
+{
+       put_event(file->private_data);
+       return 0;
 }
 
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
@@ -3195,30 +3251,13 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
        unsigned int events = POLL_HUP;
 
        /*
-        * Race between perf_event_set_output() and perf_poll(): perf_poll()
-        * grabs the rb reference but perf_event_set_output() overrides it.
-        * Here is the timeline for two threads T1, T2:
-        * t0: T1, rb = rcu_dereference(event->rb)
-        * t1: T2, old_rb = event->rb
-        * t2: T2, event->rb = new rb
-        * t3: T2, ring_buffer_detach(old_rb)
-        * t4: T1, ring_buffer_attach(rb1)
-        * t5: T1, poll_wait(event->waitq)
-        *
-        * To avoid this problem, we grab mmap_mutex in perf_poll()
-        * thereby ensuring that the assignment of the new ring buffer
-        * and the detachment of the old buffer appear atomic to perf_poll()
+        * Pin the event->rb by taking event->mmap_mutex; otherwise
+        * perf_event_set_output() can swizzle our rb and make us miss wakeups.
         */
        mutex_lock(&event->mmap_mutex);
-
-       rcu_read_lock();
-       rb = rcu_dereference(event->rb);
-       if (rb) {
-               ring_buffer_attach(event, rb);
+       rb = event->rb;
+       if (rb)
                events = atomic_xchg(&rb->poll, 0);
-       }
-       rcu_read_unlock();
-
        mutex_unlock(&event->mmap_mutex);
 
        poll_wait(file, &event->waitq, wait);
@@ -3304,7 +3343,7 @@ unlock:
 
 static const struct file_operations perf_fops;
 
-static struct perf_event *perf_fget_light(int fd, int *fput_needed)
+static struct file *perf_fget_light(int fd, int *fput_needed)
 {
        struct file *file;
 
@@ -3318,7 +3357,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed)
                return ERR_PTR(-EBADF);
        }
 
-       return file->private_data;
+       return file;
 }
 
 static int perf_event_set_output(struct perf_event *event,
@@ -3350,19 +3389,21 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
        case PERF_EVENT_IOC_SET_OUTPUT:
        {
+               struct file *output_file = NULL;
                struct perf_event *output_event = NULL;
                int fput_needed = 0;
                int ret;
 
                if (arg != -1) {
-                       output_event = perf_fget_light(arg, &fput_needed);
-                       if (IS_ERR(output_event))
-                               return PTR_ERR(output_event);
+                       output_file = perf_fget_light(arg, &fput_needed);
+                       if (IS_ERR(output_file))
+                               return PTR_ERR(output_file);
+                       output_event = output_file->private_data;
                }
 
                ret = perf_event_set_output(event, output_event);
                if (output_event)
-                       fput_light(output_event->filp, fput_needed);
+                       fput_light(output_file, fput_needed);
 
                return ret;
        }
@@ -3530,16 +3571,12 @@ static void ring_buffer_attach(struct perf_event *event,
                return;
 
        spin_lock_irqsave(&rb->event_lock, flags);
-       if (!list_empty(&event->rb_entry))
-               goto unlock;
-
-       list_add(&event->rb_entry, &rb->event_list);
-unlock:
+       if (list_empty(&event->rb_entry))
+               list_add(&event->rb_entry, &rb->event_list);
        spin_unlock_irqrestore(&rb->event_lock, flags);
 }
 
-static void ring_buffer_detach(struct perf_event *event,
-                              struct ring_buffer *rb)
+static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
 {
        unsigned long flags;
 
@@ -3558,13 +3595,10 @@ static void ring_buffer_wakeup(struct perf_event *event)
 
        rcu_read_lock();
        rb = rcu_dereference(event->rb);
-       if (!rb)
-               goto unlock;
-
-       list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
-               wake_up_all(&event->waitq);
-
-unlock:
+       if (rb) {
+               list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
+                       wake_up_all(&event->waitq);
+       }
        rcu_read_unlock();
 }
 
@@ -3593,18 +3627,10 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
 
 static void ring_buffer_put(struct ring_buffer *rb)
 {
-       struct perf_event *event, *n;
-       unsigned long flags;
-
        if (!atomic_dec_and_test(&rb->refcount))
                return;
 
-       spin_lock_irqsave(&rb->event_lock, flags);
-       list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
-               list_del_init(&event->rb_entry);
-               wake_up_all(&event->waitq);
-       }
-       spin_unlock_irqrestore(&rb->event_lock, flags);
+       WARN_ON_ONCE(!list_empty(&rb->event_list));
 
        call_rcu(&rb->rcu_head, rb_free_rcu);
 }
@@ -3614,26 +3640,100 @@ static void perf_mmap_open(struct vm_area_struct *vma)
        struct perf_event *event = vma->vm_file->private_data;
 
        atomic_inc(&event->mmap_count);
+       atomic_inc(&event->rb->mmap_count);
 }
 
+/*
+ * A buffer can be mmap()ed multiple times; either directly through the same
+ * event, or through other events by use of perf_event_set_output().
+ *
+ * In order to undo the VM accounting done by perf_mmap() we need to destroy
+ * the buffer here, where we still have a VM context. This means we need
+ * to detach all events redirecting to us.
+ */
 static void perf_mmap_close(struct vm_area_struct *vma)
 {
        struct perf_event *event = vma->vm_file->private_data;
 
-       if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
-               unsigned long size = perf_data_size(event->rb);
-               struct user_struct *user = event->mmap_user;
-               struct ring_buffer *rb = event->rb;
+       struct ring_buffer *rb = event->rb;
+       struct user_struct *mmap_user = rb->mmap_user;
+       int mmap_locked = rb->mmap_locked;
+       unsigned long size = perf_data_size(rb);
+
+       atomic_dec(&rb->mmap_count);
+
+       if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
+               return;
 
-               atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
-               vma->vm_mm->pinned_vm -= event->mmap_locked;
-               rcu_assign_pointer(event->rb, NULL);
-               ring_buffer_detach(event, rb);
+       /* Detach current event from the buffer. */
+       rcu_assign_pointer(event->rb, NULL);
+       ring_buffer_detach(event, rb);
+       mutex_unlock(&event->mmap_mutex);
+
+       /* If there's still other mmap()s of this buffer, we're done. */
+       if (atomic_read(&rb->mmap_count)) {
+               ring_buffer_put(rb); /* can't be last */
+               return;
+       }
+
+       /*
+        * No other mmap()s, detach from all other events that might redirect
+        * into the now unreachable buffer. Somewhat complicated by the
+        * fact that rb::event_lock otherwise nests inside mmap_mutex.
+        */
+again:
+       rcu_read_lock();
+       list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
+               if (!atomic_long_inc_not_zero(&event->refcount)) {
+                       /*
+                        * This event is en-route to free_event() which will
+                        * detach it and remove it from the list.
+                        */
+                       continue;
+               }
+               rcu_read_unlock();
+
+               mutex_lock(&event->mmap_mutex);
+               /*
+                * Check we didn't race with perf_event_set_output() which can
+                * swizzle the rb from under us while we were waiting to
+                * acquire mmap_mutex.
+                *
+                * If we find a different rb; ignore this event, a next
+                * iteration will no longer find it on the list. We have to
+                * still restart the iteration to make sure we're not now
+                * iterating the wrong list.
+                */
+               if (event->rb == rb) {
+                       rcu_assign_pointer(event->rb, NULL);
+                       ring_buffer_detach(event, rb);
+                       ring_buffer_put(rb); /* can't be last, we still have one */
+               }
                mutex_unlock(&event->mmap_mutex);
+               put_event(event);
 
-               ring_buffer_put(rb);
-               free_uid(user);
+               /*
+                * Restart the iteration; either we're on the wrong list or
+                * destroyed its integrity by doing a deletion.
+                */
+               goto again;
        }
+       rcu_read_unlock();
+
+       /*
+        * It could be there's still a few 0-ref events on the list; they'll
+        * get cleaned up by free_event() -- they'll also still have their
+        * ref on the rb and will free it whenever they are done with it.
+        *
+        * Aside from that, this buffer is 'fully' detached and unmapped,
+        * undo the VM accounting.
+        */
+
+       atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
+       vma->vm_mm->pinned_vm -= mmap_locked;
+       free_uid(mmap_user);
+
+       ring_buffer_put(rb); /* could be last */
 }
 
 static const struct vm_operations_struct perf_mmap_vmops = {
@@ -3683,12 +3783,24 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                return -EINVAL;
 
        WARN_ON_ONCE(event->ctx->parent_ctx);
+again:
        mutex_lock(&event->mmap_mutex);
        if (event->rb) {
-               if (event->rb->nr_pages == nr_pages)
-                       atomic_inc(&event->rb->refcount);
-               else
+               if (event->rb->nr_pages != nr_pages) {
                        ret = -EINVAL;
+                       goto unlock;
+               }
+
+               if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
+                       /*
+                        * Raced against perf_mmap_close() through
+                        * perf_event_set_output(). Try again, hope for better
+                        * luck.
+                        */
+                       mutex_unlock(&event->mmap_mutex);
+                       goto again;
+               }
+
                goto unlock;
        }
 
@@ -3729,19 +3841,27 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
                ret = -ENOMEM;
                goto unlock;
        }
-       rcu_assign_pointer(event->rb, rb);
+
+       atomic_set(&rb->mmap_count, 1);
+       rb->mmap_locked = extra;
+       rb->mmap_user = get_current_user();
 
        atomic_long_add(user_extra, &user->locked_vm);
-       event->mmap_locked = extra;
-       event->mmap_user = get_current_user();
-       vma->vm_mm->pinned_vm += event->mmap_locked;
+       vma->vm_mm->pinned_vm += extra;
+
+       ring_buffer_attach(event, rb);
+       rcu_assign_pointer(event->rb, rb);
 
 unlock:
        if (!ret)
                atomic_inc(&event->mmap_count);
        mutex_unlock(&event->mmap_mutex);
 
-       vma->vm_flags |= VM_RESERVED;
+       /*
+        * Since pinned accounting is per vm we cannot allow fork() to copy our
+        * vma.
+        */
+       vma->vm_flags |= VM_DONTCOPY | VM_RESERVED;
        vma->vm_ops = &perf_mmap_vmops;
 
        return ret;
@@ -4264,7 +4384,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-               if (cpuctx->active_pmu != pmu)
+               if (cpuctx->unique_pmu != pmu)
                        goto next;
                perf_event_task_ctx(&cpuctx->ctx, task_event);
 
@@ -4410,7 +4530,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-               if (cpuctx->active_pmu != pmu)
+               if (cpuctx->unique_pmu != pmu)
                        goto next;
                perf_event_comm_ctx(&cpuctx->ctx, comm_event);
 
@@ -4606,7 +4726,7 @@ got_name:
        rcu_read_lock();
        list_for_each_entry_rcu(pmu, &pmus, entry) {
                cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
-               if (cpuctx->active_pmu != pmu)
+               if (cpuctx->unique_pmu != pmu)
                        goto next;
                perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
                                        vma->vm_flags & VM_EXEC);
@@ -5156,7 +5276,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
 
 static int perf_swevent_init(struct perf_event *event)
 {
-       int event_id = event->attr.config;
+       u64 event_id = event->attr.config;
 
        if (event->attr.type != PERF_TYPE_SOFTWARE)
                return -ENOENT;
@@ -5628,8 +5748,8 @@ static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
 
                cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 
-               if (cpuctx->active_pmu == old_pmu)
-                       cpuctx->active_pmu = pmu;
+               if (cpuctx->unique_pmu == old_pmu)
+                       cpuctx->unique_pmu = pmu;
        }
 }
 
@@ -5748,6 +5868,7 @@ skip_type:
        if (pmu->pmu_cpu_context)
                goto got_cpu_context;
 
+       ret = -ENOMEM;
        pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
        if (!pmu->pmu_cpu_context)
                goto free_dev;
@@ -5763,7 +5884,7 @@ skip_type:
                cpuctx->ctx.pmu = pmu;
                cpuctx->jiffies_interval = 1;
                INIT_LIST_HEAD(&cpuctx->rotation_list);
-               cpuctx->active_pmu = pmu;
+               cpuctx->unique_pmu = pmu;
        }
 
 got_cpu_context:
@@ -5912,6 +6033,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
        mutex_init(&event->mmap_mutex);
 
+       atomic_long_set(&event->refcount, 1);
        event->cpu              = cpu;
        event->attr             = *attr;
        event->group_leader     = group_leader;
@@ -5944,8 +6066,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        event->overflow_handler = overflow_handler;
        event->overflow_handler_context = context;
 
-       if (attr->disabled)
-               event->state = PERF_EVENT_STATE_OFF;
+       perf_event__state_init(event);
 
        pmu = NULL;
 
@@ -6104,6 +6225,8 @@ set:
        if (atomic_read(&event->mmap_count))
                goto unlock;
 
+       old_rb = event->rb;
+
        if (output_event) {
                /* get the rb we want to redirect to */
                rb = ring_buffer_get(output_event);
@@ -6111,16 +6234,28 @@ set:
                        goto unlock;
        }
 
-       old_rb = event->rb;
-       rcu_assign_pointer(event->rb, rb);
        if (old_rb)
                ring_buffer_detach(event, old_rb);
+
+       if (rb)
+               ring_buffer_attach(event, rb);
+
+       rcu_assign_pointer(event->rb, rb);
+
+       if (old_rb) {
+               ring_buffer_put(old_rb);
+               /*
+                * Since we detached before setting the new rb, so that we
+                * could attach the new rb, we could have missed a wakeup.
+                * Provide it now.
+                */
+               wake_up_all(&event->waitq);
+       }
+
        ret = 0;
 unlock:
        mutex_unlock(&event->mmap_mutex);
 
-       if (old_rb)
-               ring_buffer_put(old_rb);
 out:
        return ret;
 }
@@ -6166,6 +6301,9 @@ SYSCALL_DEFINE5(perf_event_open,
        if (attr.freq) {
                if (attr.sample_freq > sysctl_perf_event_sample_rate)
                        return -EINVAL;
+       } else {
+               if (attr.sample_period & (1ULL << 63))
+                       return -EINVAL;
        }
 
        /*
@@ -6182,12 +6320,12 @@ SYSCALL_DEFINE5(perf_event_open,
                return event_fd;
 
        if (group_fd != -1) {
-               group_leader = perf_fget_light(group_fd, &fput_needed);
-               if (IS_ERR(group_leader)) {
-                       err = PTR_ERR(group_leader);
+               group_file = perf_fget_light(group_fd, &fput_needed);
+               if (IS_ERR(group_file)) {
+                       err = PTR_ERR(group_file);
                        goto err_fd;
                }
-               group_file = group_leader->filp;
+               group_leader = group_file->private_data;
                if (flags & PERF_FLAG_FD_OUTPUT)
                        output_event = group_leader;
                if (flags & PERF_FLAG_FD_NO_GROUP)
@@ -6313,16 +6451,23 @@ SYSCALL_DEFINE5(perf_event_open,
 
                mutex_lock(&gctx->mutex);
                perf_remove_from_context(group_leader);
+
+               /*
+                * Removing from the context ends up with disabled
+                * event. What we want here is event in the initial
+                * startup state, ready to be add into new context.
+                */
+               perf_event__state_init(group_leader);
                list_for_each_entry(sibling, &group_leader->sibling_list,
                                    group_entry) {
                        perf_remove_from_context(sibling);
+                       perf_event__state_init(sibling);
                        put_ctx(gctx);
                }
                mutex_unlock(&gctx->mutex);
                put_ctx(gctx);
        }
 
-       event->filp = event_file;
        WARN_ON_ONCE(ctx->parent_ctx);
        mutex_lock(&ctx->mutex);
 
@@ -6412,7 +6557,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
                goto err_free;
        }
 
-       event->filp = NULL;
        WARN_ON_ONCE(ctx->parent_ctx);
        mutex_lock(&ctx->mutex);
        perf_install_in_context(ctx, event, cpu);
@@ -6461,7 +6605,7 @@ static void sync_child_event(struct perf_event *child_event,
         * Release the parent event, if this was the last
         * reference to it.
         */
-       fput(parent_event->filp);
+       put_event(parent_event);
 }
 
 static void
@@ -6537,9 +6681,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
         *
         *   __perf_event_exit_task()
         *     sync_child_event()
-        *       fput(parent_event->filp)
-        *         perf_release()
-        *           mutex_lock(&ctx->mutex)
+        *       put_event()
+        *         mutex_lock(&ctx->mutex)
         *
         * But since its the parent context it won't be the same instance.
         */
@@ -6607,7 +6750,7 @@ static void perf_free_event(struct perf_event *event,
        list_del_init(&event->child_list);
        mutex_unlock(&parent->child_mutex);
 
-       fput(parent->filp);
+       put_event(parent);
 
        perf_group_detach(event);
        list_del_event(event, ctx);
@@ -6687,6 +6830,12 @@ inherit_event(struct perf_event *parent_event,
                                           NULL, NULL);
        if (IS_ERR(child_event))
                return child_event;
+
+       if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
+               free_event(child_event);
+               return NULL;
+       }
+
        get_ctx(child_ctx);
 
        /*
@@ -6727,14 +6876,6 @@ inherit_event(struct perf_event *parent_event,
        add_event_to_ctx(child_event, child_ctx);
        raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
 
-       /*
-        * Get a reference to the parent filp - we will fput it
-        * when the child event exits. This is safe to do because
-        * we are in the parent and we know that the filp still
-        * exists and has a nonzero count:
-        */
-       atomic_long_inc(&parent_event->filp->f_count);
-
        /*
         * Link this into the parent event's child list
         */
@@ -6792,7 +6933,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
                 * child.
                 */
 
-               child_ctx = alloc_perf_context(event->pmu, child);
+               child_ctx = alloc_perf_context(parent_ctx->pmu, child);
                if (!child_ctx)
                        return -ENOMEM;
 
@@ -6963,14 +7104,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
 static void __perf_event_exit_context(void *__info)
 {
        struct perf_event_context *ctx = __info;
-       struct perf_event *event, *tmp;
+       struct perf_event *event;
 
        perf_pmu_rotate_stop(ctx->pmu);
 
-       list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
-               __perf_remove_from_context(event);
-       list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
+       rcu_read_lock();
+       list_for_each_entry_rcu(event, &ctx->event_list, event_entry)
                __perf_remove_from_context(event);
+       rcu_read_unlock();
 }
 
 static void perf_event_exit_cpu_context(int cpu)
@@ -6994,11 +7135,11 @@ static void perf_event_exit_cpu(int cpu)
 {
        struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
+       perf_event_exit_cpu_context(cpu);
+
        mutex_lock(&swhash->hlist_mutex);
        swevent_hlist_release(swhash);
        mutex_unlock(&swhash->hlist_mutex);
-
-       perf_event_exit_cpu_context(cpu);
 }
 #else
 static inline void perf_event_exit_cpu(int cpu) { }