Merge branch 'stable-3.2' into pandora-3.2
[pandora-kernel.git] / kernel / events / core.c
index bc94278..beeac8e 100644 (file)
@@ -1344,7 +1344,7 @@ retry:
 /*
  * Cross CPU call to disable a performance event
  */
-static int __perf_event_disable(void *info)
+int __perf_event_disable(void *info)
 {
        struct perf_event *event = info;
        struct perf_event_context *ctx = event->ctx;
@@ -3277,50 +3277,77 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 }
 EXPORT_SYMBOL_GPL(perf_event_read_value);
 
-static int perf_event_read_group(struct perf_event *event,
-                                  u64 read_format, char __user *buf)
+static void __perf_read_group_add(struct perf_event *leader,
+                                       u64 read_format, u64 *values)
 {
-       struct perf_event *leader = event->group_leader, *sub;
        struct perf_event_context *ctx = leader->ctx;
-       int n = 0, size = 0, ret;
+       struct perf_event *sub;
+       unsigned long flags;
+       int n = 1; /* skip @nr */
        u64 count, enabled, running;
-       u64 values[5];
-
-       lockdep_assert_held(&ctx->mutex);
 
        count = perf_event_read_value(leader, &enabled, &running);
 
-       values[n++] = 1 + leader->nr_siblings;
+       /*
+        * Since we co-schedule groups, {enabled,running} times of siblings
+        * will be identical to those of the leader, so we only publish one
+        * set.
+        */
        if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
                values[n++] = enabled;
        if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
                values[n++] = running;
-       values[n++] = count;
+
+       /*
+        * Write {count,id} tuples for every sibling.
+        */
+       values[n++] += count;
        if (read_format & PERF_FORMAT_ID)
                values[n++] = primary_event_id(leader);
 
-       size = n * sizeof(u64);
-
-       if (copy_to_user(buf, values, size))
-               return -EFAULT;
-
-       ret = size;
+       raw_spin_lock_irqsave(&ctx->lock, flags);
 
        list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-               n = 0;
-
                values[n++] = perf_event_read_value(sub, &enabled, &running);
                if (read_format & PERF_FORMAT_ID)
                        values[n++] = primary_event_id(sub);
+       }
 
-               size = n * sizeof(u64);
+       raw_spin_unlock_irqrestore(&ctx->lock, flags);
+}
 
-               if (copy_to_user(buf + ret, values, size)) {
-                       return -EFAULT;
-               }
+static int perf_event_read_group(struct perf_event *event,
+                                  u64 read_format, char __user *buf)
+{
+       struct perf_event *leader = event->group_leader, *child;
+       struct perf_event_context *ctx = leader->ctx;
+       int ret = event->read_size;
+       u64 *values;
 
-               ret += size;
-       }
+       lockdep_assert_held(&ctx->mutex);
+
+       values = kzalloc(event->read_size, GFP_KERNEL);
+       if (!values)
+               return -ENOMEM;
+
+       values[0] = 1 + leader->nr_siblings;
+
+       /*
+        * By locking the child_mutex of the leader we effectively
+        * lock the child list of all siblings.. XXX explain how.
+        */
+       mutex_lock(&leader->child_mutex);
+
+       __perf_read_group_add(leader, read_format, values);
+       list_for_each_entry(child, &leader->child_list, child_list)
+               __perf_read_group_add(child, read_format, values);
+
+       mutex_unlock(&leader->child_mutex);
+
+       if (copy_to_user(buf, values, event->read_size))
+               ret = -EFAULT;
+
+       kfree(values);
 
        return ret;
 }
@@ -4240,9 +4267,6 @@ static void perf_output_read_one(struct perf_output_handle *handle,
        __output_copy(handle, values, n * sizeof(u64));
 }
 
-/*
- * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
- */
 static void perf_output_read_group(struct perf_output_handle *handle,
                            struct perf_event *event,
                            u64 enabled, u64 running)
@@ -4286,6 +4310,13 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
                                 PERF_FORMAT_TOTAL_TIME_RUNNING)
 
+/*
+ * XXX PERF_SAMPLE_READ vs inherited events seems difficult.
+ *
+ * The problem is that its both hard and excessively expensive to iterate the
+ * child list, not to mention that its impossible to IPI the children running
+ * on another CPU, from interrupt/NMI context.
+ */
 static void perf_output_read(struct perf_output_handle *handle,
                             struct perf_event *event)
 {
@@ -6085,7 +6116,6 @@ skip_type:
                __perf_event_init_context(&cpuctx->ctx);
                lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
                lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
-               cpuctx->ctx.type = cpu_context;
                cpuctx->ctx.pmu = pmu;
                cpuctx->jiffies_interval = 1;
                INIT_LIST_HEAD(&cpuctx->rotation_list);
@@ -6284,9 +6314,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        local64_set(&hwc->period_left, hwc->sample_period);
 
        /*
-        * we currently do not support PERF_FORMAT_GROUP on inherited events
+        * We currently do not support PERF_SAMPLE_READ on inherited events.
+        * See perf_output_read().
         */
-       if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
+       if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
                goto done;
 
        pmu = perf_init_event(event);
@@ -6661,16 +6692,27 @@ SYSCALL_DEFINE5(perf_event_open,
                if (group_leader->group_leader != group_leader)
                        goto err_context;
                /*
-                * Do not allow to attach to a group in a different
-                * task or CPU context:
+                * Make sure we're both events for the same CPU;
+                * grouping events for different CPUs is broken; since
+                * you can never concurrently schedule them anyhow.
                 */
-               if (move_group) {
-                       if (group_leader->ctx->type != ctx->type)
-                               goto err_context;
-               } else {
-                       if (group_leader->ctx != ctx)
-                               goto err_context;
-               }
+               if (group_leader->cpu != event->cpu)
+                       goto err_context;
+
+               /*
+                * Make sure we're both on the same task, or both
+                * per-CPU events.
+                */
+               if (group_leader->ctx->task != ctx->task)
+                       goto err_context;
+
+               /*
+                * Do not allow to attach to a group in a different task
+                * or CPU context. If we're moving SW events, we'll fix
+                * this up later, so allow that.
+                */
+               if (!move_group && group_leader->ctx != ctx)
+                       goto err_context;
 
                /*
                 * Only a group leader can be exclusive or pinned