/*
* Cross CPU call to disable a performance event
*/
-static int __perf_event_disable(void *info)
+int __perf_event_disable(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
}
EXPORT_SYMBOL_GPL(perf_event_read_value);
-static int perf_event_read_group(struct perf_event *event,
- u64 read_format, char __user *buf)
+static void __perf_read_group_add(struct perf_event *leader,
+ u64 read_format, u64 *values)
{
- struct perf_event *leader = event->group_leader, *sub;
struct perf_event_context *ctx = leader->ctx;
- int n = 0, size = 0, ret;
+ struct perf_event *sub;
+ unsigned long flags;
+ int n = 1; /* skip @nr */
u64 count, enabled, running;
- u64 values[5];
-
- lockdep_assert_held(&ctx->mutex);
count = perf_event_read_value(leader, &enabled, &running);
- values[n++] = 1 + leader->nr_siblings;
+ /*
+ * Since we co-schedule groups, {enabled,running} times of siblings
+ * will be identical to those of the leader, so we only publish one
+ * set.
+ */
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
values[n++] = enabled;
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
values[n++] = running;
- values[n++] = count;
+
+ /*
+ * Write {count,id} tuples for every sibling.
+ */
+ values[n++] += count;
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
- size = n * sizeof(u64);
-
- if (copy_to_user(buf, values, size))
- return -EFAULT;
-
- ret = size;
+ raw_spin_lock_irqsave(&ctx->lock, flags);
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
- n = 0;
-
values[n++] = perf_event_read_value(sub, &enabled, &running);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
+ }
- size = n * sizeof(u64);
+ raw_spin_unlock_irqrestore(&ctx->lock, flags);
+}
- if (copy_to_user(buf + ret, values, size)) {
- return -EFAULT;
- }
+static int perf_event_read_group(struct perf_event *event,
+ u64 read_format, char __user *buf)
+{
+ struct perf_event *leader = event->group_leader, *child;
+ struct perf_event_context *ctx = leader->ctx;
+ int ret = event->read_size;
+ u64 *values;
- ret += size;
- }
+ lockdep_assert_held(&ctx->mutex);
+
+ values = kzalloc(event->read_size, GFP_KERNEL);
+ if (!values)
+ return -ENOMEM;
+
+ values[0] = 1 + leader->nr_siblings;
+
+ /*
+ * By locking the child_mutex of the leader we effectively
+ * lock the child list of all siblings.. XXX explain how.
+ */
+ mutex_lock(&leader->child_mutex);
+
+ __perf_read_group_add(leader, read_format, values);
+ list_for_each_entry(child, &leader->child_list, child_list)
+ __perf_read_group_add(child, read_format, values);
+
+ mutex_unlock(&leader->child_mutex);
+
+ if (copy_to_user(buf, values, event->read_size))
+ ret = -EFAULT;
+
+ kfree(values);
return ret;
}
__output_copy(handle, values, n * sizeof(u64));
}
-/*
- * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
- */
static void perf_output_read_group(struct perf_output_handle *handle,
struct perf_event *event,
u64 enabled, u64 running)
#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
PERF_FORMAT_TOTAL_TIME_RUNNING)
+/*
+ * XXX PERF_SAMPLE_READ vs inherited events seems difficult.
+ *
+ * The problem is that its both hard and excessively expensive to iterate the
+ * child list, not to mention that its impossible to IPI the children running
+ * on another CPU, from interrupt/NMI context.
+ */
static void perf_output_read(struct perf_output_handle *handle,
struct perf_event *event)
{
__perf_event_init_context(&cpuctx->ctx);
lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
- cpuctx->ctx.type = cpu_context;
cpuctx->ctx.pmu = pmu;
cpuctx->jiffies_interval = 1;
INIT_LIST_HEAD(&cpuctx->rotation_list);
local64_set(&hwc->period_left, hwc->sample_period);
/*
- * we currently do not support PERF_FORMAT_GROUP on inherited events
+ * We currently do not support PERF_SAMPLE_READ on inherited events.
+ * See perf_output_read().
*/
- if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
+ if (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ))
goto done;
pmu = perf_init_event(event);
if (group_leader->group_leader != group_leader)
goto err_context;
/*
- * Do not allow to attach to a group in a different
- * task or CPU context:
+ * Make sure we're both events for the same CPU;
+ * grouping events for different CPUs is broken; since
+ * you can never concurrently schedule them anyhow.
*/
- if (move_group) {
- if (group_leader->ctx->type != ctx->type)
- goto err_context;
- } else {
- if (group_leader->ctx != ctx)
- goto err_context;
- }
+ if (group_leader->cpu != event->cpu)
+ goto err_context;
+
+ /*
+ * Make sure we're both on the same task, or both
+ * per-CPU events.
+ */
+ if (group_leader->ctx->task != ctx->task)
+ goto err_context;
+
+ /*
+ * Do not allow to attach to a group in a different task
+ * or CPU context. If we're moving SW events, we'll fix
+ * this up later, so allow that.
+ */
+ if (!move_group && group_leader->ctx != ctx)
+ goto err_context;
/*
* Only a group leader can be exclusive or pinned