perf_counter: More aggressive frequency adjustment
authorPeter Zijlstra <a.p.zijlstra@chello.nl>
Wed, 10 Jun 2009 11:40:57 +0000 (13:40 +0200)
committerIngo Molnar <mingo@elte.hu>
Wed, 10 Jun 2009 14:55:26 +0000 (16:55 +0200)
Also employ the overflow handler to adjust the frequency, this results
in a stable frequency in about 40~50 samples, instead of that many ticks.

This also means we can start sampling at a sample period of 1 without
running head-first into the throttle.

It relies on sched_clock() to accurately measure the time difference
between the overflow NMIs.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/kernel/cpu/perf_counter.c
include/linux/perf_counter.h
kernel/perf_counter.c

index 49f2585..240ca56 100644 (file)
@@ -696,10 +696,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
        if (!attr->exclude_kernel)
                hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
 
-       if (!hwc->sample_period)
+       if (!hwc->sample_period) {
                hwc->sample_period = x86_pmu.max_period;
+               atomic64_set(&hwc->period_left, hwc->sample_period);
+       }
 
-       atomic64_set(&hwc->period_left, hwc->sample_period);
        counter->destroy = hw_perf_counter_destroy;
 
        /*
index 3586df8..282d8cc 100644 (file)
@@ -371,6 +371,7 @@ struct hw_perf_counter {
 
        u64                             freq_count;
        u64                             freq_interrupts;
+       u64                             freq_stamp;
 #endif
 };
 
index 5eacaaf..51c571e 100644 (file)
@@ -1184,13 +1184,33 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 static void perf_log_throttle(struct perf_counter *counter, int enable);
 static void perf_log_period(struct perf_counter *counter, u64 period);
 
-static void perf_adjust_freq(struct perf_counter_context *ctx)
+static void perf_adjust_period(struct perf_counter *counter, u64 events)
+{
+       struct hw_perf_counter *hwc = &counter->hw;
+       u64 period, sample_period;
+       s64 delta;
+
+       events *= hwc->sample_period;
+       period = div64_u64(events, counter->attr.sample_freq);
+
+       delta = (s64)(period - hwc->sample_period);
+       delta = (delta + 7) / 8; /* low pass filter */
+
+       sample_period = hwc->sample_period + delta;
+
+       if (!sample_period)
+               sample_period = 1;
+
+       perf_log_period(counter, sample_period);
+
+       hwc->sample_period = sample_period;
+}
+
+static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
 {
        struct perf_counter *counter;
        struct hw_perf_counter *hwc;
-       u64 interrupts, sample_period;
-       u64 events, period, freq;
-       s64 delta;
+       u64 interrupts, freq;
 
        spin_lock(&ctx->lock);
        list_for_each_entry(counter, &ctx->counter_list, list_entry) {
@@ -1202,6 +1222,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
                interrupts = hwc->interrupts;
                hwc->interrupts = 0;
 
+               /*
+                * unthrottle counters on the tick
+                */
                if (interrupts == MAX_INTERRUPTS) {
                        perf_log_throttle(counter, 1);
                        counter->pmu->unthrottle(counter);
@@ -1211,6 +1234,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
                if (!counter->attr.freq || !counter->attr.sample_freq)
                        continue;
 
+               /*
+                * if the specified freq < HZ then we need to skip ticks
+                */
                if (counter->attr.sample_freq < HZ) {
                        freq = counter->attr.sample_freq;
 
@@ -1226,20 +1252,20 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
                } else
                        freq = HZ;
 
-               events = freq * interrupts * hwc->sample_period;
-               period = div64_u64(events, counter->attr.sample_freq);
-
-               delta = (s64)(1 + period - hwc->sample_period);
-               delta >>= 1;
-
-               sample_period = hwc->sample_period + delta;
-
-               if (!sample_period)
-                       sample_period = 1;
+               perf_adjust_period(counter, freq * interrupts);
 
-               perf_log_period(counter, sample_period);
-
-               hwc->sample_period = sample_period;
+               /*
+                * In order to avoid being stalled by an (accidental) huge
+                * sample period, force reset the sample period if we didn't
+                * get any events in this freq period.
+                */
+               if (!interrupts) {
+                       perf_disable();
+                       counter->pmu->disable(counter);
+                       atomic_set(&hwc->period_left, 0);
+                       counter->pmu->enable(counter);
+                       perf_enable();
+               }
        }
        spin_unlock(&ctx->lock);
 }
@@ -1279,9 +1305,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
        cpuctx = &per_cpu(perf_cpu_context, cpu);
        ctx = curr->perf_counter_ctxp;
 
-       perf_adjust_freq(&cpuctx->ctx);
+       perf_ctx_adjust_freq(&cpuctx->ctx);
        if (ctx)
-               perf_adjust_freq(ctx);
+               perf_ctx_adjust_freq(ctx);
 
        perf_counter_cpu_sched_out(cpuctx);
        if (ctx)
@@ -1647,10 +1673,10 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 
                counter->attr.sample_freq = value;
        } else {
+               perf_log_period(counter, value);
+
                counter->attr.sample_period = value;
                counter->hw.sample_period = value;
-
-               perf_log_period(counter, value);
        }
 unlock:
        spin_unlock_irq(&ctx->lock);
@@ -2853,35 +2879,41 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
  * event flow.
  */
 
+struct freq_event {
+       struct perf_event_header        header;
+       u64                             time;
+       u64                             id;
+       u64                             period;
+};
+
 static void perf_log_period(struct perf_counter *counter, u64 period)
 {
        struct perf_output_handle handle;
+       struct freq_event event;
        int ret;
 
-       struct {
-               struct perf_event_header        header;
-               u64                             time;
-               u64                             id;
-               u64                             period;
-       } freq_event = {
+       if (counter->hw.sample_period == period)
+               return;
+
+       if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
+               return;
+
+       event = (struct freq_event) {
                .header = {
                        .type = PERF_EVENT_PERIOD,
                        .misc = 0,
-                       .size = sizeof(freq_event),
+                       .size = sizeof(event),
                },
                .time = sched_clock(),
                .id = counter->id,
                .period = period,
        };
 
-       if (counter->hw.sample_period == period)
-               return;
-
-       ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
+       ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
        if (ret)
                return;
 
-       perf_output_put(&handle, freq_event);
+       perf_output_put(&handle, event);
        perf_output_end(&handle);
 }
 
@@ -2923,15 +2955,16 @@ int perf_counter_overflow(struct perf_counter *counter,
 {
        int events = atomic_read(&counter->event_limit);
        int throttle = counter->pmu->unthrottle != NULL;
+       struct hw_perf_counter *hwc = &counter->hw;
        int ret = 0;
 
        if (!throttle) {
-               counter->hw.interrupts++;
+               hwc->interrupts++;
        } else {
-               if (counter->hw.interrupts != MAX_INTERRUPTS) {
-                       counter->hw.interrupts++;
-                       if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
-                               counter->hw.interrupts = MAX_INTERRUPTS;
+               if (hwc->interrupts != MAX_INTERRUPTS) {
+                       hwc->interrupts++;
+                       if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) {
+                               hwc->interrupts = MAX_INTERRUPTS;
                                perf_log_throttle(counter, 0);
                                ret = 1;
                        }
@@ -2945,6 +2978,16 @@ int perf_counter_overflow(struct perf_counter *counter,
                }
        }
 
+       if (counter->attr.freq) {
+               u64 now = sched_clock();
+               s64 delta = now - hwc->freq_stamp;
+
+               hwc->freq_stamp = now;
+
+               if (delta > 0 && delta < TICK_NSEC)
+                       perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
+       }
+
        /*
         * XXX event_limit might not quite work as expected on inherited
         * counters
@@ -3379,7 +3422,6 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
                return NULL;
 
        counter->destroy = tp_perf_counter_destroy;
-       counter->hw.sample_period = counter->attr.sample_period;
 
        return &perf_ops_generic;
 }
@@ -3483,10 +3525,11 @@ perf_counter_alloc(struct perf_counter_attr *attr,
        pmu = NULL;
 
        hwc = &counter->hw;
+       hwc->sample_period = attr->sample_period;
        if (attr->freq && attr->sample_freq)
-               hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq);
-       else
-               hwc->sample_period = attr->sample_period;
+               hwc->sample_period = 1;
+
+       atomic64_set(&hwc->period_left, hwc->sample_period);
 
        /*
         * we currently do not support PERF_SAMPLE_GROUP on inherited counters
@@ -3687,6 +3730,9 @@ inherit_counter(struct perf_counter *parent_counter,
        else
                child_counter->state = PERF_COUNTER_STATE_OFF;
 
+       if (parent_counter->attr.freq)
+               child_counter->hw.sample_period = parent_counter->hw.sample_period;
+
        /*
         * Link it up in the child's context:
         */