X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=kernel%2Fsched.c;h=d42992bccdfae88569559f3e88f81bbcea8e9494;hp=089be8adb0746ac6680fc26dc7344fbc1a3697d2;hb=bc4016f48161454a9a8e5eb209b0693c6cde9f62;hpb=75e1056f5c57050415b64cb761a3acc35d91f013 diff --git a/kernel/sched.c b/kernel/sched.c index 089be8adb074..d42992bccdfa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -491,6 +491,7 @@ struct rq { struct mm_struct *prev_mm; u64 clock; + u64 clock_task; atomic_t nr_iowait; @@ -518,6 +519,10 @@ struct rq { u64 avg_idle; #endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + u64 prev_irq_time; +#endif + /* calc_load related fields */ unsigned long calc_load_update; long calc_load_active; @@ -641,10 +646,22 @@ static inline struct task_group *task_group(struct task_struct *p) #endif /* CONFIG_CGROUP_SCHED */ +static u64 irq_time_cpu(int cpu); +static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); + inline void update_rq_clock(struct rq *rq) { - if (!rq->skip_clock_update) - rq->clock = sched_clock_cpu(cpu_of(rq)); + if (!rq->skip_clock_update) { + int cpu = cpu_of(rq); + u64 irq_time; + + rq->clock = sched_clock_cpu(cpu); + irq_time = irq_time_cpu(cpu); + if (rq->clock - irq_time > rq->clock_task) + rq->clock_task = rq->clock - irq_time; + + sched_irq_time_avg_update(rq, irq_time); + } } /* @@ -1908,6 +1925,94 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) dec_nr_running(rq); } +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + +/* + * There are no locks covering percpu hardirq/softirq time. + * They are only modified in account_system_vtime, on corresponding CPU + * with interrupts disabled. So, writes are safe. + * They are read and saved off onto struct rq in update_rq_clock(). + * This may result in other CPU reading this CPU's irq time and can + * race with irq/account_system_vtime on this CPU. We would either get old + * or new value (or semi updated value on 32 bit) with a side effect of + * accounting a slice of irq time to wrong task when irq is in progress + * while we read rq->clock. That is a worthy compromise in place of having + * locks on each irq in account_system_time. + */ +static DEFINE_PER_CPU(u64, cpu_hardirq_time); +static DEFINE_PER_CPU(u64, cpu_softirq_time); + +static DEFINE_PER_CPU(u64, irq_start_time); +static int sched_clock_irqtime; + +void enable_sched_clock_irqtime(void) +{ + sched_clock_irqtime = 1; +} + +void disable_sched_clock_irqtime(void) +{ + sched_clock_irqtime = 0; +} + +static u64 irq_time_cpu(int cpu) +{ + if (!sched_clock_irqtime) + return 0; + + return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); +} + +void account_system_vtime(struct task_struct *curr) +{ + unsigned long flags; + int cpu; + u64 now, delta; + + if (!sched_clock_irqtime) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + now = sched_clock_cpu(cpu); + delta = now - per_cpu(irq_start_time, cpu); + per_cpu(irq_start_time, cpu) = now; + /* + * We do not account for softirq time from ksoftirqd here. + * We want to continue accounting softirq time to ksoftirqd thread + * in that case, so as not to confuse scheduler with a special task + * that do not consume any time, but still wants to run. + */ + if (hardirq_count()) + per_cpu(cpu_hardirq_time, cpu) += delta; + else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) + per_cpu(cpu_softirq_time, cpu) += delta; + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(account_system_vtime); + +static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) +{ + if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { + u64 delta_irq = curr_irq_time - rq->prev_irq_time; + rq->prev_irq_time = curr_irq_time; + sched_rt_avg_update(rq, delta_irq); + } +} + +#else + +static u64 irq_time_cpu(int cpu) +{ + return 0; +} + +static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } + +#endif + #include "sched_idletask.c" #include "sched_fair.c" #include "sched_rt.c" @@ -3273,7 +3378,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) if (task_current(rq, p)) { update_rq_clock(rq); - ns = rq->clock - p->se.exec_start; + ns = rq->clock_task - p->se.exec_start; if ((s64)ns < 0) ns = 0; } @@ -3609,7 +3714,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); raw_spin_unlock(&rq->lock); - perf_event_task_tick(curr); + perf_event_task_tick(); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); @@ -4667,7 +4772,7 @@ recheck: } if (user) { - retval = security_task_setscheduler(p, policy, param); + retval = security_task_setscheduler(p); if (retval) return retval; } @@ -4918,7 +5023,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; - retval = security_task_setscheduler(p, 0, NULL); + retval = security_task_setscheduler(p); if (retval) goto out_unlock; @@ -5368,7 +5473,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) idle->se.exec_start = sched_clock(); cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); + /* + * We're having a chicken and egg problem, even though we are + * holding rq->lock, the cpu isn't yet set to this cpu so the + * lockdep check in task_group() will fail. + * + * Similar case to sched_fork(). / Alternatively we could + * use task_rq_lock() here and obtain the other rq->lock. + * + * Silence PROVE_RCU + */ + rcu_read_lock(); __set_task_cpu(idle, cpu); + rcu_read_unlock(); rq->curr = rq->idle = idle; #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)