sched: avoid large irq-latencies in smp-balancing

author Peter Zijlstra <a.p.zijlstra@chello.nl>

Fri, 9 Nov 2007 21:39:39 +0000 (22:39 +0100)

committer Ingo Molnar <mingo@elte.hu>

Fri, 9 Nov 2007 21:39:39 +0000 (22:39 +0100)
author Peter Zijlstra <a.p.zijlstra@chello.nl>
Fri, 9 Nov 2007 21:39:39 +0000 (22:39 +0100)
committer Ingo Molnar <mingo@elte.hu>
Fri, 9 Nov 2007 21:39:39 +0000 (22:39 +0100)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 93fd30d..2cc789f 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1466,6 +1466,7 @@ extern unsigned int sysctl_sched_batch_wakeup_granularity;
  extern unsigned int sysctl_sched_child_runs_first;
  extern unsigned int sysctl_sched_features;
  extern unsigned int sysctl_sched_migration_cost;
+extern unsigned int sysctl_sched_nr_migrate;
  
  int sched_nr_latency_handler(struct ctl_table *table, int write,
                 struct file *file, void __user *buffer, size_t *length,
diff --git a/kernel/sched.c b/kernel/sched.c

index 2a107e4..e195a42 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -471,6 +471,12 @@ const_debug unsigned int sysctl_sched_features =
  
  #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
  
+/*
+ * Number of tasks to iterate in a single balance run.
+ * Limited because this is done with IRQs disabled.
+ */
+const_debug unsigned int sysctl_sched_nr_migrate = 32;
+
  /*
   * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
   * clock constructed from sched_clock():
@@ -2235,7 +2241,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
               enum cpu_idle_type idle, int *all_pinned,
               int *this_best_prio, struct rq_iterator *iterator)
  {
-       int pulled = 0, pinned = 0, skip_for_load;
+       int loops = 0, pulled = 0, pinned = 0, skip_for_load;
         struct task_struct *p;
         long rem_load_move = max_load_move;
  
@@ -2249,10 +2255,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
          */
         p = iterator->start(iterator->arg);
  next:
-       if (!p)
+       if (!p || loops++ > sysctl_sched_nr_migrate)
                 goto out;
         /*
-        * To help distribute high priority tasks accross CPUs we don't
+        * To help distribute high priority tasks across CPUs we don't
          * skip a task if it will be the highest priority task (i.e. smallest
          * prio value) on its new queue regardless of its load weight
          */
@@ -2269,8 +2275,7 @@ next:
         rem_load_move -= p->se.load.weight;
  
         /*
-        * We only want to steal up to the prescribed number of tasks
-        * and the prescribed amount of weighted load.
+        * We only want to steal up to the prescribed amount of weighted load.
          */
         if (rem_load_move > 0) {
                 if (p->prio < *this_best_prio)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index adddf68..3a1744f 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -301,6 +301,14 @@ static struct ctl_table kern_table[] = {
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
         },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "sched_nr_migrate",
+               .data           = &sysctl_sched_nr_migrate,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 644,
+               .proc_handler   = &proc_dointvec,
+       },
  #endif
         {
                 .ctl_name       = CTL_UNNUMBERED,
author	Peter Zijlstra <a.p.zijlstra@chello.nl>
	Fri, 9 Nov 2007 21:39:39 +0000 (22:39 +0100)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 9 Nov 2007 21:39:39 +0000 (22:39 +0100)
include/linux/sched.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history