Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 23 May 2011 19:53:48 +0000 (12:53 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 23 May 2011 19:53:48 +0000 (12:53 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 23 May 2011 19:53:48 +0000 (12:53 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 23 May 2011 19:53:48 +0000 (12:53 -0700)
diff --combined include/linux/sched.h

index 340f5ee,c34a718..aaf71e0
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -315,6 -315,7 +315,6 @@@ extern int proc_dowatchdog_thresh(struc
                                   void __user *buffer,
                                   size_t *lenp, loff_t *ppos);
   extern unsigned int  softlockup_panic;
- -extern int softlockup_thresh;
   void lockup_detector_init(void);
   #else
   static inline void touch_softlockup_watchdog(void)
@@@ -652,8 -653,9 +652,8 @@@ struct signal_struct 
    * Bits in flags field of signal_struct.
    */
   #define SIGNAL_STOP_STOPPED   0x00000001 /* job control stop in effect */
- -#define SIGNAL_STOP_DEQUEUED  0x00000002 /* stop signal dequeued */
- -#define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */
- -#define SIGNAL_GROUP_EXIT     0x00000008 /* group exit in progress */
+ +#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */
+ +#define SIGNAL_GROUP_EXIT     0x00000004 /* group exit in progress */
   /*
    * Pending notifications to parent.
    */
@@@ -786,17 -788,39 +786,39 @@@ enum cpu_idle_type 
   };
   
   /*
-  * sched-domains (multiprocessor balancing) declarations:
+  * Increase resolution of nice-level calculations for 64-bit architectures.
+  * The extra resolution improves shares distribution and load balancing of
+  * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
+  * hierarchies, especially on larger systems. This is not a user-visible change
+  * and does not change the user-interface for setting shares/weights.
+  *
+  * We increase resolution only if we have enough bits to allow this increased
+  * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
+  * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
+  * increased costs.
    */
+ #if BITS_PER_LONG > 32
+ # define SCHED_LOAD_RESOLUTION        10
+ # define scale_load(w)                ((w) << SCHED_LOAD_RESOLUTION)
+ # define scale_load_down(w)   ((w) >> SCHED_LOAD_RESOLUTION)
+ #else
+ # define SCHED_LOAD_RESOLUTION        0
+ # define scale_load(w)                (w)
+ # define scale_load_down(w)   (w)
+ #endif
   
- /*
-  * Increase resolution of nice-level calculations:
-  */
- #define SCHED_LOAD_SHIFT      10
+ #define SCHED_LOAD_SHIFT      (10 + SCHED_LOAD_RESOLUTION)
   #define SCHED_LOAD_SCALE      (1L << SCHED_LOAD_SHIFT)
   
- #define SCHED_LOAD_SCALE_FUZZ SCHED_LOAD_SCALE
+ /*
+  * Increase resolution of cpu_power calculations
+  */
+ #define SCHED_POWER_SHIFT     10
+ #define SCHED_POWER_SCALE     (1L << SCHED_POWER_SHIFT)
   
+ /*
+  * sched-domains (multiprocessor balancing) declarations:
+  */
   #ifdef CONFIG_SMP
   #define SD_LOAD_BALANCE               0x0001  /* Do load balancing on this domain. */
   #define SD_BALANCE_NEWIDLE    0x0002  /* Balance when about to become idle */
@@@ -1249,7 -1273,6 +1271,7 @@@ struct task_struct 
         int exit_state;
         int exit_code, exit_signal;
         int pdeath_signal;  /*  The signal sent when the parent dies  */
+ +      unsigned int group_stop;        /* GROUP_STOP_*, siglock protected */
         /* ??? */
         unsigned int personality;
         unsigned did_exec:1;
@@@ -1770,17 -1793,6 +1792,17 @@@ extern void thread_group_times(struct t
   #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
   #define used_math() tsk_used_math(current)
   
+ +/*
+ + * task->group_stop flags
+ + */
+ +#define GROUP_STOP_SIGMASK    0xffff    /* signr of the last group stop */
+ +#define GROUP_STOP_PENDING    (1 << 16) /* task should stop for group stop */
+ +#define GROUP_STOP_CONSUME    (1 << 17) /* consume group stop count */
+ +#define GROUP_STOP_TRAPPING   (1 << 18) /* switching from STOPPED to TRACED */
+ +#define GROUP_STOP_DEQUEUED   (1 << 19) /* stop signal dequeued */
+ +
+ +extern void task_clear_group_stop_pending(struct task_struct *task);
+ +
   #ifdef CONFIG_PREEMPT_RCU
   
   #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
diff --combined kernel/sched.c

index 0516af4,bb504e1..2d12893
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -293,7 -293,7 +293,7 @@@ static DEFINE_SPINLOCK(task_group_lock)
    *  limitation from this.)
    */
   #define MIN_SHARES    2
- #define MAX_SHARES    (1UL << 18)
+ #define MAX_SHARES    (1UL << (18 + SCHED_LOAD_RESOLUTION))
   
   static int root_task_group_load = ROOT_TASK_GROUP_LOAD;
   #endif
@@@ -1330,13 -1330,25 +1330,25 @@@ calc_delta_mine(unsigned long delta_exe
   {
         u64 tmp;
   
-       tmp = (u64)delta_exec * weight;
+       /*
+        * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched
+        * entities since MIN_SHARES = 2. Treat weight as 1 if less than
+        * 2^SCHED_LOAD_RESOLUTION.
+        */
+       if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION)))
+               tmp = (u64)delta_exec * scale_load_down(weight);
+       else
+               tmp = (u64)delta_exec;
   
         if (!lw->inv_weight) {
-               if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST))
+               unsigned long w = scale_load_down(lw->weight);
+ 
+               if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
                         lw->inv_weight = 1;
+               else if (unlikely(!w))
+                       lw->inv_weight = WMULT_CONST;
                 else
-                       lw->inv_weight = WMULT_CONST / lw->weight;
+                       lw->inv_weight = WMULT_CONST / w;
         }
   
         /*
@@@ -1778,17 -1790,20 +1790,20 @@@ static void dec_nr_running(struct rq *r
   
   static void set_load_weight(struct task_struct *p)
   {
+       int prio = p->static_prio - MAX_RT_PRIO;
+       struct load_weight *load = &p->se.load;
+ 
         /*
          * SCHED_IDLE tasks get minimal weight:
          */
         if (p->policy == SCHED_IDLE) {
-               p->se.load.weight = WEIGHT_IDLEPRIO;
-               p->se.load.inv_weight = WMULT_IDLEPRIO;
+               load->weight = scale_load(WEIGHT_IDLEPRIO);
+               load->inv_weight = WMULT_IDLEPRIO;
                 return;
         }
   
-       p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
-       p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
+       load->weight = scale_load(prio_to_weight[prio]);
+       load->inv_weight = prio_to_wmult[prio];
   }
   
   static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
@@@ -2564,7 -2579,7 +2579,7 @@@ static void ttwu_queue(struct task_stru
   {
         struct rq *rq = cpu_rq(cpu);
   
- -#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
+ +#if defined(CONFIG_SMP)
         if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
                 ttwu_queue_remote(p, cpu);
                 return;
@@@ -6527,7 -6542,7 +6542,7 @@@ static int sched_domain_debug_one(struc
                 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
   
                 printk(KERN_CONT " %s", str);
-               if (group->cpu_power != SCHED_LOAD_SCALE) {
+               if (group->cpu_power != SCHED_POWER_SCALE) {
                         printk(KERN_CONT " (cpu_power = %d)",
                                 group->cpu_power);
                 }
@@@ -7902,7 -7917,7 +7917,7 @@@ void __init sched_init(void
   #ifdef CONFIG_SMP
                 rq->sd = NULL;
                 rq->rd = NULL;
-               rq->cpu_power = SCHED_LOAD_SCALE;
+               rq->cpu_power = SCHED_POWER_SCALE;
                 rq->post_schedule = 0;
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
@@@ -8806,14 -8821,14 +8821,14 @@@ cpu_cgroup_exit(struct cgroup_subsys *s
   static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
                                 u64 shareval)
   {
-       return sched_group_set_shares(cgroup_tg(cgrp), shareval);
+       return sched_group_set_shares(cgroup_tg(cgrp), scale_load(shareval));
   }
   
   static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
   {
         struct task_group *tg = cgroup_tg(cgrp);
   
-       return (u64) tg->shares;
+       return (u64) scale_load_down(tg->shares);
   }
   #endif /* CONFIG_FAIR_GROUP_SCHED */
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 23 May 2011 19:53:48 +0000 (12:53 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 23 May 2011 19:53:48 +0000 (12:53 -0700)
		1	2
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history