Merge branch 'sched/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 24 Jul 2008 02:36:53 +0000 (19:36 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 24 Jul 2008 02:36:53 +0000 (19:36 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Jul 2008 02:36:53 +0000 (19:36 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 24 Jul 2008 02:36:53 +0000 (19:36 -0700)
diff --combined arch/x86/kernel/signal_32.c

index 07faaa5,e1fc7bd..6fb5bcd
--- 1/arch/x86/kernel/signal_32.c
--- 2/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@@ -212,7 -212,7 +212,7 @@@ asmlinkage unsigned long sys_sigreturn(
   
   badframe:
         if (show_unhandled_signals && printk_ratelimit()) {
- -              printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:"
+ +              printk("%s%s[%d] bad frame in sigreturn frame:"
                         "%p ip:%lx sp:%lx oeax:%lx",
                     task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
                     current->comm, task_pid_nr(current), frame, regs->ip,
@@@ -657,12 -657,15 +657,9 @@@ static void do_signal(struct pt_regs *r
   void
   do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
   {
- -      /* Pending single-step? */
- -      if (thread_info_flags & _TIF_SINGLESTEP) {
- -              regs->flags |= X86_EFLAGS_TF;
- -              clear_thread_flag(TIF_SINGLESTEP);
- -      }
- -
         /* deal with pending signal delivery */
         if (thread_info_flags & _TIF_SIGPENDING)
                 do_signal(regs);
   
-       if (thread_info_flags & _TIF_HRTICK_RESCHED)
-               hrtick_resched();
- 
         clear_thread_flag(TIF_IRET);
   }
diff --combined arch/x86/kernel/signal_64.c

index bf87684,88023fc..47c3d24
--- 1/arch/x86/kernel/signal_64.c
--- 2/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@@ -487,6 -487,12 +487,6 @@@ static void do_signal(struct pt_regs *r
   void do_notify_resume(struct pt_regs *regs, void *unused,
                       __u32 thread_info_flags)
   {
- -      /* Pending single-step? */
- -      if (thread_info_flags & _TIF_SINGLESTEP) {
- -              regs->flags |= X86_EFLAGS_TF;
- -              clear_thread_flag(TIF_SINGLESTEP);
- -      }
- -
   #ifdef CONFIG_X86_MCE
         /* notify userspace of pending MCEs */
         if (thread_info_flags & _TIF_MCE_NOTIFY)
@@@ -496,9 -502,6 +496,6 @@@
         /* deal with pending signal delivery */
         if (thread_info_flags & _TIF_SIGPENDING)
                 do_signal(regs);
- 
-       if (thread_info_flags & _TIF_HRTICK_RESCHED)
-               hrtick_resched();
   }
   
   void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --combined include/asm-x86/thread_info.h

index 0a8f27d,d701263..3f2de10
--- 1/include/asm-x86/thread_info.h
--- 2/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@@ -75,11 -75,12 +75,10 @@@ struct thread_info 
   #define TIF_NEED_RESCHED      3       /* rescheduling necessary */
   #define TIF_SINGLESTEP                4       /* reenable singlestep on user return*/
   #define TIF_IRET              5       /* force IRET */
- -#ifdef CONFIG_X86_32
   #define TIF_SYSCALL_EMU               6       /* syscall emulation active */
- -#endif
   #define TIF_SYSCALL_AUDIT     7       /* syscall auditing active */
   #define TIF_SECCOMP           8       /* secure computing */
   #define TIF_MCE_NOTIFY                10      /* notify userspace of an MCE */
- #define TIF_HRTICK_RESCHED    11      /* reprogram hrtick timer */
   #define TIF_NOTSC             16      /* TSC is not accessible in userland */
   #define TIF_IA32              17      /* 32bit process */
   #define TIF_FORK              18      /* ret_from_fork */
@@@ -98,11 -99,14 +97,10 @@@
   #define _TIF_SINGLESTEP               (1 << TIF_SINGLESTEP)
   #define _TIF_NEED_RESCHED     (1 << TIF_NEED_RESCHED)
   #define _TIF_IRET             (1 << TIF_IRET)
- -#ifdef CONFIG_X86_32
   #define _TIF_SYSCALL_EMU      (1 << TIF_SYSCALL_EMU)
- -#else
- -#define _TIF_SYSCALL_EMU      0
- -#endif
   #define _TIF_SYSCALL_AUDIT    (1 << TIF_SYSCALL_AUDIT)
   #define _TIF_SECCOMP          (1 << TIF_SECCOMP)
   #define _TIF_MCE_NOTIFY               (1 << TIF_MCE_NOTIFY)
- #define _TIF_HRTICK_RESCHED   (1 << TIF_HRTICK_RESCHED)
   #define _TIF_NOTSC            (1 << TIF_NOTSC)
   #define _TIF_IA32             (1 << TIF_IA32)
   #define _TIF_FORK             (1 << TIF_FORK)
@@@ -115,27 -119,18 +113,27 @@@
   #define _TIF_DS_AREA_MSR      (1 << TIF_DS_AREA_MSR)
   #define _TIF_BTS_TRACE_TS     (1 << TIF_BTS_TRACE_TS)
   
+ +/* work to do in syscall_trace_enter() */
+ +#define _TIF_WORK_SYSCALL_ENTRY       \
+ +      (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
+ +       _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
+ +
+ +/* work to do in syscall_trace_leave() */
+ +#define _TIF_WORK_SYSCALL_EXIT        \
+ +      (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)
+ +
   /* work to do on interrupt/exception return */
   #define _TIF_WORK_MASK                                                        \
         (0x0000FFFF &                                                   \
- -       ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|       \
- -       _TIF_SECCOMP|_TIF_SYSCALL_EMU))
+ +       ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|                       \
+ +         _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
   
   /* work to do on any return to user space */
   #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
   
   /* Only used for 64 bit */
   #define _TIF_DO_NOTIFY_MASK                                           \
-       (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
- -      (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY)
++      (_TIF_SIGPENDING|_TIF_MCE_NOTIFY)
   
   /* flags to check in __switch_to() */
   #define _TIF_WORK_CTXSW                                                       \
diff --combined include/linux/cpumask.h

index 30d59d1,d614d24..1b5c98e
--- 1/include/linux/cpumask.h
--- 2/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@@ -17,20 -17,6 +17,20 @@@
    * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
    * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
    *
+ + * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+ + * Note: The alternate operations with the suffix "_nr" are used
+ + *       to limit the range of the loop to nr_cpu_ids instead of
+ + *       NR_CPUS when NR_CPUS > 64 for performance reasons.
+ + *       If NR_CPUS is <= 64 then most assembler bitmask
+ + *       operators execute faster with a constant range, so
+ + *       the operator will continue to use NR_CPUS.
+ + *
+ + *       Another consideration is that nr_cpu_ids is initialized
+ + *       to NR_CPUS and isn't lowered until the possible cpus are
+ + *       discovered (including any disabled cpus).  So early uses
+ + *       will span the entire range of NR_CPUS.
+ + * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
+ + *
    * The available cpumask operations are:
    *
    * void cpu_set(cpu, mask)            turn on bit 'cpu' in mask
@@@ -52,60 -38,18 +52,60 @@@
    * int cpus_empty(mask)                       Is mask empty (no bits sets)?
    * int cpus_full(mask)                        Is mask full (all bits sets)?
    * int cpus_weight(mask)              Hamming weigh - number of set bits
+ + * int cpus_weight_nr(mask)           Same using nr_cpu_ids instead of NR_CPUS
    *
    * void cpus_shift_right(dst, src, n) Shift right
    * void cpus_shift_left(dst, src, n)  Shift left
    *
    * int first_cpu(mask)                        Number lowest set bit, or NR_CPUS
    * int next_cpu(cpu, mask)            Next cpu past 'cpu', or NR_CPUS
+ + * int next_cpu_nr(cpu, mask)         Next cpu past 'cpu', or nr_cpu_ids
    *
    * cpumask_t cpumask_of_cpu(cpu)      Return cpumask with bit 'cpu' set
+ + *ifdef CONFIG_HAS_CPUMASK_OF_CPU
+ + * cpumask_of_cpu_ptr_declare(v)      Declares cpumask_t *v
+ + * cpumask_of_cpu_ptr_next(v, cpu)    Sets v = &cpumask_of_cpu_map[cpu]
+ + * cpumask_of_cpu_ptr(v, cpu)         Combines above two operations
+ + *else
+ + * cpumask_of_cpu_ptr_declare(v)      Declares cpumask_t _v and *v = &_v
+ + * cpumask_of_cpu_ptr_next(v, cpu)    Sets _v = cpumask_of_cpu(cpu)
+ + * cpumask_of_cpu_ptr(v, cpu)         Combines above two operations
+ + *endif
    * CPU_MASK_ALL                               Initializer - all bits set
    * CPU_MASK_NONE                      Initializer - no bits set
    * unsigned long *cpus_addr(mask)     Array of unsigned long's in mask
    *
+ + * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t
+ + * variables, and CPUMASK_PTR provides pointers to each field.
+ + *
+ + * The structure should be defined something like this:
+ + * struct my_cpumasks {
+ + *    cpumask_t mask1;
+ + *    cpumask_t mask2;
+ + * };
+ + *
+ + * Usage is then:
+ + *    CPUMASK_ALLOC(my_cpumasks);
+ + *    CPUMASK_PTR(mask1, my_cpumasks);
+ + *    CPUMASK_PTR(mask2, my_cpumasks);
+ + *
+ + *    --- DO NOT reference cpumask_t pointers until this check ---
+ + *    if (my_cpumasks == NULL)
+ + *            "kmalloc failed"...
+ + *
+ + * References are now pointers to the cpumask_t variables (*mask1, ...)
+ + *
+ + *if NR_CPUS > BITS_PER_LONG
+ + *   CPUMASK_ALLOC(m)                 Declares and allocates struct m *m =
+ + *                                            kmalloc(sizeof(*m), GFP_KERNEL)
+ + *   CPUMASK_FREE(m)                  Macro for kfree(m)
+ + *else
+ + *   CPUMASK_ALLOC(m)                 Declares struct m _m, *m = &_m
+ + *   CPUMASK_FREE(m)                  Nop
+ + *endif
+ + *   CPUMASK_PTR(v, m)                        Declares cpumask_t *v = &(m->v)
+ + * ------------------------------------------------------------------------
+ + *
    * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
    * int cpumask_parse_user(ubuf, ulen, mask)   Parse ascii string as cpumask
    * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
@@@ -115,8 -59,7 +115,8 @@@
    * void cpus_onto(dst, orig, relmap)  *dst = orig relative to relmap
    * void cpus_fold(dst, orig, sz)      dst bits = orig bits mod sz
    *
- - * for_each_cpu_mask(cpu, mask)               for-loop cpu over mask
+ + * for_each_cpu_mask(cpu, mask)               for-loop cpu over mask using NR_CPUS
+ + * for_each_cpu_mask_nr(cpu, mask)    for-loop cpu over mask using nr_cpu_ids
    *
    * int num_online_cpus()              Number of online CPUs
    * int num_possible_cpus()            Number of all possible CPUs
@@@ -273,19 -216,23 +273,19 @@@ static inline void __cpus_shift_left(cp
         bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
   }
   
- -#ifdef CONFIG_SMP
- -int __first_cpu(const cpumask_t *srcp);
- -#define first_cpu(src) __first_cpu(&(src))
- -int __next_cpu(int n, const cpumask_t *srcp);
- -#define next_cpu(n, src) __next_cpu((n), &(src))
- -#else
- -#define first_cpu(src)                ({ (void)(src); 0; })
- -#define next_cpu(n, src)      ({ (void)(src); 1; })
- -#endif
   
   #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
   extern cpumask_t *cpumask_of_cpu_map;
- -#define cpumask_of_cpu(cpu)    (cpumask_of_cpu_map[cpu])
- -
+ +#define cpumask_of_cpu(cpu)   (cpumask_of_cpu_map[cpu])
+ +#define       cpumask_of_cpu_ptr(v, cpu)                                      \
+ +              const cpumask_t *v = &cpumask_of_cpu(cpu)
+ +#define       cpumask_of_cpu_ptr_declare(v)                                   \
+ +              const cpumask_t *v
+ +#define cpumask_of_cpu_ptr_next(v, cpu)                                       \
+ +                                      v = &cpumask_of_cpu(cpu)
   #else
   #define cpumask_of_cpu(cpu)                                           \
- -(*({                                                                  \
+ +({                                                                    \
         typeof(_unused_cpumask_arg_) m;                                 \
         if (sizeof(m) == sizeof(unsigned long)) {                       \
                 m.bits[0] = 1UL<<(cpu);                                 \
@@@ -293,16 -240,8 +293,16 @@@
                 cpus_clear(m);                                          \
                 cpu_set((cpu), m);                                      \
         }                                                               \
- -      &m;                                                             \
- -}))
+ +      m;                                                              \
+ +})
+ +#define       cpumask_of_cpu_ptr(v, cpu)                                      \
+ +              cpumask_t _##v = cpumask_of_cpu(cpu);                   \
+ +              const cpumask_t *v = &_##v
+ +#define       cpumask_of_cpu_ptr_declare(v)                                   \
+ +              cpumask_t _##v;                                         \
+ +              const cpumask_t *v = &_##v
+ +#define cpumask_of_cpu_ptr_next(v, cpu)                                       \
+ +                                      _##v = cpumask_of_cpu(cpu)
   #endif
   
   #define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
@@@ -342,15 -281,6 +342,15 @@@ extern cpumask_t cpu_mask_all
   
   #define cpus_addr(src) ((src).bits)
   
+ +#if NR_CPUS > BITS_PER_LONG
+ +#define       CPUMASK_ALLOC(m)        struct m *m = kmalloc(sizeof(*m), GFP_KERNEL)
+ +#define       CPUMASK_FREE(m)         kfree(m)
+ +#else
+ +#define       CPUMASK_ALLOC(m)        struct m _m, *m = &_m
+ +#define       CPUMASK_FREE(m)
+ +#endif
+ +#define       CPUMASK_PTR(v, m)       cpumask_t *v = &(m->v)
+ +
   #define cpumask_scnprintf(buf, len, src) \
                         __cpumask_scnprintf((buf), (len), &(src), NR_CPUS)
   static inline int __cpumask_scnprintf(char *buf, int len,
@@@ -413,58 -343,30 +413,59 @@@ static inline void __cpus_fold(cpumask_
         bitmap_fold(dstp->bits, origp->bits, sz, nbits);
   }
   
- -#if NR_CPUS > 1
- -#define for_each_cpu_mask(cpu, mask)          \
- -      for ((cpu) = first_cpu(mask);           \
- -              (cpu) < NR_CPUS;                \
- -              (cpu) = next_cpu((cpu), (mask)))
- -#else /* NR_CPUS == 1 */
- -#define for_each_cpu_mask(cpu, mask)          \
+ +#if NR_CPUS == 1
+ +
+ +#define nr_cpu_ids            1
+ +#define first_cpu(src)                ({ (void)(src); 0; })
+ +#define next_cpu(n, src)      ({ (void)(src); 1; })
+ +#define any_online_cpu(mask)  0
+ +#define for_each_cpu_mask(cpu, mask)  \
         for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
- -#endif /* NR_CPUS */
+ +
+ +#else /* NR_CPUS > 1 */
+ +
+ +extern int nr_cpu_ids;
+ +int __first_cpu(const cpumask_t *srcp);
+ +int __next_cpu(int n, const cpumask_t *srcp);
+ +int __any_online_cpu(const cpumask_t *mask);
+ +
+ +#define first_cpu(src)                __first_cpu(&(src))
+ +#define next_cpu(n, src)      __next_cpu((n), &(src))
+ +#define any_online_cpu(mask) __any_online_cpu(&(mask))
+ +#define for_each_cpu_mask(cpu, mask)                  \
+ +      for ((cpu) = -1;                                \
+ +              (cpu) = next_cpu((cpu), (mask)),        \
+ +              (cpu) < NR_CPUS; )
+ +#endif
+ +
+ +#if NR_CPUS <= 64
   
   #define next_cpu_nr(n, src)           next_cpu(n, src)
   #define cpus_weight_nr(cpumask)               cpus_weight(cpumask)
   #define for_each_cpu_mask_nr(cpu, mask)       for_each_cpu_mask(cpu, mask)
   
+ +#else /* NR_CPUS > 64 */
+ +
+ +int __next_cpu_nr(int n, const cpumask_t *srcp);
+ +#define next_cpu_nr(n, src)   __next_cpu_nr((n), &(src))
+ +#define cpus_weight_nr(cpumask)       __cpus_weight(&(cpumask), nr_cpu_ids)
+ +#define for_each_cpu_mask_nr(cpu, mask)                       \
+ +      for ((cpu) = -1;                                \
+ +              (cpu) = next_cpu_nr((cpu), (mask)),     \
+ +              (cpu) < nr_cpu_ids; )
+ +
+ +#endif /* NR_CPUS > 64 */
+ +
   /*
    * The following particular system cpumasks and operations manage
-  * possible, present and online cpus.  Each of them is a fixed size
+  * possible, present, active and online cpus.  Each of them is a fixed size
    * bitmap of size NR_CPUS.
    *
    *  #ifdef CONFIG_HOTPLUG_CPU
    *     cpu_possible_map - has bit 'cpu' set iff cpu is populatable
    *     cpu_present_map  - has bit 'cpu' set iff cpu is populated
    *     cpu_online_map   - has bit 'cpu' set iff cpu available to scheduler
+  *     cpu_active_map   - has bit 'cpu' set iff cpu available to migration
    *  #else
    *     cpu_possible_map - has bit 'cpu' set iff cpu is populated
    *     cpu_present_map  - copy of cpu_possible_map
@@@ -515,14 -417,16 +516,16 @@@
   extern cpumask_t cpu_possible_map;
   extern cpumask_t cpu_online_map;
   extern cpumask_t cpu_present_map;
+ extern cpumask_t cpu_active_map;
   
   #if NR_CPUS > 1
- -#define num_online_cpus()     cpus_weight(cpu_online_map)
- -#define num_possible_cpus()   cpus_weight(cpu_possible_map)
- -#define num_present_cpus()    cpus_weight(cpu_present_map)
+ +#define num_online_cpus()     cpus_weight_nr(cpu_online_map)
+ +#define num_possible_cpus()   cpus_weight_nr(cpu_possible_map)
+ +#define num_present_cpus()    cpus_weight_nr(cpu_present_map)
   #define cpu_online(cpu)               cpu_isset((cpu), cpu_online_map)
   #define cpu_possible(cpu)     cpu_isset((cpu), cpu_possible_map)
   #define cpu_present(cpu)      cpu_isset((cpu), cpu_present_map)
+ #define cpu_active(cpu)               cpu_isset((cpu), cpu_active_map)
   #else
   #define num_online_cpus()     1
   #define num_possible_cpus()   1
@@@ -530,12 -434,22 +533,13 @@@
   #define cpu_online(cpu)               ((cpu) == 0)
   #define cpu_possible(cpu)     ((cpu) == 0)
   #define cpu_present(cpu)      ((cpu) == 0)
+ #define cpu_active(cpu)               ((cpu) == 0)
   #endif
   
   #define cpu_is_offline(cpu)   unlikely(!cpu_online(cpu))
   
- -#ifdef CONFIG_SMP
- -extern int nr_cpu_ids;
- -#define any_online_cpu(mask) __any_online_cpu(&(mask))
- -int __any_online_cpu(const cpumask_t *mask);
- -#else
- -#define nr_cpu_ids                    1
- -#define any_online_cpu(mask)          0
- -#endif
- -
- -#define for_each_possible_cpu(cpu)  for_each_cpu_mask((cpu), cpu_possible_map)
- -#define for_each_online_cpu(cpu)  for_each_cpu_mask((cpu), cpu_online_map)
- -#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)
+ +#define for_each_possible_cpu(cpu) for_each_cpu_mask_nr((cpu), cpu_possible_map)
+ +#define for_each_online_cpu(cpu)   for_each_cpu_mask_nr((cpu), cpu_online_map)
+ +#define for_each_present_cpu(cpu)  for_each_cpu_mask_nr((cpu), cpu_present_map)
   
   #endif /* __LINUX_CPUMASK_H */
diff --combined include/linux/sched.h

index af443a0,26da921..dc7e592
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -295,11 -295,10 +295,11 @@@ extern void softlockup_tick(void)
   extern void spawn_softlockup_task(void);
   extern void touch_softlockup_watchdog(void);
   extern void touch_all_softlockup_watchdogs(void);
- -extern unsigned long  softlockup_thresh;
+ +extern unsigned int  softlockup_panic;
   extern unsigned long sysctl_hung_task_check_count;
   extern unsigned long sysctl_hung_task_timeout_secs;
   extern unsigned long sysctl_hung_task_warnings;
+ +extern int softlockup_thresh;
   #else
   static inline void softlockup_tick(void)
   {
@@@ -825,7 -824,16 +825,16 @@@ extern void partition_sched_domains(in
                                     struct sched_domain_attr *dattr_new);
   extern int arch_reinit_sched_domains(void);
   
- #endif        /* CONFIG_SMP */
+ #else /* CONFIG_SMP */
+ 
+ struct sched_domain_attr;
+ 
+ static inline void
+ partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+                       struct sched_domain_attr *dattr_new)
+ {
+ }
+ #endif        /* !CONFIG_SMP */
   
   struct io_context;                    /* See blkdev.h */
   #define NGROUPS_SMALL         32
diff --combined init/main.c

index 756eca4,dd25259..2769dc0
--- 1/init/main.c
--- 2/init/main.c
+++ b/init/main.c
@@@ -415,6 -415,13 +415,13 @@@ static void __init smp_init(void
   {
         unsigned int cpu;
   
+       /*
+        * Set up the current CPU as possible to migrate to.
+        * The other ones will be done by cpu_up/cpu_down()
+        */
+       cpu = smp_processor_id();
+       cpu_set(cpu, cpu_active_map);
+ 
         /* FIXME: This should be done in userspace --RR */
         for_each_present_cpu(cpu) {
                 if (num_online_cpus() >= setup_max_cpus)
@@@ -630,10 -637,9 +637,10 @@@ asmlinkage void __init start_kernel(voi
   
   #ifdef CONFIG_BLK_DEV_INITRD
         if (initrd_start && !initrd_below_start_ok &&
- -                      initrd_start < min_low_pfn << PAGE_SHIFT) {
+ +          page_to_pfn(virt_to_page(initrd_start)) < min_low_pfn) {
                 printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
- -                  "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);
+ +                  "disabling it.\n",
+ +                  page_to_pfn(virt_to_page(initrd_start)), min_low_pfn);
                 initrd_start = 0;
         }
   #endif
diff --combined kernel/cpu.c

index d26d0b0,033603c..2cc409c
--- 1/kernel/cpu.c
--- 2/kernel/cpu.c
+++ b/kernel/cpu.c
@@@ -64,6 -64,8 +64,8 @@@ void __init cpu_hotplug_init(void
         cpu_hotplug.refcount = 0;
   }
   
+ cpumask_t cpu_active_map;
+ 
   #ifdef CONFIG_HOTPLUG_CPU
   
   void get_online_cpus(void)
@@@ -291,11 -293,30 +293,30 @@@ int __ref cpu_down(unsigned int cpu
         int err = 0;
   
         cpu_maps_update_begin();
-       if (cpu_hotplug_disabled)
+ 
+       if (cpu_hotplug_disabled) {
                 err = -EBUSY;
-       else
-               err = _cpu_down(cpu, 0);
+               goto out;
+       }
+ 
+       cpu_clear(cpu, cpu_active_map);
+ 
+       /*
+        * Make sure the all cpus did the reschedule and are not
+        * using stale version of the cpu_active_map.
+        * This is not strictly necessary becuase stop_machine()
+        * that we run down the line already provides the required
+        * synchronization. But it's really a side effect and we do not
+        * want to depend on the innards of the stop_machine here.
+        */
+       synchronize_sched();
   
+       err = _cpu_down(cpu, 0);
+ 
+       if (cpu_online(cpu))
+               cpu_set(cpu, cpu_active_map);
+ 
+ out:
         cpu_maps_update_done();
         return err;
   }
@@@ -355,11 -376,18 +376,18 @@@ int __cpuinit cpu_up(unsigned int cpu
         }
   
         cpu_maps_update_begin();
-       if (cpu_hotplug_disabled)
+ 
+       if (cpu_hotplug_disabled) {
                 err = -EBUSY;
-       else
-               err = _cpu_up(cpu, 0);
+               goto out;
+       }
+ 
+       err = _cpu_up(cpu, 0);
+ 
+       if (cpu_online(cpu))
+               cpu_set(cpu, cpu_active_map);
   
+ out:
         cpu_maps_update_done();
         return err;
   }
@@@ -413,7 -441,7 +441,7 @@@ void __ref enable_nonboot_cpus(void
                 goto out;
   
         printk("Enabling non-boot CPUs ...\n");
- -      for_each_cpu_mask(cpu, frozen_cpus) {
+ +      for_each_cpu_mask_nr(cpu, frozen_cpus) {
                 error = _cpu_up(cpu, 1);
                 if (!error) {
                         printk("CPU%d is up\n", cpu);
diff --combined kernel/cpuset.c

index d2cc67d,3c3ef02..d573891
--- 1/kernel/cpuset.c
--- 2/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@@ -564,7 -564,7 +564,7 @@@ update_domain_attr(struct sched_domain_
    *    partition_sched_domains().
    */
   
- static void rebuild_sched_domains(void)
+ void rebuild_sched_domains(void)
   {
         struct kfifo *q;        /* queue of cpusets to be scanned */
         struct cpuset *cp;      /* scans q */
@@@ -679,9 -679,7 +679,9 @@@ restart
                                 if (apn == b->pn) {
                                         cpus_or(*dp, *dp, b->cpus_allowed);
                                         b->pn = -1;
- -                                      update_domain_attr(dattr, b);
+ +                                      if (dattr)
+ +                                              update_domain_attr(dattr
+ +                                                                 + nslot, b);
                                 }
                         }
                         nslot++;
diff --combined kernel/sched.c

index df80bae,62b1b8e..6acf749
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -571,8 -571,10 +571,10 @@@ struct rq 
   #endif
   
   #ifdef CONFIG_SCHED_HRTICK
-       unsigned long hrtick_flags;
-       ktime_t hrtick_expire;
+ #ifdef CONFIG_SMP
+       int hrtick_csd_pending;
+       struct call_single_data hrtick_csd;
+ #endif
         struct hrtimer hrtick_timer;
   #endif
   
@@@ -983,13 -985,6 +985,6 @@@ static struct rq *this_rq_lock(void
         return rq;
   }
   
- static void __resched_task(struct task_struct *p, int tif_bit);
- 
- static inline void resched_task(struct task_struct *p)
- {
-       __resched_task(p, TIF_NEED_RESCHED);
- }
- 
   #ifdef CONFIG_SCHED_HRTICK
   /*
    * Use HR-timers to deliver accurate preemption points.
@@@ -1001,25 -996,6 +996,6 @@@
    * When we get rescheduled we reprogram the hrtick_timer outside of the
    * rq->lock.
    */
- static inline void resched_hrt(struct task_struct *p)
- {
-       __resched_task(p, TIF_HRTICK_RESCHED);
- }
- 
- static inline void resched_rq(struct rq *rq)
- {
-       unsigned long flags;
- 
-       spin_lock_irqsave(&rq->lock, flags);
-       resched_task(rq->curr);
-       spin_unlock_irqrestore(&rq->lock, flags);
- }
- 
- enum {
-       HRTICK_SET,             /* re-programm hrtick_timer */
-       HRTICK_RESET,           /* not a new slice */
-       HRTICK_BLOCK,           /* stop hrtick operations */
- };
   
   /*
    * Use hrtick when:
@@@ -1030,72 -1006,17 +1006,17 @@@ static inline int hrtick_enabled(struc
   {
         if (!sched_feat(HRTICK))
                 return 0;
-       if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags)))
+       if (!cpu_active(cpu_of(rq)))
                 return 0;
         return hrtimer_is_hres_active(&rq->hrtick_timer);
   }
   
- /*
-  * Called to set the hrtick timer state.
-  *
-  * called with rq->lock held and irqs disabled
-  */
- static void hrtick_start(struct rq *rq, u64 delay, int reset)
- {
-       assert_spin_locked(&rq->lock);
- 
-       /*
-        * preempt at: now + delay
-        */
-       rq->hrtick_expire =
-               ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
-       /*
-        * indicate we need to program the timer
-        */
-       __set_bit(HRTICK_SET, &rq->hrtick_flags);
-       if (reset)
-               __set_bit(HRTICK_RESET, &rq->hrtick_flags);
- 
-       /*
-        * New slices are called from the schedule path and don't need a
-        * forced reschedule.
-        */
-       if (reset)
-               resched_hrt(rq->curr);
- }
- 
   static void hrtick_clear(struct rq *rq)
   {
         if (hrtimer_active(&rq->hrtick_timer))
                 hrtimer_cancel(&rq->hrtick_timer);
   }
   
- /*
-  * Update the timer from the possible pending state.
-  */
- static void hrtick_set(struct rq *rq)
- {
-       ktime_t time;
-       int set, reset;
-       unsigned long flags;
- 
-       WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
- 
-       spin_lock_irqsave(&rq->lock, flags);
-       set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
-       reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
-       time = rq->hrtick_expire;
-       clear_thread_flag(TIF_HRTICK_RESCHED);
-       spin_unlock_irqrestore(&rq->lock, flags);
- 
-       if (set) {
-               hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
-               if (reset && !hrtimer_active(&rq->hrtick_timer))
-                       resched_rq(rq);
-       } else
-               hrtick_clear(rq);
- }
- 
   /*
    * High-resolution timer tick.
    * Runs from hardirq context with interrupts disabled.
@@@ -1115,27 -1036,37 +1036,37 @@@ static enum hrtimer_restart hrtick(stru
   }
   
   #ifdef CONFIG_SMP
- static void hotplug_hrtick_disable(int cpu)
+ /*
+  * called from hardirq (IPI) context
+  */
+ static void __hrtick_start(void *arg)
   {
-       struct rq *rq = cpu_rq(cpu);
-       unsigned long flags;
- 
-       spin_lock_irqsave(&rq->lock, flags);
-       rq->hrtick_flags = 0;
-       __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
-       spin_unlock_irqrestore(&rq->lock, flags);
+       struct rq *rq = arg;
   
-       hrtick_clear(rq);
+       spin_lock(&rq->lock);
+       hrtimer_restart(&rq->hrtick_timer);
+       rq->hrtick_csd_pending = 0;
+       spin_unlock(&rq->lock);
   }
   
- static void hotplug_hrtick_enable(int cpu)
+ /*
+  * Called to set the hrtick timer state.
+  *
+  * called with rq->lock held and irqs disabled
+  */
+ static void hrtick_start(struct rq *rq, u64 delay)
   {
-       struct rq *rq = cpu_rq(cpu);
-       unsigned long flags;
+       struct hrtimer *timer = &rq->hrtick_timer;
+       ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
   
-       spin_lock_irqsave(&rq->lock, flags);
-       __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags);
-       spin_unlock_irqrestore(&rq->lock, flags);
+       timer->expires = time;
+ 
+       if (rq == this_rq()) {
+               hrtimer_restart(timer);
+       } else if (!rq->hrtick_csd_pending) {
+               __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
+               rq->hrtick_csd_pending = 1;
+       }
   }
   
   static int
@@@ -1150,16 -1081,7 +1081,7 @@@ hotplug_hrtick(struct notifier_block *n
         case CPU_DOWN_PREPARE_FROZEN:
         case CPU_DEAD:
         case CPU_DEAD_FROZEN:
-               hotplug_hrtick_disable(cpu);
-               return NOTIFY_OK;
- 
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-       case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               hotplug_hrtick_enable(cpu);
+               hrtick_clear(cpu_rq(cpu));
                 return NOTIFY_OK;
         }
   
@@@ -1170,46 -1092,45 +1092,45 @@@ static void init_hrtick(void
   {
         hotcpu_notifier(hotplug_hrtick, 0);
   }
- #endif /* CONFIG_SMP */
+ #else
+ /*
+  * Called to set the hrtick timer state.
+  *
+  * called with rq->lock held and irqs disabled
+  */
+ static void hrtick_start(struct rq *rq, u64 delay)
+ {
+       hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
+ }
   
- static void init_rq_hrtick(struct rq *rq)
+ static void init_hrtick(void)
   {
-       rq->hrtick_flags = 0;
-       hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-       rq->hrtick_timer.function = hrtick;
-       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
   }
+ #endif /* CONFIG_SMP */
   
- void hrtick_resched(void)
+ static void init_rq_hrtick(struct rq *rq)
   {
-       struct rq *rq;
-       unsigned long flags;
+ #ifdef CONFIG_SMP
+       rq->hrtick_csd_pending = 0;
   
-       if (!test_thread_flag(TIF_HRTICK_RESCHED))
-               return;
+       rq->hrtick_csd.flags = 0;
+       rq->hrtick_csd.func = __hrtick_start;
+       rq->hrtick_csd.info = rq;
+ #endif
   
-       local_irq_save(flags);
-       rq = cpu_rq(smp_processor_id());
-       hrtick_set(rq);
-       local_irq_restore(flags);
+       hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       rq->hrtick_timer.function = hrtick;
+       rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
   }
   #else
   static inline void hrtick_clear(struct rq *rq)
   {
   }
   
- static inline void hrtick_set(struct rq *rq)
- {
- }
- 
   static inline void init_rq_hrtick(struct rq *rq)
   {
   }
   
- void hrtick_resched(void)
- {
- }
- 
   static inline void init_hrtick(void)
   {
   }
@@@ -1228,16 -1149,16 +1149,16 @@@
   #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
   #endif
   
- static void __resched_task(struct task_struct *p, int tif_bit)
+ static void resched_task(struct task_struct *p)
   {
         int cpu;
   
         assert_spin_locked(&task_rq(p)->lock);
   
-       if (unlikely(test_tsk_thread_flag(p, tif_bit)))
+       if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
                 return;
   
-       set_tsk_thread_flag(p, tif_bit);
+       set_tsk_thread_flag(p, TIF_NEED_RESCHED);
   
         cpu = task_cpu(p);
         if (cpu == smp_processor_id())
@@@ -1303,10 -1224,10 +1224,10 @@@ void wake_up_idle_cpu(int cpu
   #endif /* CONFIG_NO_HZ */
   
   #else /* !CONFIG_SMP */
- static void __resched_task(struct task_struct *p, int tif_bit)
+ static void resched_task(struct task_struct *p)
   {
         assert_spin_locked(&task_rq(p)->lock);
-       set_tsk_thread_flag(p, tif_bit);
+       set_tsk_need_resched(p);
   }
   #endif /* CONFIG_SMP */
   
@@@ -2108,7 -2029,7 +2029,7 @@@ find_idlest_group(struct sched_domain *
                 /* Tally up the load of all CPUs in the group */
                 avg_load = 0;
   
- -              for_each_cpu_mask(i, group->cpumask) {
+ +              for_each_cpu_mask_nr(i, group->cpumask) {
                         /* Bias balancing toward cpus of our domain */
                         if (local_group)
                                 load = source_load(i, load_idx);
@@@ -2150,7 -2071,7 +2071,7 @@@ find_idlest_cpu(struct sched_group *gro
         /* Traverse only the allowed CPUs */
         cpus_and(*tmp, group->cpumask, p->cpus_allowed);
   
- -      for_each_cpu_mask(i, *tmp) {
+ +      for_each_cpu_mask_nr(i, *tmp) {
                 load = weighted_cpuload(i);
   
                 if (load < min_load || (load == min_load && i == this_cpu)) {
@@@ -2881,7 -2802,7 +2802,7 @@@ static void sched_migrate_task(struct t
   
         rq = task_rq_lock(p, &flags);
         if (!cpu_isset(dest_cpu, p->cpus_allowed)
-           || unlikely(cpu_is_offline(dest_cpu)))
+           || unlikely(!cpu_active(dest_cpu)))
                 goto out;
   
         /* force the process onto the specified CPU */
@@@ -3168,7 -3089,7 +3089,7 @@@ find_busiest_group(struct sched_domain 
                 max_cpu_load = 0;
                 min_cpu_load = ~0UL;
   
- -              for_each_cpu_mask(i, group->cpumask) {
+ +              for_each_cpu_mask_nr(i, group->cpumask) {
                         struct rq *rq;
   
                         if (!cpu_isset(i, *cpus))
@@@ -3447,7 -3368,7 +3368,7 @@@ find_busiest_queue(struct sched_group *
         unsigned long max_load = 0;
         int i;
   
- -      for_each_cpu_mask(i, group->cpumask) {
+ +      for_each_cpu_mask_nr(i, group->cpumask) {
                 unsigned long wl;
   
                 if (!cpu_isset(i, *cpus))
@@@ -3849,7 -3770,7 +3770,7 @@@ int select_nohz_load_balancer(int stop_
                 /*
                  * If we are going offline and still the leader, give up!
                  */
-               if (cpu_is_offline(cpu) &&
+               if (!cpu_active(cpu) &&
                     atomic_read(&nohz.load_balancer) == cpu) {
                         if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
                                 BUG();
@@@ -3989,7 -3910,7 +3910,7 @@@ static void run_rebalance_domains(struc
                 int balance_cpu;
   
                 cpu_clear(this_cpu, cpus);
- -              for_each_cpu_mask(balance_cpu, cpus) {
+ +              for_each_cpu_mask_nr(balance_cpu, cpus) {
                         /*
                          * If this cpu gets work to do, stop the load balancing
                          * work being done for other cpus. Next load
@@@ -4395,7 -4316,7 +4316,7 @@@ asmlinkage void __sched schedule(void
         struct task_struct *prev, *next;
         unsigned long *switch_count;
         struct rq *rq;
-       int cpu, hrtick = sched_feat(HRTICK);
+       int cpu;
   
   need_resched:
         preempt_disable();
@@@ -4410,7 -4331,7 +4331,7 @@@ need_resched_nonpreemptible
   
         schedule_debug(prev);
   
-       if (hrtick)
+       if (sched_feat(HRTICK))
                 hrtick_clear(rq);
   
         /*
@@@ -4457,9 -4378,6 +4378,6 @@@
         } else
                 spin_unlock_irq(&rq->lock);
   
-       if (hrtick)
-               hrtick_set(rq);
- 
         if (unlikely(reacquire_kernel_lock(current) < 0))
                 goto need_resched_nonpreemptible;
   
@@@ -5876,7 -5794,7 +5794,7 @@@ static int __migrate_task(struct task_s
         struct rq *rq_dest, *rq_src;
         int ret = 0, on_rq;
   
-       if (unlikely(cpu_is_offline(dest_cpu)))
+       if (unlikely(!cpu_active(dest_cpu)))
                 return ret;
   
         rq_src = cpu_rq(src_cpu);
@@@ -6768,7 -6686,8 +6686,8 @@@ static cpumask_t cpu_isolated_map = CPU
   /* Setup the mask of cpus configured for isolated domains */
   static int __init isolated_cpu_setup(char *str)
   {
-       int ints[NR_CPUS], i;
+       static int __initdata ints[NR_CPUS];
+       int i;
   
         str = get_options(str, ARRAY_SIZE(ints), ints);
         cpus_clear(cpu_isolated_map);
@@@ -6802,7 -6721,7 +6721,7 @@@ init_sched_build_groups(const cpumask_
   
         cpus_clear(*covered);
   
- -      for_each_cpu_mask(i, *span) {
+ +      for_each_cpu_mask_nr(i, *span) {
                 struct sched_group *sg;
                 int group = group_fn(i, cpu_map, &sg, tmpmask);
                 int j;
@@@ -6813,7 -6732,7 +6732,7 @@@
                 cpus_clear(sg->cpumask);
                 sg->__cpu_power = 0;
   
- -              for_each_cpu_mask(j, *span) {
+ +              for_each_cpu_mask_nr(j, *span) {
                         if (group_fn(j, cpu_map, NULL, tmpmask) != group)
                                 continue;
   
@@@ -7013,7 -6932,7 +6932,7 @@@ static void init_numa_sched_groups_powe
         if (!sg)
                 return;
         do {
- -              for_each_cpu_mask(j, sg->cpumask) {
+ +              for_each_cpu_mask_nr(j, sg->cpumask) {
                         struct sched_domain *sd;
   
                         sd = &per_cpu(phys_domains, j);
@@@ -7038,7 -6957,7 +6957,7 @@@ static void free_sched_groups(const cpu
   {
         int cpu, i;
   
- -      for_each_cpu_mask(cpu, *cpu_map) {
+ +      for_each_cpu_mask_nr(cpu, *cpu_map) {
                 struct sched_group **sched_group_nodes
                         = sched_group_nodes_bycpu[cpu];
   
@@@ -7277,7 -7196,7 +7196,7 @@@ static int __build_sched_domains(const 
         /*
          * Set up domains for cpus specified by the cpu_map.
          */
- -      for_each_cpu_mask(i, *cpu_map) {
+ +      for_each_cpu_mask_nr(i, *cpu_map) {
                 struct sched_domain *sd = NULL, *p;
                 SCHED_CPUMASK_VAR(nodemask, allmasks);
   
@@@ -7344,7 -7263,7 +7263,7 @@@
   
   #ifdef CONFIG_SCHED_SMT
         /* Set up CPU (sibling) groups */
- -      for_each_cpu_mask(i, *cpu_map) {
+ +      for_each_cpu_mask_nr(i, *cpu_map) {
                 SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
                 SCHED_CPUMASK_VAR(send_covered, allmasks);
   
@@@ -7361,7 -7280,7 +7280,7 @@@
   
   #ifdef CONFIG_SCHED_MC
         /* Set up multi-core groups */
- -      for_each_cpu_mask(i, *cpu_map) {
+ +      for_each_cpu_mask_nr(i, *cpu_map) {
                 SCHED_CPUMASK_VAR(this_core_map, allmasks);
                 SCHED_CPUMASK_VAR(send_covered, allmasks);
   
@@@ -7428,7 -7347,7 +7347,7 @@@
                         goto error;
                 }
                 sched_group_nodes[i] = sg;
- -              for_each_cpu_mask(j, *nodemask) {
+ +              for_each_cpu_mask_nr(j, *nodemask) {
                         struct sched_domain *sd;
   
                         sd = &per_cpu(node_domains, j);
@@@ -7474,21 -7393,21 +7393,21 @@@
   
         /* Calculate CPU power for physical packages and nodes */
   #ifdef CONFIG_SCHED_SMT
- -      for_each_cpu_mask(i, *cpu_map) {
+ +      for_each_cpu_mask_nr(i, *cpu_map) {
                 struct sched_domain *sd = &per_cpu(cpu_domains, i);
   
                 init_sched_groups_power(i, sd);
         }
   #endif
   #ifdef CONFIG_SCHED_MC
- -      for_each_cpu_mask(i, *cpu_map) {
+ +      for_each_cpu_mask_nr(i, *cpu_map) {
                 struct sched_domain *sd = &per_cpu(core_domains, i);
   
                 init_sched_groups_power(i, sd);
         }
   #endif
   
- -      for_each_cpu_mask(i, *cpu_map) {
+ +      for_each_cpu_mask_nr(i, *cpu_map) {
                 struct sched_domain *sd = &per_cpu(phys_domains, i);
   
                 init_sched_groups_power(i, sd);
@@@ -7508,7 -7427,7 +7427,7 @@@
   #endif
   
         /* Attach the domains */
- -      for_each_cpu_mask(i, *cpu_map) {
+ +      for_each_cpu_mask_nr(i, *cpu_map) {
                 struct sched_domain *sd;
   #ifdef CONFIG_SCHED_SMT
                 sd = &per_cpu(cpu_domains, i);
@@@ -7552,18 -7471,6 +7471,6 @@@ void __attribute__((weak)) arch_update_
   {
   }
   
- /*
-  * Free current domain masks.
-  * Called after all cpus are attached to NULL domain.
-  */
- static void free_sched_domains(void)
- {
-       ndoms_cur = 0;
-       if (doms_cur != &fallback_doms)
-               kfree(doms_cur);
-       doms_cur = &fallback_doms;
- }
- 
   /*
    * Set up scheduler domains and groups. Callers must hold the hotplug lock.
    * For now this just excludes isolated cpus, but could be used to
@@@ -7603,7 -7510,7 +7510,7 @@@ static void detach_destroy_domains(cons
   
         unregister_sched_domain_sysctl();
   
- -      for_each_cpu_mask(i, *cpu_map)
+ +      for_each_cpu_mask_nr(i, *cpu_map)
                 cpu_attach_domain(NULL, &def_root_domain, i);
         synchronize_sched();
         arch_destroy_sched_domains(cpu_map, &tmpmask);
@@@ -7642,7 -7549,7 +7549,7 @@@ static int dattrs_equal(struct sched_do
    * ownership of it and will kfree it when done with it. If the caller
    * failed the kmalloc call, then it can pass in doms_new == NULL,
    * and partition_sched_domains() will fallback to the single partition
-  * 'fallback_doms'.
+  * 'fallback_doms', it also forces the domains to be rebuilt.
    *
    * Call with hotplug lock held
    */
@@@ -7656,12 -7563,8 +7563,8 @@@ void partition_sched_domains(int ndoms_
         /* always unregister in case we don't destroy any domains */
         unregister_sched_domain_sysctl();
   
-       if (doms_new == NULL) {
-               ndoms_new = 1;
-               doms_new = &fallback_doms;
-               cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
-               dattr_new = NULL;
-       }
+       if (doms_new == NULL)
+               ndoms_new = 0;
   
         /* Destroy deleted domains */
         for (i = 0; i < ndoms_cur; i++) {
@@@ -7676,6 -7579,14 +7579,14 @@@ match1
                 ;
         }
   
+       if (doms_new == NULL) {
+               ndoms_cur = 0;
+               ndoms_new = 1;
+               doms_new = &fallback_doms;
+               cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+               dattr_new = NULL;
+       }
+ 
         /* Build new domains */
         for (i = 0; i < ndoms_new; i++) {
                 for (j = 0; j < ndoms_cur; j++) {
@@@ -7706,17 -7617,10 +7617,10 @@@ match2
   #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
   int arch_reinit_sched_domains(void)
   {
-       int err;
- 
         get_online_cpus();
-       mutex_lock(&sched_domains_mutex);
-       detach_destroy_domains(&cpu_online_map);
-       free_sched_domains();
-       err = arch_init_sched_domains(&cpu_online_map);
-       mutex_unlock(&sched_domains_mutex);
+       rebuild_sched_domains();
         put_online_cpus();
- 
-       return err;
+       return 0;
   }
   
   static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@@ -7737,13 -7641,11 +7641,13 @@@
   }
   
   #ifdef CONFIG_SCHED_MC
- -static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
+ +static ssize_t sched_mc_power_savings_show(struct sys_device *dev,
+ +                              struct sysdev_attribute *attr, char *page)
   {
         return sprintf(page, "%u\n", sched_mc_power_savings);
   }
   static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
+ +                                          struct sysdev_attribute *attr,
                                             const char *buf, size_t count)
   {
         return sched_power_savings_store(buf, count, 0);
@@@ -7753,13 -7655,11 +7657,13 @@@ static SYSDEV_ATTR(sched_mc_power_savin
   #endif
   
   #ifdef CONFIG_SCHED_SMT
- -static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
+ +static ssize_t sched_smt_power_savings_show(struct sys_device *dev,
+ +                              struct sysdev_attribute *attr, char *page)
   {
         return sprintf(page, "%u\n", sched_smt_power_savings);
   }
   static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
+ +                                           struct sysdev_attribute *attr,
                                              const char *buf, size_t count)
   {
         return sched_power_savings_store(buf, count, 1);
@@@ -7786,14 -7686,30 +7690,30 @@@ int sched_create_sysfs_power_savings_en
   }
   #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
   
+ #ifndef CONFIG_CPUSETS
   /*
-  * Force a reinitialization of the sched domains hierarchy. The domains
-  * and groups cannot be updated in place without racing with the balancing
-  * code, so we temporarily attach all running cpus to the NULL domain
-  * which will prevent rebalancing while the sched domains are recalculated.
+  * Add online and remove offline CPUs from the scheduler domains.
+  * When cpusets are enabled they take over this function.
    */
   static int update_sched_domains(struct notifier_block *nfb,
                                 unsigned long action, void *hcpu)
+ {
+       switch (action) {
+       case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+               partition_sched_domains(0, NULL, NULL);
+               return NOTIFY_OK;
+ 
+       default:
+               return NOTIFY_DONE;
+       }
+ }
+ #endif
+ 
+ static int update_runtime(struct notifier_block *nfb,
+                               unsigned long action, void *hcpu)
   {
         int cpu = (int)(long)hcpu;
   
@@@ -7801,44 -7717,18 +7721,18 @@@
         case CPU_DOWN_PREPARE:
         case CPU_DOWN_PREPARE_FROZEN:
                 disable_runtime(cpu_rq(cpu));
-               /* fall-through */
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               detach_destroy_domains(&cpu_online_map);
-               free_sched_domains();
                 return NOTIFY_OK;
   
- 
         case CPU_DOWN_FAILED:
         case CPU_DOWN_FAILED_FROZEN:
         case CPU_ONLINE:
         case CPU_ONLINE_FROZEN:
                 enable_runtime(cpu_rq(cpu));
-               /* fall-through */
-       case CPU_UP_CANCELED:
-       case CPU_UP_CANCELED_FROZEN:
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               /*
-                * Fall through and re-initialise the domains.
-                */
-               break;
+               return NOTIFY_OK;
+ 
         default:
                 return NOTIFY_DONE;
         }
- 
- #ifndef CONFIG_CPUSETS
-       /*
-        * Create default domain partitioning if cpusets are disabled.
-        * Otherwise we let cpusets rebuild the domains based on the
-        * current setup.
-        */
- 
-       /* The hotplug lock is already held by cpu_up/cpu_down */
-       arch_init_sched_domains(&cpu_online_map);
- #endif
- 
-       return NOTIFY_OK;
   }
   
   void __init sched_init_smp(void)
@@@ -7858,8 -7748,15 +7752,15 @@@
                 cpu_set(smp_processor_id(), non_isolated_cpus);
         mutex_unlock(&sched_domains_mutex);
         put_online_cpus();
+ 
+ #ifndef CONFIG_CPUSETS
         /* XXX: Theoretical race here - CPU may be hotplugged now */
         hotcpu_notifier(update_sched_domains, 0);
+ #endif
+ 
+       /* RT runtime code needs to handle some hotplug events */
+       hotcpu_notifier(update_runtime, 0);
+ 
         init_hrtick();
   
         /* Move init over to a non-isolated CPU */
diff --combined kernel/sched_fair.c

index bb61fe2,7f70026..cf2cd6c
--- 1/kernel/sched_fair.c
--- 2/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@@ -878,7 -878,6 +878,6 @@@ entity_tick(struct cfs_rq *cfs_rq, stru
   #ifdef CONFIG_SCHED_HRTICK
   static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
   {
-       int requeue = rq->curr == p;
         struct sched_entity *se = &p->se;
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
   
@@@ -899,10 -898,10 +898,10 @@@
                  * Don't schedule slices shorter than 10000ns, that just
                  * doesn't make sense. Rely on vruntime for fairness.
                  */
-               if (!requeue)
+               if (rq->curr != p)
                         delta = max(10000LL, delta);
   
-               hrtick_start(rq, delta, requeue);
+               hrtick_start(rq, delta);
         }
   }
   #else /* !CONFIG_SCHED_HRTICK */
@@@ -1004,6 -1003,8 +1003,8 @@@ static void yield_task_fair(struct rq *
    * not idle and an idle cpu is available.  The span of cpus to
    * search starts with cpus closest then further out as needed,
    * so we always favor a closer, idle cpu.
+  * Domains may include CPUs that are not usable for migration,
+  * hence we need to mask them out (cpu_active_map)
    *
    * Returns the CPU we should wake onto.
    */
@@@ -1031,7 -1032,8 +1032,8 @@@ static int wake_idle(int cpu, struct ta
                     || ((sd->flags & SD_WAKE_IDLE_FAR)
                         && !task_hot(p, task_rq(p)->clock, sd))) {
                         cpus_and(tmp, sd->span, p->cpus_allowed);
- -                      for_each_cpu_mask(i, tmp) {
+                       cpus_and(tmp, tmp, cpu_active_map);
+ +                      for_each_cpu_mask_nr(i, tmp) {
                                 if (idle_cpu(i)) {
                                         if (i != task_cpu(p)) {
                                                 schedstat_inc(p,
diff --combined kernel/sched_rt.c

index 7c96147,24621ce..f85a763
--- 1/kernel/sched_rt.c
--- 2/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@@ -240,7 -240,7 +240,7 @@@ static int do_balance_runtime(struct rt
   
         spin_lock(&rt_b->rt_runtime_lock);
         rt_period = ktime_to_ns(rt_b->rt_period);
- -      for_each_cpu_mask(i, rd->span) {
+ +      for_each_cpu_mask_nr(i, rd->span) {
                 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
                 s64 diff;
   
@@@ -505,7 -505,9 +505,9 @@@ void inc_rt_tasks(struct sched_rt_entit
         rt_rq->rt_nr_running++;
   #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
         if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
+ #ifdef CONFIG_SMP
                 struct rq *rq = rq_of_rt_rq(rt_rq);
+ #endif
   
                 rt_rq->highest_prio = rt_se_prio(rt_se);
   #ifdef CONFIG_SMP
@@@ -599,11 -601,7 +601,7 @@@ static void __enqueue_rt_entity(struct 
         if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
                 return;
   
-       if (rt_se->nr_cpus_allowed == 1)
-               list_add(&rt_se->run_list, queue);
-       else
-               list_add_tail(&rt_se->run_list, queue);
- 
+       list_add_tail(&rt_se->run_list, queue);
         __set_bit(rt_se_prio(rt_se), array->bitmap);
   
         inc_rt_tasks(rt_se, rt_rq);
@@@ -688,32 -686,34 +686,34 @@@ static void dequeue_task_rt(struct rq *
    * Put task to the end of the run list without the overhead of dequeue
    * followed by enqueue.
    */
- static
- void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
+ static void
+ requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
   {
-       struct rt_prio_array *array = &rt_rq->active;
- 
         if (on_rt_rq(rt_se)) {
-               list_del_init(&rt_se->run_list);
-               list_add_tail(&rt_se->run_list,
-                             array->queue + rt_se_prio(rt_se));
+               struct rt_prio_array *array = &rt_rq->active;
+               struct list_head *queue = array->queue + rt_se_prio(rt_se);
+ 
+               if (head)
+                       list_move(&rt_se->run_list, queue);
+               else
+                       list_move_tail(&rt_se->run_list, queue);
         }
   }
   
- static void requeue_task_rt(struct rq *rq, struct task_struct *p)
+ static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
   {
         struct sched_rt_entity *rt_se = &p->rt;
         struct rt_rq *rt_rq;
   
         for_each_sched_rt_entity(rt_se) {
                 rt_rq = rt_rq_of_se(rt_se);
-               requeue_rt_entity(rt_rq, rt_se);
+               requeue_rt_entity(rt_rq, rt_se, head);
         }
   }
   
   static void yield_task_rt(struct rq *rq)
   {
-       requeue_task_rt(rq, rq->curr);
+       requeue_task_rt(rq, rq->curr, 0);
   }
   
   #ifdef CONFIG_SMP
@@@ -753,6 -753,30 +753,30 @@@ static int select_task_rq_rt(struct tas
          */
         return task_cpu(p);
   }
+ 
+ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
+ {
+       cpumask_t mask;
+ 
+       if (rq->curr->rt.nr_cpus_allowed == 1)
+               return;
+ 
+       if (p->rt.nr_cpus_allowed != 1
+           && cpupri_find(&rq->rd->cpupri, p, &mask))
+               return;
+ 
+       if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
+               return;
+ 
+       /*
+        * There appears to be other cpus that can accept
+        * current and none to run 'p', so lets reschedule
+        * to try and push current away:
+        */
+       requeue_task_rt(rq, p, 1);
+       resched_task(rq->curr);
+ }
+ 
   #endif /* CONFIG_SMP */
   
   /*
@@@ -778,18 -802,8 +802,8 @@@ static void check_preempt_curr_rt(struc
          * to move current somewhere else, making room for our non-migratable
          * task.
          */
-       if((p->prio == rq->curr->prio)
-          && p->rt.nr_cpus_allowed == 1
-          && rq->curr->rt.nr_cpus_allowed != 1) {
-               cpumask_t mask;
- 
-               if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
-                       /*
-                        * There appears to be other cpus that can accept
-                        * current, so lets reschedule to try and push it away
-                        */
-                       resched_task(rq->curr);
-       }
+       if (p->prio == rq->curr->prio && !need_resched())
+               check_preempt_equal_prio(rq, p);
   #endif
   }
   
@@@ -921,6 -935,13 +935,13 @@@ static int find_lowest_rq(struct task_s
         if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
                 return -1; /* No targets found */
   
+       /*
+        * Only consider CPUs that are usable for migration.
+        * I guess we might want to change cpupri_find() to ignore those
+        * in the first place.
+        */
+       cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
+ 
         /*
          * At this point we have built a mask of cpus representing the
          * lowest priority tasks in the system.  Now we want to elect
@@@ -1107,7 -1128,7 +1128,7 @@@ static int pull_rt_task(struct rq *this
   
         next = pick_next_task_rt(this_rq);
   
- -      for_each_cpu_mask(cpu, this_rq->rd->rto_mask) {
+ +      for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
                 if (this_cpu == cpu)
                         continue;
   
@@@ -1415,7 -1436,7 +1436,7 @@@ static void task_tick_rt(struct rq *rq
          * on the queue:
          */
         if (p->rt.run_list.prev != p->rt.run_list.next) {
-               requeue_task_rt(rq, p);
+               requeue_task_rt(rq, p, 0);
                 set_tsk_need_resched(p);
         }
   }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 24 Jul 2008 02:36:53 +0000 (19:36 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 24 Jul 2008 02:36:53 +0000 (19:36 -0700)
		1	2
arch/x86/kernel/signal_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/signal_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-x86/thread_info.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/cpumask.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cpu.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/cpuset.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_fair.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched_rt.c	patch \|	diff1 \|	diff2 \|	blob \| history