Merge commit 'remotes/tip/x86/paravirt' into x86/untangle2

author Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Wed, 11 Feb 2009 19:52:22 +0000 (11:52 -0800)

committer Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Wed, 11 Feb 2009 19:52:22 +0000 (11:52 -0800)
author Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Wed, 11 Feb 2009 19:52:22 +0000 (11:52 -0800)
committer Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Wed, 11 Feb 2009 19:52:22 +0000 (11:52 -0800)
diff --combined arch/arm/kernel/irq.c

index 363db18,4bb723e..45eacb5
--- 1/arch/arm/kernel/irq.c
--- 2/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@@ -101,9 -101,14 +101,14 @@@ unlock
   /* Handle bad interrupts */
   static struct irq_desc bad_irq_desc = {
         .handle_irq = handle_bad_irq,
- -      .lock = SPIN_LOCK_UNLOCKED
+ +      .lock = __SPIN_LOCK_UNLOCKED(bad_irq_desc.lock),
   };
   
+ #ifdef CONFIG_CPUMASK_OFFSTACK
+ /* We are not allocating bad_irq_desc.affinity or .pending_mask */
+ #error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK."
+ #endif
+ 
   /*
    * do_IRQ handles all hardware IRQ's.  Decoded IRQs should not
    * come via this function.  Instead, they should provide their
@@@ -161,7 -166,7 +166,7 @@@ void __init init_IRQ(void
                 irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE;
   
   #ifdef CONFIG_SMP
-       bad_irq_desc.affinity = CPU_MASK_ALL;
+       cpumask_setall(bad_irq_desc.affinity);
         bad_irq_desc.cpu = smp_processor_id();
   #endif
         init_arch_irq();
@@@ -191,15 -196,16 +196,16 @@@ void migrate_irqs(void
                 struct irq_desc *desc = irq_desc + i;
   
                 if (desc->cpu == cpu) {
-                       unsigned int newcpu = any_online_cpu(desc->affinity);
- 
-                       if (newcpu == NR_CPUS) {
+                       unsigned int newcpu = cpumask_any_and(desc->affinity,
+                                                             cpu_online_mask);
+                       if (newcpu >= nr_cpu_ids) {
                                 if (printk_ratelimit())
                                         printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n",
                                                i, cpu);
   
-                               cpus_setall(desc->affinity);
-                               newcpu = any_online_cpu(desc->affinity);
+                               cpumask_setall(desc->affinity);
+                               newcpu = cpumask_any_and(desc->affinity,
+                                                        cpu_online_mask);
                         }
   
                         route_irq(desc, i, newcpu);
diff --combined arch/blackfin/kernel/irqchip.c

index 75724ee,5780d6d..23e9aa0
--- 1/arch/blackfin/kernel/irqchip.c
--- 2/arch/blackfin/kernel/irqchip.c
+++ b/arch/blackfin/kernel/irqchip.c
@@@ -35,7 -35,6 +35,7 @@@
   #include <linux/interrupt.h>
   #include <linux/irq.h>
   #include <asm/trace.h>
+ +#include <asm/pda.h>
   
   static atomic_t irq_err_count;
   static spinlock_t irq_controller_lock;
@@@ -70,6 -69,11 +70,11 @@@ static struct irq_desc bad_irq_desc = 
   #endif
   };
   
+ #ifdef CONFIG_CPUMASK_OFFSTACK
+ /* We are not allocating a variable-sized bad_irq_desc.affinity */
+ #error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK."
+ #endif
+ 
   int show_interrupts(struct seq_file *p, void *v)
   {
         int i = *(loff_t *) v, j;
@@@ -92,13 -96,8 +97,13 @@@
                 seq_putc(p, '\n');
    skip:
                 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
- -      } else if (i == NR_IRQS)
+ +      } else if (i == NR_IRQS) {
+ +              seq_printf(p, "NMI: ");
+ +              for_each_online_cpu(j)
+ +                      seq_printf(p, "%10u ", cpu_pda[j].__nmi_count);
+ +              seq_printf(p, "     CORE  Non Maskable Interrupt\n");
                 seq_printf(p, "Err: %10u\n",  atomic_read(&irq_err_count));
+ +      }
         return 0;
   }
   
diff --combined arch/sparc/kernel/irq_64.c

index e289376,4ac5c65..3d2c6ba
--- 1/arch/sparc/kernel/irq_64.c
--- 2/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@@ -196,11 -196,6 +196,11 @@@ int show_interrupts(struct seq_file *p
                 seq_putc(p, '\n');
   skip:
                 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ +      } else if (i == NR_IRQS) {
+ +              seq_printf(p, "NMI: ");
+ +              for_each_online_cpu(j)
+ +                      seq_printf(p, "%10u ", cpu_data(j).__nmi_count);
+ +              seq_printf(p, "     Non-maskable interrupts\n");
         }
         return 0;
   }
@@@ -252,9 -247,10 +252,10 @@@ struct irq_handler_data 
   #ifdef CONFIG_SMP
   static int irq_choose_cpu(unsigned int virt_irq)
   {
-       cpumask_t mask = irq_desc[virt_irq].affinity;
+       cpumask_t mask;
         int cpuid;
   
+       cpumask_copy(&mask, irq_desc[virt_irq].affinity);
         if (cpus_equal(mask, CPU_MASK_ALL)) {
                 static int irq_rover;
                 static DEFINE_SPINLOCK(irq_rover_lock);
@@@ -783,6 -779,69 +784,6 @@@ void do_softirq(void
         local_irq_restore(flags);
   }
   
- -static void unhandled_perf_irq(struct pt_regs *regs)
- -{
- -      unsigned long pcr, pic;
- -
- -      read_pcr(pcr);
- -      read_pic(pic);
- -
- -      write_pcr(0);
- -
- -      printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
- -             smp_processor_id());
- -      printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
- -             smp_processor_id(), pcr, pic);
- -}
- -
- -/* Almost a direct copy of the powerpc PMC code.  */
- -static DEFINE_SPINLOCK(perf_irq_lock);
- -static void *perf_irq_owner_caller; /* mostly for debugging */
- -static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
- -
- -/* Invoked from level 15 PIL handler in trap table.  */
- -void perfctr_irq(int irq, struct pt_regs *regs)
- -{
- -      clear_softint(1 << irq);
- -      perf_irq(regs);
- -}
- -
- -int register_perfctr_intr(void (*handler)(struct pt_regs *))
- -{
- -      int ret;
- -
- -      if (!handler)
- -              return -EINVAL;
- -
- -      spin_lock(&perf_irq_lock);
- -      if (perf_irq != unhandled_perf_irq) {
- -              printk(KERN_WARNING "register_perfctr_intr: "
- -                     "perf IRQ busy (reserved by caller %p)\n",
- -                     perf_irq_owner_caller);
- -              ret = -EBUSY;
- -              goto out;
- -      }
- -
- -      perf_irq_owner_caller = __builtin_return_address(0);
- -      perf_irq = handler;
- -
- -      ret = 0;
- -out:
- -      spin_unlock(&perf_irq_lock);
- -
- -      return ret;
- -}
- -EXPORT_SYMBOL_GPL(register_perfctr_intr);
- -
- -void release_perfctr_intr(void (*handler)(struct pt_regs *))
- -{
- -      spin_lock(&perf_irq_lock);
- -      perf_irq_owner_caller = NULL;
- -      perf_irq = unhandled_perf_irq;
- -      spin_unlock(&perf_irq_lock);
- -}
- -EXPORT_SYMBOL_GPL(release_perfctr_intr);
- -
   #ifdef CONFIG_HOTPLUG_CPU
   void fixup_irqs(void)
   {
@@@ -796,7 -855,7 +797,7 @@@
                     !(irq_desc[irq].status & IRQ_PER_CPU)) {
                         if (irq_desc[irq].chip->set_affinity)
                                 irq_desc[irq].chip->set_affinity(irq,
-                                       &irq_desc[irq].affinity);
+                                       irq_desc[irq].affinity);
                 }
                 spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
         }
diff --combined arch/x86/ia32/ia32entry.S

index 5a0d76d,9c79b24..097a6b6
--- 1/arch/x86/ia32/ia32entry.S
--- 2/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@@ -112,8 -112,8 +112,8 @@@ ENTRY(ia32_sysenter_target
         CFI_DEF_CFA     rsp,0
         CFI_REGISTER    rsp,rbp
         SWAPGS_UNSAFE_STACK
-       movq    %gs:pda_kernelstack, %rsp
-       addq    $(PDA_STACKOFFSET),%rsp 
+       movq    PER_CPU_VAR(kernel_stack), %rsp
+       addq    $(KERNEL_STACK_OFFSET),%rsp
         /*
          * No need to follow this irqs on/off section: the syscall
          * disabled irqs, here we enable it straight after entry:
@@@ -273,13 -273,13 +273,13 @@@ ENDPROC(ia32_sysenter_target
   ENTRY(ia32_cstar_target)
         CFI_STARTPROC32 simple
         CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,PDA_STACKOFFSET
+       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
         CFI_REGISTER    rip,rcx
         /*CFI_REGISTER  rflags,r11*/
         SWAPGS_UNSAFE_STACK
         movl    %esp,%r8d
         CFI_REGISTER    rsp,r8
-       movq    %gs:pda_kernelstack,%rsp
+       movq    PER_CPU_VAR(kernel_stack),%rsp
         /*
          * No need to follow this irqs on/off section: the syscall
          * disabled irqs and here we enable it straight after entry:
@@@ -418,9 -418,9 +418,9 @@@ ENTRY(ia32_syscall
         orl   $TS_COMPAT,TI_status(%r10)
         testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
         jnz ia32_tracesys
- -ia32_do_syscall:      
         cmpl $(IA32_NR_syscalls-1),%eax
- -      ja  int_ret_from_sys_call       /* ia32_tracesys has set RAX(%rsp) */
+ +      ja ia32_badsys
+ +ia32_do_call:
         IA32_ARG_FIXUP
         call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
   ia32_sysret:
@@@ -435,9 -435,7 +435,9 @@@ ia32_tracesys
         call syscall_trace_enter
         LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
         RESTORE_REST
- -      jmp ia32_do_syscall
+ +      cmpl $(IA32_NR_syscalls-1),%eax
+ +      ja  int_ret_from_sys_call       /* ia32_tracesys has set RAX(%rsp) */
+ +      jmp ia32_do_call
   END(ia32_syscall)
   
   ia32_badsys:
diff --combined arch/x86/include/asm/page.h

index 823cc93,6b98108..4022699
--- 1/arch/x86/include/asm/page.h
--- 2/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@@ -95,11 -95,6 +95,11 @@@ static inline pgdval_t native_pgd_val(p
         return pgd.pgd;
   }
   
+ +static inline pgdval_t pgd_flags(pgd_t pgd)
+ +{
+ +      return native_pgd_val(pgd) & PTE_FLAGS_MASK;
+ +}
+ +
   #if PAGETABLE_LEVELS >= 3
   #if PAGETABLE_LEVELS == 4
   typedef struct { pudval_t pud; } pud_t;
@@@ -122,11 -117,6 +122,11 @@@ static inline pudval_t native_pud_val(p
   }
   #endif        /* PAGETABLE_LEVELS == 4 */
   
+ +static inline pudval_t pud_flags(pud_t pud)
+ +{
+ +      return native_pud_val(pud) & PTE_FLAGS_MASK;
+ +}
+ +
   typedef struct { pmdval_t pmd; } pmd_t;
   
   static inline pmd_t native_make_pmd(pmdval_t val)
@@@ -138,7 -128,6 +138,7 @@@ static inline pmdval_t native_pmd_val(p
   {
         return pmd.pmd;
   }
+ +
   #else  /* PAGETABLE_LEVELS == 2 */
   #include <asm-generic/pgtable-nopmd.h>
   
@@@ -148,11 -137,6 +148,11 @@@ static inline pmdval_t native_pmd_val(p
   }
   #endif        /* PAGETABLE_LEVELS >= 3 */
   
+ +static inline pmdval_t pmd_flags(pmd_t pmd)
+ +{
+ +      return native_pmd_val(pmd) & PTE_FLAGS_MASK;
+ +}
+ +
   static inline pte_t native_make_pte(pteval_t val)
   {
         return (pte_t) { .pte = val };
@@@ -163,7 -147,7 +163,7 @@@ static inline pteval_t native_pte_val(p
         return pte.pte;
   }
   
- static inline pteval_t native_pte_flags(pte_t pte)
+ static inline pteval_t pte_flags(pte_t pte)
   {
         return native_pte_val(pte) & PTE_FLAGS_MASK;
   }
@@@ -189,7 -173,6 +189,6 @@@
   #endif
   
   #define pte_val(x)    native_pte_val(x)
- #define pte_flags(x)  native_pte_flags(x)
   #define __pte(x)      native_make_pte(x)
   
   #endif        /* CONFIG_PARAVIRT */
diff --combined arch/x86/include/asm/pgtable.h

index 1782053,6ceaef0..860f1b6
--- 1/arch/x86/include/asm/pgtable.h
--- 2/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@@ -1,8 -1,6 +1,8 @@@
   #ifndef _ASM_X86_PGTABLE_H
   #define _ASM_X86_PGTABLE_H
   
+ +#include <asm/page.h>
+ +
   #define FIRST_USER_ADDRESS    0
   
   #define _PAGE_BIT_PRESENT     0       /* is present */
@@@ -238,68 -236,82 +238,82 @@@ static inline unsigned long pte_pfn(pte
   
   static inline int pmd_large(pmd_t pte)
   {
- -      return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+ +      return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
                 (_PAGE_PSE | _PAGE_PRESENT);
   }
   
+ static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
+ {
+       pteval_t v = native_pte_val(pte);
+ 
+       return native_make_pte(v | set);
+ }
+ 
+ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
+ {
+       pteval_t v = native_pte_val(pte);
+ 
+       return native_make_pte(v & ~clear);
+ }
+ 
   static inline pte_t pte_mkclean(pte_t pte)
   {
-       return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+       return pte_clear_flags(pte, _PAGE_DIRTY);
   }
   
   static inline pte_t pte_mkold(pte_t pte)
   {
-       return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+       return pte_clear_flags(pte, _PAGE_ACCESSED);
   }
   
   static inline pte_t pte_wrprotect(pte_t pte)
   {
-       return __pte(pte_val(pte) & ~_PAGE_RW);
+       return pte_clear_flags(pte, _PAGE_RW);
   }
   
   static inline pte_t pte_mkexec(pte_t pte)
   {
-       return __pte(pte_val(pte) & ~_PAGE_NX);
+       return pte_clear_flags(pte, _PAGE_NX);
   }
   
   static inline pte_t pte_mkdirty(pte_t pte)
   {
-       return __pte(pte_val(pte) | _PAGE_DIRTY);
+       return pte_set_flags(pte, _PAGE_DIRTY);
   }
   
   static inline pte_t pte_mkyoung(pte_t pte)
   {
-       return __pte(pte_val(pte) | _PAGE_ACCESSED);
+       return pte_set_flags(pte, _PAGE_ACCESSED);
   }
   
   static inline pte_t pte_mkwrite(pte_t pte)
   {
-       return __pte(pte_val(pte) | _PAGE_RW);
+       return pte_set_flags(pte, _PAGE_RW);
   }
   
   static inline pte_t pte_mkhuge(pte_t pte)
   {
-       return __pte(pte_val(pte) | _PAGE_PSE);
+       return pte_set_flags(pte, _PAGE_PSE);
   }
   
   static inline pte_t pte_clrhuge(pte_t pte)
   {
-       return __pte(pte_val(pte) & ~_PAGE_PSE);
+       return pte_clear_flags(pte, _PAGE_PSE);
   }
   
   static inline pte_t pte_mkglobal(pte_t pte)
   {
-       return __pte(pte_val(pte) | _PAGE_GLOBAL);
+       return pte_set_flags(pte, _PAGE_GLOBAL);
   }
   
   static inline pte_t pte_clrglobal(pte_t pte)
   {
-       return __pte(pte_val(pte) & ~_PAGE_GLOBAL);
+       return pte_clear_flags(pte, _PAGE_GLOBAL);
   }
   
   static inline pte_t pte_mkspecial(pte_t pte)
   {
-       return __pte(pte_val(pte) | _PAGE_SPECIAL);
+       return pte_set_flags(pte, _PAGE_SPECIAL);
   }
   
   extern pteval_t __supported_pte_mask;
@@@ -439,190 -451,6 +453,190 @@@ static inline void __init paravirt_page
   # include "pgtable_64.h"
   #endif
   
+ +#ifndef __ASSEMBLY__
+ +#include <linux/mm_types.h>
+ +
+ +static inline int pte_none(pte_t pte)
+ +{
+ +      return !pte.pte;
+ +}
+ +
+ +#define __HAVE_ARCH_PTE_SAME
+ +static inline int pte_same(pte_t a, pte_t b)
+ +{
+ +      return a.pte == b.pte;
+ +}
+ +
+ +static inline int pte_present(pte_t a)
+ +{
+ +      return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+ +}
+ +
+ +static inline int pmd_present(pmd_t pmd)
+ +{
+ +      return pmd_flags(pmd) & _PAGE_PRESENT;
+ +}
+ +
+ +static inline int pmd_none(pmd_t pmd)
+ +{
+ +      /* Only check low word on 32-bit platforms, since it might be
+ +         out of sync with upper half. */
+ +      return (unsigned long)native_pmd_val(pmd) == 0;
+ +}
+ +
+ +static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+ +{
+ +      return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
+ +}
+ +
+ +/*
+ + * Currently stuck as a macro due to indirect forward reference to
+ + * linux/mmzone.h's __section_mem_map_addr() definition:
+ + */
+ +#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+ +
+ +/*
+ + * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+ + *
+ + * this macro returns the index of the entry in the pmd page which would
+ + * control the given virtual address
+ + */
+ +static inline unsigned pmd_index(unsigned long address)
+ +{
+ +      return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+ +}
+ +
+ +/*
+ + * Conversion functions: convert a page and protection to a page entry,
+ + * and a page entry and page directory to the page they refer to.
+ + *
+ + * (Currently stuck as a macro because of indirect forward reference
+ + * to linux/mm.h:page_to_nid())
+ + */
+ +#define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
+ +
+ +/*
+ + * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+ + *
+ + * this function returns the index of the entry in the pte page which would
+ + * control the given virtual address
+ + */
+ +static inline unsigned pte_index(unsigned long address)
+ +{
+ +      return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+ +}
+ +
+ +static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+ +{
+ +      return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+ +}
+ +
+ +static inline int pmd_bad(pmd_t pmd)
+ +{
+ +      return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ +}
+ +
+ +static inline unsigned long pages_to_mb(unsigned long npg)
+ +{
+ +      return npg >> (20 - PAGE_SHIFT);
+ +}
+ +
+ +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)       \
+ +      remap_pfn_range(vma, vaddr, pfn, size, prot)
+ +
+ +#if PAGETABLE_LEVELS == 2
+ +static inline int pud_large(pud_t pud)
+ +{
+ +      return 0;
+ +}
+ +#endif
+ +
+ +#if PAGETABLE_LEVELS > 2
+ +static inline int pud_none(pud_t pud)
+ +{
+ +      return native_pud_val(pud) == 0;
+ +}
+ +
+ +static inline int pud_present(pud_t pud)
+ +{
+ +      return pud_flags(pud) & _PAGE_PRESENT;
+ +}
+ +
+ +static inline unsigned long pud_page_vaddr(pud_t pud)
+ +{
+ +      return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
+ +}
+ +
+ +/*
+ + * Currently stuck as a macro due to indirect forward reference to
+ + * linux/mmzone.h's __section_mem_map_addr() definition:
+ + */
+ +#define pud_page(pud)         pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
+ +
+ +/* Find an entry in the second-level page table.. */
+ +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+ +{
+ +      return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
+ +}
+ +
+ +static inline unsigned long pmd_pfn(pmd_t pmd)
+ +{
+ +      return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+ +}
+ +
+ +static inline int pud_large(pud_t pud)
+ +{
+ +      return (pud_flags(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+ +              (_PAGE_PSE | _PAGE_PRESENT);
+ +}
+ +
+ +static inline int pud_bad(pud_t pud)
+ +{
+ +      return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+ +}
+ +#endif        /* PAGETABLE_LEVELS > 2 */
+ +
+ +#if PAGETABLE_LEVELS > 3
+ +static inline int pgd_present(pgd_t pgd)
+ +{
+ +      return pgd_flags(pgd) & _PAGE_PRESENT;
+ +}
+ +
+ +static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+ +{
+ +      return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK);
+ +}
+ +
+ +/*
+ + * Currently stuck as a macro due to indirect forward reference to
+ + * linux/mmzone.h's __section_mem_map_addr() definition:
+ + */
+ +#define pgd_page(pgd)         pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
+ +
+ +/* to find an entry in a page-table-directory. */
+ +static inline unsigned pud_index(unsigned long address)
+ +{
+ +      return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+ +}
+ +
+ +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
+ +{
+ +      return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
+ +}
+ +
+ +static inline int pgd_bad(pgd_t pgd)
+ +{
+ +      return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ +}
+ +
+ +static inline int pgd_none(pgd_t pgd)
+ +{
+ +      return !native_pgd_val(pgd);
+ +}
+ +#endif        /* PAGETABLE_LEVELS > 3 */
+ +
+ +#endif        /* __ASSEMBLY__ */
+ +
   /*
    * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
    *
diff --combined arch/x86/include/asm/pgtable_64.h

index 100ac48,1df9637..1c4e247
--- 1/arch/x86/include/asm/pgtable_64.h
--- 2/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@@ -11,7 -11,6 +11,6 @@@
   #include <asm/processor.h>
   #include <linux/bitops.h>
   #include <linux/threads.h>
- #include <asm/pda.h>
   
   extern pud_t level3_kernel_pgt[512];
   extern pud_t level3_ident_pgt[512];
@@@ -67,6 -66,9 +66,6 @@@ extern void paging_init(void)
         printk("%s:%d: bad pgd %p(%016lx).\n",          \
                __FILE__, __LINE__, &(e), pgd_val(e))
   
- -#define pgd_none(x)   (!pgd_val(x))
- -#define pud_none(x)   (!pud_val(x))
- -
   struct mm_struct;
   
   void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
@@@ -131,6 -133,8 +130,6 @@@ static inline void native_pgd_clear(pgd
         native_set_pgd(pgd, native_make_pgd(0));
   }
   
- -#define pte_same(a, b)                ((a).pte == (b).pte)
- -
   #endif /* !__ASSEMBLY__ */
   
   #define PMD_SIZE      (_AC(1, UL) << PMD_SHIFT)
@@@ -151,6 -155,26 +150,6 @@@
   
   #ifndef __ASSEMBLY__
   
- -static inline int pgd_bad(pgd_t pgd)
- -{
- -      return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
- -}
- -
- -static inline int pud_bad(pud_t pud)
- -{
- -      return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
- -}
- -
- -static inline int pmd_bad(pmd_t pmd)
- -{
- -      return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
- -}
- -
- -#define pte_none(x)   (!pte_val((x)))
- -#define pte_present(x)        (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE))
- -
- -#define pages_to_mb(x)        ((x) >> (20 - PAGE_SHIFT))   /* FIXME: is this right? */
- -
   /*
    * Conversion functions: convert a page and protection to a page entry,
    * and a page entry and page directory to the page they refer to.
@@@ -159,12 -183,41 +158,12 @@@
   /*
    * Level 4 access.
    */
- -#define pgd_page_vaddr(pgd)                                           \
- -      ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK))
- -#define pgd_page(pgd)         (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
- -#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT)
   static inline int pgd_large(pgd_t pgd) { return 0; }
   #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
   
   /* PUD - Level3 access */
- -/* to find an entry in a page-table-directory. */
- -#define pud_page_vaddr(pud)                                           \
- -      ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
- -#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
- -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
- -#define pud_offset(pgd, address)                                      \
- -      ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
- -#define pud_present(pud) (pud_val((pud)) & _PAGE_PRESENT)
- -
- -static inline int pud_large(pud_t pte)
- -{
- -      return (pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- -              (_PAGE_PSE | _PAGE_PRESENT);
- -}
   
   /* PMD  - Level 2 access */
- -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK))
- -#define pmd_page(pmd)         (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
- -
- -#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
- -#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
- -                                pmd_index(address))
- -#define pmd_none(x)   (!pmd_val((x)))
- -#define pmd_present(x)        (pmd_val((x)) & _PAGE_PRESENT)
- -#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
- -#define pmd_pfn(x)  ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
- -
   #define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
   #define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) |   \
                                             _PAGE_FILE })
@@@ -172,6 -225,13 +171,6 @@@
   
   /* PTE - Level 1 access. */
   
- -/* page, protection -> pte */
- -#define mk_pte(page, pgprot)  pfn_pte(page_to_pfn((page)), (pgprot))
- -
- -#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
- -#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
- -                                       pte_index((address)))
- -
   /* x86-64 always has all page tables mapped. */
   #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
   #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
@@@ -205,6 -265,9 +204,6 @@@ extern int direct_gbpages
   extern int kern_addr_valid(unsigned long addr);
   extern void cleanup_highmap(void);
   
- -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)       \
- -      remap_pfn_range(vma, vaddr, pfn, size, prot)
- -
   #define HAVE_ARCH_UNMAPPED_AREA
   #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
   
diff --combined arch/x86/kernel/acpi/sleep.c

index a60c1f3,4abff45..7c243a2
--- 1/arch/x86/kernel/acpi/sleep.c
--- 2/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@@ -101,6 -101,7 +101,7 @@@ int acpi_save_state_mem(void
         stack_start.sp = temp_stack + sizeof(temp_stack);
         early_gdt_descr.address =
                         (unsigned long)get_cpu_gdt_table(smp_processor_id());
+       initial_gs = per_cpu_offset(smp_processor_id());
   #endif
         initial_code = (unsigned long)wakeup_long64;
         saved_magic = 0x123456789abcdef0;
@@@ -156,11 -157,11 +157,11 @@@ static int __init acpi_sleep_setup(cha
   #ifdef CONFIG_HIBERNATION
                 if (strncmp(str, "s4_nohwsig", 10) == 0)
                         acpi_no_s4_hw_signature();
+ +              if (strncmp(str, "s4_nonvs", 8) == 0)
+ +                      acpi_s4_no_nvs();
   #endif
                 if (strncmp(str, "old_ordering", 12) == 0)
                         acpi_old_suspend_ordering();
- -              if (strncmp(str, "s4_nonvs", 8) == 0)
- -                      acpi_s4_no_nvs();
                 str = strchr(str, ',');
                 if (str != NULL)
                         str += strspn(str, ", \t");
diff --combined arch/x86/kernel/apic.c

index 115449f,c6f1564..383d827
--- 1/arch/x86/kernel/apic.c
--- 2/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@@ -60,6 -60,24 +60,24 @@@
   # error SPURIOUS_APIC_VECTOR definition error
   #endif
   
+ unsigned int num_processors;
+ unsigned disabled_cpus __cpuinitdata;
+ /* Processor that is doing the boot up */
+ unsigned int boot_cpu_physical_apicid = -1U;
+ EXPORT_SYMBOL(boot_cpu_physical_apicid);
+ unsigned int max_physical_apicid;
+ 
+ /* Bitmask of physically existing CPUs */
+ physid_mask_t phys_cpu_present_map;
+ 
+ /*
+  * Map cpu index to physical APIC ID
+  */
+ DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
+ DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+ 
   #ifdef CONFIG_X86_32
   /*
    * Knob to control our willingness to enable the local APIC.
@@@ -1130,6 -1148,13 +1148,13 @@@ void __cpuinit setup_local_APIC(void
         unsigned int value;
         int i, j;
   
+       if (disable_apic) {
+ #ifdef CONFIG_X86_IO_APIC
+               disable_ioapic_setup();
+ #endif
+               return;
+       }
+ 
   #ifdef CONFIG_X86_32
         /* Pound the ESR really hard over the head with a big hammer - mbligh */
         if (lapic_is_integrated() && esr_disable) {
@@@ -1436,7 -1461,7 +1461,7 @@@ static int __init detect_init_APIC(void
         switch (boot_cpu_data.x86_vendor) {
         case X86_VENDOR_AMD:
                 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
- -                  (boot_cpu_data.x86 == 15))
+ +                  (boot_cpu_data.x86 >= 15))
                         break;
                 goto no_apic;
         case X86_VENDOR_INTEL:
@@@ -1570,11 -1595,11 +1595,11 @@@ int apic_version[MAX_APICS]
   
   int __init APIC_init_uniprocessor(void)
   {
- #ifdef CONFIG_X86_64
         if (disable_apic) {
                 pr_info("Apic disabled\n");
                 return -1;
         }
+ #ifdef CONFIG_X86_64
         if (!cpu_has_apic) {
                 disable_apic = 1;
                 pr_info("Apic disabled by BIOS\n");
@@@ -1877,17 -1902,8 +1902,8 @@@ void __cpuinit generic_processor_info(i
   #endif
   
   #if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-       /* are we being called early in kernel startup? */
-       if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-               u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-               u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
- 
-               cpu_to_apicid[cpu] = apicid;
-               bios_cpu_apicid[cpu] = apicid;
-       } else {
-               per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-               per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-       }
+       early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+       early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
   #endif
   
         set_cpu_possible(cpu, true);
diff --combined arch/x86/kernel/cpu/intel_cacheinfo.c

index da299eb,58527a9..7293508
--- 1/arch/x86/kernel/cpu/intel_cacheinfo.c
--- 2/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@@ -36,11 -36,8 +36,11 @@@ static struct _cache_table cache_table[
   {
         { 0x06, LVL_1_INST, 8 },        /* 4-way set assoc, 32 byte line size */
         { 0x08, LVL_1_INST, 16 },       /* 4-way set assoc, 32 byte line size */
+ +      { 0x09, LVL_1_INST, 32 },       /* 4-way set assoc, 64 byte line size */
         { 0x0a, LVL_1_DATA, 8 },        /* 2 way set assoc, 32 byte line size */
         { 0x0c, LVL_1_DATA, 16 },       /* 4-way set assoc, 32 byte line size */
+ +      { 0x0d, LVL_1_DATA, 16 },       /* 4-way set assoc, 64 byte line size */
+ +      { 0x21, LVL_2,      256 },      /* 8-way set assoc, 64 byte line size */
         { 0x22, LVL_3,      512 },      /* 4-way set assoc, sectored cache, 64 byte line size */
         { 0x23, LVL_3,      1024 },     /* 8-way set assoc, sectored cache, 64 byte line size */
         { 0x25, LVL_3,      2048 },     /* 8-way set assoc, sectored cache, 64 byte line size */
@@@ -88,18 -85,6 +88,18 @@@
         { 0x85, LVL_2,    2048 },       /* 8-way set assoc, 32 byte line size */
         { 0x86, LVL_2,     512 },       /* 4-way set assoc, 64 byte line size */
         { 0x87, LVL_2,    1024 },       /* 8-way set assoc, 64 byte line size */
+ +      { 0xd0, LVL_3,     512 },       /* 4-way set assoc, 64 byte line size */
+ +      { 0xd1, LVL_3,    1024 },       /* 4-way set assoc, 64 byte line size */
+ +      { 0xd2, LVL_3,    2048 },       /* 4-way set assoc, 64 byte line size */
+ +      { 0xd6, LVL_3,    1024 },       /* 8-way set assoc, 64 byte line size */
+ +      { 0xd7, LVL_3,    2038 },       /* 8-way set assoc, 64 byte line size */
+ +      { 0xd8, LVL_3,    4096 },       /* 12-way set assoc, 64 byte line size */
+ +      { 0xdc, LVL_3,    2048 },       /* 12-way set assoc, 64 byte line size */
+ +      { 0xdd, LVL_3,    4096 },       /* 12-way set assoc, 64 byte line size */
+ +      { 0xde, LVL_3,    8192 },       /* 12-way set assoc, 64 byte line size */
+ +      { 0xe2, LVL_3,    2048 },       /* 16-way set assoc, 64 byte line size */
+ +      { 0xe3, LVL_3,    4096 },       /* 16-way set assoc, 64 byte line size */
+ +      { 0xe4, LVL_3,    8192 },       /* 16-way set assoc, 64 byte line size */
         { 0x00, 0, 0}
   };
   
@@@ -147,7 -132,16 +147,16 @@@ struct _cpuid4_info 
         union _cpuid4_leaf_ecx ecx;
         unsigned long size;
         unsigned long can_disable;
-       cpumask_t shared_cpu_map;       /* future?: only cpus/node is needed */
+       DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
+ };
+ 
+ /* subset of above _cpuid4_info w/o shared_cpu_map */
+ struct _cpuid4_info_regs {
+       union _cpuid4_leaf_eax eax;
+       union _cpuid4_leaf_ebx ebx;
+       union _cpuid4_leaf_ecx ecx;
+       unsigned long size;
+       unsigned long can_disable;
   };
   
   #ifdef CONFIG_PCI
@@@ -278,7 -272,7 +287,7 @@@ amd_cpuid4(int leaf, union _cpuid4_leaf
   }
   
   static void __cpuinit
- amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
   {
         if (index < 3)
                 return;
@@@ -286,7 -280,8 +295,8 @@@
   }
   
   static int
- __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+ __cpuinit cpuid4_cache_lookup_regs(int index,
+                                  struct _cpuid4_info_regs *this_leaf)
   {
         union _cpuid4_leaf_eax  eax;
         union _cpuid4_leaf_ebx  ebx;
@@@ -314,6 -309,15 +324,15 @@@
         return 0;
   }
   
+ static int
+ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+ {
+       struct _cpuid4_info_regs *leaf_regs =
+               (struct _cpuid4_info_regs *)this_leaf;
+ 
+       return cpuid4_cache_lookup_regs(index, leaf_regs);
+ }
+ 
   static int __cpuinit find_num_cache_leaves(void)
   {
         unsigned int            eax, ebx, ecx, edx;
@@@ -353,11 -357,10 +372,10 @@@ unsigned int __cpuinit init_intel_cache
                  * parameters cpuid leaf to find the cache details
                  */
                 for (i = 0; i < num_cache_leaves; i++) {
-                       struct _cpuid4_info this_leaf;
- 
+                       struct _cpuid4_info_regs this_leaf;
                         int retval;
   
-                       retval = cpuid4_cache_lookup(i, &this_leaf);
+                       retval = cpuid4_cache_lookup_regs(i, &this_leaf);
                         if (retval >= 0) {
                                 switch(this_leaf.eax.split.level) {
                                     case 1:
@@@ -506,17 -509,20 +524,20 @@@ static void __cpuinit cache_shared_cpu_
         num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
   
         if (num_threads_sharing == 1)
-               cpu_set(cpu, this_leaf->shared_cpu_map);
+               cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
         else {
                 index_msb = get_count_order(num_threads_sharing);
   
                 for_each_online_cpu(i) {
                         if (cpu_data(i).apicid >> index_msb ==
                             c->apicid >> index_msb) {
-                               cpu_set(i, this_leaf->shared_cpu_map);
+                               cpumask_set_cpu(i,
+                                       to_cpumask(this_leaf->shared_cpu_map));
                                 if (i != cpu && per_cpu(cpuid4_info, i))  {
-                                       sibling_leaf = CPUID4_INFO_IDX(i, index);
-                                       cpu_set(cpu, sibling_leaf->shared_cpu_map);
+                                       sibling_leaf =
+                                               CPUID4_INFO_IDX(i, index);
+                                       cpumask_set_cpu(cpu, to_cpumask(
+                                               sibling_leaf->shared_cpu_map));
                                 }
                         }
                 }
@@@ -528,9 -534,10 +549,10 @@@ static void __cpuinit cache_remove_shar
         int sibling;
   
         this_leaf = CPUID4_INFO_IDX(cpu, index);
-       for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
+       for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
                 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
-               cpu_clear(cpu, sibling_leaf->shared_cpu_map);
+               cpumask_clear_cpu(cpu,
+                                 to_cpumask(sibling_leaf->shared_cpu_map));
         }
   }
   #else
@@@ -635,8 -642,9 +657,9 @@@ static ssize_t show_shared_cpu_map_func
         int n = 0;
   
         if (len > 1) {
-               cpumask_t *mask = &this_leaf->shared_cpu_map;
+               const struct cpumask *mask;
   
+               mask = to_cpumask(this_leaf->shared_cpu_map);
                 n = type?
                         cpulist_scnprintf(buf, len-2, mask) :
                         cpumask_scnprintf(buf, len-2, mask);
@@@ -699,7 -707,8 +722,8 @@@ static struct pci_dev *get_k8_northbrid
   
   static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
   {
-       int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+       const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
+       int node = cpu_to_node(cpumask_first(mask));
         struct pci_dev *dev = NULL;
         ssize_t ret = 0;
         int i;
@@@ -733,7 -742,8 +757,8 @@@ static ssize_
   store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
                     size_t count)
   {
-       int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+       const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
+       int node = cpu_to_node(cpumask_first(mask));
         struct pci_dev *dev = NULL;
         unsigned int ret, index, val;
   
@@@ -878,7 -888,7 +903,7 @@@ err_out
         return -ENOMEM;
   }
   
- static cpumask_t cache_dev_map = CPU_MASK_NONE;
+ static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
   
   /* Add/Remove cache interface for CPU device */
   static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@@ -918,7 -928,7 +943,7 @@@
                 }
                 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
         }
-       cpu_set(cpu, cache_dev_map);
+       cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
   
         kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
         return 0;
@@@ -931,9 -941,9 +956,9 @@@ static void __cpuinit cache_remove_dev(
   
         if (per_cpu(cpuid4_info, cpu) == NULL)
                 return;
-       if (!cpu_isset(cpu, cache_dev_map))
+       if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
                 return;
-       cpu_clear(cpu, cache_dev_map);
+       cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
   
         for (i = 0; i < num_cache_leaves; i++)
                 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --combined arch/x86/kernel/entry_64.S

index a134621,e4c9710..586bed6
--- 1/arch/x86/kernel/entry_64.S
--- 2/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@@ -52,6 -52,7 +52,7 @@@
   #include <asm/irqflags.h>
   #include <asm/paravirt.h>
   #include <asm/ftrace.h>
+ #include <asm/percpu.h>
   
   /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
   #include <linux/elf-em.h>
@@@ -209,7 -210,7 +210,7 @@@ ENTRY(native_usergs_sysret64
   
         /* %rsp:at FRAMEEND */
         .macro FIXUP_TOP_OF_STACK tmp offset=0
-       movq %gs:pda_oldrsp,\tmp
+       movq PER_CPU_VAR(old_rsp),\tmp
         movq \tmp,RSP+\offset(%rsp)
         movq $__USER_DS,SS+\offset(%rsp)
         movq $__USER_CS,CS+\offset(%rsp)
@@@ -220,7 -221,7 +221,7 @@@
   
         .macro RESTORE_TOP_OF_STACK tmp offset=0
         movq RSP+\offset(%rsp),\tmp
-       movq \tmp,%gs:pda_oldrsp
+       movq \tmp,PER_CPU_VAR(old_rsp)
         movq EFLAGS+\offset(%rsp),\tmp
         movq \tmp,R11+\offset(%rsp)
         .endm
@@@ -336,17 -337,16 +337,17 @@@ ENTRY(save_args
         je 1f
         SWAPGS
         /*
-        * irqcount is used to check if a CPU is already on an interrupt stack
+        * irq_count is used to check if a CPU is already on an interrupt stack
          * or not. While this is essentially redundant with preempt_count it is
          * a little cheaper to use a separate counter in the PDA (short of
          * moving irq_enter into assembly, which would be too much work)
          */
- 1:    incl %gs:pda_irqcount
+ 1:    incl PER_CPU_VAR(irq_count)
         jne 2f
         popq_cfi %rax                   /* move return address... */
-       mov %gs:pda_irqstackptr,%rsp
+       mov PER_CPU_VAR(irq_stack_ptr),%rsp
         EMPTY_FRAME 0
+ +      pushq_cfi %rbp                  /* backlink for unwinder */
         pushq_cfi %rax                  /* ... to the new stack */
         /*
          * We entered an interrupt context - irqs are off:
@@@ -468,7 -468,7 +469,7 @@@ END(ret_from_fork
   ENTRY(system_call)
         CFI_STARTPROC   simple
         CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,PDA_STACKOFFSET
+       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
         CFI_REGISTER    rip,rcx
         /*CFI_REGISTER  rflags,r11*/
         SWAPGS_UNSAFE_STACK
@@@ -479,8 -479,8 +480,8 @@@
          */
   ENTRY(system_call_after_swapgs)
   
-       movq    %rsp,%gs:pda_oldrsp
-       movq    %gs:pda_kernelstack,%rsp
+       movq    %rsp,PER_CPU_VAR(old_rsp)
+       movq    PER_CPU_VAR(kernel_stack),%rsp
         /*
          * No need to follow this irqs off/on section - it's straight
          * and short:
@@@ -523,7 -523,7 +524,7 @@@ sysret_check
         CFI_REGISTER    rip,rcx
         RESTORE_ARGS 0,-ARG_SKIP,1
         /*CFI_REGISTER  rflags,r11*/
-       movq    %gs:pda_oldrsp, %rsp
+       movq    PER_CPU_VAR(old_rsp), %rsp
         USERGS_SYSRET64
   
         CFI_RESTORE_STATE
@@@ -833,11 -833,11 +834,11 @@@ common_interrupt
         XCPT_FRAME
         addq $-0x80,(%rsp)              /* Adjust vector to [-256,-1] range */
         interrupt do_IRQ
-       /* 0(%rsp): oldrsp-ARGOFFSET */
+       /* 0(%rsp): old_rsp-ARGOFFSET */
   ret_from_intr:
         DISABLE_INTERRUPTS(CLBR_NONE)
         TRACE_IRQS_OFF
-       decl %gs:pda_irqcount
+       decl PER_CPU_VAR(irq_count)
         leaveq
         CFI_DEF_CFA_REGISTER    rsp
         CFI_ADJUST_CFA_OFFSET   -8
@@@ -982,8 -982,10 +983,10 @@@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR 
         irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
   #endif
   
+ #ifdef CONFIG_X86_UV
   apicinterrupt UV_BAU_MESSAGE \
         uv_bau_message_intr1 uv_bau_message_interrupt
+ #endif
   apicinterrupt LOCAL_TIMER_VECTOR \
         apic_timer_interrupt smp_apic_timer_interrupt
   
@@@ -1073,10 -1075,10 +1076,10 @@@ ENTRY(\sym
         TRACE_IRQS_OFF
         movq %rsp,%rdi          /* pt_regs pointer */
         xorl %esi,%esi          /* no error code */
-       movq %gs:pda_data_offset, %rbp
-       subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+       PER_CPU(init_tss, %rbp)
+       subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
         call \do_sym
-       addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+       addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
         jmp paranoid_exit       /* %ebx: no swapgs flag */
         CFI_ENDPROC
   END(\sym)
@@@ -1138,7 -1140,7 +1141,7 @@@ ENTRY(native_load_gs_index
         CFI_STARTPROC
         pushf
         CFI_ADJUST_CFA_OFFSET 8
-       DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
+       DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
         SWAPGS
   gs_change:
         movl %edi,%gs
@@@ -1260,14 -1262,14 +1263,14 @@@ ENTRY(call_softirq
         CFI_REL_OFFSET rbp,0
         mov  %rsp,%rbp
         CFI_DEF_CFA_REGISTER rbp
-       incl %gs:pda_irqcount
-       cmove %gs:pda_irqstackptr,%rsp
+       incl PER_CPU_VAR(irq_count)
+       cmove PER_CPU_VAR(irq_stack_ptr),%rsp
         push  %rbp                      # backlink for old unwinder
         call __do_softirq
         leaveq
         CFI_DEF_CFA_REGISTER    rsp
         CFI_ADJUST_CFA_OFFSET   -8
-       decl %gs:pda_irqcount
+       decl PER_CPU_VAR(irq_count)
         ret
         CFI_ENDPROC
   END(call_softirq)
@@@ -1297,15 -1299,15 +1300,15 @@@ ENTRY(xen_do_hypervisor_callback)   # d
         movq %rdi, %rsp            # we don't return, adjust the stack frame
         CFI_ENDPROC
         DEFAULT_FRAME
- 11:   incl %gs:pda_irqcount
+ 11:   incl PER_CPU_VAR(irq_count)
         movq %rsp,%rbp
         CFI_DEF_CFA_REGISTER rbp
-       cmovzq %gs:pda_irqstackptr,%rsp
+       cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
         pushq %rbp                      # backlink for old unwinder
         call xen_evtchn_do_upcall
         popq %rsp
         CFI_DEF_CFA_REGISTER rsp
-       decl %gs:pda_irqcount
+       decl PER_CPU_VAR(irq_count)
         jmp  error_exit
         CFI_ENDPROC
   END(do_hypervisor_callback)
diff --combined arch/x86/kernel/io_apic.c

index 9b0c480,e4d36bd..0a7f6d6
--- 1/arch/x86/kernel/io_apic.c
--- 2/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@@ -46,6 -46,7 +46,7 @@@
   #include <asm/idle.h>
   #include <asm/io.h>
   #include <asm/smp.h>
+ #include <asm/cpu.h>
   #include <asm/desc.h>
   #include <asm/proto.h>
   #include <asm/acpi.h>
@@@ -82,11 -83,11 +83,11 @@@ static DEFINE_SPINLOCK(vector_lock)
   int nr_ioapic_registers[MAX_IO_APICS];
   
   /* I/O APIC entries */
- struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+ struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
   int nr_ioapics;
   
   /* MP IRQ source entries */
- struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
   
   /* # of MP IRQ source entries */
   int mp_irq_entries;
@@@ -356,7 -357,7 +357,7 @@@ set_extra_move_desc(struct irq_desc *de
   
         if (!cfg->move_in_progress) {
                 /* it means that domain is not changed */
-               if (!cpumask_intersects(&desc->affinity, mask))
+               if (!cpumask_intersects(desc->affinity, mask))
                         cfg->move_desc_pending = 1;
         }
   }
@@@ -386,7 -387,7 +387,7 @@@ struct io_apic 
   static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
   {
         return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-               + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
+               + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
   }
   
   static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@@ -579,9 -580,9 +580,9 @@@ set_desc_affinity(struct irq_desc *desc
         if (assign_irq_vector(irq, cfg, mask))
                 return BAD_APICID;
   
-       cpumask_and(&desc->affinity, cfg->domain, mask);
+       cpumask_and(desc->affinity, cfg->domain, mask);
         set_extra_move_desc(desc, mask);
-       return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+       return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
   }
   
   static void
@@@ -944,10 -945,10 +945,10 @@@ static int find_irq_entry(int apic, in
         int i;
   
         for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mp_irqtype == type &&
-                   (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
-                    mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
-                   mp_irqs[i].mp_dstirq == pin)
+               if (mp_irqs[i].irqtype == type &&
+                   (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
+                    mp_irqs[i].dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].dstirq == pin)
                         return i;
   
         return -1;
@@@ -961,13 -962,13 +962,13 @@@ static int __init find_isa_irq_pin(int 
         int i;
   
         for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mp_srcbus;
+               int lbus = mp_irqs[i].srcbus;
   
                 if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mp_irqtype == type) &&
-                   (mp_irqs[i].mp_srcbusirq == irq))
+                   (mp_irqs[i].irqtype == type) &&
+                   (mp_irqs[i].srcbusirq == irq))
   
-                       return mp_irqs[i].mp_dstirq;
+                       return mp_irqs[i].dstirq;
         }
         return -1;
   }
@@@ -977,17 -978,17 +978,17 @@@ static int __init find_isa_irq_apic(in
         int i;
   
         for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mp_srcbus;
+               int lbus = mp_irqs[i].srcbus;
   
                 if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mp_irqtype == type) &&
-                   (mp_irqs[i].mp_srcbusirq == irq))
+                   (mp_irqs[i].irqtype == type) &&
+                   (mp_irqs[i].srcbusirq == irq))
                         break;
         }
         if (i < mp_irq_entries) {
                 int apic;
                 for(apic = 0; apic < nr_ioapics; apic++) {
-                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
+                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
                                 return apic;
                 }
         }
@@@ -1012,23 -1013,23 +1013,23 @@@ int IO_APIC_get_PCI_irq_vector(int bus
                 return -1;
         }
         for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mp_srcbus;
+               int lbus = mp_irqs[i].srcbus;
   
                 for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
-                           mp_irqs[i].mp_dstapic == MP_APIC_ALL)
+                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+                           mp_irqs[i].dstapic == MP_APIC_ALL)
                                 break;
   
                 if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].mp_irqtype &&
+                   !mp_irqs[i].irqtype &&
                     (bus == lbus) &&
-                   (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
+                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
   
                         if (!(apic || IO_APIC_IRQ(irq)))
                                 continue;
   
-                       if (pin == (mp_irqs[i].mp_srcbusirq & 3))
+                       if (pin == (mp_irqs[i].srcbusirq & 3))
                                 return irq;
                         /*
                          * Use the first all-but-pin matching entry as a
@@@ -1071,7 -1072,7 +1072,7 @@@ static int EISA_ELCR(unsigned int irq
    * EISA conforming in the MP table, that means its trigger type must
    * be read in from the ELCR */
   
- #define default_EISA_trigger(idx)     (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+ #define default_EISA_trigger(idx)     (EISA_ELCR(mp_irqs[idx].srcbusirq))
   #define default_EISA_polarity(idx)    default_ISA_polarity(idx)
   
   /* PCI interrupts are always polarity one level triggered,
@@@ -1088,13 -1089,13 +1089,13 @@@
   
   static int MPBIOS_polarity(int idx)
   {
-       int bus = mp_irqs[idx].mp_srcbus;
+       int bus = mp_irqs[idx].srcbus;
         int polarity;
   
         /*
          * Determine IRQ line polarity (high active or low active):
          */
-       switch (mp_irqs[idx].mp_irqflag & 3)
+       switch (mp_irqs[idx].irqflag & 3)
         {
                 case 0: /* conforms, ie. bus-type dependent polarity */
                         if (test_bit(bus, mp_bus_not_pci))
@@@ -1130,13 -1131,13 +1131,13 @@@
   
   static int MPBIOS_trigger(int idx)
   {
-       int bus = mp_irqs[idx].mp_srcbus;
+       int bus = mp_irqs[idx].srcbus;
         int trigger;
   
         /*
          * Determine IRQ trigger mode (edge or level sensitive):
          */
-       switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
+       switch ((mp_irqs[idx].irqflag>>2) & 3)
         {
                 case 0: /* conforms, ie. bus-type dependent */
                         if (test_bit(bus, mp_bus_not_pci))
@@@ -1214,16 -1215,16 +1215,16 @@@ int (*ioapic_renumber_irq)(int ioapic, 
   static int pin_2_irq(int idx, int apic, int pin)
   {
         int irq, i;
-       int bus = mp_irqs[idx].mp_srcbus;
+       int bus = mp_irqs[idx].srcbus;
   
         /*
          * Debugging check, we are in big trouble if this message pops up!
          */
-       if (mp_irqs[idx].mp_dstirq != pin)
+       if (mp_irqs[idx].dstirq != pin)
                 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
   
         if (test_bit(bus, mp_bus_not_pci)) {
-               irq = mp_irqs[idx].mp_srcbusirq;
+               irq = mp_irqs[idx].srcbusirq;
         } else {
                 /*
                  * PCI IRQs are mapped in order
@@@ -1566,14 -1567,14 +1567,14 @@@ static void setup_IO_APIC_irq(int apic
         apic_printk(APIC_VERBOSE,KERN_DEBUG
                     "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
                     "IRQ %d Mode:%i Active:%i)\n",
-                   apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+                   apic, mp_ioapics[apic].apicid, pin, cfg->vector,
                     irq, trigger, polarity);
   
   
-       if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+       if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry,
                                dest, trigger, polarity, cfg->vector)) {
                 printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
-                      mp_ioapics[apic].mp_apicid, pin);
+                      mp_ioapics[apic].apicid, pin);
                 __clear_irq_vector(irq, cfg);
                 return;
         }
@@@ -1604,12 -1605,10 +1605,10 @@@ static void __init setup_IO_APIC_irqs(v
                                         notcon = 1;
                                         apic_printk(APIC_VERBOSE,
                                                 KERN_DEBUG " %d-%d",
-                                               mp_ioapics[apic].mp_apicid,
-                                               pin);
+                                               mp_ioapics[apic].apicid, pin);
                                 } else
                                         apic_printk(APIC_VERBOSE, " %d-%d",
-                                               mp_ioapics[apic].mp_apicid,
-                                               pin);
+                                               mp_ioapics[apic].apicid, pin);
                                 continue;
                         }
                         if (notcon) {
@@@ -1699,7 -1698,7 +1698,7 @@@ __apicdebuginit(void) print_IO_APIC(voi
         printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
         for (i = 0; i < nr_ioapics; i++)
                 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-                      mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
+                      mp_ioapics[i].apicid, nr_ioapic_registers[i]);
   
         /*
          * We are a bit conservative about what we expect.  We have to
@@@ -1719,7 -1718,7 +1718,7 @@@
         spin_unlock_irqrestore(&ioapic_lock, flags);
   
         printk("\n");
-       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
         printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
         printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
         printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@@ -2121,14 -2120,14 +2120,14 @@@ static void __init setup_ioapic_ids_fro
                 reg_00.raw = io_apic_read(apic, 0);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
   
-               old_id = mp_ioapics[apic].mp_apicid;
+               old_id = mp_ioapics[apic].apicid;
   
-               if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+               if (mp_ioapics[apic].apicid >= get_physical_broadcast()) {
                         printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-                               apic, mp_ioapics[apic].mp_apicid);
+                               apic, mp_ioapics[apic].apicid);
                         printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
                                 reg_00.bits.ID);
-                       mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+                       mp_ioapics[apic].apicid = reg_00.bits.ID;
                 }
   
                 /*
@@@ -2137,9 -2136,9 +2136,9 @@@
                  * 'stuck on smp_invalidate_needed IPI wait' messages.
                  */
                 if (check_apicid_used(phys_id_present_map,
-                                       mp_ioapics[apic].mp_apicid)) {
+                                       mp_ioapics[apic].apicid)) {
                         printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-                               apic, mp_ioapics[apic].mp_apicid);
+                               apic, mp_ioapics[apic].apicid);
                         for (i = 0; i < get_physical_broadcast(); i++)
                                 if (!physid_isset(i, phys_id_present_map))
                                         break;
@@@ -2148,13 -2147,13 +2147,13 @@@
                         printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
                                 i);
                         physid_set(i, phys_id_present_map);
-                       mp_ioapics[apic].mp_apicid = i;
+                       mp_ioapics[apic].apicid = i;
                 } else {
                         physid_mask_t tmp;
-                       tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+                       tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid);
                         apic_printk(APIC_VERBOSE, "Setting %d in the "
                                         "phys_id_present_map\n",
-                                       mp_ioapics[apic].mp_apicid);
+                                       mp_ioapics[apic].apicid);
                         physids_or(phys_id_present_map, phys_id_present_map, tmp);
                 }
   
@@@ -2163,11 -2162,11 +2162,11 @@@
                  * We need to adjust the IRQ routing table
                  * if the ID changed.
                  */
-               if (old_id != mp_ioapics[apic].mp_apicid)
+               if (old_id != mp_ioapics[apic].apicid)
                         for (i = 0; i < mp_irq_entries; i++)
-                               if (mp_irqs[i].mp_dstapic == old_id)
-                                       mp_irqs[i].mp_dstapic
-                                               = mp_ioapics[apic].mp_apicid;
+                               if (mp_irqs[i].dstapic == old_id)
+                                       mp_irqs[i].dstapic
+                                               = mp_ioapics[apic].apicid;
   
                 /*
                  * Read the right value from the MPC table and
@@@ -2175,9 -2174,9 +2174,9 @@@
                  */
                 apic_printk(APIC_VERBOSE, KERN_INFO
                         "...changing IO-APIC physical APIC ID to %d ...",
-                       mp_ioapics[apic].mp_apicid);
+                       mp_ioapics[apic].apicid);
   
-               reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+               reg_00.bits.ID = mp_ioapics[apic].apicid;
                 spin_lock_irqsave(&ioapic_lock, flags);
                 io_apic_write(apic, 0, reg_00.raw);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
@@@ -2188,7 -2187,7 +2187,7 @@@
                 spin_lock_irqsave(&ioapic_lock, flags);
                 reg_00.raw = io_apic_read(apic, 0);
                 spin_unlock_irqrestore(&ioapic_lock, flags);
-               if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+               if (reg_00.bits.ID != mp_ioapics[apic].apicid)
                         printk("could not set ID!\n");
                 else
                         apic_printk(APIC_VERBOSE, " ok.\n");
@@@ -2383,7 -2382,7 +2382,7 @@@ migrate_ioapic_irq_desc(struct irq_des
         if (cfg->move_in_progress)
                 send_cleanup_vector(cfg);
   
-       cpumask_copy(&desc->affinity, mask);
+       cpumask_copy(desc->affinity, mask);
   }
   
   static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@@ -2405,11 -2404,11 +2404,11 @@@
         }
   
         /* everthing is clear. we have right of way */
-       migrate_ioapic_irq_desc(desc, &desc->pending_mask);
+       migrate_ioapic_irq_desc(desc, desc->pending_mask);
   
         ret = 0;
         desc->status &= ~IRQ_MOVE_PENDING;
-       cpumask_clear(&desc->pending_mask);
+       cpumask_clear(desc->pending_mask);
   
   unmask:
         unmask_IO_APIC_irq_desc(desc);
@@@ -2434,7 -2433,7 +2433,7 @@@ static void ir_irq_migration(struct wor
                                 continue;
                         }
   
-                       desc->chip->set_affinity(irq, &desc->pending_mask);
+                       desc->chip->set_affinity(irq, desc->pending_mask);
                         spin_unlock_irqrestore(&desc->lock, flags);
                 }
         }
@@@ -2448,7 -2447,7 +2447,7 @@@ static void set_ir_ioapic_affinity_irq_
   {
         if (desc->status & IRQ_LEVEL) {
                 desc->status |= IRQ_MOVE_PENDING;
-               cpumask_copy(&desc->pending_mask, mask);
+               cpumask_copy(desc->pending_mask, mask);
                 migrate_irq_remapped_level_desc(desc);
                 return;
         }
@@@ -2516,7 -2515,7 +2515,7 @@@ static void irq_complete_move(struct ir
   
                 /* domain has not changed, but affinity did */
                 me = smp_processor_id();
-               if (cpu_isset(me, desc->affinity)) {
+               if (cpumask_test_cpu(me, desc->affinity)) {
                         *descp = desc = move_irq_desc(desc, me);
                         /* get the new one */
                         cfg = desc->chip_data;
@@@ -2528,15 -2527,14 +2527,15 @@@
   
         vector = ~get_irq_regs()->orig_ax;
         me = smp_processor_id();
+ +
+ +      if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
   #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
                 *descp = desc = move_irq_desc(desc, me);
                 /* get the new one */
                 cfg = desc->chip_data;
   #endif
- -
- -      if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
                 send_cleanup_vector(cfg);
+ +      }
   }
   #else
   static inline void irq_complete_move(struct irq_desc **descp) {}
@@@ -3118,8 -3116,8 +3117,8 @@@ static int ioapic_resume(struct sys_dev
   
         spin_lock_irqsave(&ioapic_lock, flags);
         reg_00.raw = io_apic_read(dev->id, 0);
-       if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
-               reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
+       if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].apicid;
                 io_apic_write(dev->id, 0, reg_00.raw);
         }
         spin_unlock_irqrestore(&ioapic_lock, flags);
@@@ -3184,7 -3182,7 +3183,7 @@@ unsigned int create_irq_nr(unsigned in
   
         irq = 0;
         spin_lock_irqsave(&vector_lock, flags);
-       for (new = irq_want; new < NR_IRQS; new++) {
+       for (new = irq_want; new < nr_irqs; new++) {
                 if (platform_legacy_irq(new))
                         continue;
   
@@@ -3259,6 -3257,9 +3258,9 @@@ static int msi_compose_msg(struct pci_d
         int err;
         unsigned dest;
   
+       if (disable_apic)
+               return -ENXIO;
+ 
         cfg = irq_cfg(irq);
         err = assign_irq_vector(irq, cfg, TARGET_CPUS);
         if (err)
@@@ -3727,6 -3728,9 +3729,9 @@@ int arch_setup_ht_irq(unsigned int irq
         struct irq_cfg *cfg;
         int err;
   
+       if (disable_apic)
+               return -ENXIO;
+ 
         cfg = irq_cfg(irq);
         err = assign_irq_vector(irq, cfg, TARGET_CPUS);
         if (!err) {
@@@ -3761,7 -3765,7 +3766,7 @@@
   }
   #endif /* CONFIG_HT_IRQ */
   
- #ifdef CONFIG_X86_64
+ #ifdef CONFIG_X86_UV
   /*
    * Re-target the irq to the specified CPU and enable the specified MMR located
    * on the specified blade to allow the sending of MSIs to the specified CPU.
@@@ -3851,6 -3855,22 +3856,22 @@@ void __init probe_nr_irqs_gsi(void
                 nr_irqs_gsi = nr;
   }
   
+ #ifdef CONFIG_SPARSE_IRQ
+ int __init arch_probe_nr_irqs(void)
+ {
+       int nr;
+ 
+       nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ?
+               (NR_VECTORS + (8 * nr_cpu_ids)) :
+               (NR_VECTORS + (32 * nr_ioapics)));
+ 
+       if (nr < nr_irqs && nr > nr_irqs_gsi)
+               nr_irqs = nr;
+ 
+       return 0;
+ }
+ #endif
+ 
   /* --------------------------------------------------------------------------
                             ACPI-based IOAPIC Configuration
      -------------------------------------------------------------------------- */
@@@ -3985,8 -4005,8 +4006,8 @@@ int acpi_get_override_irq(int bus_irq, 
                 return -1;
   
         for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mp_irqtype == mp_INT &&
-                   mp_irqs[i].mp_srcbusirq == bus_irq)
+               if (mp_irqs[i].irqtype == mp_INT &&
+                   mp_irqs[i].srcbusirq == bus_irq)
                         break;
         if (i >= mp_irq_entries)
                 return -1;
@@@ -4040,7 -4060,7 +4061,7 @@@ void __init setup_ioapic_dest(void
                          */
                         if (desc->status &
                             (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-                               mask = &desc->affinity;
+                               mask = desc->affinity;
                         else
                                 mask = TARGET_CPUS;
   
@@@ -4101,7 -4121,7 +4122,7 @@@ void __init ioapic_init_mappings(void
         ioapic_res = ioapic_setup_resources();
         for (i = 0; i < nr_ioapics; i++) {
                 if (smp_found_config) {
-                       ioapic_phys = mp_ioapics[i].mp_apicaddr;
+                       ioapic_phys = mp_ioapics[i].apicaddr;
   #ifdef CONFIG_X86_32
                         if (!ioapic_phys) {
                                 printk(KERN_ERR
diff --combined arch/x86/kernel/irqinit_32.c

index 10a09c2,bf629ca..22608eb
--- 1/arch/x86/kernel/irqinit_32.c
--- 2/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@@ -78,6 -78,15 +78,6 @@@ void __init init_ISA_irqs(void
         }
   }
   
- -/*
- - * IRQ2 is cascade interrupt to second interrupt controller
- - */
- -static struct irqaction irq2 = {
- -      .handler = no_action,
- -      .mask = CPU_MASK_NONE,
- -      .name = "cascade",
- -};
- -
   DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
         [0 ... IRQ0_VECTOR - 1] = -1,
         [IRQ0_VECTOR] = 0,
@@@ -140,8 -149,15 +140,15 @@@ void __init native_init_IRQ(void
          */
         alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
   
-       /* IPI for invalidation */
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+       /* IPIs for invalidation */
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
+       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
   
         /* IPI for generic function call */
         alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@@ -169,6 -185,9 +176,6 @@@
         alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
   #endif
   
- -      if (!acpi_ioapic)
- -              setup_irq(2, &irq2);
- -
         /* setup after call gates are initialised (usually add in
          * the architecture specific gates)
          */
diff --combined arch/x86/mach-voyager/setup.c

index d914a79,0ade625..66b7eb5
--- 1/arch/x86/mach-voyager/setup.c
--- 2/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@@ -9,6 -9,7 +9,7 @@@
   #include <asm/e820.h>
   #include <asm/io.h>
   #include <asm/setup.h>
+ #include <asm/cpu.h>
   
   void __init pre_intr_init_hook(void)
   {
@@@ -33,23 -34,13 +34,23 @@@ void __init intr_init_hook(void
         setup_irq(2, &irq2);
   }
   
- -void __init pre_setup_arch_hook(void)
+ +static void voyager_disable_tsc(void)
   {
         /* Voyagers run their CPUs from independent clocks, so disable
          * the TSC code because we can't sync them */
         setup_clear_cpu_cap(X86_FEATURE_TSC);
   }
   
+ +void __init pre_setup_arch_hook(void)
+ +{
+ +      voyager_disable_tsc();
+ +}
+ +
+ +void __init pre_time_init_hook(void)
+ +{
+ +      voyager_disable_tsc();
+ +}
+ +
   void __init trap_init_hook(void)
   {
   }
diff --combined arch/x86/mach-voyager/voyager_smp.c

index 7ffcdee,58c7cac..98e3c2b
--- 1/arch/x86/mach-voyager/voyager_smp.c
--- 2/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@@ -81,7 -81,7 +81,7 @@@ static void enable_local_vic_irq(unsign
   static void disable_local_vic_irq(unsigned int irq);
   static void before_handle_vic_irq(unsigned int irq);
   static void after_handle_vic_irq(unsigned int irq);
- -static void set_vic_irq_affinity(unsigned int irq, cpumask_t mask);
+ +static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask);
   static void ack_vic_irq(unsigned int irq);
   static void vic_enable_cpi(void);
   static void do_boot_cpu(__u8 cpuid);
@@@ -211,6 -211,8 +211,6 @@@ static __u32 cpu_booted_map
   static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
   
   /* This is for the new dynamic CPU boot code */
- -cpumask_t cpu_callin_map = CPU_MASK_NONE;
- -cpumask_t cpu_callout_map = CPU_MASK_NONE;
   
   /* The per processor IRQ masks (these are usually kept in sync) */
   static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
@@@ -376,7 -378,7 +376,7 @@@ void __init find_smp_config(void
         cpus_addr(phys_cpu_present_map)[0] |=
             voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
                                        3) << 24;
- -      cpu_possible_map = phys_cpu_present_map;
+ +      init_cpu_possible(&phys_cpu_present_map);
         printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n",
                cpus_addr(phys_cpu_present_map)[0]);
         /* Here we set up the VIC to enable SMP */
@@@ -400,7 -402,7 +400,7 @@@
              VOYAGER_SUS_IN_CONTROL_PORT);
   
         current_thread_info()->cpu = boot_cpu_id;
-       x86_write_percpu(cpu_number, boot_cpu_id);
+       percpu_write(cpu_number, boot_cpu_id);
   }
   
   /*
@@@ -528,7 -530,6 +528,6 @@@ static void __init do_boot_cpu(__u8 cpu
         /* init_tasks (in sched.c) is indexed logically */
         stack_start.sp = (void *)idle->thread.sp;
   
-       init_gdt(cpu);
         per_cpu(current_task, cpu) = idle;
         early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
         irq_ctx_init(cpu);
@@@ -1597,16 -1598,16 +1596,16 @@@ static void after_handle_vic_irq(unsign
    * change the mask and then do an interrupt enable CPI to re-enable on
    * the selected processors */
   
- -void set_vic_irq_affinity(unsigned int irq, cpumask_t mask)
+ +void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask)
   {
         /* Only extended processors handle interrupts */
         unsigned long real_mask;
         unsigned long irq_mask = 1 << irq;
         int cpu;
   
- -      real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors;
+ +      real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors;
   
- -      if (cpus_addr(mask)[0] == 0)
+ +      if (cpus_addr(*mask)[0] == 0)
                 /* can't have no CPUs to accept the interrupt -- extremely
                  * bad things will happen */
                 return;
@@@ -1745,14 -1746,13 +1744,14 @@@ static void __init voyager_smp_prepare_
   
   static void __cpuinit voyager_smp_prepare_boot_cpu(void)
   {
-       init_gdt(smp_processor_id());
-       switch_to_new_gdt();
+       int cpu = smp_processor_id();
+       switch_to_new_gdt(cpu);
   
-       cpu_online_map = cpumask_of_cpu(smp_processor_id());
-       cpu_callout_map = cpumask_of_cpu(smp_processor_id());
-       cpu_callin_map = CPU_MASK_NONE;
-       cpu_present_map = cpumask_of_cpu(smp_processor_id());
+       cpu_set(cpu, cpu_online_map);
+       cpu_set(cpu, cpu_callout_map);
+       cpu_set(cpu, cpu_possible_map);
+       cpu_set(cpu, cpu_present_map);
+ +
   }
   
   static int __cpuinit voyager_cpu_up(unsigned int cpu)
@@@ -1779,12 -1779,11 +1778,11 @@@ static void __init voyager_smp_cpus_don
   void __init smp_setup_processor_id(void)
   {
         current_thread_info()->cpu = hard_smp_processor_id();
-       x86_write_percpu(cpu_number, hard_smp_processor_id());
   }
   
- -static void voyager_send_call_func(cpumask_t callmask)
+ +static void voyager_send_call_func(const struct cpumask *callmask)
   {
- -      __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id());
+ +      __u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id());
         send_CPI(mask, VIC_CALL_FUNCTION_CPI);
   }
   
diff --combined arch/x86/mm/fault.c

index c76ef1d,65709a6..976b5a7
--- 1/arch/x86/mm/fault.c
--- 2/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@@ -26,6 -26,7 +26,7 @@@
   #include <linux/kprobes.h>
   #include <linux/uaccess.h>
   #include <linux/kdebug.h>
+ #include <linux/magic.h>
   
   #include <asm/system.h>
   #include <asm/desc.h>
@@@ -91,8 -92,8 +92,8 @@@ static inline int notify_page_fault(str
    *
    * Opcode checker based on code by Richard Brunner
    */
- static int is_prefetch(struct pt_regs *regs, unsigned long addr,
-                      unsigned long error_code)
+ static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
+                       unsigned long addr)
   {
         unsigned char *instr;
         int scan_more = 1;
@@@ -409,15 -410,15 +410,15 @@@ static void show_fault_oops(struct pt_r
   }
   
   #ifdef CONFIG_X86_64
- static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
-                                unsigned long error_code)
+ static noinline void pgtable_bad(struct pt_regs *regs,
+                        unsigned long error_code, unsigned long address)
   {
         unsigned long flags = oops_begin();
         int sig = SIGKILL;
-       struct task_struct *tsk;
+       struct task_struct *tsk = current;
   
         printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
-              current->comm, address);
+              tsk->comm, address);
         dump_pagetable(address);
         tsk = current;
         tsk->thread.cr2 = address;
@@@ -429,6 -430,196 +430,196 @@@
   }
   #endif
   
+ static noinline void no_context(struct pt_regs *regs,
+                       unsigned long error_code, unsigned long address)
+ {
+       struct task_struct *tsk = current;
+       unsigned long *stackend;
+ 
+ #ifdef CONFIG_X86_64
+       unsigned long flags;
+       int sig;
+ #endif
+ 
+       /* Are we prepared to handle this kernel fault?  */
+       if (fixup_exception(regs))
+               return;
+ 
+       /*
+        * X86_32
+        * Valid to do another page fault here, because if this fault
+        * had been triggered by is_prefetch fixup_exception would have
+        * handled it.
+        *
+        * X86_64
+        * Hall of shame of CPU/BIOS bugs.
+        */
+       if (is_prefetch(regs, error_code, address))
+               return;
+ 
+       if (is_errata93(regs, address))
+               return;
+ 
+       /*
+        * Oops. The kernel tried to access some bad page. We'll have to
+        * terminate things with extreme prejudice.
+        */
+ #ifdef CONFIG_X86_32
+       bust_spinlocks(1);
+ #else
+       flags = oops_begin();
+ #endif
+ 
+       show_fault_oops(regs, error_code, address);
+ 
+       stackend = end_of_stack(tsk);
+       if (*stackend != STACK_END_MAGIC)
+               printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+ 
+       tsk->thread.cr2 = address;
+       tsk->thread.trap_no = 14;
+       tsk->thread.error_code = error_code;
+ 
+ #ifdef CONFIG_X86_32
+       die("Oops", regs, error_code);
+       bust_spinlocks(0);
+       do_exit(SIGKILL);
+ #else
+       sig = SIGKILL;
+       if (__die("Oops", regs, error_code))
+               sig = 0;
+       /* Executive summary in case the body of the oops scrolled away */
+       printk(KERN_EMERG "CR2: %016lx\n", address);
+       oops_end(flags, regs, sig);
+ #endif
+ }
+ 
+ static void __bad_area_nosemaphore(struct pt_regs *regs,
+                       unsigned long error_code, unsigned long address,
+                       int si_code)
+ {
+       struct task_struct *tsk = current;
+ 
+       /* User mode accesses just cause a SIGSEGV */
+       if (error_code & PF_USER) {
+               /*
+                * It's possible to have interrupts off here.
+                */
+               local_irq_enable();
+ 
+               /*
+                * Valid to do another page fault here because this one came
+                * from user space.
+                */
+               if (is_prefetch(regs, error_code, address))
+                       return;
+ 
+               if (is_errata100(regs, address))
+                       return;
+ 
+               if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+                   printk_ratelimit()) {
+                       printk(
+                       "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+                       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+                       tsk->comm, task_pid_nr(tsk), address,
+                       (void *) regs->ip, (void *) regs->sp, error_code);
+                       print_vma_addr(" in ", regs->ip);
+                       printk("\n");
+               }
+ 
+               tsk->thread.cr2 = address;
+               /* Kernel addresses are always protection faults */
+               tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+               tsk->thread.trap_no = 14;
+               force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+               return;
+       }
+ 
+       if (is_f00f_bug(regs, address))
+               return;
+ 
+       no_context(regs, error_code, address);
+ }
+ 
+ static noinline void bad_area_nosemaphore(struct pt_regs *regs,
+                       unsigned long error_code, unsigned long address)
+ {
+       __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
+ }
+ 
+ static void __bad_area(struct pt_regs *regs,
+                       unsigned long error_code, unsigned long address,
+                       int si_code)
+ {
+       struct mm_struct *mm = current->mm;
+ 
+       /*
+        * Something tried to access memory that isn't in our memory map..
+        * Fix it, but check if it's kernel or user first..
+        */
+       up_read(&mm->mmap_sem);
+ 
+       __bad_area_nosemaphore(regs, error_code, address, si_code);
+ }
+ 
+ static noinline void bad_area(struct pt_regs *regs,
+                       unsigned long error_code, unsigned long address)
+ {
+       __bad_area(regs, error_code, address, SEGV_MAPERR);
+ }
+ 
+ static noinline void bad_area_access_error(struct pt_regs *regs,
+                       unsigned long error_code, unsigned long address)
+ {
+       __bad_area(regs, error_code, address, SEGV_ACCERR);
+ }
+ 
+ /* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
+ static void out_of_memory(struct pt_regs *regs,
+                       unsigned long error_code, unsigned long address)
+ {
+       /*
+        * We ran out of memory, call the OOM killer, and return the userspace
+        * (which will retry the fault, or kill us if we got oom-killed).
+        */
+       up_read(&current->mm->mmap_sem);
+       pagefault_out_of_memory();
+ }
+ 
+ static void do_sigbus(struct pt_regs *regs,
+                       unsigned long error_code, unsigned long address)
+ {
+       struct task_struct *tsk = current;
+       struct mm_struct *mm = tsk->mm;
+ 
+       up_read(&mm->mmap_sem);
+ 
+       /* Kernel mode? Handle exceptions or die */
+       if (!(error_code & PF_USER))
+               no_context(regs, error_code, address);
+ #ifdef CONFIG_X86_32
+       /* User space => ok to do another page fault */
+       if (is_prefetch(regs, error_code, address))
+               return;
+ #endif
+       tsk->thread.cr2 = address;
+       tsk->thread.error_code = error_code;
+       tsk->thread.trap_no = 14;
+       force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+ }
+ 
+ static noinline void mm_fault_error(struct pt_regs *regs,
+               unsigned long error_code, unsigned long address, unsigned int fault)
+ {
+       if (fault & VM_FAULT_OOM)
+               out_of_memory(regs, error_code, address);
+       else if (fault & VM_FAULT_SIGBUS)
+               do_sigbus(regs, error_code, address);
+       else
+               BUG();
+ }
+ 
   static int spurious_fault_check(unsigned long error_code, pte_t *pte)
   {
         if ((error_code & PF_WRITE) && !pte_write(*pte))
@@@ -448,8 -639,8 +639,8 @@@
    * There are no security implications to leaving a stale TLB when
    * increasing the permissions on a page.
    */
- static int spurious_fault(unsigned long address,
-                         unsigned long error_code)
+ static noinline int spurious_fault(unsigned long error_code,
+                               unsigned long address)
   {
         pgd_t *pgd;
         pud_t *pud;
@@@ -494,7 -685,7 +685,7 @@@
    *
    * This assumes no large pages in there.
    */
- static int vmalloc_fault(unsigned long address)
+ static noinline int vmalloc_fault(unsigned long address)
   {
   #ifdef CONFIG_X86_32
         unsigned long pgd_paddr;
@@@ -573,6 -764,25 +764,25 @@@
   
   int show_unhandled_signals = 1;
   
+ static inline int access_error(unsigned long error_code, int write,
+                               struct vm_area_struct *vma)
+ {
+       if (write) {
+               /* write, present and write, not present */
+               if (unlikely(!(vma->vm_flags & VM_WRITE)))
+                       return 1;
+       } else if (unlikely(error_code & PF_PROT)) {
+               /* read, present */
+               return 1;
+       } else {
+               /* read, not present */
+               if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
+                       return 1;
+       }
+ 
+       return 0;
+ }
+ 
   /*
    * This routine handles page faults.  It determines the address,
    * and the problem, and then passes it off to one of the appropriate
@@@ -583,16 -793,12 +793,12 @@@ asmlinkag
   #endif
   void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
   {
+       unsigned long address;
         struct task_struct *tsk;
         struct mm_struct *mm;
         struct vm_area_struct *vma;
-       unsigned long address;
-       int write, si_code;
+       int write;
         int fault;
- #ifdef CONFIG_X86_64
-       unsigned long flags;
-       int sig;
- #endif
   
         tsk = current;
         mm = tsk->mm;
@@@ -601,8 -807,8 +807,7 @@@
         /* get the address */
         address = read_cr2();
   
-       si_code = SEGV_MAPERR;
- 
+       if (unlikely(notify_page_fault(regs)))
- -              return;
         if (unlikely(kmmio_fault(regs, address)))
                 return;
   
@@@ -629,23 -835,17 +834,23 @@@
                         return;
   
                 /* Can handle a stale RO->RW TLB */
-               if (spurious_fault(address, error_code))
+               if (spurious_fault(error_code, address))
                         return;
   
+ +              /* kprobes don't want to hook the spurious faults. */
+ +              if (notify_page_fault(regs))
+ +                      return;
                 /*
                  * Don't take the mm semaphore here. If we fixup a prefetch
                  * fault we could otherwise deadlock.
                  */
-               goto bad_area_nosemaphore;
+               bad_area_nosemaphore(regs, error_code, address);
+               return;
         }
   
- 
+ +      /* kprobes don't want to hook the spurious faults. */
+ +      if (notify_page_fault(regs))
+ +              return;
         /*
          * It's safe to allow irq's after cr2 has been saved and the
          * vmalloc fault has been handled.
@@@ -661,15 -861,17 +866,17 @@@
   
   #ifdef CONFIG_X86_64
         if (unlikely(error_code & PF_RSVD))
-               pgtable_bad(address, regs, error_code);
+               pgtable_bad(regs, error_code, address);
   #endif
   
         /*
          * If we're in an interrupt, have no user context or are running in an
          * atomic region then we must not take the fault.
          */
-       if (unlikely(in_atomic() || !mm))
-               goto bad_area_nosemaphore;
+       if (unlikely(in_atomic() || !mm)) {
+               bad_area_nosemaphore(regs, error_code, address);
+               return;
+       }
   
         /*
          * When running in the kernel we expect faults to occur only to
@@@ -687,20 -889,26 +894,26 @@@
          * source.  If this is invalid we can skip the address space check,
          * thus avoiding the deadlock.
          */
-       if (!down_read_trylock(&mm->mmap_sem)) {
+       if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
                 if ((error_code & PF_USER) == 0 &&
-                   !search_exception_tables(regs->ip))
-                       goto bad_area_nosemaphore;
+                   !search_exception_tables(regs->ip)) {
+                       bad_area_nosemaphore(regs, error_code, address);
+                       return;
+               }
                 down_read(&mm->mmap_sem);
         }
   
         vma = find_vma(mm, address);
-       if (!vma)
-               goto bad_area;
-       if (vma->vm_start <= address)
+       if (unlikely(!vma)) {
+               bad_area(regs, error_code, address);
+               return;
+       }
+       if (likely(vma->vm_start <= address))
                 goto good_area;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               goto bad_area;
+       if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+               bad_area(regs, error_code, address);
+               return;
+       }
         if (error_code & PF_USER) {
                 /*
                  * Accessing the stack below %sp is always a bug.
@@@ -708,31 -916,25 +921,25 @@@
                  * and pusha to work.  ("enter $65535,$31" pushes
                  * 32 pointers and then decrements %sp by 65535.)
                  */
-               if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
-                       goto bad_area;
+               if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
+                       bad_area(regs, error_code, address);
+                       return;
+               }
         }
-       if (expand_stack(vma, address))
-               goto bad_area;
- /*
-  * Ok, we have a good vm_area for this memory access, so
-  * we can handle it..
-  */
+       if (unlikely(expand_stack(vma, address))) {
+               bad_area(regs, error_code, address);
+               return;
+       }
+ 
+       /*
+        * Ok, we have a good vm_area for this memory access, so
+        * we can handle it..
+        */
   good_area:
-       si_code = SEGV_ACCERR;
-       write = 0;
-       switch (error_code & (PF_PROT|PF_WRITE)) {
-       default:        /* 3: write, present */
-               /* fall through */
-       case PF_WRITE:          /* write, not present */
-               if (!(vma->vm_flags & VM_WRITE))
-                       goto bad_area;
-               write++;
-               break;
-       case PF_PROT:           /* read, present */
-               goto bad_area;
-       case 0:                 /* read, not present */
-               if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
-                       goto bad_area;
+       write = error_code & PF_WRITE;
+       if (unlikely(access_error(error_code, write, vma))) {
+               bad_area_access_error(regs, error_code, address);
+               return;
         }
   
         /*
@@@ -742,11 -944,8 +949,8 @@@
          */
         fault = handle_mm_fault(mm, vma, address, write);
         if (unlikely(fault & VM_FAULT_ERROR)) {
-               if (fault & VM_FAULT_OOM)
-                       goto out_of_memory;
-               else if (fault & VM_FAULT_SIGBUS)
-                       goto do_sigbus;
-               BUG();
+               mm_fault_error(regs, error_code, address, fault);
+               return;
         }
         if (fault & VM_FAULT_MAJOR)
                 tsk->maj_flt++;
@@@ -764,128 -963,6 +968,6 @@@
         }
   #endif
         up_read(&mm->mmap_sem);
-       return;
- 
- /*
-  * Something tried to access memory that isn't in our memory map..
-  * Fix it, but check if it's kernel or user first..
-  */
- bad_area:
-       up_read(&mm->mmap_sem);
- 
- bad_area_nosemaphore:
-       /* User mode accesses just cause a SIGSEGV */
-       if (error_code & PF_USER) {
-               /*
-                * It's possible to have interrupts off here.
-                */
-               local_irq_enable();
- 
-               /*
-                * Valid to do another page fault here because this one came
-                * from user space.
-                */
-               if (is_prefetch(regs, address, error_code))
-                       return;
- 
-               if (is_errata100(regs, address))
-                       return;
- 
-               if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
-                   printk_ratelimit()) {
-                       printk(
-                       "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
-                       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
-                       tsk->comm, task_pid_nr(tsk), address,
-                       (void *) regs->ip, (void *) regs->sp, error_code);
-                       print_vma_addr(" in ", regs->ip);
-                       printk("\n");
-               }
- 
-               tsk->thread.cr2 = address;
-               /* Kernel addresses are always protection faults */
-               tsk->thread.error_code = error_code | (address >= TASK_SIZE);
-               tsk->thread.trap_no = 14;
-               force_sig_info_fault(SIGSEGV, si_code, address, tsk);
-               return;
-       }
- 
-       if (is_f00f_bug(regs, address))
-               return;
- 
- no_context:
-       /* Are we prepared to handle this kernel fault?  */
-       if (fixup_exception(regs))
-               return;
- 
-       /*
-        * X86_32
-        * Valid to do another page fault here, because if this fault
-        * had been triggered by is_prefetch fixup_exception would have
-        * handled it.
-        *
-        * X86_64
-        * Hall of shame of CPU/BIOS bugs.
-        */
-       if (is_prefetch(regs, address, error_code))
-               return;
- 
-       if (is_errata93(regs, address))
-               return;
- 
- /*
-  * Oops. The kernel tried to access some bad page. We'll have to
-  * terminate things with extreme prejudice.
-  */
- #ifdef CONFIG_X86_32
-       bust_spinlocks(1);
- #else
-       flags = oops_begin();
- #endif
- 
-       show_fault_oops(regs, error_code, address);
- 
-       tsk->thread.cr2 = address;
-       tsk->thread.trap_no = 14;
-       tsk->thread.error_code = error_code;
- 
- #ifdef CONFIG_X86_32
-       die("Oops", regs, error_code);
-       bust_spinlocks(0);
-       do_exit(SIGKILL);
- #else
-       sig = SIGKILL;
-       if (__die("Oops", regs, error_code))
-               sig = 0;
-       /* Executive summary in case the body of the oops scrolled away */
-       printk(KERN_EMERG "CR2: %016lx\n", address);
-       oops_end(flags, regs, sig);
- #endif
- 
- out_of_memory:
-       /*
-        * We ran out of memory, call the OOM killer, and return the userspace
-        * (which will retry the fault, or kill us if we got oom-killed).
-        */
-       up_read(&mm->mmap_sem);
-       pagefault_out_of_memory();
-       return;
- 
- do_sigbus:
-       up_read(&mm->mmap_sem);
- 
-       /* Kernel mode? Handle exceptions or die */
-       if (!(error_code & PF_USER))
-               goto no_context;
- #ifdef CONFIG_X86_32
-       /* User space => ok to do another page fault */
-       if (is_prefetch(regs, address, error_code))
-               return;
- #endif
-       tsk->thread.cr2 = address;
-       tsk->thread.error_code = error_code;
-       tsk->thread.trap_no = 14;
-       force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
   }
   
   DEFINE_SPINLOCK(pgd_lock);
diff --combined arch/x86/xen/multicalls.h

index fa3e107,e786fa7..9e565da
--- 1/arch/x86/xen/multicalls.h
--- 2/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@@ -19,10 -19,8 +19,10 @@@ DECLARE_PER_CPU(unsigned long, xen_mc_i
      paired with xen_mc_issue() */
   static inline void xen_mc_batch(void)
   {
+ +      unsigned long flags;
         /* need to disable interrupts until this entry is complete */
- -      local_irq_save(__get_cpu_var(xen_mc_irq_flags));
+ +      local_irq_save(flags);
+ +      __get_cpu_var(xen_mc_irq_flags) = flags;
   }
   
   static inline struct multicall_space xen_mc_entry(size_t args)
@@@ -41,7 -39,7 +41,7 @@@ static inline void xen_mc_issue(unsigne
                 xen_mc_flush();
   
         /* restore flags saved in xen_mc_batch */
-       local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
+       local_irq_restore(percpu_read(xen_mc_irq_flags));
   }
   
   /* Set up a callback to be called when the current batch is flushed */
diff --combined drivers/misc/Kconfig

index c64e679,396d935..1c48408
--- 1/drivers/misc/Kconfig
--- 2/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@@ -162,7 -162,7 +162,7 @@@ config ENCLOSURE_SERVICE
   config SGI_XP
         tristate "Support communication between SGI SSIs"
         depends on NET
-       depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
+       depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
         select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
         select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
         select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
@@@ -189,7 -189,7 +189,7 @@@ config HP_IL
   
   config SGI_GRU
         tristate "SGI GRU driver"
-       depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
+       depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
         default n
         select MMU_NOTIFIER
         ---help---
@@@ -217,7 -217,6 +217,7 @@@ config DELL_LAPTO
         depends on EXPERIMENTAL
         depends on BACKLIGHT_CLASS_DEVICE
         depends on RFKILL
+ +      depends on POWER_SUPPLY
         default n
         ---help---
         This driver adds support for rfkill and backlight control to Dell
diff --combined drivers/net/sfc/efx.c

index ab0e09b,101c00a..847e9bb
--- 1/drivers/net/sfc/efx.c
--- 2/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@@ -676,8 -676,9 +676,8 @@@ static int efx_init_port(struct efx_ni
         rc = efx->phy_op->init(efx);
         if (rc)
                 return rc;
- -      efx->phy_op->reconfigure(efx);
- -
         mutex_lock(&efx->mac_lock);
+ +      efx->phy_op->reconfigure(efx);
         rc = falcon_switch_mac(efx);
         mutex_unlock(&efx->mac_lock);
         if (rc)
@@@ -685,7 -686,7 +685,7 @@@
         efx->mac_op->reconfigure(efx);
   
         efx->port_initialized = true;
- -      efx->stats_enabled = true;
+ +      efx_stats_enable(efx);
         return 0;
   
   fail:
@@@ -734,7 -735,6 +734,7 @@@ static void efx_fini_port(struct efx_ni
         if (!efx->port_initialized)
                 return;
   
+ +      efx_stats_disable(efx);
         efx->phy_op->fini(efx);
         efx->port_initialized = false;
   
@@@ -854,20 -854,27 +854,27 @@@ static void efx_fini_io(struct efx_nic 
    * interrupts across them. */
   static int efx_wanted_rx_queues(void)
   {
-       cpumask_t core_mask;
+       cpumask_var_t core_mask;
         int count;
         int cpu;
   
-       cpus_clear(core_mask);
+       if (!alloc_cpumask_var(&core_mask, GFP_KERNEL)) {
+               printk(KERN_WARNING
+                      "efx.c: allocation failure, irq balancing hobbled\n");
+               return 1;
+       }
+ 
+       cpumask_clear(core_mask);
         count = 0;
         for_each_online_cpu(cpu) {
-               if (!cpu_isset(cpu, core_mask)) {
+               if (!cpumask_test_cpu(cpu, core_mask)) {
                         ++count;
-                       cpus_or(core_mask, core_mask,
-                               topology_core_siblings(cpu));
+                       cpumask_or(core_mask, core_mask,
+                                  topology_core_cpumask(cpu));
                 }
         }
   
+       free_cpumask_var(core_mask);
         return count;
   }
   
@@@ -1361,20 -1368,6 +1368,20 @@@ static int efx_net_stop(struct net_devi
         return 0;
   }
   
+ +void efx_stats_disable(struct efx_nic *efx)
+ +{
+ +      spin_lock(&efx->stats_lock);
+ +      ++efx->stats_disable_count;
+ +      spin_unlock(&efx->stats_lock);
+ +}
+ +
+ +void efx_stats_enable(struct efx_nic *efx)
+ +{
+ +      spin_lock(&efx->stats_lock);
+ +      --efx->stats_disable_count;
+ +      spin_unlock(&efx->stats_lock);
+ +}
+ +
   /* Context: process, dev_base_lock or RTNL held, non-blocking. */
   static struct net_device_stats *efx_net_stats(struct net_device *net_dev)
   {
@@@ -1383,12 -1376,12 +1390,12 @@@
         struct net_device_stats *stats = &net_dev->stats;
   
         /* Update stats if possible, but do not wait if another thread
- -       * is updating them (or resetting the NIC); slightly stale
- -       * stats are acceptable.
+ +       * is updating them or if MAC stats fetches are temporarily
+ +       * disabled; slightly stale stats are acceptable.
          */
         if (!spin_trylock(&efx->stats_lock))
                 return stats;
- -      if (efx->stats_enabled) {
+ +      if (!efx->stats_disable_count) {
                 efx->mac_op->update_stats(efx);
                 falcon_update_nic_stats(efx);
         }
@@@ -1636,12 -1629,16 +1643,12 @@@ static void efx_unregister_netdev(struc
   
   /* Tears down the entire software state and most of the hardware state
    * before reset.  */
- -void efx_reset_down(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+ +void efx_reset_down(struct efx_nic *efx, enum reset_type method,
+ +                  struct ethtool_cmd *ecmd)
   {
         EFX_ASSERT_RESET_SERIALISED(efx);
   
- -      /* The net_dev->get_stats handler is quite slow, and will fail
- -       * if a fetch is pending over reset. Serialise against it. */
- -      spin_lock(&efx->stats_lock);
- -      efx->stats_enabled = false;
- -      spin_unlock(&efx->stats_lock);
- -
+ +      efx_stats_disable(efx);
         efx_stop_all(efx);
         mutex_lock(&efx->mac_lock);
         mutex_lock(&efx->spi_lock);
@@@ -1649,8 -1646,6 +1656,8 @@@
         efx->phy_op->get_settings(efx, ecmd);
   
         efx_fini_channels(efx);
+ +      if (efx->port_initialized && method != RESET_TYPE_INVISIBLE)
+ +              efx->phy_op->fini(efx);
   }
   
   /* This function will always ensure that the locks acquired in
@@@ -1658,8 -1653,7 +1665,8 @@@
    * that we were unable to reinitialise the hardware, and the
    * driver should be disabled. If ok is false, then the rx and tx
    * engines are not restarted, pending a RESET_DISABLE. */
- -int efx_reset_up(struct efx_nic *efx, struct ethtool_cmd *ecmd, bool ok)
+ +int efx_reset_up(struct efx_nic *efx, enum reset_type method,
+ +               struct ethtool_cmd *ecmd, bool ok)
   {
         int rc;
   
@@@ -1671,15 -1665,6 +1678,15 @@@
                 ok = false;
         }
   
+ +      if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) {
+ +              if (ok) {
+ +                      rc = efx->phy_op->init(efx);
+ +                      if (rc)
+ +                              ok = false;
+ +              } else
+ +                      efx->port_initialized = false;
+ +      }
+ +
         if (ok) {
                 efx_init_channels(efx);
   
@@@ -1692,7 -1677,7 +1699,7 @@@
   
         if (ok) {
                 efx_start_all(efx);
- -              efx->stats_enabled = true;
+ +              efx_stats_enable(efx);
         }
         return rc;
   }
@@@ -1724,7 -1709,7 +1731,7 @@@ static int efx_reset(struct efx_nic *ef
   
         EFX_INFO(efx, "resetting (%d)\n", method);
   
- -      efx_reset_down(efx, &ecmd);
+ +      efx_reset_down(efx, method, &ecmd);
   
         rc = falcon_reset_hw(efx, method);
         if (rc) {
@@@ -1743,10 -1728,10 +1750,10 @@@
   
         /* Leave device stopped if necessary */
         if (method == RESET_TYPE_DISABLE) {
- -              efx_reset_up(efx, &ecmd, false);
+ +              efx_reset_up(efx, method, &ecmd, false);
                 rc = -EIO;
         } else {
- -              rc = efx_reset_up(efx, &ecmd, true);
+ +              rc = efx_reset_up(efx, method, &ecmd, true);
         }
   
   out_disable:
@@@ -1898,7 -1883,6 +1905,7 @@@ static int efx_init_struct(struct efx_n
         efx->rx_checksum_enabled = true;
         spin_lock_init(&efx->netif_stop_lock);
         spin_lock_init(&efx->stats_lock);
+ +      efx->stats_disable_count = 1;
         mutex_init(&efx->mac_lock);
         efx->mac_op = &efx_dummy_mac_operations;
         efx->phy_op = &efx_dummy_phy_operations;
diff --combined include/linux/sched.h

index 2127e95,2225c20..28b3f50
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -443,7 -443,6 +443,7 @@@ struct pacct_struct 
    * @utime:            time spent in user mode, in &cputime_t units
    * @stime:            time spent in kernel mode, in &cputime_t units
    * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
+ + * @lock:             lock for fields in this struct
    *
    * This structure groups together three kinds of CPU time that are
    * tracked for threads and thread groups.  Most things considering
@@@ -1161,10 -1160,9 +1161,9 @@@ struct task_struct 
         pid_t pid;
         pid_t tgid;
   
- #ifdef CONFIG_CC_STACKPROTECTOR
         /* Canary value for the -fstack-protector gcc feature */
         unsigned long stack_canary;
- #endif
+ 
         /* 
          * pointers to (original) parent process, youngest child, younger sibling,
          * older sibling, respectively.  (p->father can be replaced with 
@@@ -2070,6 -2068,19 +2069,19 @@@ static inline int object_is_on_stack(vo
   
   extern void thread_info_cache_init(void);
   
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+ static inline unsigned long stack_not_used(struct task_struct *p)
+ {
+       unsigned long *n = end_of_stack(p);
+ 
+       do {    /* Skip over canary */
+               n++;
+       } while (!*n);
+ 
+       return (unsigned long)n - (unsigned long)end_of_stack(p);
+ }
+ #endif
+ 
   /* set thread flags in other task's structures
    * - see asm/thread_info.h for TIF_xxxx flags available
    */
diff --combined kernel/fork.c

index 6d5dbb7,99309df..c078438
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -61,6 -61,7 +61,7 @@@
   #include <linux/proc_fs.h>
   #include <linux/blkdev.h>
   #include <trace/sched.h>
+ #include <linux/magic.h>
   
   #include <asm/pgtable.h>
   #include <asm/pgalloc.h>
@@@ -212,6 -213,8 +213,8 @@@ static struct task_struct *dup_task_str
   {
         struct task_struct *tsk;
         struct thread_info *ti;
+       unsigned long *stackend;
+ 
         int err;
   
         prepare_to_copy(orig);
@@@ -237,6 -240,8 +240,8 @@@
                 goto out;
   
         setup_thread_stack(tsk, orig);
+       stackend = end_of_stack(tsk);
+       *stackend = STACK_END_MAGIC;    /* for overflow detection */
   
   #ifdef CONFIG_CC_STACKPROTECTOR
         tsk->stack_canary = get_random_int();
@@@ -1005,7 -1010,6 +1010,7 @@@ static struct task_struct *copy_process
          * triggers too late. This doesn't hurt, the check is only there
          * to stop root fork bombs.
          */
+ +      retval = -EAGAIN;
         if (nr_threads >= max_threads)
                 goto bad_fork_cleanup_count;
   
diff --combined kernel/irq/chip.c

index 7de11bd,c248eba..122fef4
--- 1/kernel/irq/chip.c
--- 2/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@@ -46,7 -46,10 +46,10 @@@ void dynamic_irq_init(unsigned int irq
         desc->irq_count = 0;
         desc->irqs_unhandled = 0;
   #ifdef CONFIG_SMP
-       cpumask_setall(&desc->affinity);
+       cpumask_setall(desc->affinity);
+ #ifdef CONFIG_GENERIC_PENDING_IRQ
+       cpumask_clear(desc->pending_mask);
+ #endif
   #endif
         spin_unlock_irqrestore(&desc->lock, flags);
   }
@@@ -383,7 -386,6 +386,7 @@@ handle_level_irq(unsigned int irq, stru
   out_unlock:
         spin_unlock(&desc->lock);
   }
+ +EXPORT_SYMBOL_GPL(handle_level_irq);
   
   /**
    *    handle_fasteoi_irq - irq handler for transparent controllers
@@@ -594,7 -596,6 +597,7 @@@ __set_irq_handler(unsigned int irq, irq
         }
         spin_unlock_irqrestore(&desc->lock, flags);
   }
+ +EXPORT_SYMBOL_GPL(__set_irq_handler);
   
   void
   set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
diff --combined kernel/irq/numa_migrate.c

index acd8835,666260e..7f9b804
--- 1/kernel/irq/numa_migrate.c
--- 2/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@@ -38,15 -38,22 +38,22 @@@ static void free_kstat_irqs(struct irq_
         old_desc->kstat_irqs = NULL;
   }
   
- static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
+ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
                  struct irq_desc *desc, int cpu)
   {
         memcpy(desc, old_desc, sizeof(struct irq_desc));
+       if (!init_alloc_desc_masks(desc, cpu, false)) {
+               printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
+                               "for migration.\n", irq);
+               return false;
+       }
         spin_lock_init(&desc->lock);
         desc->cpu = cpu;
         lockdep_set_class(&desc->lock, &irq_desc_lock_class);
         init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+       init_copy_desc_masks(old_desc, desc);
         arch_init_copy_chip_data(old_desc, desc, cpu);
+       return true;
   }
   
   static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
@@@ -71,28 -78,29 +78,34 @@@ static struct irq_desc *__real_move_irq
         desc = irq_desc_ptrs[irq];
   
         if (desc && old_desc != desc)
- -                      goto out_unlock;
+ +              goto out_unlock;
   
         node = cpu_to_node(cpu);
         desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
         if (!desc) {
-               printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq);
+               printk(KERN_ERR "irq %d: can not get new irq_desc "
+                               "for migration.\n", irq);
+               /* still use old one */
+               desc = old_desc;
+               goto out_unlock;
+       }
+       if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
                 /* still use old one */
+               kfree(desc);
                 desc = old_desc;
                 goto out_unlock;
         }
-       init_copy_one_irq_desc(irq, old_desc, desc, cpu);
   
         irq_desc_ptrs[irq] = desc;
+ +      spin_unlock_irqrestore(&sparse_irq_lock, flags);
   
         /* free the old one */
         free_one_irq_desc(old_desc, desc);
+ +      spin_unlock(&old_desc->lock);
         kfree(old_desc);
+ +      spin_lock(&desc->lock);
+ +
+ +      return desc;
   
   out_unlock:
         spin_unlock_irqrestore(&sparse_irq_lock, flags);
diff --combined kernel/sched.c

index 8ee437a,c71d7d5..fc17fd9
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -2266,16 -2266,6 +2266,16 @@@ static int try_to_wake_up(struct task_s
         if (!sched_feat(SYNC_WAKEUPS))
                 sync = 0;
   
+ +      if (!sync) {
+ +              if (current->se.avg_overlap < sysctl_sched_migration_cost &&
+ +                        p->se.avg_overlap < sysctl_sched_migration_cost)
+ +                      sync = 1;
+ +      } else {
+ +              if (current->se.avg_overlap >= sysctl_sched_migration_cost ||
+ +                        p->se.avg_overlap >= sysctl_sched_migration_cost)
+ +                      sync = 0;
+ +      }
+ +
   #ifdef CONFIG_SMP
         if (sched_feat(LB_WAKEUP_UPDATE)) {
                 struct sched_domain *sd;
@@@ -4697,8 -4687,8 +4697,8 @@@ EXPORT_SYMBOL(default_wake_function)
    * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
    * zero in this (rare) case, and we handle it by continuing to scan the queue.
    */
- -static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- -                           int nr_exclusive, int sync, void *key)
+ +void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+ +                      int nr_exclusive, int sync, void *key)
   {
         wait_queue_t *curr, *next;
   
@@@ -5949,12 -5939,7 +5949,7 @@@ void sched_show_task(struct task_struc
                 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
   #endif
   #ifdef CONFIG_DEBUG_STACK_USAGE
-       {
-               unsigned long *n = end_of_stack(p);
-               while (!*n)
-                       n++;
-               free = (unsigned long)n - (unsigned long)end_of_stack(p);
-       }
+       free = stack_not_used(p);
   #endif
         printk(KERN_CONT "%5lu %5d %6d\n", free,
                 task_pid_nr(p), task_pid_nr(p->real_parent));
author	Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
	Wed, 11 Feb 2009 19:52:22 +0000 (11:52 -0800)
committer	Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
	Wed, 11 Feb 2009 19:52:22 +0000 (11:52 -0800)
		1	2
arch/arm/kernel/irq.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/blackfin/kernel/irqchip.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/irq_64.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/ia32/ia32entry.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/page.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/pgtable_64.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/acpi/sleep.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/intel_cacheinfo.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/entry_64.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/io_apic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/irqinit_32.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mach-voyager/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mach-voyager/voyager_smp.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/xen/multicalls.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/misc/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/sfc/efx.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/chip.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/numa_migrate.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history