Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / arch / x86 / kernel / smpboot.c
index 8b3bfc4..083e99d 100644 (file)
@@ -62,7 +62,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mtrr.h>
-#include <asm/vmi.h>
+#include <asm/mwait.h>
 #include <asm/apic.h>
 #include <asm/setup.h>
 #include <asm/uv/uv.h>
@@ -299,23 +299,16 @@ notrace static void __cpuinit start_secondary(void *unused)
         * fragile that we want to limit the things done here to the
         * most necessary things.
         */
+       cpu_init();
+       preempt_disable();
+       smp_callin();
 
 #ifdef CONFIG_X86_32
-       /*
-        * Switch away from the trampoline page-table
-        *
-        * Do this before cpu_init() because it needs to access per-cpu
-        * data which may not be mapped in the trampoline page-table.
-        */
+       /* switch away from the initial page table */
        load_cr3(swapper_pg_dir);
        __flush_tlb_all();
 #endif
 
-       vmi_bringup();
-       cpu_init();
-       preempt_disable();
-       smp_callin();
-
        /* otherwise gcc will move up smp_processor_id before the cpu_init */
        barrier();
        /*
@@ -324,9 +317,9 @@ notrace static void __cpuinit start_secondary(void *unused)
        check_tsc_sync_target();
 
        if (nmi_watchdog == NMI_IO_APIC) {
-               legacy_pic->chip->mask(0);
+               legacy_pic->mask(0);
                enable_NMI_through_LVT0();
-               legacy_pic->chip->unmask(0);
+               legacy_pic->unmask(0);
        }
 
        /* This must be done before setting cpu_online_mask */
@@ -397,6 +390,19 @@ void __cpuinit smp_store_cpu_info(int id)
                identify_secondary_cpu(c);
 }
 
+static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
+{
+       struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
+       struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
+
+       cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
+       cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
+       cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
+       cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
+       cpumask_set_cpu(cpu1, c2->llc_shared_map);
+       cpumask_set_cpu(cpu2, c1->llc_shared_map);
+}
+
 
 void __cpuinit set_cpu_sibling_map(int cpu)
 {
@@ -409,14 +415,13 @@ void __cpuinit set_cpu_sibling_map(int cpu)
                for_each_cpu(i, cpu_sibling_setup_mask) {
                        struct cpuinfo_x86 *o = &cpu_data(i);
 
-                       if (c->phys_proc_id == o->phys_proc_id &&
-                           c->cpu_core_id == o->cpu_core_id) {
-                               cpumask_set_cpu(i, cpu_sibling_mask(cpu));
-                               cpumask_set_cpu(cpu, cpu_sibling_mask(i));
-                               cpumask_set_cpu(i, cpu_core_mask(cpu));
-                               cpumask_set_cpu(cpu, cpu_core_mask(i));
-                               cpumask_set_cpu(i, c->llc_shared_map);
-                               cpumask_set_cpu(cpu, o->llc_shared_map);
+                       if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+                               if (c->phys_proc_id == o->phys_proc_id &&
+                                   c->compute_unit_id == o->compute_unit_id)
+                                       link_thread_siblings(cpu, i);
+                       } else if (c->phys_proc_id == o->phys_proc_id &&
+                                  c->cpu_core_id == o->cpu_core_id) {
+                               link_thread_siblings(cpu, i);
                        }
                }
        } else {
@@ -742,7 +747,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
                .done   = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
        };
 
-       INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
+       INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
 
        alternatives_smp_switch(1);
 
@@ -774,7 +779,6 @@ do_rest:
 #ifdef CONFIG_X86_32
        /* Stack for startup_32 can be just as for start_secondary onwards */
        irq_ctx_init(cpu);
-       initial_page_table = __pa(&trampoline_pg_dir);
 #else
        clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
        initial_gs = per_cpu_offset(cpu);
@@ -923,7 +927,6 @@ int __cpuinit native_cpu_up(unsigned int cpu)
        per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 
        err = do_boot_cpu(apicid, cpu);
-
        if (err) {
                pr_debug("do_boot_cpu failed %d\n", err);
                return -EIO;
@@ -1109,8 +1112,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
        }
        set_cpu_sibling_map(0);
 
-       enable_IR_x2apic();
-       default_setup_apic_routing();
 
        if (smp_sanity_check(max_cpus) < 0) {
                printk(KERN_INFO "SMP disabled\n");
@@ -1118,6 +1119,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
                goto out;
        }
 
+       default_setup_apic_routing();
+
        preempt_disable();
        if (read_apic_id() != boot_cpu_physical_apicid) {
                panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
@@ -1370,7 +1373,6 @@ void play_dead_common(void)
 {
        idle_task_exit();
        reset_lazy_tlbstate();
-       irq_ctx_exit(raw_smp_processor_id());
        c1e_remove_cpu(raw_smp_processor_id());
 
        mb();
@@ -1383,11 +1385,88 @@ void play_dead_common(void)
        local_irq_disable();
 }
 
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
+static inline void mwait_play_dead(void)
+{
+       unsigned int eax, ebx, ecx, edx;
+       unsigned int highest_cstate = 0;
+       unsigned int highest_subcstate = 0;
+       int i;
+       void *mwait_ptr;
+
+       if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
+               return;
+       if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
+               return;
+       if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+               return;
+
+       eax = CPUID_MWAIT_LEAF;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+
+       /*
+        * eax will be 0 if EDX enumeration is not valid.
+        * Initialized below to cstate, sub_cstate value when EDX is valid.
+        */
+       if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
+               eax = 0;
+       } else {
+               edx >>= MWAIT_SUBSTATE_SIZE;
+               for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+                       if (edx & MWAIT_SUBSTATE_MASK) {
+                               highest_cstate = i;
+                               highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+                       }
+               }
+               eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+                       (highest_subcstate - 1);
+       }
+
+       /*
+        * This should be a memory location in a cache line which is
+        * unlikely to be touched by other processors.  The actual
+        * content is immaterial as it is not actually modified in any way.
+        */
+       mwait_ptr = &current_thread_info()->flags;
+
+       wbinvd();
+
+       while (1) {
+               /*
+                * The CLFLUSH is a workaround for erratum AAI65 for
+                * the Xeon 7400 series.  It's not clear it is actually
+                * needed, but it should be harmless in either case.
+                * The WBINVD is insufficient due to the spurious-wakeup
+                * case where we return around the loop.
+                */
+               clflush(mwait_ptr);
+               __monitor(mwait_ptr, 0, 0);
+               mb();
+               __mwait(eax, 0);
+       }
+}
+
+static inline void hlt_play_dead(void)
+{
+       if (current_cpu_data.x86 >= 4)
+               wbinvd();
+
+       while (1) {
+               native_halt();
+       }
+}
+
 void native_play_dead(void)
 {
        play_dead_common();
        tboot_shutdown(TB_SHUTDOWN_WFS);
-       wbinvd_halt();
+
+       mwait_play_dead();      /* Only returns on failure */
+       hlt_play_dead();
 }
 
 #else /* ... !CONFIG_HOTPLUG_CPU */