[PATCH] x86-64: Remove apic_write_around from smpboot.c
[pandora-kernel.git] / arch / x86_64 / kernel / smpboot.c
index e773a79..d4b6328 100644 (file)
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
 /* Package ID of each logical CPU */
-u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 EXPORT_SYMBOL(phys_proc_id);
 EXPORT_SYMBOL(cpu_core_id);
 
 /* Bitmask of currently online CPUs */
-cpumask_t cpu_online_map;
+cpumask_t cpu_online_map __read_mostly;
 
 EXPORT_SYMBOL(cpu_online_map);
 
@@ -88,8 +88,8 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 /* Set when the idlers are all forked */
 int smp_threads_ready;
 
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
 /*
@@ -112,24 +112,6 @@ struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
 #define get_idle_for_cpu(x)     (idle_thread_array[(x)])
 #define set_idle_for_cpu(x,p)   (idle_thread_array[(x)] = (p))
 
-/*
- * cpu_possible_map should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and dont expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * If cpu-hotplug is supported, then we need to preallocate for all
- * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
- * - Ashok Raj
- */
-#ifdef CONFIG_HOTPLUG_CPU
-#define fixup_cpu_possible_map(x)      cpu_set((x), cpu_possible_map)
-#else
-#define fixup_cpu_possible_map(x)
-#endif
-
 /*
  * Currently trivial. Write the real->protected mode
  * bootstrap into the page concerned. The caller
@@ -229,9 +211,6 @@ static __cpuinit void sync_master(void *arg)
 {
        unsigned long flags, i;
 
-       if (smp_processor_id() != 0)
-               return;
-
        go[MASTER] = 0;
 
        local_irq_save(flags);
@@ -280,7 +259,7 @@ get_delta(long *rt, long *master)
        return tcenter - best_tm;
 }
 
-static __cpuinit void sync_tsc(void)
+static __cpuinit void sync_tsc(unsigned int master)
 {
        int i, done = 0;
        long delta, adj, adjust_latency = 0;
@@ -294,9 +273,17 @@ static __cpuinit void sync_tsc(void)
        } t[NUM_ROUNDS] __cpuinitdata;
 #endif
 
+       printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n",
+               smp_processor_id(), master);
+
        go[MASTER] = 1;
 
-       smp_call_function(sync_master, NULL, 1, 0);
+       /* It is dangerous to broadcast IPI as cpus are coming up,
+        * as they may not be ready to accept them.  So since
+        * we only need to send the ipi to the boot cpu direct
+        * the message, and avoid the race.
+        */
+       smp_call_function_single(master, sync_master, NULL, 1, 0);
 
        while (go[MASTER])      /* wait for master to be ready */
                no_cpu_relax();
@@ -340,16 +327,14 @@ static __cpuinit void sync_tsc(void)
        printk(KERN_INFO
               "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
               "maxerr %lu cycles)\n",
-              smp_processor_id(), boot_cpu_id, delta, rt);
+              smp_processor_id(), master, delta, rt);
 }
 
 static void __cpuinit tsc_sync_wait(void)
 {
        if (notscsync || !cpu_has_tsc)
                return;
-       printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
-                       boot_cpu_id);
-       sync_tsc();
+       sync_tsc(0);
 }
 
 static __init int notscsync_setup(char *s)
@@ -507,6 +492,14 @@ void __cpuinit start_secondary(void)
         */
        set_cpu_sibling_map(smp_processor_id());
 
+       /* 
+        * Wait for TSC sync to not schedule things before.
+        * We still process interrupts, which could see an inconsistent
+        * time in that window unfortunately. 
+        * Do this here because TSC sync has global unprotected state.
+        */
+       tsc_sync_wait();
+
        /*
         * We need to hold call_lock, so there is no inconsistency
         * between the time smp_call_function() determines number of
@@ -524,13 +517,6 @@ void __cpuinit start_secondary(void)
        per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
        unlock_ipi_call_lock();
 
-       mb();
-
-       /* Wait for TSC sync to not schedule things before.
-          We still process interrupts, which could see an inconsistent
-          time in that window unfortunately. */
-       tsc_sync_wait();
-
        cpu_idle();
 }
 
@@ -554,8 +540,8 @@ static void inquire_remote_apic(int apicid)
                 */
                apic_wait_icr_idle();
 
-               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-               apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+               apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+               apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
 
                timeout = 0;
                do {
@@ -588,12 +574,12 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
        /*
         * Turn INIT on target chip
         */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 
        /*
         * Send IPI
         */
-       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+       apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
                                | APIC_DM_INIT);
 
        Dprintk("Waiting for send to finish...\n");
@@ -609,10 +595,10 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
        Dprintk("Deasserting INIT.\n");
 
        /* Target chip */
-       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 
        /* Send IPI */
-       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+       apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
 
        Dprintk("Waiting for send to finish...\n");
        timeout = 0;
@@ -654,12 +640,11 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
                 */
 
                /* Target chip */
-               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
+               apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
 
                /* Boot on the stack */
                /* Kick the second */
-               apic_write_around(APIC_ICR, APIC_DM_STARTUP
-                                       | (start_rip >> 12));
+               apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12));
 
                /*
                 * Give the other CPU some time to accept the IPI.
@@ -773,8 +758,9 @@ do_rest:
        initial_code = start_secondary;
        clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK);
 
-       printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
-              start_rip, init_rsp);
+       printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu,
+               cpus_weight(cpu_present_map),
+               apicid);
 
        /*
         * This grunge runs the startup process for
@@ -907,22 +893,26 @@ static __init void disable_smp(void)
        cpu_set(0, cpu_core_map[0]);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
- * Handle user cpus=... parameter.
+ * cpu_possible_map should be static, it cannot change as cpu's
+ * are onlined, or offlined. The reason is per-cpu data-structures
+ * are allocated by some modules at init time, and dont expect to
+ * do this dynamically on cpu arrival/departure.
+ * cpu_present_map on the other hand can change dynamically.
+ * In case when cpu_hotplug is not compiled, then we resort to current
+ * behaviour, which is cpu_possible == cpu_present.
+ * If cpu-hotplug is supported, then we need to preallocate for all
+ * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range.
+ * - Ashok Raj
  */
-static __init void enforce_max_cpus(unsigned max_cpus)
+static void prefill_possible_map(void)
 {
-       int i, k;
-       k = 0;
-       for (i = 0; i < NR_CPUS; i++) {
-               if (!cpu_possible(i))
-                       continue;
-               if (++k > max_cpus) {
-                       cpu_clear(i, cpu_possible_map);
-                       cpu_clear(i, cpu_present_map);
-               }
-       }
+       int i;
+       for (i = 0; i < NR_CPUS; i++)
+               cpu_set(i, cpu_possible_map);
 }
+#endif
 
 /*
  * Various sanity checks.
@@ -987,25 +977,13 @@ static int __init smp_sanity_check(unsigned max_cpus)
  */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-       int i;
-
        nmi_watchdog_default();
        current_cpu_data = boot_cpu_data;
        current_thread_info()->cpu = 0;  /* needed? */
 
-       enforce_max_cpus(max_cpus);
-
-       /*
-        * Fill in cpu_present_mask
-        */
-       for (i = 0; i < NR_CPUS; i++) {
-               int apicid = cpu_present_to_apicid(i);
-               if (physid_isset(apicid, phys_cpu_present_map)) {
-                       cpu_set(i, cpu_present_map);
-                       cpu_set(i, cpu_possible_map);
-               }
-               fixup_cpu_possible_map(i);
-       }
+#ifdef CONFIG_HOTPLUG_CPU
+       prefill_possible_map();
+#endif
 
        if (smp_sanity_check(max_cpus) < 0) {
                printk(KERN_INFO "SMP disabled\n");
@@ -1189,8 +1167,7 @@ void __cpu_die(unsigned int cpu)
                        printk ("CPU %d is now offline\n", cpu);
                        return;
                }
-               current->state = TASK_UNINTERRUPTIBLE;
-               schedule_timeout(HZ/10);
+               msleep(100);
        }
        printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }