[SPARC64]: More sensible udelay implementation.

[pandora-kernel.git] / arch / sparc64 / kernel / smp.c
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c

index 8087d67..69a1183 100644 (file)
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1,6 +1,6 @@
  /* smp.c: Sparc64 SMP support.
   *
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
   */
  
  #include <linux/module.h>
@@ -28,6 +28,8 @@
  #include <asm/tlbflush.h>
  #include <asm/mmu_context.h>
  #include <asm/cpudata.h>
+#include <asm/hvtramp.h>
+#include <asm/io.h>
  
  #include <asm/irq.h>
  #include <asm/irq_regs.h>
@@ -40,16 +42,25 @@
  #include <asm/tlb.h>
  #include <asm/sections.h>
  #include <asm/prom.h>
+#include <asm/mdesc.h>
+#include <asm/ldc.h>
  
  extern void calibrate_delay(void);
  
-/* Please don't make this stuff initdata!!!  --DaveM */
-unsigned char boot_cpu_id;
+int sparc64_multi_core __read_mostly;
  
+cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
  cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
-cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
  cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
         { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
+       { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
+EXPORT_SYMBOL(cpu_possible_map);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_SYMBOL(cpu_core_map);
+
  static cpumask_t smp_commenced_mask;
  static cpumask_t cpu_callout_map;
  
@@ -68,67 +79,18 @@ void smp_bogo(struct seq_file *m)
         
         for_each_online_cpu(i)
                 seq_printf(m,
-                          "Cpu%dBogo\t: %lu.%02lu\n"
                            "Cpu%dClkTck\t: %016lx\n",
-                          i, cpu_data(i).udelay_val / (500000/HZ),
-                          (cpu_data(i).udelay_val / (5000/HZ)) % 100,
                            i, cpu_data(i).clock_tick);
  }
  
-void __init smp_store_cpu_info(int id)
-{
-       struct device_node *dp;
-       int def;
-
-       cpu_data(id).udelay_val                 = loops_per_jiffy;
-
-       cpu_find_by_mid(id, &dp);
-       cpu_data(id).clock_tick =
-               of_getintprop_default(dp, "clock-frequency", 0);
-
-       def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024));
-       cpu_data(id).dcache_size =
-               of_getintprop_default(dp, "dcache-size", def);
-
-       def = 32;
-       cpu_data(id).dcache_line_size =
-               of_getintprop_default(dp, "dcache-line-size", def);
-
-       def = 16 * 1024;
-       cpu_data(id).icache_size =
-               of_getintprop_default(dp, "icache-size", def);
-
-       def = 32;
-       cpu_data(id).icache_line_size =
-               of_getintprop_default(dp, "icache-line-size", def);
-
-       def = ((tlb_type == hypervisor) ?
-              (3 * 1024 * 1024) :
-              (4 * 1024 * 1024));
-       cpu_data(id).ecache_size =
-               of_getintprop_default(dp, "ecache-size", def);
-
-       def = 64;
-       cpu_data(id).ecache_line_size =
-               of_getintprop_default(dp, "ecache-line-size", def);
-
-       printk("CPU[%d]: Caches "
-              "D[sz(%d):line_sz(%d)] "
-              "I[sz(%d):line_sz(%d)] "
-              "E[sz(%d):line_sz(%d)]\n",
-              id,
-              cpu_data(id).dcache_size, cpu_data(id).dcache_line_size,
-              cpu_data(id).icache_size, cpu_data(id).icache_line_size,
-              cpu_data(id).ecache_size, cpu_data(id).ecache_line_size);
-}
-
  extern void setup_sparc64_timer(void);
  
  static volatile unsigned long callin_flag = 0;
  
-void __init smp_callin(void)
+void __devinit smp_callin(void)
  {
         int cpuid = hard_smp_processor_id();
+       struct trap_per_cpu *tb = &trap_block[cpuid];;
  
         __local_per_cpu_offset = __per_cpu_offset(cpuid);
  
@@ -144,8 +106,6 @@ void __init smp_callin(void)
  
         local_irq_enable();
  
-       calibrate_delay();
-       smp_store_cpu_info(cpuid);
         callin_flag = 1;
         __asm__ __volatile__("membar #Sync\n\t"
                              "flush  %%g6" : : : "memory");
@@ -159,6 +119,11 @@ void __init smp_callin(void)
         atomic_inc(&init_mm.mm_count);
         current->active_mm = &init_mm;
  
+       if (tb->hdesc) {
+               kfree(tb->hdesc);
+               tb->hdesc = NULL;
+       }
+
         while (!cpu_isset(cpuid, smp_commenced_mask))
                 rmb();
  
@@ -310,6 +275,64 @@ static void smp_synchronize_one_tick(int cpu)
         spin_unlock_irqrestore(&itc_sync_lock, flags);
  }
  
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+/* XXX Put this in some common place. XXX */
+static unsigned long kimage_addr_to_ra(void *p)
+{
+       unsigned long val = (unsigned long) p;
+
+       return kern_base + (val - KERNBASE);
+}
+
+static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
+{
+       extern unsigned long sparc64_ttable_tl0;
+       extern unsigned long kern_locked_tte_data;
+       extern int bigkernel;
+       struct hvtramp_descr *hdesc;
+       unsigned long trampoline_ra;
+       struct trap_per_cpu *tb;
+       u64 tte_vaddr, tte_data;
+       unsigned long hv_err;
+
+       hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
+       if (!hdesc) {
+               printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
+                      "hvtramp_descr.\n");
+               return;
+       }
+
+       hdesc->cpu = cpu;
+       hdesc->num_mappings = (bigkernel ? 2 : 1);
+
+       tb = &trap_block[cpu];
+       tb->hdesc = hdesc;
+
+       hdesc->fault_info_va = (unsigned long) &tb->fault_info;
+       hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
+
+       hdesc->thread_reg = thread_reg;
+
+       tte_vaddr = (unsigned long) KERNBASE;
+       tte_data = kern_locked_tte_data;
+
+       hdesc->maps[0].vaddr = tte_vaddr;
+       hdesc->maps[0].tte   = tte_data;
+       if (bigkernel) {
+               tte_vaddr += 0x400000;
+               tte_data  += 0x400000;
+               hdesc->maps[1].vaddr = tte_vaddr;
+               hdesc->maps[1].tte   = tte_data;
+       }
+
+       trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
+
+       hv_err = sun4v_cpu_start(cpu, trampoline_ra,
+                                kimage_addr_to_ra(&sparc64_ttable_tl0),
+                                __pa(hdesc));
+}
+#endif
+
  extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
  
  extern unsigned long sparc64_cpu_startup;
@@ -338,15 +361,20 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
                 /* Alloc the mondo queues, cpu will load them.  */
                 sun4v_init_mondo_queues(0, cpu, 1, 0);
  
-               prom_startcpu_cpuid(cpu, entry, cookie);
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+               if (ldom_domaining_enabled)
+                       ldom_startcpu_cpuid(cpu,
+                                           (unsigned long) cpu_new_thread);
+               else
+#endif
+                       prom_startcpu_cpuid(cpu, entry, cookie);
         } else {
-               struct device_node *dp;
+               struct device_node *dp = of_find_node_by_cpuid(cpu);
  
-               cpu_find_by_mid(cpu, &dp);
                 prom_startcpu(dp->node, entry, cookie);
         }
  
-       for (timeout = 0; timeout < 5000000; timeout++) {
+       for (timeout = 0; timeout < 50000; timeout++) {
                 if (callin_flag)
                         break;
                 udelay(100);
@@ -447,7 +475,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
  static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
  {
         u64 pstate, ver;
-       int nack_busy_id, is_jbus;
+       int nack_busy_id, is_jbus, need_more;
  
         if (cpus_empty(mask))
                 return;
@@ -463,6 +491,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
         __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
  
  retry:
+       need_more = 0;
         __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
                              : : "r" (pstate), "i" (PSTATE_IE));
  
@@ -491,6 +520,10 @@ retry:
                                 : /* no outputs */
                                 : "r" (target), "i" (ASI_INTR_W));
                         nack_busy_id++;
+                       if (nack_busy_id == 32) {
+                               need_more = 1;
+                               break;
+                       }
                 }
         }
  
@@ -507,6 +540,16 @@ retry:
                         if (dispatch_stat == 0UL) {
                                 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
                                                      : : "r" (pstate));
+                               if (unlikely(need_more)) {
+                                       int i, cnt = 0;
+                                       for_each_cpu_mask(i, mask) {
+                                               cpu_clear(i, mask);
+                                               cnt++;
+                                               if (cnt == 32)
+                                                       break;
+                                       }
+                                       goto retry;
+                               }
                                 return;
                         }
                         if (!--stuck)
@@ -544,6 +587,8 @@ retry:
                                 if ((dispatch_stat & check_mask) == 0)
                                         cpu_clear(i, mask);
                                 this_busy_nack += 2;
+                               if (this_busy_nack == 64)
+                                       break;
                         }
  
                         goto retry;
@@ -561,6 +606,9 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
         unsigned long flags, status;
         int cnt, retries, this_cpu, prev_sent, i;
  
+       if (cpus_empty(mask))
+               return;
+
         /* We have to do this whole thing with interrupts fully disabled.
          * Otherwise if we send an xcall from interrupt context it will
          * corrupt both our mondo block and cpu list state.
@@ -1175,112 +1223,55 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
         preempt_enable();
  }
  
-void __init smp_tick_init(void)
-{
-       boot_cpu_id = hard_smp_processor_id();
-}
-
  /* /proc/profile writes can call this, don't __init it please. */
  int setup_profiling_timer(unsigned int multiplier)
  {
         return -EINVAL;
  }
  
-static void __init smp_tune_scheduling(void)
+void __init smp_prepare_cpus(unsigned int max_cpus)
  {
-       struct device_node *dp;
-       int instance;
-       unsigned int def, smallest = ~0U;
-
-       def = ((tlb_type == hypervisor) ?
-              (3 * 1024 * 1024) :
-              (4 * 1024 * 1024));
-
-       instance = 0;
-       while (!cpu_find_by_instance(instance, &dp, NULL)) {
-               unsigned int val;
-
-               val = of_getintprop_default(dp, "ecache-size", def);
-               if (val < smallest)
-                       smallest = val;
-
-               instance++;
-       }
-
-       /* Any value less than 256K is nonsense.  */
-       if (smallest < (256U * 1024U))
-               smallest = 256 * 1024;
-
-       max_cache_size = smallest;
+}
  
-       if (smallest < 1U * 1024U * 1024U)
-               printk(KERN_INFO "Using max_cache_size of %uKB\n",
-                      smallest / 1024U);
-       else
-               printk(KERN_INFO "Using max_cache_size of %uMB\n",
-                      smallest / 1024U / 1024U);
+void __devinit smp_prepare_boot_cpu(void)
+{
  }
  
-/* Constrain the number of cpus to max_cpus.  */
-void __init smp_prepare_cpus(unsigned int max_cpus)
+void __devinit smp_fill_in_sib_core_maps(void)
  {
-       int i;
+       unsigned int i;
  
-       if (num_possible_cpus() > max_cpus) {
-               int instance, mid;
+       for_each_possible_cpu(i) {
+               unsigned int j;
  
-               instance = 0;
-               while (!cpu_find_by_instance(instance, NULL, &mid)) {
-                       if (mid != boot_cpu_id) {
-                               cpu_clear(mid, phys_cpu_present_map);
-                               cpu_clear(mid, cpu_present_map);
-                               if (num_possible_cpus() <= max_cpus)
-                                       break;
-                       }
-                       instance++;
+               if (cpu_data(i).core_id == 0) {
+                       cpu_set(i, cpu_core_map[i]);
+                       continue;
+               }
+
+               for_each_possible_cpu(j) {
+                       if (cpu_data(i).core_id ==
+                           cpu_data(j).core_id)
+                               cpu_set(j, cpu_core_map[i]);
                 }
         }
  
         for_each_possible_cpu(i) {
-               if (tlb_type == hypervisor) {
-                       int j;
+               unsigned int j;
  
-                       /* XXX get this mapping from machine description */
-                       for_each_possible_cpu(j) {
-                               if ((j >> 2) == (i >> 2))
-                                       cpu_set(j, cpu_sibling_map[i]);
-                       }
-               } else {
+               if (cpu_data(i).proc_id == -1) {
                         cpu_set(i, cpu_sibling_map[i]);
+                       continue;
                 }
-       }
-
-       smp_store_cpu_info(boot_cpu_id);
-       smp_tune_scheduling();
-}
-
-/* Set this up early so that things like the scheduler can init
- * properly.  We use the same cpu mask for both the present and
- * possible cpu map.
- */
-void __init smp_setup_cpu_possible_map(void)
-{
-       int instance, mid;
  
-       instance = 0;
-       while (!cpu_find_by_instance(instance, NULL, &mid)) {
-               if (mid < NR_CPUS) {
-                       cpu_set(mid, phys_cpu_present_map);
-                       cpu_set(mid, cpu_present_map);
+               for_each_possible_cpu(j) {
+                       if (cpu_data(i).proc_id ==
+                           cpu_data(j).proc_id)
+                               cpu_set(j, cpu_sibling_map[i]);
                 }
-               instance++;
         }
  }
  
-void __devinit smp_prepare_boot_cpu(void)
-{
-}
-
  int __cpuinit __cpu_up(unsigned int cpu)
  {
         int ret = smp_boot_one_cpu(cpu);
@@ -1302,18 +1293,22 @@ int __cpuinit __cpu_up(unsigned int cpu)
         return ret;
  }
  
-void __init smp_cpus_done(unsigned int max_cpus)
+#ifdef CONFIG_HOTPLUG_CPU
+int __cpu_disable(void)
  {
-       unsigned long bogosum = 0;
-       int i;
+       printk(KERN_ERR "SMP: __cpu_disable() on cpu %d\n",
+              smp_processor_id());
+       return -ENODEV;
+}
  
-       for_each_online_cpu(i)
-               bogosum += cpu_data(i).udelay_val;
-       printk("Total of %ld processors activated "
-              "(%lu.%02lu BogoMIPS).\n",
-              (long) num_online_cpus(),
-              bogosum/(500000/HZ),
-              (bogosum/(5000/HZ))%100);
+void __cpu_die(unsigned int cpu)
+{
+       printk(KERN_ERR "SMP: __cpu_die(%u)\n", cpu);
+}
+#endif
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
  }
  
  void smp_send_reschedule(int cpu)
@@ -1334,7 +1329,7 @@ unsigned long __per_cpu_shift __read_mostly;
  EXPORT_SYMBOL(__per_cpu_base);
  EXPORT_SYMBOL(__per_cpu_shift);
  
-void __init setup_per_cpu_areas(void)
+void __init real_setup_per_cpu_areas(void)
  {
         unsigned long goal, size, i;
         char *ptr;