/* smp.c: Sparc64 SMP support.
*
- * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
*/
#include <linux/module.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/cpudata.h>
+#include <asm/hvtramp.h>
+#include <asm/io.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/tlb.h>
#include <asm/sections.h>
#include <asm/prom.h>
+#include <asm/mdesc.h>
+#include <asm/ldc.h>
extern void calibrate_delay(void);
-/* Please don't make this stuff initdata!!! --DaveM */
-unsigned char boot_cpu_id;
+int sparc64_multi_core __read_mostly;
+cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
-cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
+ { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
+EXPORT_SYMBOL(cpu_possible_map);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_SYMBOL(cpu_core_map);
+
static cpumask_t smp_commenced_mask;
static cpumask_t cpu_callout_map;
for_each_online_cpu(i)
seq_printf(m,
- "Cpu%dBogo\t: %lu.%02lu\n"
"Cpu%dClkTck\t: %016lx\n",
- i, cpu_data(i).udelay_val / (500000/HZ),
- (cpu_data(i).udelay_val / (5000/HZ)) % 100,
i, cpu_data(i).clock_tick);
}
-void __init smp_store_cpu_info(int id)
-{
- struct device_node *dp;
- int def;
-
- cpu_data(id).udelay_val = loops_per_jiffy;
-
- cpu_find_by_mid(id, &dp);
- cpu_data(id).clock_tick =
- of_getintprop_default(dp, "clock-frequency", 0);
-
- def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024));
- cpu_data(id).dcache_size =
- of_getintprop_default(dp, "dcache-size", def);
-
- def = 32;
- cpu_data(id).dcache_line_size =
- of_getintprop_default(dp, "dcache-line-size", def);
-
- def = 16 * 1024;
- cpu_data(id).icache_size =
- of_getintprop_default(dp, "icache-size", def);
-
- def = 32;
- cpu_data(id).icache_line_size =
- of_getintprop_default(dp, "icache-line-size", def);
-
- def = ((tlb_type == hypervisor) ?
- (3 * 1024 * 1024) :
- (4 * 1024 * 1024));
- cpu_data(id).ecache_size =
- of_getintprop_default(dp, "ecache-size", def);
-
- def = 64;
- cpu_data(id).ecache_line_size =
- of_getintprop_default(dp, "ecache-line-size", def);
-
- printk("CPU[%d]: Caches "
- "D[sz(%d):line_sz(%d)] "
- "I[sz(%d):line_sz(%d)] "
- "E[sz(%d):line_sz(%d)]\n",
- id,
- cpu_data(id).dcache_size, cpu_data(id).dcache_line_size,
- cpu_data(id).icache_size, cpu_data(id).icache_line_size,
- cpu_data(id).ecache_size, cpu_data(id).ecache_line_size);
-}
-
extern void setup_sparc64_timer(void);
static volatile unsigned long callin_flag = 0;
-void __init smp_callin(void)
+void __devinit smp_callin(void)
{
int cpuid = hard_smp_processor_id();
+ struct trap_per_cpu *tb = &trap_block[cpuid];;
__local_per_cpu_offset = __per_cpu_offset(cpuid);
local_irq_enable();
- calibrate_delay();
- smp_store_cpu_info(cpuid);
callin_flag = 1;
__asm__ __volatile__("membar #Sync\n\t"
"flush %%g6" : : : "memory");
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
+ if (tb->hdesc) {
+ kfree(tb->hdesc);
+ tb->hdesc = NULL;
+ }
+
while (!cpu_isset(cpuid, smp_commenced_mask))
rmb();
spin_unlock_irqrestore(&itc_sync_lock, flags);
}
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+/* XXX Put this in some common place. XXX */
+static unsigned long kimage_addr_to_ra(void *p)
+{
+ unsigned long val = (unsigned long) p;
+
+ return kern_base + (val - KERNBASE);
+}
+
+static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
+{
+ extern unsigned long sparc64_ttable_tl0;
+ extern unsigned long kern_locked_tte_data;
+ extern int bigkernel;
+ struct hvtramp_descr *hdesc;
+ unsigned long trampoline_ra;
+ struct trap_per_cpu *tb;
+ u64 tte_vaddr, tte_data;
+ unsigned long hv_err;
+
+ hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
+ if (!hdesc) {
+ printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
+ "hvtramp_descr.\n");
+ return;
+ }
+
+ hdesc->cpu = cpu;
+ hdesc->num_mappings = (bigkernel ? 2 : 1);
+
+ tb = &trap_block[cpu];
+ tb->hdesc = hdesc;
+
+ hdesc->fault_info_va = (unsigned long) &tb->fault_info;
+ hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
+
+ hdesc->thread_reg = thread_reg;
+
+ tte_vaddr = (unsigned long) KERNBASE;
+ tte_data = kern_locked_tte_data;
+
+ hdesc->maps[0].vaddr = tte_vaddr;
+ hdesc->maps[0].tte = tte_data;
+ if (bigkernel) {
+ tte_vaddr += 0x400000;
+ tte_data += 0x400000;
+ hdesc->maps[1].vaddr = tte_vaddr;
+ hdesc->maps[1].tte = tte_data;
+ }
+
+ trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
+
+ hv_err = sun4v_cpu_start(cpu, trampoline_ra,
+ kimage_addr_to_ra(&sparc64_ttable_tl0),
+ __pa(hdesc));
+}
+#endif
+
extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
extern unsigned long sparc64_cpu_startup;
/* Alloc the mondo queues, cpu will load them. */
sun4v_init_mondo_queues(0, cpu, 1, 0);
- prom_startcpu_cpuid(cpu, entry, cookie);
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+ if (ldom_domaining_enabled)
+ ldom_startcpu_cpuid(cpu,
+ (unsigned long) cpu_new_thread);
+ else
+#endif
+ prom_startcpu_cpuid(cpu, entry, cookie);
} else {
- struct device_node *dp;
+ struct device_node *dp = of_find_node_by_cpuid(cpu);
- cpu_find_by_mid(cpu, &dp);
prom_startcpu(dp->node, entry, cookie);
}
- for (timeout = 0; timeout < 5000000; timeout++) {
+ for (timeout = 0; timeout < 50000; timeout++) {
if (callin_flag)
break;
udelay(100);
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{
u64 pstate, ver;
- int nack_busy_id, is_jbus;
+ int nack_busy_id, is_jbus, need_more;
if (cpus_empty(mask))
return;
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
retry:
+ need_more = 0;
__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
: : "r" (pstate), "i" (PSTATE_IE));
: /* no outputs */
: "r" (target), "i" (ASI_INTR_W));
nack_busy_id++;
+ if (nack_busy_id == 32) {
+ need_more = 1;
+ break;
+ }
}
}
if (dispatch_stat == 0UL) {
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate));
+ if (unlikely(need_more)) {
+ int i, cnt = 0;
+ for_each_cpu_mask(i, mask) {
+ cpu_clear(i, mask);
+ cnt++;
+ if (cnt == 32)
+ break;
+ }
+ goto retry;
+ }
return;
}
if (!--stuck)
if ((dispatch_stat & check_mask) == 0)
cpu_clear(i, mask);
this_busy_nack += 2;
+ if (this_busy_nack == 64)
+ break;
}
goto retry;
unsigned long flags, status;
int cnt, retries, this_cpu, prev_sent, i;
+ if (cpus_empty(mask))
+ return;
+
/* We have to do this whole thing with interrupts fully disabled.
* Otherwise if we send an xcall from interrupt context it will
* corrupt both our mondo block and cpu list state.
preempt_enable();
}
-void __init smp_tick_init(void)
-{
- boot_cpu_id = hard_smp_processor_id();
-}
-
/* /proc/profile writes can call this, don't __init it please. */
int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
-static void __init smp_tune_scheduling(void)
+void __init smp_prepare_cpus(unsigned int max_cpus)
{
- struct device_node *dp;
- int instance;
- unsigned int def, smallest = ~0U;
-
- def = ((tlb_type == hypervisor) ?
- (3 * 1024 * 1024) :
- (4 * 1024 * 1024));
-
- instance = 0;
- while (!cpu_find_by_instance(instance, &dp, NULL)) {
- unsigned int val;
-
- val = of_getintprop_default(dp, "ecache-size", def);
- if (val < smallest)
- smallest = val;
-
- instance++;
- }
-
- /* Any value less than 256K is nonsense. */
- if (smallest < (256U * 1024U))
- smallest = 256 * 1024;
-
- max_cache_size = smallest;
+}
- if (smallest < 1U * 1024U * 1024U)
- printk(KERN_INFO "Using max_cache_size of %uKB\n",
- smallest / 1024U);
- else
- printk(KERN_INFO "Using max_cache_size of %uMB\n",
- smallest / 1024U / 1024U);
+void __devinit smp_prepare_boot_cpu(void)
+{
}
-/* Constrain the number of cpus to max_cpus. */
-void __init smp_prepare_cpus(unsigned int max_cpus)
+void __devinit smp_fill_in_sib_core_maps(void)
{
- int i;
+ unsigned int i;
- if (num_possible_cpus() > max_cpus) {
- int instance, mid;
+ for_each_possible_cpu(i) {
+ unsigned int j;
- instance = 0;
- while (!cpu_find_by_instance(instance, NULL, &mid)) {
- if (mid != boot_cpu_id) {
- cpu_clear(mid, phys_cpu_present_map);
- cpu_clear(mid, cpu_present_map);
- if (num_possible_cpus() <= max_cpus)
- break;
- }
- instance++;
+ if (cpu_data(i).core_id == 0) {
+ cpu_set(i, cpu_core_map[i]);
+ continue;
+ }
+
+ for_each_possible_cpu(j) {
+ if (cpu_data(i).core_id ==
+ cpu_data(j).core_id)
+ cpu_set(j, cpu_core_map[i]);
}
}
for_each_possible_cpu(i) {
- if (tlb_type == hypervisor) {
- int j;
+ unsigned int j;
- /* XXX get this mapping from machine description */
- for_each_possible_cpu(j) {
- if ((j >> 2) == (i >> 2))
- cpu_set(j, cpu_sibling_map[i]);
- }
- } else {
+ if (cpu_data(i).proc_id == -1) {
cpu_set(i, cpu_sibling_map[i]);
+ continue;
}
- }
-
- smp_store_cpu_info(boot_cpu_id);
- smp_tune_scheduling();
-}
-
-/* Set this up early so that things like the scheduler can init
- * properly. We use the same cpu mask for both the present and
- * possible cpu map.
- */
-void __init smp_setup_cpu_possible_map(void)
-{
- int instance, mid;
- instance = 0;
- while (!cpu_find_by_instance(instance, NULL, &mid)) {
- if (mid < NR_CPUS) {
- cpu_set(mid, phys_cpu_present_map);
- cpu_set(mid, cpu_present_map);
+ for_each_possible_cpu(j) {
+ if (cpu_data(i).proc_id ==
+ cpu_data(j).proc_id)
+ cpu_set(j, cpu_sibling_map[i]);
}
- instance++;
}
}
-void __devinit smp_prepare_boot_cpu(void)
-{
-}
-
int __cpuinit __cpu_up(unsigned int cpu)
{
int ret = smp_boot_one_cpu(cpu);
return ret;
}
-void __init smp_cpus_done(unsigned int max_cpus)
+#ifdef CONFIG_HOTPLUG_CPU
+int __cpu_disable(void)
{
- unsigned long bogosum = 0;
- int i;
+ printk(KERN_ERR "SMP: __cpu_disable() on cpu %d\n",
+ smp_processor_id());
+ return -ENODEV;
+}
- for_each_online_cpu(i)
- bogosum += cpu_data(i).udelay_val;
- printk("Total of %ld processors activated "
- "(%lu.%02lu BogoMIPS).\n",
- (long) num_online_cpus(),
- bogosum/(500000/HZ),
- (bogosum/(5000/HZ))%100);
+void __cpu_die(unsigned int cpu)
+{
+ printk(KERN_ERR "SMP: __cpu_die(%u)\n", cpu);
+}
+#endif
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
}
void smp_send_reschedule(int cpu)
EXPORT_SYMBOL(__per_cpu_base);
EXPORT_SYMBOL(__per_cpu_shift);
-void __init setup_per_cpu_areas(void)
+void __init real_setup_per_cpu_areas(void)
{
unsigned long goal, size, i;
char *ptr;