x86/smp: Don't ever patch back to UP if we unplug cpus
[pandora-kernel.git] / arch / x86 / xen / smp.c
1 /*
2  * Xen SMP support
3  *
4  * This file implements the Xen versions of smp_ops.  SMP under Xen is
5  * very straightforward.  Bringing a CPU up is simply a matter of
6  * loading its initial context and setting it running.
7  *
8  * IPIs are handled through the Xen event mechanism.
9  *
10  * Because virtual CPUs can be scheduled onto any real CPU, there's no
11  * useful topology information for the kernel to make use of.  As a
12  * result, all CPUs are treated as if they're single-core and
13  * single-threaded.
14  */
15 #include <linux/sched.h>
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/smp.h>
19
20 #include <asm/paravirt.h>
21 #include <asm/desc.h>
22 #include <asm/pgtable.h>
23 #include <asm/cpu.h>
24
25 #include <xen/interface/xen.h>
26 #include <xen/interface/vcpu.h>
27
28 #include <asm/xen/interface.h>
29 #include <asm/xen/hypercall.h>
30
31 #include <xen/xen.h>
32 #include <xen/page.h>
33 #include <xen/events.h>
34
35 #include <xen/hvc-console.h>
36 #include "xen-ops.h"
37 #include "mmu.h"
38
39 cpumask_var_t xen_cpu_initialized_map;
40
41 static DEFINE_PER_CPU(int, xen_resched_irq);
42 static DEFINE_PER_CPU(int, xen_callfunc_irq);
43 static DEFINE_PER_CPU(int, xen_callfuncsingle_irq);
44 static DEFINE_PER_CPU(int, xen_debug_irq) = -1;
45
46 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
47 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
48
49 /*
50  * Reschedule call back.
51  */
52 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
53 {
54         inc_irq_stat(irq_resched_count);
55         scheduler_ipi();
56
57         return IRQ_HANDLED;
58 }
59
60 static void __cpuinit cpu_bringup(void)
61 {
62         int cpu = smp_processor_id();
63
64         cpu_init();
65         touch_softlockup_watchdog();
66         preempt_disable();
67
68         xen_enable_sysenter();
69         xen_enable_syscall();
70
71         cpu = smp_processor_id();
72         smp_store_cpu_info(cpu);
73         cpu_data(cpu).x86_max_cores = 1;
74         set_cpu_sibling_map(cpu);
75
76         xen_setup_cpu_clockevents();
77
78         set_cpu_online(cpu, true);
79         percpu_write(cpu_state, CPU_ONLINE);
80         wmb();
81
82         /* We can take interrupts now: we're officially "up". */
83         local_irq_enable();
84
85         wmb();                  /* make sure everything is out */
86 }
87
88 static void __cpuinit cpu_bringup_and_idle(void)
89 {
90         cpu_bringup();
91         cpu_idle();
92 }
93
94 static int xen_smp_intr_init(unsigned int cpu)
95 {
96         int rc;
97         const char *resched_name, *callfunc_name, *debug_name;
98
99         resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
100         rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
101                                     cpu,
102                                     xen_reschedule_interrupt,
103                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
104                                     resched_name,
105                                     NULL);
106         if (rc < 0)
107                 goto fail;
108         per_cpu(xen_resched_irq, cpu) = rc;
109
110         callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
111         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
112                                     cpu,
113                                     xen_call_function_interrupt,
114                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
115                                     callfunc_name,
116                                     NULL);
117         if (rc < 0)
118                 goto fail;
119         per_cpu(xen_callfunc_irq, cpu) = rc;
120
121         debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
122         rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
123                                      IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
124                                      debug_name, NULL);
125         if (rc < 0)
126                 goto fail;
127         per_cpu(xen_debug_irq, cpu) = rc;
128
129         callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
130         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
131                                     cpu,
132                                     xen_call_function_single_interrupt,
133                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
134                                     callfunc_name,
135                                     NULL);
136         if (rc < 0)
137                 goto fail;
138         per_cpu(xen_callfuncsingle_irq, cpu) = rc;
139
140         return 0;
141
142  fail:
143         if (per_cpu(xen_resched_irq, cpu) >= 0)
144                 unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
145         if (per_cpu(xen_callfunc_irq, cpu) >= 0)
146                 unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
147         if (per_cpu(xen_debug_irq, cpu) >= 0)
148                 unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
149         if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
150                 unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
151                                        NULL);
152
153         return rc;
154 }
155
156 static void __init xen_fill_possible_map(void)
157 {
158         int i, rc;
159
160         if (xen_initial_domain())
161                 return;
162
163         for (i = 0; i < nr_cpu_ids; i++) {
164                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
165                 if (rc >= 0) {
166                         num_processors++;
167                         set_cpu_possible(i, true);
168                 }
169         }
170 }
171
172 static void __init xen_filter_cpu_maps(void)
173 {
174         int i, rc;
175         unsigned int subtract = 0;
176
177         if (!xen_initial_domain())
178                 return;
179
180         num_processors = 0;
181         disabled_cpus = 0;
182         for (i = 0; i < nr_cpu_ids; i++) {
183                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
184                 if (rc >= 0) {
185                         num_processors++;
186                         set_cpu_possible(i, true);
187                 } else {
188                         set_cpu_possible(i, false);
189                         set_cpu_present(i, false);
190                         subtract++;
191                 }
192         }
193 #ifdef CONFIG_HOTPLUG_CPU
194         /* This is akin to using 'nr_cpus' on the Linux command line.
195          * Which is OK as when we use 'dom0_max_vcpus=X' we can only
196          * have up to X, while nr_cpu_ids is greater than X. This
197          * normally is not a problem, except when CPU hotplugging
198          * is involved and then there might be more than X CPUs
199          * in the guest - which will not work as there is no
200          * hypercall to expand the max number of VCPUs an already
201          * running guest has. So cap it up to X. */
202         if (subtract)
203                 nr_cpu_ids = nr_cpu_ids - subtract;
204 #endif
205
206 }
207
208 static void __init xen_smp_prepare_boot_cpu(void)
209 {
210         BUG_ON(smp_processor_id() != 0);
211         native_smp_prepare_boot_cpu();
212
213         /* We've switched to the "real" per-cpu gdt, so make sure the
214            old memory can be recycled */
215         make_lowmem_page_readwrite(xen_initial_gdt);
216
217         xen_filter_cpu_maps();
218         xen_setup_vcpu_info_placement();
219 }
220
221 static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
222 {
223         unsigned cpu;
224         unsigned int i;
225
226         if (skip_ioapic_setup) {
227                 char *m = (max_cpus == 0) ?
228                         "The nosmp parameter is incompatible with Xen; " \
229                         "use Xen dom0_max_vcpus=1 parameter" :
230                         "The noapic parameter is incompatible with Xen";
231
232                 xen_raw_printk(m);
233                 panic(m);
234         }
235         xen_init_lock_cpu(0);
236
237         smp_store_cpu_info(0);
238         cpu_data(0).x86_max_cores = 1;
239
240         for_each_possible_cpu(i) {
241                 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
242                 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
243                 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
244         }
245         set_cpu_sibling_map(0);
246
247         if (xen_smp_intr_init(0))
248                 BUG();
249
250         if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
251                 panic("could not allocate xen_cpu_initialized_map\n");
252
253         cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
254
255         /* Restrict the possible_map according to max_cpus. */
256         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
257                 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
258                         continue;
259                 set_cpu_possible(cpu, false);
260         }
261
262         for_each_possible_cpu (cpu) {
263                 struct task_struct *idle;
264
265                 if (cpu == 0)
266                         continue;
267
268                 idle = fork_idle(cpu);
269                 if (IS_ERR(idle))
270                         panic("failed fork for CPU %d", cpu);
271
272                 set_cpu_present(cpu, true);
273         }
274 }
275
276 static int __cpuinit
277 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
278 {
279         struct vcpu_guest_context *ctxt;
280         struct desc_struct *gdt;
281         unsigned long gdt_mfn;
282
283         if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
284                 return 0;
285
286         ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
287         if (ctxt == NULL)
288                 return -ENOMEM;
289
290         gdt = get_cpu_gdt_table(cpu);
291
292         ctxt->flags = VGCF_IN_KERNEL;
293         ctxt->user_regs.ds = __USER_DS;
294         ctxt->user_regs.es = __USER_DS;
295         ctxt->user_regs.ss = __KERNEL_DS;
296 #ifdef CONFIG_X86_32
297         ctxt->user_regs.fs = __KERNEL_PERCPU;
298         ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
299 #else
300         ctxt->gs_base_kernel = per_cpu_offset(cpu);
301 #endif
302         ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
303         ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
304
305         memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
306
307         xen_copy_trap_info(ctxt->trap_ctxt);
308
309         ctxt->ldt_ents = 0;
310
311         BUG_ON((unsigned long)gdt & ~PAGE_MASK);
312
313         gdt_mfn = arbitrary_virt_to_mfn(gdt);
314         make_lowmem_page_readonly(gdt);
315         make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
316
317         ctxt->gdt_frames[0] = gdt_mfn;
318         ctxt->gdt_ents      = GDT_ENTRIES;
319
320         ctxt->user_regs.cs = __KERNEL_CS;
321         ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
322
323         ctxt->kernel_ss = __KERNEL_DS;
324         ctxt->kernel_sp = idle->thread.sp0;
325
326 #ifdef CONFIG_X86_32
327         ctxt->event_callback_cs     = __KERNEL_CS;
328         ctxt->failsafe_callback_cs  = __KERNEL_CS;
329 #endif
330         ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
331         ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
332
333         per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
334         ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
335
336         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
337                 BUG();
338
339         kfree(ctxt);
340         return 0;
341 }
342
343 static int __cpuinit xen_cpu_up(unsigned int cpu)
344 {
345         struct task_struct *idle = idle_task(cpu);
346         int rc;
347
348         per_cpu(current_task, cpu) = idle;
349 #ifdef CONFIG_X86_32
350         irq_ctx_init(cpu);
351 #else
352         clear_tsk_thread_flag(idle, TIF_FORK);
353         per_cpu(kernel_stack, cpu) =
354                 (unsigned long)task_stack_page(idle) -
355                 KERNEL_STACK_OFFSET + THREAD_SIZE;
356 #endif
357         xen_setup_runstate_info(cpu);
358         xen_setup_timer(cpu);
359         xen_init_lock_cpu(cpu);
360
361         per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
362
363         /* make sure interrupts start blocked */
364         per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
365
366         rc = cpu_initialize_context(cpu, idle);
367         if (rc)
368                 return rc;
369
370         if (num_online_cpus() == 1)
371                 /* Just in case we booted with a single CPU. */
372                 alternatives_enable_smp();
373
374         rc = xen_smp_intr_init(cpu);
375         if (rc)
376                 return rc;
377
378         rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
379         BUG_ON(rc);
380
381         while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
382                 HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
383                 barrier();
384         }
385
386         return 0;
387 }
388
389 static void xen_smp_cpus_done(unsigned int max_cpus)
390 {
391 }
392
393 #ifdef CONFIG_HOTPLUG_CPU
394 static int xen_cpu_disable(void)
395 {
396         unsigned int cpu = smp_processor_id();
397         if (cpu == 0)
398                 return -EBUSY;
399
400         cpu_disable_common();
401
402         load_cr3(swapper_pg_dir);
403         return 0;
404 }
405
406 static void xen_cpu_die(unsigned int cpu)
407 {
408         while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
409                 current->state = TASK_UNINTERRUPTIBLE;
410                 schedule_timeout(HZ/10);
411         }
412         unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
413         unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
414         unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
415         unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
416         xen_uninit_lock_cpu(cpu);
417         xen_teardown_timer(cpu);
418 }
419
420 static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
421 {
422         play_dead_common();
423         HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
424         cpu_bringup();
425 }
426
427 #else /* !CONFIG_HOTPLUG_CPU */
428 static int xen_cpu_disable(void)
429 {
430         return -ENOSYS;
431 }
432
433 static void xen_cpu_die(unsigned int cpu)
434 {
435         BUG();
436 }
437
438 static void xen_play_dead(void)
439 {
440         BUG();
441 }
442
443 #endif
444 static void stop_self(void *v)
445 {
446         int cpu = smp_processor_id();
447
448         /* make sure we're not pinning something down */
449         load_cr3(swapper_pg_dir);
450         /* should set up a minimal gdt */
451
452         set_cpu_online(cpu, false);
453
454         HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
455         BUG();
456 }
457
458 static void xen_stop_other_cpus(int wait)
459 {
460         smp_call_function(stop_self, NULL, wait);
461 }
462
463 static void xen_smp_send_reschedule(int cpu)
464 {
465         xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
466 }
467
468 static void xen_send_IPI_mask(const struct cpumask *mask,
469                               enum ipi_vector vector)
470 {
471         unsigned cpu;
472
473         for_each_cpu_and(cpu, mask, cpu_online_mask)
474                 xen_send_IPI_one(cpu, vector);
475 }
476
477 static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
478 {
479         int cpu;
480
481         xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
482
483         /* Make sure other vcpus get a chance to run if they need to. */
484         for_each_cpu(cpu, mask) {
485                 if (xen_vcpu_stolen(cpu)) {
486                         HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
487                         break;
488                 }
489         }
490 }
491
492 static void xen_smp_send_call_function_single_ipi(int cpu)
493 {
494         xen_send_IPI_mask(cpumask_of(cpu),
495                           XEN_CALL_FUNCTION_SINGLE_VECTOR);
496 }
497
498 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
499 {
500         irq_enter();
501         generic_smp_call_function_interrupt();
502         inc_irq_stat(irq_call_count);
503         irq_exit();
504
505         return IRQ_HANDLED;
506 }
507
508 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
509 {
510         irq_enter();
511         generic_smp_call_function_single_interrupt();
512         inc_irq_stat(irq_call_count);
513         irq_exit();
514
515         return IRQ_HANDLED;
516 }
517
518 static const struct smp_ops xen_smp_ops __initconst = {
519         .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
520         .smp_prepare_cpus = xen_smp_prepare_cpus,
521         .smp_cpus_done = xen_smp_cpus_done,
522
523         .cpu_up = xen_cpu_up,
524         .cpu_die = xen_cpu_die,
525         .cpu_disable = xen_cpu_disable,
526         .play_dead = xen_play_dead,
527
528         .stop_other_cpus = xen_stop_other_cpus,
529         .smp_send_reschedule = xen_smp_send_reschedule,
530
531         .send_call_func_ipi = xen_smp_send_call_function_ipi,
532         .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
533 };
534
535 void __init xen_smp_init(void)
536 {
537         smp_ops = xen_smp_ops;
538         xen_fill_possible_map();
539         xen_init_spinlocks();
540 }
541
542 static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
543 {
544         native_smp_prepare_cpus(max_cpus);
545         WARN_ON(xen_smp_intr_init(0));
546
547         xen_init_lock_cpu(0);
548 }
549
550 static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
551 {
552         int rc;
553         rc = native_cpu_up(cpu);
554         WARN_ON (xen_smp_intr_init(cpu));
555         return rc;
556 }
557
558 static void xen_hvm_cpu_die(unsigned int cpu)
559 {
560         unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
561         unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
562         unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
563         unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
564         xen_uninit_lock_cpu(cpu);
565         xen_teardown_timer(cpu);
566         native_cpu_die(cpu);
567 }
568
569 void __init xen_hvm_smp_init(void)
570 {
571         if (!xen_have_vector_callback)
572                 return;
573         smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
574         smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
575         smp_ops.cpu_up = xen_hvm_cpu_up;
576         smp_ops.cpu_die = xen_hvm_cpu_die;
577         smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
578         smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
579 }