xen: support sysenter/sysexit if hypervisor does
authorJeremy Fitzhardinge <jeremy@goop.org>
Mon, 17 Mar 2008 23:37:17 +0000 (16:37 -0700)
committerIngo Molnar <mingo@elte.hu>
Thu, 24 Apr 2008 21:57:31 +0000 (23:57 +0200)
64-bit Xen supports sysenter for 32-bit guests, so support its
use.  (sysenter is faster than int $0x80 in 32-on-64.)

sysexit is still not supported, so we fake it up using iret.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
arch/x86/kernel/entry_32.S
arch/x86/xen/enlighten.c
arch/x86/xen/setup.c
arch/x86/xen/smp.c
arch/x86/xen/xen-asm.S
arch/x86/xen/xen-ops.h

index 568c6cc..5d80d53 100644 (file)
@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
 ENDPROC(kernel_thread_helper)
 
 #ifdef CONFIG_XEN
+/* Xen doesn't set %esp to be precisely what the normal sysenter
+   entrypoint expects, so fix it up before using the normal path. */
+ENTRY(xen_sysenter_target)
+       RING0_INT_FRAME
+       addl $5*4, %esp         /* remove xen-provided frame */
+       jmp sysenter_past_esp
+
 ENTRY(xen_hypervisor_callback)
        CFI_STARTPROC
        pushl $0
@@ -1036,8 +1043,17 @@ ENTRY(xen_hypervisor_callback)
        jae  1f
 
        call xen_iret_crit_fixup
+       jmp  2f
+
+1:     cmpl $xen_sysexit_start_crit,%eax
+       jb   2f
+       cmpl $xen_sysexit_end_crit,%eax
+       jae  2f
+
+       jmp xen_sysexit_crit_fixup
 
-1:     mov %esp, %eax
+ENTRY(xen_do_upcall)
+2:     mov %esp, %eax
        call xen_evtchn_do_upcall
        jmp  ret_from_intr
        CFI_ENDPROC
index 36f36e6..9436845 100644 (file)
@@ -155,7 +155,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
        if (*ax == 1)
                maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
                            (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
-                           (1 << X86_FEATURE_SEP)  |  /* disable SEP */
                            (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
        asm(XEN_EMULATE_PREFIX "cpuid"
@@ -994,7 +993,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
        .read_pmc = native_read_pmc,
 
        .iret = xen_iret,
-       .irq_enable_syscall_ret = NULL,  /* never called */
+       .irq_enable_syscall_ret = xen_sysexit,
 
        .load_tr_desc = paravirt_nop,
        .set_ldt = xen_set_ldt,
index 2341492..82517e4 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
 
@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
        *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
+void xen_enable_sysenter(void)
+{
+       int cpu = smp_processor_id();
+       extern void xen_sysenter_target(void);
+       /* Mask events on entry, even though they get enabled immediately */
+       static struct callback_register sysenter = {
+               .type = CALLBACKTYPE_sysenter,
+               .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
+               .flags = CALLBACKF_mask_events,
+       };
+
+       if (!boot_cpu_has(X86_FEATURE_SEP) ||
+           HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
+               clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
+               clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
+       }
+}
+
 void __init xen_arch_setup(void)
 {
        struct physdev_set_iopl set_iopl;
@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
        HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
                                 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
 
+       xen_enable_sysenter();
+
        set_iopl.iopl = 1;
        rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
        if (rc != 0)
index e340ff9..d61e4f8 100644 (file)
@@ -72,6 +72,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
        int cpu = smp_processor_id();
 
        cpu_init();
+       xen_enable_sysenter();
 
        preempt_disable();
        per_cpu(cpu_state, cpu) = CPU_ONLINE;
index 99223cc..1ac0808 100644 (file)
@@ -280,6 +280,62 @@ ENTRY(xen_iret_crit_fixup)
 2:     ret
 
 
+ENTRY(xen_sysexit)
+       /* Store vcpu_info pointer for easy access.  Do it this
+          way to avoid having to reload %fs */
+#ifdef CONFIG_SMP
+       GET_THREAD_INFO(%eax)
+       movl TI_cpu(%eax),%eax
+       movl __per_cpu_offset(,%eax,4),%eax
+       mov per_cpu__xen_vcpu(%eax),%eax
+#else
+       movl per_cpu__xen_vcpu, %eax
+#endif
+
+       /* We can't actually use sysexit in a pv guest,
+          so fake it up with iret */
+       pushl $__USER_DS                /* user stack segment */
+       pushl %ecx                      /* user esp */
+       pushl PT_EFLAGS+2*4(%esp)       /* user eflags */
+       pushl $__USER_CS                /* user code segment */
+       pushl %edx                      /* user eip */
+
+xen_sysexit_start_crit:
+       /* Unmask events... */
+       movb $0, XEN_vcpu_info_mask(%eax)
+       /* ...and test for pending.
+          There's a preempt window here, but it doesn't
+          matter because we're within the critical section. */
+       testb $0xff, XEN_vcpu_info_pending(%eax)
+
+       /* If there's something pending, mask events again so we
+          can directly inject it back into the kernel. */
+       jnz   1f
+
+       movl PT_EAX+5*4(%esp),%eax
+2:     iret
+1:     movb $1, XEN_vcpu_info_mask(%eax)
+xen_sysexit_end_crit:
+       addl $5*4, %esp         /* remove iret frame */
+       /* no need to re-save regs, but need to restore kernel %fs */
+       mov $__KERNEL_PERCPU, %eax
+       mov %eax, %fs
+       jmp xen_do_upcall
+.section __ex_table,"a"
+       .align 4
+       .long 2b,iret_exc
+.previous
+
+       .globl xen_sysexit_start_crit, xen_sysexit_end_crit
+/*
+       sysexit fixup is easy, since the old frame is still sitting there
+       on the stack.  We just need to remove the new recursive
+       interrupt and return.
+ */
+ENTRY(xen_sysexit_crit_fixup)
+       addl $PT_OLDESP+5*4, %esp               /* remove frame+iret */
+       jmp xen_do_upcall
+
 /*
        Force an event check by making a hypercall,
        but preserve regs before making the call.
index 956a491..01d4ff2 100644 (file)
@@ -19,6 +19,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
+void xen_enable_sysenter(void);
 
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
@@ -64,4 +65,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
 void xen_iret(void);
+void xen_sysexit(void);
+
 #endif /* XEN_OPS_H */