KVM: x86: SYSENTER emulation is broken
[pandora-kernel.git] / arch / x86 / kvm / emulate.c
index aa71c95..bdad489 100644 (file)
@@ -539,12 +539,14 @@ static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
        case 4:
                ctxt->_eip = (u32)dst;
                break;
+#ifdef CONFIG_X86_64
        case 8:
                if ((cs_l && is_noncanonical_address(dst)) ||
-                   (!cs_l && (dst & ~(u32)-1)))
+                   (!cs_l && (dst >> 32) != 0))
                        return emulate_gp(ctxt, 0);
                ctxt->_eip = dst;
                break;
+#endif
        default:
                WARN(1, "unsupported eip assignment size\n");
        }
@@ -1234,7 +1236,8 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
 /* Does not support long mode */
 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
-                                    u16 selector, int seg, u8 cpl)
+                                    u16 selector, int seg, u8 cpl,
+                                    struct desc_struct *desc)
 {
        struct desc_struct seg_desc;
        u8 dpl, rpl;
@@ -1342,6 +1345,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
        }
 load:
        ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
+       if (desc)
+               *desc = seg_desc;
        return X86EMUL_CONTINUE;
 exception:
        emulate_exception(ctxt, err_vec, err_code, true);
@@ -1352,7 +1357,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
                                   u16 selector, int seg)
 {
        u8 cpl = ctxt->ops->cpl(ctxt);
-       return __load_segment_descriptor(ctxt, selector, seg, cpl);
+       return __load_segment_descriptor(ctxt, selector, seg, cpl, NULL);
 }
 
 static void write_register_operand(struct operand *op)
@@ -1694,17 +1699,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
-       unsigned short sel;
+       unsigned short sel, old_sel;
+       struct desc_struct old_desc, new_desc;
+       const struct x86_emulate_ops *ops = ctxt->ops;
+       u8 cpl = ctxt->ops->cpl(ctxt);
+
+       /* Assignment of RIP may only fail in 64-bit mode */
+       if (ctxt->mode == X86EMUL_MODE_PROT64)
+               ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
+                                VCPU_SREG_CS);
 
        memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
 
-       rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
+       rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
+                                      &new_desc);
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
-       ctxt->_eip = 0;
-       memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
-       return X86EMUL_CONTINUE;
+       rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+       if (rc != X86EMUL_CONTINUE) {
+               WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
+               /* assigning eip failed; restore the old cs */
+               ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
+               return rc;
+       }
+       return rc;
 }
 
 static int em_grp1a(struct x86_emulate_ctxt *ctxt)
@@ -1856,21 +1875,34 @@ static int em_ret(struct x86_emulate_ctxt *ctxt)
 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
-       unsigned long cs;
+       unsigned long eip, cs;
+       u16 old_cs;
        int cpl = ctxt->ops->cpl(ctxt);
+       struct desc_struct old_desc, new_desc;
+       const struct x86_emulate_ops *ops = ctxt->ops;
 
-       rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
+       if (ctxt->mode == X86EMUL_MODE_PROT64)
+               ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
+                                VCPU_SREG_CS);
+
+       rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
                return rc;
-       if (ctxt->op_bytes == 4)
-               ctxt->_eip = (u32)ctxt->_eip;
        rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
                return rc;
        /* Outer-privilege level return is not implemented */
        if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
                return X86EMUL_UNHANDLEABLE;
-       rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
+       rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0,
+                                      &new_desc);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       rc = assign_eip_far(ctxt, eip, new_desc.l);
+       if (rc != X86EMUL_CONTINUE) {
+               WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
+               ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+       }
        return rc;
 }
 
@@ -1920,6 +1952,17 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
        ss->p = 1;
 }
 
+static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
+{
+       u32 eax, ebx, ecx, edx;
+
+       eax = ecx = 0;
+       return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)
+               && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
+               && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
+               && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
+}
+
 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
 {
        struct x86_emulate_ops *ops = ctxt->ops;
@@ -2036,6 +2079,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
        if (ctxt->mode == X86EMUL_MODE_REAL)
                return emulate_gp(ctxt, 0);
 
+       /*
+        * Not recognized on AMD in compat mode (but is recognized in legacy
+        * mode).
+        */
+       if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
+           && !vendor_intel(ctxt))
+               return emulate_ud(ctxt);
+
        /* XXX sysenter/sysexit have not been tested in 64bit mode.
        * Therefore, we inject an #UD.
        */
@@ -2045,23 +2096,13 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
        setup_syscalls_segments(ctxt, &cs, &ss);
 
        ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
-       switch (ctxt->mode) {
-       case X86EMUL_MODE_PROT32:
-               if ((msr_data & 0xfffc) == 0x0)
-                       return emulate_gp(ctxt, 0);
-               break;
-       case X86EMUL_MODE_PROT64:
-               if (msr_data == 0x0)
-                       return emulate_gp(ctxt, 0);
-               break;
-       }
+       if ((msr_data & 0xfffc) == 0x0)
+               return emulate_gp(ctxt, 0);
 
        ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
-       cs_sel = (u16)msr_data;
-       cs_sel &= ~SELECTOR_RPL_MASK;
+       cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
        ss_sel = cs_sel + 8;
-       ss_sel &= ~SELECTOR_RPL_MASK;
-       if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
+       if (efer & EFER_LMA) {
                cs.d = 0;
                cs.l = 1;
        }
@@ -2070,10 +2111,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
        ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
        ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
-       ctxt->_eip = msr_data;
+       ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
 
        ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
-       ctxt->regs[VCPU_REGS_RSP] = msr_data;
+       ctxt->regs[VCPU_REGS_RSP] = (efer & EFER_LMA) ? msr_data :
+                                                       (u32)msr_data;
 
        return X86EMUL_CONTINUE;
 }
@@ -2248,19 +2290,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
         * Now load segment descriptors. If fault happenes at this stage
         * it is handled in a context of new task
         */
-       ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
@@ -2373,25 +2420,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
         * Now load segment descriptors. If fault happenes at this stage
         * it is handled in a context of new task
         */
-       ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
+                                       cpl, NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl);
+       ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
@@ -2605,24 +2659,39 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
        u16 sel, old_cs;
        ulong old_eip;
        int rc;
+       struct desc_struct old_desc, new_desc;
+       const struct x86_emulate_ops *ops = ctxt->ops;
+       int cpl = ctxt->ops->cpl(ctxt);
 
-       old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
        old_eip = ctxt->_eip;
+       ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
 
        memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
-       if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
+       rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
+                                      &new_desc);
+       if (rc != X86EMUL_CONTINUE)
                return X86EMUL_CONTINUE;
 
-       ctxt->_eip = 0;
-       memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
+       rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+       if (rc != X86EMUL_CONTINUE)
+               goto fail;
 
        ctxt->src.val = old_cs;
        rc = em_push(ctxt);
        if (rc != X86EMUL_CONTINUE)
-               return rc;
+               goto fail;
 
        ctxt->src.val = old_eip;
-       return em_push(ctxt);
+       rc = em_push(ctxt);
+       /* If we failed, we tainted the memory, but the very least we should
+          restore cs */
+       if (rc != X86EMUL_CONTINUE)
+               goto fail;
+       return rc;
+fail:
+       ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+       return rc;
+
 }
 
 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)