KVM: x86: SYSENTER emulation is broken
[pandora-kernel.git] / arch / x86 / kvm / emulate.c
index f1e3be1..bdad489 100644 (file)
@@ -456,11 +456,6 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
                *reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt));
 }
 
-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
-       register_address_increment(ctxt, &ctxt->_eip, rel);
-}
-
 static u32 desc_limit_scaled(struct desc_struct *desc)
 {
        u32 limit = get_desc_limit(desc);
@@ -534,6 +529,40 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
        return emulate_exception(ctxt, NM_VECTOR, 0, false);
 }
 
+static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+                              int cs_l)
+{
+       switch (ctxt->op_bytes) {
+       case 2:
+               ctxt->_eip = (u16)dst;
+               break;
+       case 4:
+               ctxt->_eip = (u32)dst;
+               break;
+#ifdef CONFIG_X86_64
+       case 8:
+               if ((cs_l && is_noncanonical_address(dst)) ||
+                   (!cs_l && (dst >> 32) != 0))
+                       return emulate_gp(ctxt, 0);
+               ctxt->_eip = dst;
+               break;
+#endif
+       default:
+               WARN(1, "unsupported eip assignment size\n");
+       }
+       return X86EMUL_CONTINUE;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+       return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+       return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
+
 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
 {
        u16 selector;
@@ -1206,11 +1235,12 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 }
 
 /* Does not support long mode */
-static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
-                                  u16 selector, int seg)
+static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+                                    u16 selector, int seg, u8 cpl,
+                                    struct desc_struct *desc)
 {
        struct desc_struct seg_desc;
-       u8 dpl, rpl, cpl;
+       u8 dpl, rpl;
        unsigned err_vec = GP_VECTOR;
        u32 err_code = 0;
        bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
@@ -1259,7 +1289,6 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
        rpl = selector & 3;
        dpl = seg_desc.dpl;
-       cpl = ctxt->ops->cpl(ctxt);
 
        switch (seg) {
        case VCPU_SREG_SS:
@@ -1316,12 +1345,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
        }
 load:
        ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
+       if (desc)
+               *desc = seg_desc;
        return X86EMUL_CONTINUE;
 exception:
        emulate_exception(ctxt, err_vec, err_code, true);
        return X86EMUL_PROPAGATE_FAULT;
 }
 
+static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+                                  u16 selector, int seg)
+{
+       u8 cpl = ctxt->ops->cpl(ctxt);
+       return __load_segment_descriptor(ctxt, selector, seg, cpl, NULL);
+}
+
 static void write_register_operand(struct operand *op)
 {
        /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
@@ -1661,17 +1699,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
-       unsigned short sel;
+       unsigned short sel, old_sel;
+       struct desc_struct old_desc, new_desc;
+       const struct x86_emulate_ops *ops = ctxt->ops;
+       u8 cpl = ctxt->ops->cpl(ctxt);
+
+       /* Assignment of RIP may only fail in 64-bit mode */
+       if (ctxt->mode == X86EMUL_MODE_PROT64)
+               ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
+                                VCPU_SREG_CS);
 
        memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
 
-       rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
+       rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
+                                      &new_desc);
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
-       ctxt->_eip = 0;
-       memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
-       return X86EMUL_CONTINUE;
+       rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+       if (rc != X86EMUL_CONTINUE) {
+               WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
+               /* assigning eip failed; restore the old cs */
+               ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
+               return rc;
+       }
+       return rc;
 }
 
 static int em_grp1a(struct x86_emulate_ctxt *ctxt)
@@ -1770,13 +1822,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
        case 2: /* call near abs */ {
                long int old_eip;
                old_eip = ctxt->_eip;
-               ctxt->_eip = ctxt->src.val;
+               rc = assign_eip_near(ctxt, ctxt->src.val);
+               if (rc != X86EMUL_CONTINUE)
+                       break;
                ctxt->src.val = old_eip;
                rc = em_push(ctxt);
                break;
        }
        case 4: /* jmp abs */
-               ctxt->_eip = ctxt->src.val;
+               rc = assign_eip_near(ctxt, ctxt->src.val);
                break;
        case 5: /* jmp far */
                rc = em_jmp_far(ctxt);
@@ -1808,26 +1862,47 @@ static int em_grp9(struct x86_emulate_ctxt *ctxt)
 
 static int em_ret(struct x86_emulate_ctxt *ctxt)
 {
-       ctxt->dst.type = OP_REG;
-       ctxt->dst.addr.reg = &ctxt->_eip;
-       ctxt->dst.bytes = ctxt->op_bytes;
-       return em_pop(ctxt);
+       int rc;
+       unsigned long eip;
+
+       rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+
+       return assign_eip_near(ctxt, eip);
 }
 
 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
-       unsigned long cs;
+       unsigned long eip, cs;
+       u16 old_cs;
+       int cpl = ctxt->ops->cpl(ctxt);
+       struct desc_struct old_desc, new_desc;
+       const struct x86_emulate_ops *ops = ctxt->ops;
+
+       if (ctxt->mode == X86EMUL_MODE_PROT64)
+               ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
+                                VCPU_SREG_CS);
 
-       rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
+       rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
                return rc;
-       if (ctxt->op_bytes == 4)
-               ctxt->_eip = (u32)ctxt->_eip;
        rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
                return rc;
-       rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
+       /* Outer-privilege level return is not implemented */
+       if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
+               return X86EMUL_UNHANDLEABLE;
+       rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0,
+                                      &new_desc);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       rc = assign_eip_far(ctxt, eip, new_desc.l);
+       if (rc != X86EMUL_CONTINUE) {
+               WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
+               ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+       }
        return rc;
 }
 
@@ -1877,6 +1952,62 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
        ss->p = 1;
 }
 
+static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
+{
+       u32 eax, ebx, ecx, edx;
+
+       eax = ecx = 0;
+       return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)
+               && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
+               && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
+               && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
+}
+
+static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
+{
+       struct x86_emulate_ops *ops = ctxt->ops;
+       u32 eax, ebx, ecx, edx;
+
+       /*
+        * syscall should always be enabled in longmode - so only become
+        * vendor specific (cpuid) if other modes are active...
+        */
+       if (ctxt->mode == X86EMUL_MODE_PROT64)
+               return true;
+
+       eax = 0x00000000;
+       ecx = 0x00000000;
+       if (ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)) {
+               /*
+                * Intel ("GenuineIntel")
+                * remark: Intel CPUs only support "syscall" in 64bit
+                * longmode. Also an 64bit guest with a
+                * 32bit compat-app running will #UD !! While this
+                * behaviour can be fixed (by emulating) into AMD
+                * response - CPUs of AMD can't behave like Intel.
+                */
+               if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
+                   ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
+                   edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
+                       return false;
+
+               /* AMD ("AuthenticAMD") */
+               if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
+                   ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
+                   edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
+                       return true;
+
+               /* AMD ("AMDisbetter!") */
+               if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
+                   ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
+                   edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
+                       return true;
+       }
+
+       /* default: (not Intel, not AMD), apply Intel's stricter rules... */
+       return false;
+}
+
 static int em_syscall(struct x86_emulate_ctxt *ctxt)
 {
        struct x86_emulate_ops *ops = ctxt->ops;
@@ -1890,9 +2021,15 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
            ctxt->mode == X86EMUL_MODE_VM86)
                return emulate_ud(ctxt);
 
+       if (!(em_syscall_is_enabled(ctxt)))
+               return emulate_ud(ctxt);
+
        ops->get_msr(ctxt, MSR_EFER, &efer);
        setup_syscalls_segments(ctxt, &cs, &ss);
 
+       if (!(efer & EFER_SCE))
+               return emulate_ud(ctxt);
+
        ops->get_msr(ctxt, MSR_STAR, &msr_data);
        msr_data >>= 32;
        cs_sel = (u16)(msr_data & 0xfffc);
@@ -1942,6 +2079,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
        if (ctxt->mode == X86EMUL_MODE_REAL)
                return emulate_gp(ctxt, 0);
 
+       /*
+        * Not recognized on AMD in compat mode (but is recognized in legacy
+        * mode).
+        */
+       if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
+           && !vendor_intel(ctxt))
+               return emulate_ud(ctxt);
+
        /* XXX sysenter/sysexit have not been tested in 64bit mode.
        * Therefore, we inject an #UD.
        */
@@ -1951,23 +2096,13 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
        setup_syscalls_segments(ctxt, &cs, &ss);
 
        ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
-       switch (ctxt->mode) {
-       case X86EMUL_MODE_PROT32:
-               if ((msr_data & 0xfffc) == 0x0)
-                       return emulate_gp(ctxt, 0);
-               break;
-       case X86EMUL_MODE_PROT64:
-               if (msr_data == 0x0)
-                       return emulate_gp(ctxt, 0);
-               break;
-       }
+       if ((msr_data & 0xfffc) == 0x0)
+               return emulate_gp(ctxt, 0);
 
        ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
-       cs_sel = (u16)msr_data;
-       cs_sel &= ~SELECTOR_RPL_MASK;
+       cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
        ss_sel = cs_sel + 8;
-       ss_sel &= ~SELECTOR_RPL_MASK;
-       if (ctxt->mode == X86EMUL_MODE_PROT64 || (efer & EFER_LMA)) {
+       if (efer & EFER_LMA) {
                cs.d = 0;
                cs.l = 1;
        }
@@ -1976,10 +2111,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
        ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
        ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
-       ctxt->_eip = msr_data;
+       ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
 
        ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
-       ctxt->regs[VCPU_REGS_RSP] = msr_data;
+       ctxt->regs[VCPU_REGS_RSP] = (efer & EFER_LMA) ? msr_data :
+                                                       (u32)msr_data;
 
        return X86EMUL_CONTINUE;
 }
@@ -1988,7 +2124,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 {
        struct x86_emulate_ops *ops = ctxt->ops;
        struct desc_struct cs, ss;
-       u64 msr_data;
+       u64 msr_data, rcx, rdx;
        int usermode;
        u16 cs_sel = 0, ss_sel = 0;
 
@@ -2004,6 +2140,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
        else
                usermode = X86EMUL_MODE_PROT32;
 
+       rcx = ctxt->regs[VCPU_REGS_RCX];
+       rdx = ctxt->regs[VCPU_REGS_RDX];
+
        cs.dpl = 3;
        ss.dpl = 3;
        ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2021,6 +2160,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
                ss_sel = cs_sel + 8;
                cs.d = 0;
                cs.l = 1;
+               if (is_noncanonical_address(rcx) ||
+                   is_noncanonical_address(rdx))
+                       return emulate_gp(ctxt, 0);
                break;
        }
        cs_sel |= SELECTOR_RPL_MASK;
@@ -2029,8 +2171,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
        ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
        ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-       ctxt->_eip = ctxt->regs[VCPU_REGS_RDX];
-       ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX];
+       ctxt->_eip = rdx;
+       ctxt->regs[VCPU_REGS_RSP] = rcx;
 
        return X86EMUL_CONTINUE;
 }
@@ -2119,6 +2261,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
                                 struct tss_segment_16 *tss)
 {
        int ret;
+       u8 cpl;
 
        ctxt->_eip = tss->ip;
        ctxt->eflags = tss->flag | 2;
@@ -2141,23 +2284,30 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
        set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
        set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
 
+       cpl = tss->cs & 3;
+
        /*
         * Now load segment descriptors. If fault happenes at this stage
         * it is handled in a context of new task
         */
-       ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
+       ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+       ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+       ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+       ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+       ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
@@ -2236,6 +2386,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
                                 struct tss_segment_32 *tss)
 {
        int ret;
+       u8 cpl;
 
        if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
                return emulate_gp(ctxt, 0);
@@ -2252,7 +2403,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 
        /*
         * SDM says that segment selectors are loaded before segment
-        * descriptors
+        * descriptors.  This is important because CPL checks will
+        * use CS.RPL.
         */
        set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
        set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
@@ -2262,29 +2414,38 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
        set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
        set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
 
+       cpl = tss->cs & 3;
+
        /*
         * Now load segment descriptors. If fault happenes at this stage
         * it is handled in a context of new task
         */
-       ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
+       ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
+                                       cpl, NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+       ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+       ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+       ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+       ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS);
+       ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
-       ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS);
+       ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
+                                       NULL);
        if (ret != X86EMUL_CONTINUE)
                return ret;
 
@@ -2481,39 +2642,67 @@ static int em_das(struct x86_emulate_ctxt *ctxt)
        return X86EMUL_CONTINUE;
 }
 
+static int em_call(struct x86_emulate_ctxt *ctxt)
+{
+       int rc;
+       long rel = ctxt->src.val;
+
+       ctxt->src.val = (unsigned long)ctxt->_eip;
+       rc = jmp_rel(ctxt, rel);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       return em_push(ctxt);
+}
+
 static int em_call_far(struct x86_emulate_ctxt *ctxt)
 {
        u16 sel, old_cs;
        ulong old_eip;
        int rc;
+       struct desc_struct old_desc, new_desc;
+       const struct x86_emulate_ops *ops = ctxt->ops;
+       int cpl = ctxt->ops->cpl(ctxt);
 
-       old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
        old_eip = ctxt->_eip;
+       ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
 
        memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
-       if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
+       rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
+                                      &new_desc);
+       if (rc != X86EMUL_CONTINUE)
                return X86EMUL_CONTINUE;
 
-       ctxt->_eip = 0;
-       memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
+       rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+       if (rc != X86EMUL_CONTINUE)
+               goto fail;
 
        ctxt->src.val = old_cs;
        rc = em_push(ctxt);
        if (rc != X86EMUL_CONTINUE)
-               return rc;
+               goto fail;
 
        ctxt->src.val = old_eip;
-       return em_push(ctxt);
+       rc = em_push(ctxt);
+       /* If we failed, we tainted the memory, but the very least we should
+          restore cs */
+       if (rc != X86EMUL_CONTINUE)
+               goto fail;
+       return rc;
+fail:
+       ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+       return rc;
+
 }
 
 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
+       unsigned long eip;
 
-       ctxt->dst.type = OP_REG;
-       ctxt->dst.addr.reg = &ctxt->_eip;
-       ctxt->dst.bytes = ctxt->op_bytes;
-       rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
+       rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       rc = assign_eip_near(ctxt, eip);
        if (rc != X86EMUL_CONTINUE)
                return rc;
        register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val);
@@ -2759,20 +2948,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
 
 static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
+       int rc = X86EMUL_CONTINUE;
+
        register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
        if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) &&
            (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
-               jmp_rel(ctxt, ctxt->src.val);
+               rc = jmp_rel(ctxt, ctxt->src.val);
 
-       return X86EMUL_CONTINUE;
+       return rc;
 }
 
 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
 {
+       int rc = X86EMUL_CONTINUE;
+
        if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0)
-               jmp_rel(ctxt, ctxt->src.val);
+               rc = jmp_rel(ctxt, ctxt->src.val);
 
-       return X86EMUL_CONTINUE;
+       return rc;
 }
 
 static int em_cli(struct x86_emulate_ctxt *ctxt)
@@ -3216,7 +3409,7 @@ static struct opcode opcode_table[256] = {
        D2bvIP(SrcImmUByte | DstAcc, in,  check_perm_in),
        D2bvIP(SrcAcc | DstImmUByte, out, check_perm_out),
        /* 0xE8 - 0xEF */
-       D(SrcImm | Stack), D(SrcImm | ImplicitOps),
+       I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
        I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
        D2bvIP(SrcDX | DstAcc, in,  check_perm_in),
        D2bvIP(SrcAcc | DstDX, out, check_perm_out),
@@ -3865,7 +4058,7 @@ special_insn:
                break;
        case 0x70 ... 0x7f: /* jcc (short) */
                if (test_cc(ctxt->b, ctxt->eflags))
-                       jmp_rel(ctxt, ctxt->src.val);
+                       rc = jmp_rel(ctxt, ctxt->src.val);
                break;
        case 0x8d: /* lea r16/r32, m */
                ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -3911,16 +4104,9 @@ special_insn:
        case 0xe6: /* outb */
        case 0xe7: /* out */
                goto do_io_out;
-       case 0xe8: /* call (near) */ {
-               long int rel = ctxt->src.val;
-               ctxt->src.val = (unsigned long) ctxt->_eip;
-               jmp_rel(ctxt, rel);
-               rc = em_push(ctxt);
-               break;
-       }
        case 0xe9: /* jmp rel */
        case 0xeb: /* jmp rel short */
-               jmp_rel(ctxt, ctxt->src.val);
+               rc = jmp_rel(ctxt, ctxt->src.val);
                ctxt->dst.type = OP_NONE; /* Disable writeback. */
                break;
        case 0xec: /* in al,dx */
@@ -4086,7 +4272,7 @@ twobyte_insn:
                break;
        case 0x80 ... 0x8f: /* jnz rel, etc*/
                if (test_cc(ctxt->b, ctxt->eflags))
-                       jmp_rel(ctxt, ctxt->src.val);
+                       rc = jmp_rel(ctxt, ctxt->src.val);
                break;
        case 0x90 ... 0x9f:     /* setcc r/m8 */
                ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);