kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF)
[pandora-kernel.git] / arch / x86 / kvm / vmx.c
index a0d6bd9..0fb33a0 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/mce.h>
 #include <asm/i387.h>
 #include <asm/xcr.h>
+#include <asm/perf_event.h>
 
 #include "trace.h"
 
@@ -118,7 +119,7 @@ module_param(ple_gap, int, S_IRUGO);
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
-#define NR_AUTOLOAD_MSRS 1
+#define NR_AUTOLOAD_MSRS 8
 #define VMCS02_POOL_SIZE 1
 
 struct vmcs {
@@ -389,6 +390,7 @@ struct vcpu_vmx {
                u16           fs_sel, gs_sel, ldt_sel;
                int           gs_ldt_reload_needed;
                int           fs_reload_needed;
+               unsigned long vmcs_host_cr4;    /* May not match real cr4 */
        } host_state;
        struct {
                int vm86_active;
@@ -622,6 +624,7 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 
 static bool cpu_has_load_ia32_efer;
+static bool cpu_has_load_perf_global_ctrl;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -873,10 +876,10 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
        return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
 }
 
-static inline bool is_exception(u32 intr_info)
+static inline bool is_nmi(u32 intr_info)
 {
        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
-               == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
+               == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
 }
 
 static void nested_vmx_vmexit(struct kvm_vcpu *vcpu);
@@ -1168,7 +1171,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
        u32 eb;
 
        eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
-            (1u << NM_VECTOR) | (1u << DB_VECTOR);
+            (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR);
        if ((vcpu->guest_debug &
             (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
            (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -1191,15 +1194,34 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
        vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
+static void clear_atomic_switch_msr_special(unsigned long entry,
+               unsigned long exit)
+{
+       vmcs_clear_bits(VM_ENTRY_CONTROLS, entry);
+       vmcs_clear_bits(VM_EXIT_CONTROLS, exit);
+}
+
 static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 {
        unsigned i;
        struct msr_autoload *m = &vmx->msr_autoload;
 
-       if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
-               vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
-               vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
-               return;
+       switch (msr) {
+       case MSR_EFER:
+               if (cpu_has_load_ia32_efer) {
+                       clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+                                       VM_EXIT_LOAD_IA32_EFER);
+                       return;
+               }
+               break;
+       case MSR_CORE_PERF_GLOBAL_CTRL:
+               if (cpu_has_load_perf_global_ctrl) {
+                       clear_atomic_switch_msr_special(
+                                       VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+                       return;
+               }
+               break;
        }
 
        for (i = 0; i < m->nr; ++i)
@@ -1215,25 +1237,55 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
        vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
 }
 
+static void add_atomic_switch_msr_special(unsigned long entry,
+               unsigned long exit, unsigned long guest_val_vmcs,
+               unsigned long host_val_vmcs, u64 guest_val, u64 host_val)
+{
+       vmcs_write64(guest_val_vmcs, guest_val);
+       vmcs_write64(host_val_vmcs, host_val);
+       vmcs_set_bits(VM_ENTRY_CONTROLS, entry);
+       vmcs_set_bits(VM_EXIT_CONTROLS, exit);
+}
+
 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                                  u64 guest_val, u64 host_val)
 {
        unsigned i;
        struct msr_autoload *m = &vmx->msr_autoload;
 
-       if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
-               vmcs_write64(GUEST_IA32_EFER, guest_val);
-               vmcs_write64(HOST_IA32_EFER, host_val);
-               vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
-               vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
-               return;
+       switch (msr) {
+       case MSR_EFER:
+               if (cpu_has_load_ia32_efer) {
+                       add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+                                       VM_EXIT_LOAD_IA32_EFER,
+                                       GUEST_IA32_EFER,
+                                       HOST_IA32_EFER,
+                                       guest_val, host_val);
+                       return;
+               }
+               break;
+       case MSR_CORE_PERF_GLOBAL_CTRL:
+               if (cpu_has_load_perf_global_ctrl) {
+                       add_atomic_switch_msr_special(
+                                       VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       GUEST_IA32_PERF_GLOBAL_CTRL,
+                                       HOST_IA32_PERF_GLOBAL_CTRL,
+                                       guest_val, host_val);
+                       return;
+               }
+               break;
        }
 
        for (i = 0; i < m->nr; ++i)
                if (m->guest[i].index == msr)
                        break;
 
-       if (i == m->nr) {
+       if (i == NR_AUTOLOAD_MSRS) {
+               printk_once(KERN_WARNING"Not enough mst switch entries. "
+                               "Can't add msr %x\n", msr);
+               return;
+       } else if (i == m->nr) {
                ++m->nr;
                vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
                vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
@@ -1405,7 +1457,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #ifdef CONFIG_X86_64
        wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
 #endif
-       if (current_thread_info()->status & TS_USEDFPU)
+       if (__thread_has_fpu(current))
                clts();
        load_gdt(&__get_cpu_var(host_gdt));
 }
@@ -1626,7 +1678,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu)
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
        /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
-       if (!(vmcs12->exception_bitmap & PF_VECTOR))
+       if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
                return 0;
 
        nested_vmx_vmexit(vcpu);
@@ -1905,6 +1957,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #endif
                CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
                CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
+               CPU_BASED_RDPMC_EXITING |
                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
        /*
         * We can allow some features even when not supported by the
@@ -2455,6 +2508,42 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
                                   VM_EXIT_LOAD_IA32_EFER);
 
+       cpu_has_load_perf_global_ctrl =
+               allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
+                               VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+               && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
+                                  VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+
+       /*
+        * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
+        * but due to arrata below it can't be used. Workaround is to use
+        * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
+        *
+        * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
+        *
+        * AAK155             (model 26)
+        * AAP115             (model 30)
+        * AAT100             (model 37)
+        * BC86,AAY89,BD102   (model 44)
+        * BA97               (model 46)
+        *
+        */
+       if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
+               switch (boot_cpu_data.x86_model) {
+               case 26:
+               case 30:
+               case 37:
+               case 44:
+               case 46:
+                       cpu_has_load_perf_global_ctrl = false;
+                       printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
+                                       "does not work properly. Using workaround\n");
+                       break;
+               default:
+                       break;
+               }
+       }
+
        return 0;
 }
 
@@ -2943,8 +3032,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
-                   KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+       /*
+        * Pass through host's Machine Check Enable value to hw_cr4, which
+        * is in force while we are in guest mode.  Do not let guests control
+        * this bit, even if host CR4.MCE == 0.
+        */
+       unsigned long hw_cr4 =
+               (read_cr4() & X86_CR4_MCE) |
+               (cr4 & ~X86_CR4_MCE) |
+               (to_vmx(vcpu)->rmode.vm86_active ?
+                KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
 
        if (cr4 & X86_CR4_VMXE) {
                /*
@@ -3541,16 +3638,21 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
  * Note that host-state that does change is set elsewhere. E.g., host-state
  * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
  */
-static void vmx_set_constant_host_state(void)
+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 {
        u32 low32, high32;
        unsigned long tmpl;
        struct desc_ptr dt;
+       unsigned long cr4;
 
        vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS);  /* 22.2.3 */
-       vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
        vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
 
+       /* Save the most likely value for this task's CR4 in the VMCS. */
+       cr4 = read_cr4();
+       vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
+       vmx->host_state.vmcs_host_cr4 = cr4;
+
        vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
        vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
        vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
@@ -3672,7 +3774,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
        vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
        vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
-       vmx_set_constant_host_state();
+       vmx_set_constant_host_state(vmx);
 #ifdef CONFIG_X86_64
        rdmsrl(MSR_FS_BASE, a);
        vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -3828,7 +3930,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
        vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        vmx_set_cr4(&vmx->vcpu, 0);
        vmx_set_efer(&vmx->vcpu, 0);
        vmx_fpu_activate(&vmx->vcpu);
@@ -4121,7 +4225,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                return 0;
        }
 
-       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
+       if (is_nmi(intr_info))
                return 1;  /* already handled by vmx_vcpu_run() */
 
        if (is_no_device(intr_info)) {
@@ -4162,6 +4266,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 
        ex_no = intr_info & INTR_INFO_VECTOR_MASK;
        switch (ex_no) {
+       case AC_VECTOR:
+               kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+               return 1;
        case DB_VECTOR:
                dr6 = vmcs_readl(EXIT_QUALIFICATION);
                if (!(vcpu->guest_debug &
@@ -4454,7 +4561,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
        u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
                | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-       if (vmx_set_msr(vcpu, ecx, data) != 0) {
+       if (kvm_set_msr(vcpu, ecx, data) != 0) {
                trace_kvm_msr_write_ex(ecx, data);
                kvm_inject_gp(vcpu, 0);
                return 1;
@@ -4792,6 +4899,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                if (err != EMULATE_DONE)
                        return 0;
 
+               if (vcpu->arch.halt_request) {
+                       vcpu->arch.halt_request = 0;
+                       ret = kvm_emulate_halt(vcpu);
+                       goto out;
+               }
+
                if (signal_pending(current))
                        goto out;
                if (need_resched())
@@ -4886,22 +4999,27 @@ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
 
 /*
  * Free all VMCSs saved for this vcpu, except the one pointed by
- * vmx->loaded_vmcs. These include the VMCSs in vmcs02_pool (except the one
- * currently used, if running L2), and vmcs01 when running L2.
+ * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
+ * must be &vmx->vmcs01.
  */
 static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
 {
        struct vmcs02_list *item, *n;
+
+       WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
        list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
-               if (vmx->loaded_vmcs != &item->vmcs02)
-                       free_loaded_vmcs(&item->vmcs02);
+               /*
+                * Something will leak if the above WARN triggers.  Better than
+                * a use-after-free.
+                */
+               if (vmx->loaded_vmcs == &item->vmcs02)
+                       continue;
+
+               free_loaded_vmcs(&item->vmcs02);
                list_del(&item->list);
                kfree(item);
+               vmx->nested.vmcs02_num--;
        }
-       vmx->nested.vmcs02_num = 0;
-
-       if (vmx->loaded_vmcs != &vmx->vmcs01)
-               free_loaded_vmcs(&vmx->vmcs01);
 }
 
 /*
@@ -5454,6 +5572,18 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
+static int handle_invvpid(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -5495,6 +5625,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
        [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
        [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+       [EXIT_REASON_INVEPT]                  = handle_invept,
+       [EXIT_REASON_INVVPID]                 = handle_invvpid,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -5641,7 +5773,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 
        switch (exit_reason) {
        case EXIT_REASON_EXCEPTION_NMI:
-               if (!is_exception(intr_info))
+               if (is_nmi(intr_info))
                        return 0;
                else if (is_page_fault(intr_info))
                        return enable_ept;
@@ -5679,6 +5811,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
        case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
        case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+       case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
                /*
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -5808,10 +5941,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
            && kvm_vmx_exit_handlers[exit_reason])
                return kvm_vmx_exit_handlers[exit_reason](vcpu);
        else {
-               vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
-               vcpu->run->hw.hardware_exit_reason = exit_reason;
+               WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
        }
-       return 0;
 }
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -5840,8 +5973,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
                kvm_machine_check();
 
        /* We need to handle NMIs before interrupts are enabled */
-       if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-           (exit_intr_info & INTR_INFO_VALID_MASK)) {
+       if (is_nmi(exit_intr_info)) {
                kvm_before_handle_nmi(&vmx->vcpu);
                asm("int $2");
                kvm_after_handle_nmi(&vmx->vcpu);
@@ -5968,6 +6100,24 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
 }
 
+static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
+{
+       int i, nr_msrs;
+       struct perf_guest_switch_msr *msrs;
+
+       msrs = perf_guest_get_msrs(&nr_msrs);
+
+       if (!msrs)
+               return;
+
+       for (i = 0; i < nr_msrs; i++)
+               if (msrs[i].host == msrs[i].guest)
+                       clear_atomic_switch_msr(vmx, msrs[i].msr);
+               else
+                       add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
+                                       msrs[i].host);
+}
+
 #ifdef CONFIG_X86_64
 #define R "r"
 #define Q "q"
@@ -5979,6 +6129,7 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long cr4;
 
        if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
                struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -6009,6 +6160,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
                vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
+       cr4 = read_cr4();
+       if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+               vmcs_writel(HOST_CR4, cr4);
+               vmx->host_state.vmcs_host_cr4 = cr4;
+       }
+
        /* When single-stepping over STI and MOV SS, we must clear the
         * corresponding interruptibility bits in the guest state. Otherwise
         * vmentry fails as it then expects bit 14 (BS) in pending debug
@@ -6017,6 +6174,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
                vmx_set_interrupt_shadow(vcpu, 0);
 
+       atomic_switch_perf_msrs(vmx);
+
        vmx->__launched = vmx->loaded_vmcs->launched;
        asm(
                /* Store host registers */
@@ -6152,12 +6311,43 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #undef R
 #undef Q
 
+static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int cpu;
+
+       if (vmx->loaded_vmcs == &vmx->vmcs01)
+               return;
+
+       cpu = get_cpu();
+       vmx->loaded_vmcs = &vmx->vmcs01;
+       vmx_vcpu_put(vcpu);
+       vmx_vcpu_load(vcpu, cpu);
+       vcpu->cpu = cpu;
+       put_cpu();
+}
+
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       vcpu_load(vcpu);
+       vmx_load_vmcs01(vcpu);
+       free_nested(vmx);
+       vcpu_put(vcpu);
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
        free_vpid(vmx);
-       free_nested(vmx);
+       leave_guest_mode(vcpu);
+       vmx_free_vcpu_nested(vcpu);
        free_loaded_vmcs(vmx->loaded_vmcs);
        kfree(vmx->guest_msrs);
        kvm_vcpu_uninit(vcpu);
@@ -6465,7 +6655,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
         * Other fields are different per CPU, and will be set later when
         * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
         */
-       vmx_set_constant_host_state();
+       vmx_set_constant_host_state(vmx);
 
        /*
         * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -6904,18 +7094,12 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int cpu;
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
        leave_guest_mode(vcpu);
        prepare_vmcs12(vcpu, vmcs12);
 
-       cpu = get_cpu();
-       vmx->loaded_vmcs = &vmx->vmcs01;
-       vmx_vcpu_put(vcpu);
-       vmx_vcpu_load(vcpu, cpu);
-       vcpu->cpu = cpu;
-       put_cpu();
+       vmx_load_vmcs01(vcpu);
 
        /* if no vmcs02 cache requested, remove the one we used */
        if (VMCS02_POOL_SIZE == 0)