kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF)

[pandora-kernel.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index a0d6bd9..0fb33a0 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -39,6 +39,7 @@
  #include <asm/mce.h>
  #include <asm/i387.h>
  #include <asm/xcr.h>
+#include <asm/perf_event.h>
  
  #include "trace.h"
  
@@ -118,7 +119,7 @@ module_param(ple_gap, int, S_IRUGO);
  static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
  module_param(ple_window, int, S_IRUGO);
  
-#define NR_AUTOLOAD_MSRS 1
+#define NR_AUTOLOAD_MSRS 8
  #define VMCS02_POOL_SIZE 1
  
  struct vmcs {
@@ -389,6 +390,7 @@ struct vcpu_vmx {
                 u16           fs_sel, gs_sel, ldt_sel;
                 int           gs_ldt_reload_needed;
                 int           fs_reload_needed;
+               unsigned long vmcs_host_cr4;    /* May not match real cr4 */
         } host_state;
         struct {
                 int vm86_active;
@@ -622,6 +624,7 @@ static unsigned long *vmx_msr_bitmap_legacy;
  static unsigned long *vmx_msr_bitmap_longmode;
  
  static bool cpu_has_load_ia32_efer;
+static bool cpu_has_load_perf_global_ctrl;
  
  static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
  static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -873,10 +876,10 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
         return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
  }
  
-static inline bool is_exception(u32 intr_info)
+static inline bool is_nmi(u32 intr_info)
  {
         return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
-               == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
+               == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
  }
  
  static void nested_vmx_vmexit(struct kvm_vcpu *vcpu);
@@ -1168,7 +1171,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
         u32 eb;
  
         eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
-            (1u << NM_VECTOR) | (1u << DB_VECTOR);
+            (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR);
         if ((vcpu->guest_debug &
              (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
             (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -1191,15 +1194,34 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
         vmcs_write32(EXCEPTION_BITMAP, eb);
  }
  
+static void clear_atomic_switch_msr_special(unsigned long entry,
+               unsigned long exit)
+{
+       vmcs_clear_bits(VM_ENTRY_CONTROLS, entry);
+       vmcs_clear_bits(VM_EXIT_CONTROLS, exit);
+}
+
  static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
  {
         unsigned i;
         struct msr_autoload *m = &vmx->msr_autoload;
  
-       if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
-               vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
-               vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
-               return;
+       switch (msr) {
+       case MSR_EFER:
+               if (cpu_has_load_ia32_efer) {
+                       clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+                                       VM_EXIT_LOAD_IA32_EFER);
+                       return;
+               }
+               break;
+       case MSR_CORE_PERF_GLOBAL_CTRL:
+               if (cpu_has_load_perf_global_ctrl) {
+                       clear_atomic_switch_msr_special(
+                                       VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+                       return;
+               }
+               break;
         }
  
         for (i = 0; i < m->nr; ++i)
@@ -1215,25 +1237,55 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
         vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
  }
  
+static void add_atomic_switch_msr_special(unsigned long entry,
+               unsigned long exit, unsigned long guest_val_vmcs,
+               unsigned long host_val_vmcs, u64 guest_val, u64 host_val)
+{
+       vmcs_write64(guest_val_vmcs, guest_val);
+       vmcs_write64(host_val_vmcs, host_val);
+       vmcs_set_bits(VM_ENTRY_CONTROLS, entry);
+       vmcs_set_bits(VM_EXIT_CONTROLS, exit);
+}
+
  static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                                   u64 guest_val, u64 host_val)
  {
         unsigned i;
         struct msr_autoload *m = &vmx->msr_autoload;
  
-       if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
-               vmcs_write64(GUEST_IA32_EFER, guest_val);
-               vmcs_write64(HOST_IA32_EFER, host_val);
-               vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
-               vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
-               return;
+       switch (msr) {
+       case MSR_EFER:
+               if (cpu_has_load_ia32_efer) {
+                       add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+                                       VM_EXIT_LOAD_IA32_EFER,
+                                       GUEST_IA32_EFER,
+                                       HOST_IA32_EFER,
+                                       guest_val, host_val);
+                       return;
+               }
+               break;
+       case MSR_CORE_PERF_GLOBAL_CTRL:
+               if (cpu_has_load_perf_global_ctrl) {
+                       add_atomic_switch_msr_special(
+                                       VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       GUEST_IA32_PERF_GLOBAL_CTRL,
+                                       HOST_IA32_PERF_GLOBAL_CTRL,
+                                       guest_val, host_val);
+                       return;
+               }
+               break;
         }
  
         for (i = 0; i < m->nr; ++i)
                 if (m->guest[i].index == msr)
                         break;
  
-       if (i == m->nr) {
+       if (i == NR_AUTOLOAD_MSRS) {
+               printk_once(KERN_WARNING"Not enough mst switch entries. "
+                               "Can't add msr %x\n", msr);
+               return;
+       } else if (i == m->nr) {
                 ++m->nr;
                 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
                 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
@@ -1405,7 +1457,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
  #ifdef CONFIG_X86_64
         wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
  #endif
-       if (current_thread_info()->status & TS_USEDFPU)
+       if (__thread_has_fpu(current))
                 clts();
         load_gdt(&__get_cpu_var(host_gdt));
  }
@@ -1626,7 +1678,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu)
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
  
         /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
-       if (!(vmcs12->exception_bitmap & PF_VECTOR))
+       if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
                 return 0;
  
         nested_vmx_vmexit(vcpu);
@@ -1905,6 +1957,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
  #endif
                 CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
                 CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
+               CPU_BASED_RDPMC_EXITING |
                 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
         /*
          * We can allow some features even when not supported by the
@@ -2455,6 +2508,42 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                 && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
                                    VM_EXIT_LOAD_IA32_EFER);
  
+       cpu_has_load_perf_global_ctrl =
+               allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
+                               VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+               && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
+                                  VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+
+       /*
+        * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
+        * but due to arrata below it can't be used. Workaround is to use
+        * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
+        *
+        * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
+        *
+        * AAK155             (model 26)
+        * AAP115             (model 30)
+        * AAT100             (model 37)
+        * BC86,AAY89,BD102   (model 44)
+        * BA97               (model 46)
+        *
+        */
+       if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
+               switch (boot_cpu_data.x86_model) {
+               case 26:
+               case 30:
+               case 37:
+               case 44:
+               case 46:
+                       cpu_has_load_perf_global_ctrl = false;
+                       printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
+                                       "does not work properly. Using workaround\n");
+                       break;
+               default:
+                       break;
+               }
+       }
+
         return 0;
  }
  
@@ -2943,8 +3032,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
  
  static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
-       unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
-                   KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+       /*
+        * Pass through host's Machine Check Enable value to hw_cr4, which
+        * is in force while we are in guest mode.  Do not let guests control
+        * this bit, even if host CR4.MCE == 0.
+        */
+       unsigned long hw_cr4 =
+               (read_cr4() & X86_CR4_MCE) |
+               (cr4 & ~X86_CR4_MCE) |
+               (to_vmx(vcpu)->rmode.vm86_active ?
+                KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
  
         if (cr4 & X86_CR4_VMXE) {
                 /*
@@ -3541,16 +3638,21 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
   * Note that host-state that does change is set elsewhere. E.g., host-state
   * that is set differently for each CPU is set in vmx_vcpu_load(), not here.
   */
-static void vmx_set_constant_host_state(void)
+static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
  {
         u32 low32, high32;
         unsigned long tmpl;
         struct desc_ptr dt;
+       unsigned long cr4;
  
         vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS);  /* 22.2.3 */
-       vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
         vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
  
+       /* Save the most likely value for this task's CR4 in the VMCS. */
+       cr4 = read_cr4();
+       vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
+       vmx->host_state.vmcs_host_cr4 = cr4;
+
         vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
         vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
         vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
@@ -3672,7 +3774,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
  
         vmcs_write16(HOST_FS_SELECTOR, 0);            /* 22.2.4 */
         vmcs_write16(HOST_GS_SELECTOR, 0);            /* 22.2.4 */
-       vmx_set_constant_host_state();
+       vmx_set_constant_host_state(vmx);
  #ifdef CONFIG_X86_64
         rdmsrl(MSR_FS_BASE, a);
         vmcs_writel(HOST_FS_BASE, a); /* 22.2.4 */
@@ -3828,7 +3930,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
  
         vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
         vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
         vmx_set_cr4(&vmx->vcpu, 0);
         vmx_set_efer(&vmx->vcpu, 0);
         vmx_fpu_activate(&vmx->vcpu);
@@ -4121,7 +4225,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                 return 0;
         }
  
-       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
+       if (is_nmi(intr_info))
                 return 1;  /* already handled by vmx_vcpu_run() */
  
         if (is_no_device(intr_info)) {
@@ -4162,6 +4266,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
  
         ex_no = intr_info & INTR_INFO_VECTOR_MASK;
         switch (ex_no) {
+       case AC_VECTOR:
+               kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+               return 1;
         case DB_VECTOR:
                 dr6 = vmcs_readl(EXIT_QUALIFICATION);
                 if (!(vcpu->guest_debug &
@@ -4454,7 +4561,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
         u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
                 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
  
-       if (vmx_set_msr(vcpu, ecx, data) != 0) {
+       if (kvm_set_msr(vcpu, ecx, data) != 0) {
                 trace_kvm_msr_write_ex(ecx, data);
                 kvm_inject_gp(vcpu, 0);
                 return 1;
@@ -4792,6 +4899,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                 if (err != EMULATE_DONE)
                         return 0;
  
+               if (vcpu->arch.halt_request) {
+                       vcpu->arch.halt_request = 0;
+                       ret = kvm_emulate_halt(vcpu);
+                       goto out;
+               }
+
                 if (signal_pending(current))
                         goto out;
                 if (need_resched())
@@ -4886,22 +4999,27 @@ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
  
  /*
   * Free all VMCSs saved for this vcpu, except the one pointed by
- * vmx->loaded_vmcs. These include the VMCSs in vmcs02_pool (except the one
- * currently used, if running L2), and vmcs01 when running L2.
+ * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
+ * must be &vmx->vmcs01.
   */
  static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
  {
         struct vmcs02_list *item, *n;
+
+       WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
         list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
-               if (vmx->loaded_vmcs != &item->vmcs02)
-                       free_loaded_vmcs(&item->vmcs02);
+               /*
+                * Something will leak if the above WARN triggers.  Better than
+                * a use-after-free.
+                */
+               if (vmx->loaded_vmcs == &item->vmcs02)
+                       continue;
+
+               free_loaded_vmcs(&item->vmcs02);
                 list_del(&item->list);
                 kfree(item);
+               vmx->nested.vmcs02_num--;
         }
-       vmx->nested.vmcs02_num = 0;
-
-       if (vmx->loaded_vmcs != &vmx->vmcs01)
-               free_loaded_vmcs(&vmx->vmcs01);
  }
  
  /*
@@ -5454,6 +5572,18 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
         return 1;
  }
  
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
+static int handle_invvpid(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
  /*
   * The exit handlers return 1 if the exit was handled fully and guest execution
   * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -5495,6 +5625,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
         [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
         [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
         [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+       [EXIT_REASON_INVEPT]                  = handle_invept,
+       [EXIT_REASON_INVVPID]                 = handle_invvpid,
  };
  
  static const int kvm_vmx_max_exit_handlers =
@@ -5641,7 +5773,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
  
         switch (exit_reason) {
         case EXIT_REASON_EXCEPTION_NMI:
-               if (!is_exception(intr_info))
+               if (is_nmi(intr_info))
                         return 0;
                 else if (is_page_fault(intr_info))
                         return enable_ept;
@@ -5679,6 +5811,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
         case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
         case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
         case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+       case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
                 /*
                  * VMX instructions trap unconditionally. This allows L1 to
                  * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -5808,10 +5941,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
             && kvm_vmx_exit_handlers[exit_reason])
                 return kvm_vmx_exit_handlers[exit_reason](vcpu);
         else {
-               vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
-               vcpu->run->hw.hardware_exit_reason = exit_reason;
+               WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
         }
-       return 0;
  }
  
  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
@@ -5840,8 +5973,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
                 kvm_machine_check();
  
         /* We need to handle NMIs before interrupts are enabled */
-       if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-           (exit_intr_info & INTR_INFO_VALID_MASK)) {
+       if (is_nmi(exit_intr_info)) {
                 kvm_before_handle_nmi(&vmx->vcpu);
                 asm("int $2");
                 kvm_after_handle_nmi(&vmx->vcpu);
@@ -5968,6 +6100,24 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
  }
  
+static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
+{
+       int i, nr_msrs;
+       struct perf_guest_switch_msr *msrs;
+
+       msrs = perf_guest_get_msrs(&nr_msrs);
+
+       if (!msrs)
+               return;
+
+       for (i = 0; i < nr_msrs; i++)
+               if (msrs[i].host == msrs[i].guest)
+                       clear_atomic_switch_msr(vmx, msrs[i].msr);
+               else
+                       add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
+                                       msrs[i].host);
+}
+
  #ifdef CONFIG_X86_64
  #define R "r"
  #define Q "q"
@@ -5979,6 +6129,7 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long cr4;
  
         if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
                 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -6009,6 +6160,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
                 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
  
+       cr4 = read_cr4();
+       if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+               vmcs_writel(HOST_CR4, cr4);
+               vmx->host_state.vmcs_host_cr4 = cr4;
+       }
+
         /* When single-stepping over STI and MOV SS, we must clear the
          * corresponding interruptibility bits in the guest state. Otherwise
          * vmentry fails as it then expects bit 14 (BS) in pending debug
@@ -6017,6 +6174,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
         if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
                 vmx_set_interrupt_shadow(vcpu, 0);
  
+       atomic_switch_perf_msrs(vmx);
+
         vmx->__launched = vmx->loaded_vmcs->launched;
         asm(
                 /* Store host registers */
@@ -6152,12 +6311,43 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
  #undef R
  #undef Q
  
+static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int cpu;
+
+       if (vmx->loaded_vmcs == &vmx->vmcs01)
+               return;
+
+       cpu = get_cpu();
+       vmx->loaded_vmcs = &vmx->vmcs01;
+       vmx_vcpu_put(vcpu);
+       vmx_vcpu_load(vcpu, cpu);
+       vcpu->cpu = cpu;
+       put_cpu();
+}
+
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       vcpu_load(vcpu);
+       vmx_load_vmcs01(vcpu);
+       free_nested(vmx);
+       vcpu_put(vcpu);
+}
+
  static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
  
         free_vpid(vmx);
-       free_nested(vmx);
+       leave_guest_mode(vcpu);
+       vmx_free_vcpu_nested(vcpu);
         free_loaded_vmcs(vmx->loaded_vmcs);
         kfree(vmx->guest_msrs);
         kvm_vcpu_uninit(vcpu);
@@ -6465,7 +6655,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
          * Other fields are different per CPU, and will be set later when
          * vmx_vcpu_load() is called, and when vmx_save_host_state() is called.
          */
-       vmx_set_constant_host_state();
+       vmx_set_constant_host_state(vmx);
  
         /*
          * HOST_RSP is normally set correctly in vmx_vcpu_run() just before
@@ -6904,18 +7094,12 @@ void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int cpu;
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
  
         leave_guest_mode(vcpu);
         prepare_vmcs12(vcpu, vmcs12);
  
-       cpu = get_cpu();
-       vmx->loaded_vmcs = &vmx->vmcs01;
-       vmx_vcpu_put(vcpu);
-       vmx_vcpu_load(vcpu, cpu);
-       vcpu->cpu = cpu;
-       put_cpu();
+       vmx_load_vmcs01(vcpu);
  
         /* if no vmcs02 cache requested, remove the one we used */
         if (VMCS02_POOL_SIZE == 0)