nEPT: Nested INVEPT
[pandora-kernel.git] / arch / x86 / kvm / vmx.c
index a0d6bd9..6c70014 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/mce.h>
 #include <asm/i387.h>
 #include <asm/xcr.h>
+#include <asm/perf_event.h>
 
 #include "trace.h"
 
@@ -118,7 +119,7 @@ module_param(ple_gap, int, S_IRUGO);
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
-#define NR_AUTOLOAD_MSRS 1
+#define NR_AUTOLOAD_MSRS 8
 #define VMCS02_POOL_SIZE 1
 
 struct vmcs {
@@ -622,6 +623,7 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 
 static bool cpu_has_load_ia32_efer;
+static bool cpu_has_load_perf_global_ctrl;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -1191,15 +1193,34 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
        vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
+static void clear_atomic_switch_msr_special(unsigned long entry,
+               unsigned long exit)
+{
+       vmcs_clear_bits(VM_ENTRY_CONTROLS, entry);
+       vmcs_clear_bits(VM_EXIT_CONTROLS, exit);
+}
+
 static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 {
        unsigned i;
        struct msr_autoload *m = &vmx->msr_autoload;
 
-       if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
-               vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
-               vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
-               return;
+       switch (msr) {
+       case MSR_EFER:
+               if (cpu_has_load_ia32_efer) {
+                       clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+                                       VM_EXIT_LOAD_IA32_EFER);
+                       return;
+               }
+               break;
+       case MSR_CORE_PERF_GLOBAL_CTRL:
+               if (cpu_has_load_perf_global_ctrl) {
+                       clear_atomic_switch_msr_special(
+                                       VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+                       return;
+               }
+               break;
        }
 
        for (i = 0; i < m->nr; ++i)
@@ -1215,25 +1236,55 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
        vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
 }
 
+static void add_atomic_switch_msr_special(unsigned long entry,
+               unsigned long exit, unsigned long guest_val_vmcs,
+               unsigned long host_val_vmcs, u64 guest_val, u64 host_val)
+{
+       vmcs_write64(guest_val_vmcs, guest_val);
+       vmcs_write64(host_val_vmcs, host_val);
+       vmcs_set_bits(VM_ENTRY_CONTROLS, entry);
+       vmcs_set_bits(VM_EXIT_CONTROLS, exit);
+}
+
 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
                                  u64 guest_val, u64 host_val)
 {
        unsigned i;
        struct msr_autoload *m = &vmx->msr_autoload;
 
-       if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
-               vmcs_write64(GUEST_IA32_EFER, guest_val);
-               vmcs_write64(HOST_IA32_EFER, host_val);
-               vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
-               vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
-               return;
+       switch (msr) {
+       case MSR_EFER:
+               if (cpu_has_load_ia32_efer) {
+                       add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+                                       VM_EXIT_LOAD_IA32_EFER,
+                                       GUEST_IA32_EFER,
+                                       HOST_IA32_EFER,
+                                       guest_val, host_val);
+                       return;
+               }
+               break;
+       case MSR_CORE_PERF_GLOBAL_CTRL:
+               if (cpu_has_load_perf_global_ctrl) {
+                       add_atomic_switch_msr_special(
+                                       VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
+                                       GUEST_IA32_PERF_GLOBAL_CTRL,
+                                       HOST_IA32_PERF_GLOBAL_CTRL,
+                                       guest_val, host_val);
+                       return;
+               }
+               break;
        }
 
        for (i = 0; i < m->nr; ++i)
                if (m->guest[i].index == msr)
                        break;
 
-       if (i == m->nr) {
+       if (i == NR_AUTOLOAD_MSRS) {
+               printk_once(KERN_WARNING"Not enough mst switch entries. "
+                               "Can't add msr %x\n", msr);
+               return;
+       } else if (i == m->nr) {
                ++m->nr;
                vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
                vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
@@ -1405,7 +1456,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 #ifdef CONFIG_X86_64
        wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
 #endif
-       if (current_thread_info()->status & TS_USEDFPU)
+       if (__thread_has_fpu(current))
                clts();
        load_gdt(&__get_cpu_var(host_gdt));
 }
@@ -1626,7 +1677,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu)
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
        /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
-       if (!(vmcs12->exception_bitmap & PF_VECTOR))
+       if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
                return 0;
 
        nested_vmx_vmexit(vcpu);
@@ -1905,6 +1956,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #endif
                CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
                CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
+               CPU_BASED_RDPMC_EXITING |
                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
        /*
         * We can allow some features even when not supported by the
@@ -2455,6 +2507,42 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
                                   VM_EXIT_LOAD_IA32_EFER);
 
+       cpu_has_load_perf_global_ctrl =
+               allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
+                               VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+               && allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
+                                  VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+
+       /*
+        * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
+        * but due to arrata below it can't be used. Workaround is to use
+        * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
+        *
+        * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
+        *
+        * AAK155             (model 26)
+        * AAP115             (model 30)
+        * AAT100             (model 37)
+        * BC86,AAY89,BD102   (model 44)
+        * BA97               (model 46)
+        *
+        */
+       if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
+               switch (boot_cpu_data.x86_model) {
+               case 26:
+               case 30:
+               case 37:
+               case 44:
+               case 46:
+                       cpu_has_load_perf_global_ctrl = false;
+                       printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
+                                       "does not work properly. Using workaround\n");
+                       break;
+               default:
+                       break;
+               }
+       }
+
        return 0;
 }
 
@@ -3828,7 +3916,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
                vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
 
        vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+       vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        vmx_set_cr4(&vmx->vcpu, 0);
        vmx_set_efer(&vmx->vcpu, 0);
        vmx_fpu_activate(&vmx->vcpu);
@@ -4454,7 +4544,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
        u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
                | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-       if (vmx_set_msr(vcpu, ecx, data) != 0) {
+       if (kvm_set_msr(vcpu, ecx, data) != 0) {
                trace_kvm_msr_write_ex(ecx, data);
                kvm_inject_gp(vcpu, 0);
                return 1;
@@ -4792,6 +4882,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                if (err != EMULATE_DONE)
                        return 0;
 
+               if (vcpu->arch.halt_request) {
+                       vcpu->arch.halt_request = 0;
+                       ret = kvm_emulate_halt(vcpu);
+                       goto out;
+               }
+
                if (signal_pending(current))
                        goto out;
                if (need_resched())
@@ -5454,6 +5550,12 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -5495,6 +5597,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
        [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
        [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+       [EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -5679,6 +5782,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
        case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
        case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+       case EXIT_REASON_INVEPT:
                /*
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -5968,6 +6072,24 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
 }
 
+static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
+{
+       int i, nr_msrs;
+       struct perf_guest_switch_msr *msrs;
+
+       msrs = perf_guest_get_msrs(&nr_msrs);
+
+       if (!msrs)
+               return;
+
+       for (i = 0; i < nr_msrs; i++)
+               if (msrs[i].host == msrs[i].guest)
+                       clear_atomic_switch_msr(vmx, msrs[i].msr);
+               else
+                       add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
+                                       msrs[i].host);
+}
+
 #ifdef CONFIG_X86_64
 #define R "r"
 #define Q "q"
@@ -6017,6 +6139,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
        if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
                vmx_set_interrupt_shadow(vcpu, 0);
 
+       atomic_switch_perf_msrs(vmx);
+
        vmx->__launched = vmx->loaded_vmcs->launched;
        asm(
                /* Store host registers */
@@ -6157,8 +6281,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
 
        free_vpid(vmx);
-       free_nested(vmx);
        free_loaded_vmcs(vmx->loaded_vmcs);
+       free_nested(vmx);
        kfree(vmx->guest_msrs);
        kvm_vcpu_uninit(vcpu);
        kmem_cache_free(kvm_vcpu_cache, vmx);