Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

[pandora-kernel.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index b99bb37..0cac637 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
  }
  
  static int init_rmode(struct kvm *kvm);
+static u64 construct_eptp(unsigned long root_hpa);
  
  static DEFINE_PER_CPU(struct vmcs *, vmxarea);
  static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -264,6 +265,11 @@ static inline int cpu_has_vmx_vpid(void)
                 SECONDARY_EXEC_ENABLE_VPID);
  }
  
+static inline int cpu_has_virtual_nmis(void)
+{
+       return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
  static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
  {
         int i;
@@ -478,7 +484,7 @@ static void reload_tss(void)
         struct descriptor_table gdt;
         struct desc_struct *descs;
  
-       get_gdt(&gdt);
+       kvm_get_gdt(&gdt);
         descs = (void *)gdt.base;
         descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
         load_TR_desc();
@@ -534,9 +540,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
          * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
          * allow segment selectors with cpl > 0 or ti == 1.
          */
-       vmx->host_state.ldt_sel = read_ldt();
+       vmx->host_state.ldt_sel = kvm_read_ldt();
         vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
-       vmx->host_state.fs_sel = read_fs();
+       vmx->host_state.fs_sel = kvm_read_fs();
         if (!(vmx->host_state.fs_sel & 7)) {
                 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
                 vmx->host_state.fs_reload_needed = 0;
@@ -544,7 +550,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
                 vmcs_write16(HOST_FS_SELECTOR, 0);
                 vmx->host_state.fs_reload_needed = 1;
         }
-       vmx->host_state.gs_sel = read_gs();
+       vmx->host_state.gs_sel = kvm_read_gs();
         if (!(vmx->host_state.gs_sel & 7))
                 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
         else {
@@ -580,15 +586,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
         ++vmx->vcpu.stat.host_state_reload;
         vmx->host_state.loaded = 0;
         if (vmx->host_state.fs_reload_needed)
-               load_fs(vmx->host_state.fs_sel);
+               kvm_load_fs(vmx->host_state.fs_sel);
         if (vmx->host_state.gs_ldt_reload_needed) {
-               load_ldt(vmx->host_state.ldt_sel);
+               kvm_load_ldt(vmx->host_state.ldt_sel);
                 /*
                  * If we have to reload gs, we must take care to
                  * preserve our gs base.
                  */
                 local_irq_save(flags);
-               load_gs(vmx->host_state.gs_sel);
+               kvm_load_gs(vmx->host_state.gs_sel);
  #ifdef CONFIG_X86_64
                 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
  #endif
@@ -648,8 +654,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                  * Linux uses per-cpu TSS and GDT, so set these when switching
                  * processors.
                  */
-               vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */
-               get_gdt(&dt);
+               vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
+               kvm_get_gdt(&dt);
                 vmcs_writel(HOST_GDTR_BASE, dt.base);   /* 22.2.4 */
  
                 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
@@ -915,6 +921,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
                 break;
         case MSR_IA32_TIME_STAMP_COUNTER:
                 guest_write_tsc(data);
+               break;
+       case MSR_P6_PERFCTR0:
+       case MSR_P6_PERFCTR1:
+       case MSR_P6_EVNTSEL0:
+       case MSR_P6_EVNTSEL1:
+               /*
+                * Just discard all writes to the performance counters; this
+                * should keep both older linux and windows 64-bit guests
+                * happy
+                */
+               pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
+
                 break;
         default:
                 vmx_load_host_state(vmx);
@@ -1088,7 +1106,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
         u32 _vmentry_control = 0;
  
         min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-       opt = 0;
+       opt = PIN_BASED_VIRTUAL_NMIS;
         if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
                                 &_pin_based_exec_control) < 0)
                 return -EIO;
@@ -1405,6 +1423,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
  static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
  {
         vpid_sync_vcpu_all(to_vmx(vcpu));
+       if (vm_need_ept())
+               ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
  }
  
  static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -1436,7 +1456,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
         if (!(cr0 & X86_CR0_PG)) {
                 /* From paging/starting to nonpaging */
                 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-                            vmcs_config.cpu_based_exec_ctrl |
+                            vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
                              (CPU_BASED_CR3_LOAD_EXITING |
                               CPU_BASED_CR3_STORE_EXITING));
                 vcpu->arch.cr0 = cr0;
@@ -1446,7 +1466,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
         } else if (!is_paging(vcpu)) {
                 /* From nonpaging to paging */
                 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-                            vmcs_config.cpu_based_exec_ctrl &
+                            vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
                              ~(CPU_BASED_CR3_LOAD_EXITING |
                                CPU_BASED_CR3_STORE_EXITING));
                 vcpu->arch.cr0 = cr0;
@@ -1923,8 +1943,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
         vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
         vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
         vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
-       vmcs_write16(HOST_FS_SELECTOR, read_fs());    /* 22.2.4 */
-       vmcs_write16(HOST_GS_SELECTOR, read_gs());    /* 22.2.4 */
+       vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs());    /* 22.2.4 */
+       vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs());    /* 22.2.4 */
         vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
  #ifdef CONFIG_X86_64
         rdmsrl(MSR_FS_BASE, a);
@@ -1938,7 +1958,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
  
         vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
  
-       get_idt(&dt);
+       kvm_get_idt(&dt);
         vmcs_writel(HOST_IDTR_BASE, dt.base);   /* 22.2.4 */
  
         asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
@@ -2130,6 +2150,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
                         irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
  }
  
+static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
+{
+       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+                       INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+       vcpu->arch.nmi_pending = 0;
+}
+
  static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
  {
         int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2653,6 +2680,19 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         return 1;
  }
  
+static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u32 cpu_based_vm_exec_control;
+
+       /* clear pending NMI */
+       cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+       ++vcpu->stat.nmi_window_exits;
+
+       return 1;
+}
+
  /*
   * The exit handlers return 1 if the exit was handled fully and guest execution
   * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -2663,6 +2703,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
         [EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
         [EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
         [EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
+       [EXIT_REASON_NMI_WINDOW]              = handle_nmi_window,
         [EXIT_REASON_IO_INSTRUCTION]          = handle_io,
         [EXIT_REASON_CR_ACCESS]               = handle_cr,
         [EXIT_REASON_DR_ACCESS]               = handle_dr,
@@ -2750,17 +2791,52 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
         vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
  }
  
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+       u32 cpu_based_vm_exec_control;
+
+       if (!cpu_has_virtual_nmis())
+               return;
+
+       cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
+{
+       u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+       return !(guest_intr & (GUEST_INTR_STATE_NMI |
+                              GUEST_INTR_STATE_MOV_SS |
+                              GUEST_INTR_STATE_STI));
+}
+
+static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
+{
+       u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+       return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
+                              GUEST_INTR_STATE_STI)) &&
+               (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
+}
+
+static void enable_intr_window(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.nmi_pending)
+               enable_nmi_window(vcpu);
+       else if (kvm_cpu_has_interrupt(vcpu))
+               enable_irq_window(vcpu);
+}
+
  static void vmx_intr_assist(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u32 idtv_info_field, intr_info_field;
-       int has_ext_irq, interrupt_window_open;
+       u32 idtv_info_field, intr_info_field, exit_intr_info_field;
         int vector;
  
         update_tpr_threshold(vcpu);
  
-       has_ext_irq = kvm_cpu_has_interrupt(vcpu);
         intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+       exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO);
         idtv_info_field = vmx->idt_vectoring_info;
         if (intr_info_field & INTR_INFO_VALID_MASK) {
                 if (idtv_info_field & INTR_INFO_VALID_MASK) {
@@ -2768,8 +2844,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
                         if (printk_ratelimit())
                                 printk(KERN_ERR "Fault when IDT_Vectoring\n");
                 }
-               if (has_ext_irq)
-                       enable_irq_window(vcpu);
+               enable_intr_window(vcpu);
                 return;
         }
         if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
@@ -2779,30 +2854,56 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
                         u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
  
                         vmx_inject_irq(vcpu, vect);
-                       if (unlikely(has_ext_irq))
-                               enable_irq_window(vcpu);
+                       enable_intr_window(vcpu);
                         return;
                 }
  
                 KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
  
-               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+               /*
+                * SDM 3: 25.7.1.2
+                * Clear bit "block by NMI" before VM entry if a NMI delivery
+                * faulted.
+                */
+               if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
+                   == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis())
+                       vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+                               vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                               ~GUEST_INTR_STATE_NMI);
+
+               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field
+                               & ~INTR_INFO_RESVD_BITS_MASK);
                 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
                                 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
  
                 if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
                         vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
                                 vmcs_read32(IDT_VECTORING_ERROR_CODE));
-               if (unlikely(has_ext_irq))
-                       enable_irq_window(vcpu);
+               enable_intr_window(vcpu);
                 return;
         }
-       if (!has_ext_irq)
+       if (cpu_has_virtual_nmis()) {
+               /*
+                * SDM 3: 25.7.1.2
+                * Re-set bit "block by NMI" before VM entry if vmexit caused by
+                * a guest IRET fault.
+                */
+               if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) &&
+                   (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8)
+                       vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+                               vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) |
+                               GUEST_INTR_STATE_NMI);
+               else if (vcpu->arch.nmi_pending) {
+                       if (vmx_nmi_enabled(vcpu))
+                               vmx_inject_nmi(vcpu);
+                       enable_intr_window(vcpu);
+                       return;
+               }
+
+       }
+       if (!kvm_cpu_has_interrupt(vcpu))
                 return;
-       interrupt_window_open =
-               ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-                (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
-       if (interrupt_window_open) {
+       if (vmx_irq_enabled(vcpu)) {
                 vector = kvm_cpu_get_interrupt(vcpu);
                 vmx_inject_irq(vcpu, vector);
                 kvm_timer_intr_post(vcpu, vector);
@@ -2963,7 +3064,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                 fixup_rmode_irq(vmx);
  
         vcpu->arch.interrupt_window_open =
-               (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
+               (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
  
         asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
         vmx->launched = 1;
@@ -2971,7 +3073,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
         intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
  
         /* We need to handle NMIs before interrupts are enabled */
-       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+           (intr_info & INTR_INFO_VALID_MASK)) {
                 KVMTRACE_0D(NMI, vcpu, handler);
                 asm("int $2");
         }