Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[pandora-kernel.git] / arch / x86 / kvm / vmx.c
index b99bb37..0cac637 100644 (file)
@@ -91,6 +91,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 }
 
 static int init_rmode(struct kvm *kvm);
+static u64 construct_eptp(unsigned long root_hpa);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -264,6 +265,11 @@ static inline int cpu_has_vmx_vpid(void)
                SECONDARY_EXEC_ENABLE_VPID);
 }
 
+static inline int cpu_has_virtual_nmis(void)
+{
+       return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 {
        int i;
@@ -478,7 +484,7 @@ static void reload_tss(void)
        struct descriptor_table gdt;
        struct desc_struct *descs;
 
-       get_gdt(&gdt);
+       kvm_get_gdt(&gdt);
        descs = (void *)gdt.base;
        descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
        load_TR_desc();
@@ -534,9 +540,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
         * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
         * allow segment selectors with cpl > 0 or ti == 1.
         */
-       vmx->host_state.ldt_sel = read_ldt();
+       vmx->host_state.ldt_sel = kvm_read_ldt();
        vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
-       vmx->host_state.fs_sel = read_fs();
+       vmx->host_state.fs_sel = kvm_read_fs();
        if (!(vmx->host_state.fs_sel & 7)) {
                vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
                vmx->host_state.fs_reload_needed = 0;
@@ -544,7 +550,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
                vmcs_write16(HOST_FS_SELECTOR, 0);
                vmx->host_state.fs_reload_needed = 1;
        }
-       vmx->host_state.gs_sel = read_gs();
+       vmx->host_state.gs_sel = kvm_read_gs();
        if (!(vmx->host_state.gs_sel & 7))
                vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
        else {
@@ -580,15 +586,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
        ++vmx->vcpu.stat.host_state_reload;
        vmx->host_state.loaded = 0;
        if (vmx->host_state.fs_reload_needed)
-               load_fs(vmx->host_state.fs_sel);
+               kvm_load_fs(vmx->host_state.fs_sel);
        if (vmx->host_state.gs_ldt_reload_needed) {
-               load_ldt(vmx->host_state.ldt_sel);
+               kvm_load_ldt(vmx->host_state.ldt_sel);
                /*
                 * If we have to reload gs, we must take care to
                 * preserve our gs base.
                 */
                local_irq_save(flags);
-               load_gs(vmx->host_state.gs_sel);
+               kvm_load_gs(vmx->host_state.gs_sel);
 #ifdef CONFIG_X86_64
                wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
 #endif
@@ -648,8 +654,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                 * Linux uses per-cpu TSS and GDT, so set these when switching
                 * processors.
                 */
-               vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */
-               get_gdt(&dt);
+               vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
+               kvm_get_gdt(&dt);
                vmcs_writel(HOST_GDTR_BASE, dt.base);   /* 22.2.4 */
 
                rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
@@ -915,6 +921,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
                break;
        case MSR_IA32_TIME_STAMP_COUNTER:
                guest_write_tsc(data);
+               break;
+       case MSR_P6_PERFCTR0:
+       case MSR_P6_PERFCTR1:
+       case MSR_P6_EVNTSEL0:
+       case MSR_P6_EVNTSEL1:
+               /*
+                * Just discard all writes to the performance counters; this
+                * should keep both older linux and windows 64-bit guests
+                * happy
+                */
+               pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
+
                break;
        default:
                vmx_load_host_state(vmx);
@@ -1088,7 +1106,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
        u32 _vmentry_control = 0;
 
        min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-       opt = 0;
+       opt = PIN_BASED_VIRTUAL_NMIS;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
                                &_pin_based_exec_control) < 0)
                return -EIO;
@@ -1405,6 +1423,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 {
        vpid_sync_vcpu_all(to_vmx(vcpu));
+       if (vm_need_ept())
+               ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
 }
 
 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
@@ -1436,7 +1456,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
        if (!(cr0 & X86_CR0_PG)) {
                /* From paging/starting to nonpaging */
                vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-                            vmcs_config.cpu_based_exec_ctrl |
+                            vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
                             (CPU_BASED_CR3_LOAD_EXITING |
                              CPU_BASED_CR3_STORE_EXITING));
                vcpu->arch.cr0 = cr0;
@@ -1446,7 +1466,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
        } else if (!is_paging(vcpu)) {
                /* From nonpaging to paging */
                vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-                            vmcs_config.cpu_based_exec_ctrl &
+                            vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
                             ~(CPU_BASED_CR3_LOAD_EXITING |
                               CPU_BASED_CR3_STORE_EXITING));
                vcpu->arch.cr0 = cr0;
@@ -1923,8 +1943,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
        vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
        vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
-       vmcs_write16(HOST_FS_SELECTOR, read_fs());    /* 22.2.4 */
-       vmcs_write16(HOST_GS_SELECTOR, read_gs());    /* 22.2.4 */
+       vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs());    /* 22.2.4 */
+       vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs());    /* 22.2.4 */
        vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
 #ifdef CONFIG_X86_64
        rdmsrl(MSR_FS_BASE, a);
@@ -1938,7 +1958,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
        vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
 
-       get_idt(&dt);
+       kvm_get_idt(&dt);
        vmcs_writel(HOST_IDTR_BASE, dt.base);   /* 22.2.4 */
 
        asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
@@ -2130,6 +2150,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
                        irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
 }
 
+static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
+{
+       vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+                       INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+       vcpu->arch.nmi_pending = 0;
+}
+
 static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
 {
        int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2653,6 +2680,19 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        return 1;
 }
 
+static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u32 cpu_based_vm_exec_control;
+
+       /* clear pending NMI */
+       cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+       ++vcpu->stat.nmi_window_exits;
+
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -2663,6 +2703,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
        [EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
        [EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
        [EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
+       [EXIT_REASON_NMI_WINDOW]              = handle_nmi_window,
        [EXIT_REASON_IO_INSTRUCTION]          = handle_io,
        [EXIT_REASON_CR_ACCESS]               = handle_cr,
        [EXIT_REASON_DR_ACCESS]               = handle_dr,
@@ -2750,17 +2791,52 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+       u32 cpu_based_vm_exec_control;
+
+       if (!cpu_has_virtual_nmis())
+               return;
+
+       cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+       cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
+{
+       u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+       return !(guest_intr & (GUEST_INTR_STATE_NMI |
+                              GUEST_INTR_STATE_MOV_SS |
+                              GUEST_INTR_STATE_STI));
+}
+
+static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
+{
+       u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+       return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
+                              GUEST_INTR_STATE_STI)) &&
+               (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
+}
+
+static void enable_intr_window(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.nmi_pending)
+               enable_nmi_window(vcpu);
+       else if (kvm_cpu_has_interrupt(vcpu))
+               enable_irq_window(vcpu);
+}
+
 static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u32 idtv_info_field, intr_info_field;
-       int has_ext_irq, interrupt_window_open;
+       u32 idtv_info_field, intr_info_field, exit_intr_info_field;
        int vector;
 
        update_tpr_threshold(vcpu);
 
-       has_ext_irq = kvm_cpu_has_interrupt(vcpu);
        intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+       exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO);
        idtv_info_field = vmx->idt_vectoring_info;
        if (intr_info_field & INTR_INFO_VALID_MASK) {
                if (idtv_info_field & INTR_INFO_VALID_MASK) {
@@ -2768,8 +2844,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
                        if (printk_ratelimit())
                                printk(KERN_ERR "Fault when IDT_Vectoring\n");
                }
-               if (has_ext_irq)
-                       enable_irq_window(vcpu);
+               enable_intr_window(vcpu);
                return;
        }
        if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
@@ -2779,30 +2854,56 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
                        u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
 
                        vmx_inject_irq(vcpu, vect);
-                       if (unlikely(has_ext_irq))
-                               enable_irq_window(vcpu);
+                       enable_intr_window(vcpu);
                        return;
                }
 
                KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
 
-               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+               /*
+                * SDM 3: 25.7.1.2
+                * Clear bit "block by NMI" before VM entry if a NMI delivery
+                * faulted.
+                */
+               if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
+                   == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis())
+                       vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+                               vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                               ~GUEST_INTR_STATE_NMI);
+
+               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field
+                               & ~INTR_INFO_RESVD_BITS_MASK);
                vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
                                vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
 
                if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
                        vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
                                vmcs_read32(IDT_VECTORING_ERROR_CODE));
-               if (unlikely(has_ext_irq))
-                       enable_irq_window(vcpu);
+               enable_intr_window(vcpu);
                return;
        }
-       if (!has_ext_irq)
+       if (cpu_has_virtual_nmis()) {
+               /*
+                * SDM 3: 25.7.1.2
+                * Re-set bit "block by NMI" before VM entry if vmexit caused by
+                * a guest IRET fault.
+                */
+               if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) &&
+                   (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8)
+                       vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+                               vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) |
+                               GUEST_INTR_STATE_NMI);
+               else if (vcpu->arch.nmi_pending) {
+                       if (vmx_nmi_enabled(vcpu))
+                               vmx_inject_nmi(vcpu);
+                       enable_intr_window(vcpu);
+                       return;
+               }
+
+       }
+       if (!kvm_cpu_has_interrupt(vcpu))
                return;
-       interrupt_window_open =
-               ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
-                (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
-       if (interrupt_window_open) {
+       if (vmx_irq_enabled(vcpu)) {
                vector = kvm_cpu_get_interrupt(vcpu);
                vmx_inject_irq(vcpu, vector);
                kvm_timer_intr_post(vcpu, vector);
@@ -2963,7 +3064,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                fixup_rmode_irq(vmx);
 
        vcpu->arch.interrupt_window_open =
-               (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
+               (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
 
        asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
        vmx->launched = 1;
@@ -2971,7 +3073,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
        /* We need to handle NMIs before interrupts are enabled */
-       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+       if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+           (intr_info & INTR_INFO_VALID_MASK)) {
                KVMTRACE_0D(NMI, vcpu, handler);
                asm("int $2");
        }