Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
[pandora-kernel.git] / arch / x86 / kvm / svm.c
index a80ffaa..71510e0 100644 (file)
@@ -196,23 +196,41 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
        svm->vmcb->control.event_inj_err = error_code;
 }
 
-static bool svm_exception_injected(struct kvm_vcpu *vcpu)
-{
-       return false;
-}
-
 static int is_external_interrupt(u32 info)
 {
        info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
        return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
 }
 
+static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       u32 ret = 0;
+
+       if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
+               ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS;
+       return ret & mask;
+}
+
+static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       if (mask == 0)
+               svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
+       else
+               svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
+
+}
+
 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
        if (!svm->next_rip) {
-               printk(KERN_DEBUG "%s: NOP\n", __func__);
+               if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
+                               EMULATE_DONE)
+                       printk(KERN_DEBUG "%s: NOP\n", __func__);
                return;
        }
        if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
@@ -220,9 +238,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
                       __func__, kvm_rip_read(vcpu), svm->next_rip);
 
        kvm_rip_write(vcpu, svm->next_rip);
-       svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
-
-       vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
+       svm_set_interrupt_shadow(vcpu, 0);
 }
 
 static int has_svm(void)
@@ -810,6 +826,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
                if (!var->unusable)
                        var->type |= 0x1;
                break;
+       case VCPU_SREG_SS:
+               /* On AMD CPUs sometimes the DB bit in the segment
+                * descriptor is left as 1, although the whole segment has
+                * been made unusable. Clear it here to pass an Intel VMX
+                * entry check when cross vendor migrating.
+                */
+               if (var->unusable)
+                       var->db = 0;
+               break;
        }
 }
 
@@ -940,15 +965,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 
 }
 
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static void update_db_intercept(struct kvm_vcpu *vcpu)
 {
-       int old_debug = vcpu->guest_debug;
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       vcpu->guest_debug = dbg->control;
-
        svm->vmcb->control.intercept_exceptions &=
                ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+
+       if (vcpu->arch.singlestep)
+               svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
+
        if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
                if (vcpu->guest_debug &
                    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
@@ -959,6 +985,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
                                1 << BP_VECTOR;
        } else
                vcpu->guest_debug = 0;
+}
+
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+{
+       int old_debug = vcpu->guest_debug;
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       vcpu->guest_debug = dbg->control;
+
+       update_db_intercept(vcpu);
 
        if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
                svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
@@ -973,13 +1009,6 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
        return 0;
 }
 
-static int svm_get_irq(struct kvm_vcpu *vcpu)
-{
-       if (!vcpu->arch.interrupt.pending)
-               return -1;
-       return vcpu->arch.interrupt.nr;
-}
-
 static void load_host_msrs(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
@@ -1106,8 +1135,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        if (npt_enabled)
                svm_flush_tlb(&svm->vcpu);
        else {
-               if (svm->vcpu.arch.interrupt.pending ||
-                               svm->vcpu.arch.exception.pending)
+               if (kvm_event_needs_reinjection(&svm->vcpu))
                        kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
        }
        return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
@@ -1116,14 +1144,30 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
        if (!(svm->vcpu.guest_debug &
-             (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+             (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
+               !svm->vcpu.arch.singlestep) {
                kvm_queue_exception(&svm->vcpu, DB_VECTOR);
                return 1;
        }
-       kvm_run->exit_reason = KVM_EXIT_DEBUG;
-       kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
-       kvm_run->debug.arch.exception = DB_VECTOR;
-       return 0;
+
+       if (svm->vcpu.arch.singlestep) {
+               svm->vcpu.arch.singlestep = false;
+               if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
+                       svm->vmcb->save.rflags &=
+                               ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+               update_db_intercept(&svm->vcpu);
+       }
+
+       if (svm->vcpu.guest_debug &
+           (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){
+               kvm_run->exit_reason = KVM_EXIT_DEBUG;
+               kvm_run->debug.arch.pc =
+                       svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+               kvm_run->debug.arch.exception = DB_VECTOR;
+               return 0;
+       }
+
+       return 1;
 }
 
 static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1816,6 +1860,10 @@ static int task_switch_interception(struct vcpu_svm *svm,
        int int_type = svm->vmcb->control.exit_int_info &
                SVM_EXITINTINFO_TYPE_MASK;
        int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
+       uint32_t type =
+               svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
+       uint32_t idt_v =
+               svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
 
        tss_selector = (u16)svm->vmcb->control.exit_info_1;
 
@@ -1825,20 +1873,32 @@ static int task_switch_interception(struct vcpu_svm *svm,
        else if (svm->vmcb->control.exit_info_2 &
                 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
                reason = TASK_SWITCH_JMP;
-       else if (svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID)
+       else if (idt_v)
                reason = TASK_SWITCH_GATE;
        else
                reason = TASK_SWITCH_CALL;
 
+       if (reason == TASK_SWITCH_GATE) {
+               switch (type) {
+               case SVM_EXITINTINFO_TYPE_NMI:
+                       svm->vcpu.arch.nmi_injected = false;
+                       break;
+               case SVM_EXITINTINFO_TYPE_EXEPT:
+                       kvm_clear_exception_queue(&svm->vcpu);
+                       break;
+               case SVM_EXITINTINFO_TYPE_INTR:
+                       kvm_clear_interrupt_queue(&svm->vcpu);
+                       break;
+               default:
+                       break;
+               }
+       }
 
        if (reason != TASK_SWITCH_GATE ||
            int_type == SVM_EXITINTINFO_TYPE_SOFT ||
            (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
-            (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
-               if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0,
-                                       EMULTYPE_SKIP) != EMULATE_DONE)
-                       return 0;
-       }
+            (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
+               skip_emulated_instruction(&svm->vcpu);
 
        return kvm_task_switch(&svm->vcpu, tss_selector, reason);
 }
@@ -1850,6 +1910,14 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
        return 1;
 }
 
+static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+       ++svm->vcpu.stat.nmi_window_exits;
+       svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+       svm->vcpu.arch.hflags |= HF_IRET_MASK;
+       return 1;
+}
+
 static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
        if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
@@ -1867,8 +1935,14 @@ static int emulate_on_interception(struct vcpu_svm *svm,
 
 static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
+       u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
+       /* instruction emulation calls kvm_set_cr8() */
        emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
-       if (irqchip_in_kernel(svm->vcpu.kvm))
+       if (irqchip_in_kernel(svm->vcpu.kvm)) {
+               svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
+               return 1;
+       }
+       if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
                return 1;
        kvm_run->exit_reason = KVM_EXIT_SET_TPR;
        return 0;
@@ -2123,6 +2197,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
        [SVM_EXIT_VINTR]                        = interrupt_window_interception,
        /* [SVM_EXIT_CR0_SEL_WRITE]             = emulate_on_interception, */
        [SVM_EXIT_CPUID]                        = cpuid_interception,
+       [SVM_EXIT_IRET]                         = iret_interception,
        [SVM_EXIT_INVD]                         = emulate_on_interception,
        [SVM_EXIT_HLT]                          = halt_interception,
        [SVM_EXIT_INVLPG]                       = invlpg_interception,
@@ -2193,7 +2268,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
        if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
            exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
-           exit_code != SVM_EXIT_NPF)
+           exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
                printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
                       "exit_code 0x%x\n",
                       __func__, svm->vmcb->control.exit_int_info,
@@ -2230,6 +2305,15 @@ static void pre_svm_run(struct vcpu_svm *svm)
                new_asid(svm, svm_data);
 }
 
+static void svm_inject_nmi(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+       vcpu->arch.hflags |= HF_NMI_MASK;
+       svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+       ++vcpu->stat.nmi_injections;
+}
 
 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 {
@@ -2245,40 +2329,40 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
                ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
 }
 
-static void svm_queue_irq(struct vcpu_svm *svm, unsigned nr)
+static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
 {
+       struct vcpu_svm *svm = to_svm(vcpu);
+
        svm->vmcb->control.event_inj = nr |
                SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
-static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
+static void svm_set_irq(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
        nested_svm_intr(svm);
 
-       svm_queue_irq(svm, irq);
+       svm_queue_irq(vcpu, vcpu->arch.interrupt.nr);
 }
 
-static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       struct vmcb *vmcb = svm->vmcb;
-       int max_irr, tpr;
 
-       if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
+       if (irr == -1)
                return;
 
-       vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
-
-       max_irr = kvm_lapic_find_highest_irr(vcpu);
-       if (max_irr == -1)
-               return;
-
-       tpr = kvm_lapic_get_cr8(vcpu) << 4;
+       if (tpr >= irr)
+               svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+}
 
-       if (tpr >= (max_irr & 0xf0))
-               vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct vmcb *vmcb = svm->vmcb;
+       return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+               !(svm->vcpu.arch.hflags & HF_NMI_MASK);
 }
 
 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
@@ -2296,41 +2380,20 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
        svm_inject_irq(to_svm(vcpu), 0x0);
 }
 
-static void svm_intr_inject(struct kvm_vcpu *vcpu)
-{
-       /* try to reinject previous events if any */
-       if (vcpu->arch.interrupt.pending) {
-               svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
-               return;
-       }
-
-       /* try to inject new event if pending */
-       if (kvm_cpu_has_interrupt(vcpu)) {
-               if (vcpu->arch.interrupt_window_open) {
-                       kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-                       svm_queue_irq(to_svm(vcpu), vcpu->arch.interrupt.nr);
-               }
-       }
-}
-
-static void svm_intr_assist(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
-               kvm_run->request_interrupt_window;
-
-       if (nested_svm_intr(svm))
-               goto out;
-
-       svm->vcpu.arch.interrupt_window_open = svm_interrupt_allowed(vcpu);
 
-       svm_intr_inject(vcpu);
+       if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
+           == HF_NMI_MASK)
+               return; /* IRET will cause a vm exit */
 
-       if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
-               enable_irq_window(vcpu);
-
-out:
-       update_cr8_intercept(vcpu);
+       /* Something prevents NMI from been injected. Single step over
+          possible problem (IRET or exception injection or interrupt
+          shadow) */
+       vcpu->arch.singlestep = true;
+       svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
+       update_db_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2353,7 +2416,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
 
        if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
                int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
-               kvm_lapic_set_tpr(vcpu, cr8);
+               kvm_set_cr8(vcpu, cr8);
        }
 }
 
@@ -2362,9 +2425,6 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        u64 cr8;
 
-       if (!irqchip_in_kernel(vcpu->kvm))
-               return;
-
        cr8 = kvm_get_cr8(vcpu);
        svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
        svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
@@ -2376,6 +2436,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
        int type;
        u32 exitintinfo = svm->vmcb->control.exit_int_info;
 
+       if (svm->vcpu.arch.hflags & HF_IRET_MASK)
+               svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+
        svm->vcpu.arch.nmi_injected = false;
        kvm_clear_exception_queue(&svm->vcpu);
        kvm_clear_interrupt_queue(&svm->vcpu);
@@ -2393,7 +2456,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
        case SVM_EXITINTINFO_TYPE_EXEPT:
                /* In case of software exception do not reinject an exception
                   vector, but re-execute and instruction instead */
-               if (vector == BP_VECTOR || vector == OF_VECTOR)
+               if (kvm_exception_is_soft(vector))
                        break;
                if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
                        u32 err = svm->vmcb->control.exit_int_info_err;
@@ -2403,7 +2466,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
                        kvm_queue_exception(&svm->vcpu, vector);
                break;
        case SVM_EXITINTINFO_TYPE_INTR:
-               kvm_queue_interrupt(&svm->vcpu, vector);
+               kvm_queue_interrupt(&svm->vcpu, vector, false);
                break;
        default:
                break;
@@ -2605,7 +2668,7 @@ static int get_npt_level(void)
 #endif
 }
 
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
        return 0;
 }
@@ -2655,18 +2718,21 @@ static struct kvm_x86_ops svm_x86_ops = {
        .run = svm_vcpu_run,
        .handle_exit = handle_exit,
        .skip_emulated_instruction = skip_emulated_instruction,
+       .set_interrupt_shadow = svm_set_interrupt_shadow,
+       .get_interrupt_shadow = svm_get_interrupt_shadow,
        .patch_hypercall = svm_patch_hypercall,
-       .get_irq = svm_get_irq,
        .set_irq = svm_set_irq,
+       .set_nmi = svm_inject_nmi,
        .queue_exception = svm_queue_exception,
-       .exception_injected = svm_exception_injected,
-       .inject_pending_irq = svm_intr_assist,
-       .inject_pending_vectors = svm_intr_assist,
        .interrupt_allowed = svm_interrupt_allowed,
+       .nmi_allowed = svm_nmi_allowed,
+       .enable_nmi_window = enable_nmi_window,
+       .enable_irq_window = enable_irq_window,
+       .update_cr8_intercept = update_cr8_intercept,
 
        .set_tss_addr = svm_set_tss_addr,
        .get_tdp_level = get_npt_level,
-       .get_mt_mask_shift = svm_get_mt_mask_shift,
+       .get_mt_mask = svm_get_mt_mask,
 };
 
 static int __init svm_init(void)