X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=arch%2Fx86%2Fkvm%2Fx86.c;h=ce57cd899a62f71d6b93e93d2588c12090a27c9d;hp=bc96ac9ed912bb1926b199006f858ed7ed27b74d;hb=34c238a1d1832d7b1f655641f52782e86396b30a;hpb=d2ddd1c48364e4161052d6089f06b2cf3c50496b diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bc96ac9ed912..ce57cd899a62 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -56,6 +56,7 @@ #include #include #include +#include #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -283,6 +284,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, u32 prev_nr; int class1, class2; + kvm_make_request(KVM_REQ_EVENT, vcpu); + if (!vcpu->arch.exception.pending) { queue: vcpu->arch.exception.pending = true; @@ -328,16 +331,28 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) } EXPORT_SYMBOL_GPL(kvm_requeue_exception); -void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, - u32 error_code) +void kvm_inject_page_fault(struct kvm_vcpu *vcpu) { + unsigned error_code = vcpu->arch.fault.error_code; + ++vcpu->stat.pf_guest; - vcpu->arch.cr2 = addr; + vcpu->arch.cr2 = vcpu->arch.fault.address; kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); } +void kvm_propagate_fault(struct kvm_vcpu *vcpu) +{ + if (mmu_is_nested(vcpu) && !vcpu->arch.fault.nested) + vcpu->arch.nested_mmu.inject_page_fault(vcpu); + else + vcpu->arch.mmu.inject_page_fault(vcpu); + + vcpu->arch.fault.nested = false; +} + void kvm_inject_nmi(struct kvm_vcpu *vcpu) { + kvm_make_request(KVM_REQ_EVENT, vcpu); vcpu->arch.nmi_pending = 1; } EXPORT_SYMBOL_GPL(kvm_inject_nmi); @@ -367,19 +382,50 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) } EXPORT_SYMBOL_GPL(kvm_require_cpl); +/* + * This function will be used to read from the physical memory of the currently + * running guest. The difference to kvm_read_guest_page is that this function + * can read from guest physical or from the guest's guest physical memory. + */ +int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gfn_t ngfn, void *data, int offset, int len, + u32 access) +{ + gfn_t real_gfn; + gpa_t ngpa; + + ngpa = gfn_to_gpa(ngfn); + real_gfn = mmu->translate_gpa(vcpu, ngpa, access); + if (real_gfn == UNMAPPED_GVA) + return -EFAULT; + + real_gfn = gpa_to_gfn(real_gfn); + + return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len); +} +EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); + +int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, + void *data, int offset, int len, u32 access) +{ + return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn, + data, offset, len, access); +} + /* * Load the pae pdptrs. Return true is they are all valid. */ -int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) +int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) { gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; int i; int ret; - u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; + u64 pdpte[ARRAY_SIZE(mmu->pdptrs)]; - ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, - offset * sizeof(u64), sizeof(pdpte)); + ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte, + offset * sizeof(u64), sizeof(pdpte), + PFERR_USER_MASK|PFERR_WRITE_MASK); if (ret < 0) { ret = 0; goto out; @@ -393,7 +439,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) } ret = 1; - memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); + memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs)); __set_bit(VCPU_EXREG_PDPTR, (unsigned long *)&vcpu->arch.regs_avail); __set_bit(VCPU_EXREG_PDPTR, @@ -406,8 +452,10 @@ EXPORT_SYMBOL_GPL(load_pdptrs); static bool pdptrs_changed(struct kvm_vcpu *vcpu) { - u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; + u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)]; bool changed = true; + int offset; + gfn_t gfn; int r; if (is_long_mode(vcpu) || !is_pae(vcpu)) @@ -417,10 +465,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) (unsigned long *)&vcpu->arch.regs_avail)) return true; - r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); + gfn = (vcpu->arch.cr3 & ~31u) >> PAGE_SHIFT; + offset = (vcpu->arch.cr3 & ~31u) & (PAGE_SIZE - 1); + r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), + PFERR_USER_MASK | PFERR_WRITE_MASK); if (r < 0) goto out; - changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; + changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0; out: return changed; @@ -459,7 +510,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) return 1; } else #endif - if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) + if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, + vcpu->arch.cr3)) return 1; } @@ -548,7 +600,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) - && !load_pdptrs(vcpu, vcpu->arch.cr3)) + && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)) return 1; if (cr4 & X86_CR4_VMXE) @@ -581,7 +633,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_pae(vcpu)) { if (cr3 & CR3_PAE_RESERVED_BITS) return 1; - if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) + if (is_paging(vcpu) && + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; } /* @@ -738,7 +791,7 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA + MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; static unsigned num_msrs_to_save; @@ -839,7 +892,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) /* * The guest calculates current wall clock time by adding - * system time (updated by kvm_write_guest_time below) to the + * system time (updated by kvm_guest_time_update below) to the * wall clock specified here. guest system time equals host * system time for us, thus we must fill in host boot time here. */ @@ -867,31 +920,35 @@ static uint32_t div_frac(uint32_t dividend, uint32_t divisor) return quotient; } -static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock) +static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, + s8 *pshift, u32 *pmultiplier) { - uint64_t nsecs = 1000000000LL; + uint64_t scaled64; int32_t shift = 0; uint64_t tps64; uint32_t tps32; - tps64 = tsc_khz * 1000LL; - while (tps64 > nsecs*2) { + tps64 = base_khz * 1000LL; + scaled64 = scaled_khz * 1000LL; + while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000UL) { tps64 >>= 1; shift--; } tps32 = (uint32_t)tps64; - while (tps32 <= (uint32_t)nsecs) { - tps32 <<= 1; + while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000UL) { + if (scaled64 & 0xffffffff00000000UL || tps32 & 0x80000000) + scaled64 >>= 1; + else + tps32 <<= 1; shift++; } - hv_clock->tsc_shift = shift; - hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); + *pshift = shift; + *pmultiplier = div_frac(scaled64, tps32); - pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", - __func__, tsc_khz, hv_clock->tsc_shift, - hv_clock->tsc_to_system_mul); + pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n", + __func__, base_khz, scaled_khz, shift, *pmultiplier); } static inline u64 get_kernel_ns(void) @@ -917,11 +974,15 @@ static inline int kvm_tsc_changes_freq(void) static inline u64 nsec_to_cycles(u64 nsec) { + u64 ret; + WARN_ON(preemptible()); if (kvm_tsc_changes_freq()) printk_once(KERN_WARNING "kvm: unreliable cycle conversion on adjustable rate TSC\n"); - return (nsec * __get_cpu_var(cpu_tsc_khz)) / USEC_PER_SEC; + ret = nsec * __get_cpu_var(cpu_tsc_khz); + do_div(ret, USEC_PER_SEC); + return ret; } void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) @@ -971,7 +1032,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) } EXPORT_SYMBOL_GPL(kvm_write_tsc); -static int kvm_write_guest_time(struct kvm_vcpu *v) +static int kvm_guest_time_update(struct kvm_vcpu *v) { unsigned long flags; struct kvm_vcpu_arch *vcpu = &v->arch; @@ -991,7 +1052,7 @@ static int kvm_write_guest_time(struct kvm_vcpu *v) local_irq_restore(flags); if (unlikely(this_tsc_khz == 0)) { - kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); return 1; } @@ -1027,7 +1088,9 @@ static int kvm_write_guest_time(struct kvm_vcpu *v) } if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { - kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); + kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, + &vcpu->hv_clock.tsc_shift, + &vcpu->hv_clock.tsc_to_system_mul); vcpu->hw_tsc_khz = this_tsc_khz; } @@ -1038,6 +1101,7 @@ static int kvm_write_guest_time(struct kvm_vcpu *v) vcpu->hv_clock.tsc_timestamp = tsc_timestamp; vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_kernel_ns = kernel_ns; + vcpu->last_guest_tsc = tsc_timestamp; vcpu->hv_clock.flags = 0; /* @@ -1064,7 +1128,7 @@ static int kvm_request_guest_time_update(struct kvm_vcpu *v) if (!vcpu->time_page) return 0; - kvm_make_request(KVM_REQ_KVMCLOCK_UPDATE, v); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); return 1; } @@ -1444,6 +1508,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " "0x%x data 0x%llx\n", msr, data); break; + case MSR_K7_CLK_CTL: + /* + * Ignore all writes to this no longer documented MSR. + * Writes are only relevant for old K7 processors, + * all pre-dating SVM, but a recommended workaround from + * AMD for these chips. It is possible to speicify the + * affected processor models on the command line, hence + * the need to ignore the workaround. + */ + break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: if (kvm_hv_msr_partition_wide(msr)) { int r; @@ -1636,6 +1710,20 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case 0xcd: /* fsb frequency */ data = 3; break; + /* + * MSR_EBC_FREQUENCY_ID + * Conservative value valid for even the basic CPU models. + * Models 0,1: 000 in bits 23:21 indicating a bus speed of + * 100MHz, model 2 000 in bits 18:16 indicating 100MHz, + * and 266MHz for model 3, or 4. Set Core Clock + * Frequency to System Bus Frequency Ratio to 1 (bits + * 31:24) even though these are only valid for CPU + * models > 2, however guests may end up dividing or + * multiplying by zero otherwise. + */ + case MSR_EBC_FREQUENCY_ID: + data = 1 << 24; + break; case MSR_IA32_APICBASE: data = kvm_get_apic_base(vcpu); break; @@ -1669,6 +1757,18 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: return get_msr_mce(vcpu, msr, pdata); + case MSR_K7_CLK_CTL: + /* + * Provide expected ramp-up count for K7. All other + * are set to zero, indicating minimum divisors for + * every field. + * + * This prevents guest kernels on AMD host with CPU + * type 6, model 8 and higher from exploding due to + * the rdmsr failing. + */ + data = 0x20000000; + break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: if (kvm_hv_msr_partition_wide(msr)) { int r; @@ -2113,7 +2213,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); /* cpuid 0x80000001.ecx */ const u32 kvm_supported_word6_x86_features = - F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | + F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | 0 /* SKINIT */ | 0 /* WDT */; @@ -2322,6 +2422,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return -ENXIO; kvm_queue_interrupt(vcpu, irq->irq, false); + kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; } @@ -2475,6 +2576,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR) vcpu->arch.sipi_vector = events->sipi_vector; + kvm_make_request(KVM_REQ_EVENT, vcpu); + return 0; } @@ -2914,18 +3017,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_PIC_MASTER: - raw_spin_lock(&pic_irqchip(kvm)->lock); + spin_lock(&pic_irqchip(kvm)->lock); memcpy(&pic_irqchip(kvm)->pics[0], &chip->chip.pic, sizeof(struct kvm_pic_state)); - raw_spin_unlock(&pic_irqchip(kvm)->lock); + spin_unlock(&pic_irqchip(kvm)->lock); break; case KVM_IRQCHIP_PIC_SLAVE: - raw_spin_lock(&pic_irqchip(kvm)->lock); + spin_lock(&pic_irqchip(kvm)->lock); memcpy(&pic_irqchip(kvm)->pics[1], &chip->chip.pic, sizeof(struct kvm_pic_state)); - raw_spin_unlock(&pic_irqchip(kvm)->lock); + spin_unlock(&pic_irqchip(kvm)->lock); break; case KVM_IRQCHIP_IOAPIC: r = kvm_set_ioapic(kvm, &chip->chip.ioapic); @@ -3406,30 +3509,51 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, kvm_x86_ops->get_segment(vcpu, var, seg); } +static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +{ + return gpa; +} + +static gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +{ + gpa_t t_gpa; + u32 error; + + BUG_ON(!mmu_is_nested(vcpu)); + + /* NPT walks are always user-walks */ + access |= PFERR_USER_MASK; + t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &error); + if (t_gpa == UNMAPPED_GVA) + vcpu->arch.fault.nested = true; + + return t_gpa; +} + gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; - return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); + return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); } gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; access |= PFERR_FETCH_MASK; - return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); + return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); } gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; access |= PFERR_WRITE_MASK; - return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); + return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, error); } /* uses this to access any guest's mapped memory without checking CPL */ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { - return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); + return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, error); } static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, @@ -3440,7 +3564,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, int r = X86EMUL_CONTINUE; while (bytes) { - gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); + gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access, + error); unsigned offset = addr & (PAGE_SIZE-1); unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); int ret; @@ -3495,8 +3620,9 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val, int r = X86EMUL_CONTINUE; while (bytes) { - gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, - PFERR_WRITE_MASK, error); + gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, + PFERR_WRITE_MASK, + error); unsigned offset = addr & (PAGE_SIZE-1); unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); int ret; @@ -3738,7 +3864,7 @@ static int emulator_pio_in_emulated(int size, unsigned short port, void *val, if (vcpu->arch.pio.count) goto data_avail; - trace_kvm_pio(1, port, size, 1); + trace_kvm_pio(0, port, size, 1); vcpu->arch.pio.port = port; vcpu->arch.pio.in = 1; @@ -3766,7 +3892,7 @@ static int emulator_pio_out_emulated(int size, unsigned short port, const void *val, unsigned int count, struct kvm_vcpu *vcpu) { - trace_kvm_pio(0, port, size, 1); + trace_kvm_pio(1, port, size, 1); vcpu->arch.pio.port = port; vcpu->arch.pio.in = 0; @@ -4039,7 +4165,7 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; if (ctxt->exception == PF_VECTOR) - kvm_inject_page_fault(vcpu, ctxt->cr2, ctxt->error_code); + kvm_propagate_fault(vcpu); else if (ctxt->error_code_valid) kvm_queue_exception_e(vcpu, ctxt->exception, ctxt->error_code); else @@ -4068,6 +4194,35 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) memcpy(c->regs, vcpu->arch.regs, sizeof c->regs); } +int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq) +{ + struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode; + int ret; + + init_emulate_ctxt(vcpu); + + vcpu->arch.emulate_ctxt.decode.op_bytes = 2; + vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; + vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip; + ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); + + if (ret != X86EMUL_CONTINUE) + return EMULATE_FAIL; + + vcpu->arch.emulate_ctxt.eip = c->eip; + memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); + kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + + if (irq == NMI_VECTOR) + vcpu->arch.nmi_pending = false; + else + vcpu->arch.interrupt.pending = false; + + return EMULATE_DONE; +} +EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); + static int handle_emulation_failure(struct kvm_vcpu *vcpu) { ++vcpu->stat.insn_emulation_fail; @@ -4130,6 +4285,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu, vcpu->arch.emulate_ctxt.perm_ok = false; r = x86_decode_insn(&vcpu->arch.emulate_ctxt); + if (r == X86EMUL_PROPAGATE_FAULT) + goto done; + trace_kvm_emulate_insn_start(vcpu); /* Only allow emulation of specific instructions on #UD @@ -4188,6 +4346,7 @@ restart: return handle_emulation_failure(vcpu); } +done: if (vcpu->arch.emulate_ctxt.exception >= 0) { inject_emulated_exception(vcpu); r = EMULATE_DONE; @@ -4206,6 +4365,7 @@ restart: toggle_interruptibility(vcpu, vcpu->arch.emulate_ctxt.interruptibility); kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_make_request(KVM_REQ_EVENT, vcpu); memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); @@ -4881,8 +5041,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) __kvm_migrate_timers(vcpu); - if (kvm_check_request(KVM_REQ_KVMCLOCK_UPDATE, vcpu)) { - r = kvm_write_guest_time(vcpu); + if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) { + r = kvm_guest_time_update(vcpu); if (unlikely(r)) goto out; } @@ -4910,6 +5070,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (unlikely(r)) goto out; + if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { + inject_pending_event(vcpu); + + /* enable NMI/IRQ window open exits if needed */ + if (vcpu->arch.nmi_pending) + kvm_x86_ops->enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) + kvm_x86_ops->enable_irq_window(vcpu); + + if (kvm_lapic_enabled(vcpu)) { + update_cr8_intercept(vcpu); + kvm_lapic_sync_to_vapic(vcpu); + } + } + preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); @@ -4928,23 +5103,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) smp_wmb(); local_irq_enable(); preempt_enable(); + kvm_x86_ops->cancel_injection(vcpu); r = 1; goto out; } - inject_pending_event(vcpu); - - /* enable NMI/IRQ window open exits if needed */ - if (vcpu->arch.nmi_pending) - kvm_x86_ops->enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) - kvm_x86_ops->enable_irq_window(vcpu); - - if (kvm_lapic_enabled(vcpu)) { - update_cr8_intercept(vcpu); - kvm_lapic_sync_to_vapic(vcpu); - } - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); kvm_guest_enter(); @@ -5182,6 +5345,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.exception.pending = false; + kvm_make_request(KVM_REQ_EVENT, vcpu); + return 0; } @@ -5245,6 +5410,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { vcpu->arch.mp_state = mp_state->mp_state; + kvm_make_request(KVM_REQ_EVENT, vcpu); return 0; } @@ -5266,6 +5432,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, memcpy(vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip); kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + kvm_make_request(KVM_REQ_EVENT, vcpu); return EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_task_switch); @@ -5301,7 +5468,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; kvm_x86_ops->set_cr4(vcpu, sregs->cr4); if (!is_long_mode(vcpu) && is_pae(vcpu)) { - load_pdptrs(vcpu, vcpu->arch.cr3); + load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); mmu_reset_needed = 1; } @@ -5336,6 +5503,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, !is_protmode(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + kvm_make_request(KVM_REQ_EVENT, vcpu); + return 0; } @@ -5568,6 +5737,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.dr6 = DR6_FIXED_1; vcpu->arch.dr7 = DR7_FIXED_1; + kvm_make_request(KVM_REQ_EVENT, vcpu); + return kvm_x86_ops->vcpu_reset(vcpu); } @@ -5616,7 +5787,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm = vcpu->kvm; vcpu->arch.emulate_ctxt.ops = &emulate_ops; + vcpu->arch.walk_mmu = &vcpu->arch.mmu; vcpu->arch.mmu.root_hpa = INVALID_PAGE; + vcpu->arch.mmu.translate_gpa = translate_gpa; + vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else @@ -5875,6 +6049,7 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) rflags |= X86_EFLAGS_TF; kvm_x86_ops->set_rflags(vcpu, rflags); + kvm_make_request(KVM_REQ_EVENT, vcpu); } EXPORT_SYMBOL_GPL(kvm_set_rflags);