KVM: nVMX: fix shadow on EPT
authorGleb Natapov <gleb@redhat.com>
Wed, 9 Oct 2013 16:13:19 +0000 (19:13 +0300)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 10 Oct 2013 09:39:57 +0000 (11:39 +0200)
72f857950f6f19 broke shadow on EPT. This patch reverts it and fixes PAE
on nEPT (which reverted commit fixed) in other way.

Shadow on EPT is now broken because while L1 builds shadow page table
for L2 (which is PAE while L2 is in real mode) it never loads L2's
GUEST_PDPTR[0-3].  They do not need to be loaded because without nested
virtualization HW does this during guest entry if EPT is disabled,
but in our case L0 emulates L2's vmentry while EPT is enables, so we
cannot rely on vmcs12->guest_pdptr[0-3] to contain up-to-date values
and need to re-read PDPTEs from L2 memory. This is what kvm_set_cr3()
is doing, but by clearing cache bits during L2 vmentry we drop values
that kvm_set_cr3() read from memory.

So why the same code does not work for PAE on nEPT? kvm_set_cr3()
reads pdptes into vcpu->arch.walk_mmu->pdptrs[]. walk_mmu points to
vcpu->arch.nested_mmu while nested guest is running, but ept_load_pdptrs()
uses vcpu->arch.mmu which contain incorrect values. Fix that by using
walk_mmu in ept_(load|save)_pdptrs.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/vmx.c

index 3b8e745..2b2fce1 100644 (file)
@@ -3255,25 +3255,29 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 
 static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
        if (!test_bit(VCPU_EXREG_PDPTR,
                      (unsigned long *)&vcpu->arch.regs_dirty))
                return;
 
        if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
-               vmcs_write64(GUEST_PDPTR0, vcpu->arch.mmu.pdptrs[0]);
-               vmcs_write64(GUEST_PDPTR1, vcpu->arch.mmu.pdptrs[1]);
-               vmcs_write64(GUEST_PDPTR2, vcpu->arch.mmu.pdptrs[2]);
-               vmcs_write64(GUEST_PDPTR3, vcpu->arch.mmu.pdptrs[3]);
+               vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
+               vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
+               vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
+               vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
        }
 }
 
 static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
        if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
-               vcpu->arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
-               vcpu->arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
-               vcpu->arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
-               vcpu->arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
+               mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
+               mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
+               mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
+               mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
        }
 
        __set_bit(VCPU_EXREG_PDPTR,
@@ -7777,10 +7781,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
                vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
                vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
-               __clear_bit(VCPU_EXREG_PDPTR,
-                               (unsigned long *)&vcpu->arch.regs_avail);
-               __clear_bit(VCPU_EXREG_PDPTR,
-                               (unsigned long *)&vcpu->arch.regs_dirty);
        }
 
        kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);