MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
-static int __read_mostly bypass_guest_pf = 1;
-module_param(bypass_guest_pf, bool, S_IRUGO);
-
static int __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc", "memory");
if (error)
- printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
+ printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n",
vmcs, phys_addr);
}
eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
if (vcpu->fpu_active)
eb &= ~(1u << NM_VECTOR);
+
+ /* When we are running a nested L2 guest and L1 specified for it a
+ * certain exception bitmap, we must trap the same exceptions and pass
+ * them to L1. When running L2, we will only handle the exceptions
+ * specified above if L1 did not want them.
+ */
+ if (is_guest_mode(vcpu))
+ eb |= get_vmcs12(vcpu)->exception_bitmap;
+
vmcs_write32(EXCEPTION_BITMAP, eb);
}
vmcs_writel(GUEST_CR0, cr0);
update_exception_bitmap(vcpu);
vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
+ if (is_guest_mode(vcpu))
+ vcpu->arch.cr0_guest_owned_bits &=
+ ~get_vmcs12(vcpu)->cr0_guest_host_mask;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
}
static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
{
+ /* Note that there is no vcpu->fpu_active = 0 here. The caller must
+ * set this *before* calling this function.
+ */
vmx_decache_cr0_guest_bits(vcpu);
vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
update_exception_bitmap(vcpu);
vcpu->arch.cr0_guest_owned_bits = 0;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
- vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
+ if (is_guest_mode(vcpu)) {
+ /*
+ * L1's specified read shadow might not contain the TS bit,
+ * so now that we turned on shadowing of this bit, we need to
+ * set this bit of the shadow. Like in nested_vmx_run we need
+ * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet
+ * up-to-date here because we just decached cr0.TS (and we'll
+ * only update vmcs12->guest_cr0 on nested exit).
+ */
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) |
+ (vcpu->arch.cr0 & X86_CR0_TS);
+ vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
+ } else
+ vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
}
static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
vmcs_write64(TSC_OFFSET, offset);
+ if (is_guest_mode(vcpu))
+ /*
+ * We're here if L1 chose not to trap the TSC MSR. Since
+ * prepare_vmcs12() does not copy tsc_offset, we need to also
+ * set the vmcs12 field here.
+ */
+ get_vmcs12(vcpu)->tsc_offset = offset -
+ to_vmx(vcpu)->nested.vmcs01_tsc_offset;
}
static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
{
u64 offset = vmcs_read64(TSC_OFFSET);
vmcs_write64(TSC_OFFSET, offset + adjustment);
+ if (is_guest_mode(vcpu)) {
+ /* Even when running L2, the adjustment needs to apply to L1 */
+ to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
+ }
}
static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
return exec_control;
}
+static void ept_set_mmio_spte_mask(void)
+{
+ /*
+ * EPT Misconfigurations can be generated if the value of bits 2:0
+ * of an EPT paging-structure entry is 110b (write/execute).
+ * Also, magic bits (0xffull << 49) is set to quickly identify mmio
+ * spte.
+ */
+ kvm_mmu_set_mmio_spte_mask(0xffull << 49 | 0x6ull);
+}
+
/*
* Sets up the vmcs for emulated real mode.
*/
static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
+#ifdef CONFIG_X86_64
unsigned long a;
+#endif
int i;
/* I/O */
vmcs_write32(PLE_WINDOW, ple_window);
}
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
- vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */
vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
u64 sptes[4];
- int nr_sptes, i;
+ int nr_sptes, i, ret;
gpa_t gpa;
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+ ret = handle_mmio_page_fault_common(vcpu, gpa, true);
+ if (likely(ret == 1))
+ return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) ==
+ EMULATE_DONE;
+ if (unlikely(!ret))
+ return 1;
+
+ /* It is the real ept misconfig */
printk(KERN_ERR "EPT: Misconfiguration.\n");
printk(KERN_ERR "EPT: GPA: 0x%llx\n", gpa);
if (vmx->nested.nested_run_pending)
kvm_make_request(KVM_REQ_EVENT, vcpu);
- if (exit_reason == EXIT_REASON_VMLAUNCH ||
- exit_reason == EXIT_REASON_VMRESUME)
+ if (!is_guest_mode(vcpu) && (exit_reason == EXIT_REASON_VMLAUNCH ||
+ exit_reason == EXIT_REASON_VMRESUME))
vmx->nested.nested_run_pending = 1;
else
vmx->nested.nested_run_pending = 0;
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
+ if (func == 1 && nested)
+ entry->ecx |= bit(X86_FEATURE_VMX);
}
/*
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
if (enable_ept) {
- bypass_guest_pf = 0;
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK);
+ ept_set_mmio_spte_mask();
kvm_enable_tdp();
} else
kvm_disable_tdp();
- if (bypass_guest_pf)
- kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
-
return 0;
out3: