}
static int init_rmode(struct kvm *kvm);
+static u64 construct_eptp(unsigned long root_hpa);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
SECONDARY_EXEC_ENABLE_VPID);
}
+static inline int cpu_has_virtual_nmis(void)
+{
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
{
int i;
struct descriptor_table gdt;
struct desc_struct *descs;
- get_gdt(&gdt);
+ kvm_get_gdt(&gdt);
descs = (void *)gdt.base;
descs[GDT_ENTRY_TSS].type = 9; /* available TSS */
load_TR_desc();
* Set host fs and gs selectors. Unfortunately, 22.2.3 does not
* allow segment selectors with cpl > 0 or ti == 1.
*/
- vmx->host_state.ldt_sel = read_ldt();
+ vmx->host_state.ldt_sel = kvm_read_ldt();
vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
- vmx->host_state.fs_sel = read_fs();
+ vmx->host_state.fs_sel = kvm_read_fs();
if (!(vmx->host_state.fs_sel & 7)) {
vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
vmx->host_state.fs_reload_needed = 0;
vmcs_write16(HOST_FS_SELECTOR, 0);
vmx->host_state.fs_reload_needed = 1;
}
- vmx->host_state.gs_sel = read_gs();
+ vmx->host_state.gs_sel = kvm_read_gs();
if (!(vmx->host_state.gs_sel & 7))
vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
else {
++vmx->vcpu.stat.host_state_reload;
vmx->host_state.loaded = 0;
if (vmx->host_state.fs_reload_needed)
- load_fs(vmx->host_state.fs_sel);
+ kvm_load_fs(vmx->host_state.fs_sel);
if (vmx->host_state.gs_ldt_reload_needed) {
- load_ldt(vmx->host_state.ldt_sel);
+ kvm_load_ldt(vmx->host_state.ldt_sel);
/*
* If we have to reload gs, we must take care to
* preserve our gs base.
*/
local_irq_save(flags);
- load_gs(vmx->host_state.gs_sel);
+ kvm_load_gs(vmx->host_state.gs_sel);
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
#endif
* Linux uses per-cpu TSS and GDT, so set these when switching
* processors.
*/
- vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */
- get_gdt(&dt);
+ vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */
+ kvm_get_gdt(&dt);
vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
break;
case MSR_IA32_TIME_STAMP_COUNTER:
guest_write_tsc(data);
+ break;
+ case MSR_P6_PERFCTR0:
+ case MSR_P6_PERFCTR1:
+ case MSR_P6_EVNTSEL0:
+ case MSR_P6_EVNTSEL1:
+ /*
+ * Just discard all writes to the performance counters; this
+ * should keep both older linux and windows 64-bit guests
+ * happy
+ */
+ pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
+
break;
default:
vmx_load_host_state(vmx);
u32 _vmentry_control = 0;
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = 0;
+ opt = PIN_BASED_VIRTUAL_NMIS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
{
vpid_sync_vcpu_all(to_vmx(vcpu));
+ if (vm_need_ept())
+ ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
}
static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_config.cpu_based_exec_ctrl |
+ vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
vcpu->arch.cr0 = cr0;
} else if (!is_paging(vcpu)) {
/* From nonpaging to paging */
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
- vmcs_config.cpu_based_exec_ctrl &
+ vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
~(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
vcpu->arch.cr0 = cr0;
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
- vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */
- vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */
+ vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */
+ vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
#ifdef CONFIG_X86_64
rdmsrl(MSR_FS_BASE, a);
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
- get_idt(&dt);
+ kvm_get_idt(&dt);
vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */
asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return));
irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
}
+static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
+{
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+ vcpu->arch.nmi_pending = 0;
+}
+
static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
{
int word_index = __ffs(vcpu->arch.irq_summary);
return 1;
}
+static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ u32 cpu_based_vm_exec_control;
+
+ /* clear pending NMI */
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+ ++vcpu->stat.nmi_window_exits;
+
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
+ [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
[EXIT_REASON_IO_INSTRUCTION] = handle_io,
[EXIT_REASON_CR_ACCESS] = handle_cr,
[EXIT_REASON_DR_ACCESS] = handle_dr,
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ if (!cpu_has_virtual_nmis())
+ return;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
+{
+ u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ return !(guest_intr & (GUEST_INTR_STATE_NMI |
+ GUEST_INTR_STATE_MOV_SS |
+ GUEST_INTR_STATE_STI));
+}
+
+static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
+{
+ u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
+ GUEST_INTR_STATE_STI)) &&
+ (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
+}
+
+static void enable_intr_window(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.nmi_pending)
+ enable_nmi_window(vcpu);
+ else if (kvm_cpu_has_interrupt(vcpu))
+ enable_irq_window(vcpu);
+}
+
static void vmx_intr_assist(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 idtv_info_field, intr_info_field;
- int has_ext_irq, interrupt_window_open;
+ u32 idtv_info_field, intr_info_field, exit_intr_info_field;
int vector;
update_tpr_threshold(vcpu);
- has_ext_irq = kvm_cpu_has_interrupt(vcpu);
intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+ exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO);
idtv_info_field = vmx->idt_vectoring_info;
if (intr_info_field & INTR_INFO_VALID_MASK) {
if (idtv_info_field & INTR_INFO_VALID_MASK) {
if (printk_ratelimit())
printk(KERN_ERR "Fault when IDT_Vectoring\n");
}
- if (has_ext_irq)
- enable_irq_window(vcpu);
+ enable_intr_window(vcpu);
return;
}
if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
vmx_inject_irq(vcpu, vect);
- if (unlikely(has_ext_irq))
- enable_irq_window(vcpu);
+ enable_intr_window(vcpu);
return;
}
KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+ /*
+ * SDM 3: 25.7.1.2
+ * Clear bit "block by NMI" before VM entry if a NMI delivery
+ * faulted.
+ */
+ if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
+ == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis())
+ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ ~GUEST_INTR_STATE_NMI);
+
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field
+ & ~INTR_INFO_RESVD_BITS_MASK);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
vmcs_read32(IDT_VECTORING_ERROR_CODE));
- if (unlikely(has_ext_irq))
- enable_irq_window(vcpu);
+ enable_intr_window(vcpu);
return;
}
- if (!has_ext_irq)
+ if (cpu_has_virtual_nmis()) {
+ /*
+ * SDM 3: 25.7.1.2
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ */
+ if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) &&
+ (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8)
+ vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) |
+ GUEST_INTR_STATE_NMI);
+ else if (vcpu->arch.nmi_pending) {
+ if (vmx_nmi_enabled(vcpu))
+ vmx_inject_nmi(vcpu);
+ enable_intr_window(vcpu);
+ return;
+ }
+
+ }
+ if (!kvm_cpu_has_interrupt(vcpu))
return;
- interrupt_window_open =
- ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
- (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
- if (interrupt_window_open) {
+ if (vmx_irq_enabled(vcpu)) {
vector = kvm_cpu_get_interrupt(vcpu);
vmx_inject_irq(vcpu, vector);
kvm_timer_intr_post(vcpu, vector);
fixup_rmode_irq(vmx);
vcpu->arch.interrupt_window_open =
- (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* We need to handle NMIs before interrupts are enabled */
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+ (intr_info & INTR_INFO_VALID_MASK)) {
KVMTRACE_0D(NMI, vcpu, handler);
asm("int $2");
}