#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/perf_event.h>
+#include <asm/kexec.h>
#include "trace.h"
struct kvm_segment *var, int seg);
static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
+static bool guest_state_valid(struct kvm_vcpu *vcpu);
+static u32 vmx_segment_access_rights(struct kvm_segment *var);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
return vmx_capability.ept & VMX_EPT_AD_BIT;
}
-static inline bool cpu_has_vmx_invept_individual_addr(void)
-{
- return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
-}
-
static inline bool cpu_has_vmx_invept_context(void)
{
return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
vmcs, phys_addr);
}
+#ifdef CONFIG_KEXEC
+/*
+ * This bitmap is used to indicate whether the vmclear
+ * operation is enabled on all cpus. All disabled by
+ * default.
+ */
+static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
+
+static inline void crash_enable_local_vmclear(int cpu)
+{
+ cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static inline void crash_disable_local_vmclear(int cpu)
+{
+ cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static inline int crash_local_vmclear_enabled(int cpu)
+{
+ return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static void crash_vmclear_local_loaded_vmcss(void)
+{
+ int cpu = raw_smp_processor_id();
+ struct loaded_vmcs *v;
+
+ if (!crash_local_vmclear_enabled(cpu))
+ return;
+
+ list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
+ loaded_vmcss_on_cpu_link)
+ vmcs_clear(v->vmcs);
+}
+#else
+static inline void crash_enable_local_vmclear(int cpu) { }
+static inline void crash_disable_local_vmclear(int cpu) { }
+#endif /* CONFIG_KEXEC */
+
static void __loaded_vmcs_clear(void *arg)
{
struct loaded_vmcs *loaded_vmcs = arg;
return; /* vcpu migration can race with cpu offline */
if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
per_cpu(current_vmcs, cpu) = NULL;
+ crash_disable_local_vmclear(cpu);
list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
+
+ /*
+ * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
+ * is before setting loaded_vmcs->vcpu to -1 which is done in
+ * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
+ * then adds the vmcs into percpu list before it is deleted.
+ */
+ smp_wmb();
+
loaded_vmcs_init(loaded_vmcs);
+ crash_enable_local_vmclear(cpu);
}
static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
{
- if (loaded_vmcs->cpu != -1)
- smp_call_function_single(
- loaded_vmcs->cpu, __loaded_vmcs_clear, loaded_vmcs, 1);
+ int cpu = loaded_vmcs->cpu;
+
+ if (cpu != -1)
+ smp_call_function_single(cpu,
+ __loaded_vmcs_clear, loaded_vmcs, 1);
}
static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
}
}
-static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
-{
- if (enable_ept) {
- if (cpu_has_vmx_invept_individual_addr())
- __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
- eptp, gpa);
- else
- ept_sync_context(eptp);
- }
-}
-
static __always_inline unsigned long vmcs_readl(unsigned long field)
{
unsigned long value;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
local_irq_disable();
+ crash_disable_local_vmclear(cpu);
+
+ /*
+ * Read loaded_vmcs->cpu should be before fetching
+ * loaded_vmcs->loaded_vmcss_on_cpu_link.
+ * See the comments in __loaded_vmcs_clear().
+ */
+ smp_rmb();
+
list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
&per_cpu(loaded_vmcss_on_cpu, cpu));
+ crash_enable_local_vmclear(cpu);
local_irq_enable();
/*
static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
__set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
- __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
to_vmx(vcpu)->rflags = rflags;
if (to_vmx(vcpu)->rmode.vm86_active) {
to_vmx(vcpu)->rmode.save_rflags = rflags;
* Like guest_read_tsc, but always returns L1's notion of the timestamp
* counter, even if a nested guest (L2) is currently running.
*/
-u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
+u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
- u64 host_tsc, tsc_offset;
+ u64 tsc_offset;
- rdtscll(host_tsc);
tsc_offset = is_guest_mode(vcpu) ?
to_vmx(vcpu)->nested.vmcs01_tsc_offset :
vmcs_read64(TSC_OFFSET);
WARN(1, "user requested TSC rate below hardware speed\n");
}
+static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
+{
+ return vmcs_read64(TSC_OFFSET);
+}
+
/*
* writes 'offset' into guest's timestamp counter offset register
*/
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;
int ret = 0;
+ u32 msr_index = msr_info->index;
+ u64 data = msr_info->data;
switch (msr_index) {
case MSR_EFER:
- ret = kvm_set_msr_common(vcpu, msr_index, data);
+ ret = kvm_set_msr_common(vcpu, msr_info);
break;
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, data);
+ kvm_write_tsc(vcpu, msr_info);
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
vcpu->arch.pat = data;
break;
}
- ret = kvm_set_msr_common(vcpu, msr_index, data);
+ ret = kvm_set_msr_common(vcpu, msr_info);
+ break;
+ case MSR_IA32_TSC_ADJUST:
+ ret = kvm_set_msr_common(vcpu, msr_info);
break;
case MSR_TSC_AUX:
if (!vmx->rdtscp_enabled)
}
break;
}
- ret = kvm_set_msr_common(vcpu, msr_index, data);
+ ret = kvm_set_msr_common(vcpu, msr_info);
}
return ret;
return -EBUSY;
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+
+ /*
+ * Now we can enable the vmclear operation in kdump
+ * since the loaded_vmcss_on_cpu list on this cpu
+ * has been initialized.
+ *
+ * Though the cpu is not in VMX operation now, there
+ * is no problem to enable the vmclear operation
+ * for the loaded_vmcss_on_cpu list is empty!
+ */
+ crash_enable_local_vmclear(cpu);
+
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
test_bits = FEATURE_CONTROL_LOCKED;
free_kvm_area();
}
-static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
+static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg,
+ struct kvm_segment *save)
{
- const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
- struct kvm_segment tmp = *save;
-
- if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
- tmp.base = vmcs_readl(sf->base);
- tmp.selector = vmcs_read16(sf->selector);
- tmp.s = 1;
+ if (!emulate_invalid_guest_state) {
+ /*
+ * CS and SS RPL should be equal during guest entry according
+ * to VMX spec, but in reality it is not always so. Since vcpu
+ * is in the middle of the transition from real mode to
+ * protected mode it is safe to assume that RPL 0 is a good
+ * default value.
+ */
+ if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
+ save->selector &= ~SELECTOR_RPL_MASK;
+ save->dpl = save->selector & SELECTOR_RPL_MASK;
+ save->s = 1;
}
- vmx_set_segment(vcpu, &tmp, seg);
+ vmx_set_segment(vcpu, save, seg);
}
static void enter_pmode(struct kvm_vcpu *vcpu)
unsigned long flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ /*
+ * Update real mode segment cache. It may be not up-to-date if sement
+ * register was written while vcpu was in a guest mode.
+ */
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
+
vmx->emulation_required = 1;
vmx->rmode.vm86_active = 0;
update_exception_bitmap(vcpu);
- if (emulate_invalid_guest_state)
- return;
-
+ fix_pmode_dataseg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
+ fix_pmode_dataseg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
- vmx_segment_cache_clear(vmx);
-
- vmcs_write16(GUEST_SS_SELECTOR, 0);
- vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
-
- vmcs_write16(GUEST_CS_SELECTOR,
- vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
- vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+ /* CPL is always 0 when CPU enters protected mode */
+ __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+ vmx->cpl = 0;
}
static gva_t rmode_tss_base(struct kvm *kvm)
static void fix_rmode_seg(int seg, struct kvm_segment *save)
{
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
-
- vmcs_write16(sf->selector, save->base >> 4);
- vmcs_write32(sf->base, save->base & 0xffff0);
- vmcs_write32(sf->limit, 0xffff);
- vmcs_write32(sf->ar_bytes, 0xf3);
- if (save->base & 0xf)
- printk_once(KERN_WARNING "kvm: segment base is not paragraph"
- " aligned when entering protected mode (seg=%d)",
- seg);
+ struct kvm_segment var = *save;
+
+ var.dpl = 0x3;
+ if (seg == VCPU_SREG_CS)
+ var.type = 0x3;
+
+ if (!emulate_invalid_guest_state) {
+ var.selector = var.base >> 4;
+ var.base = var.base & 0xffff0;
+ var.limit = 0xffff;
+ var.g = 0;
+ var.db = 0;
+ var.present = 1;
+ var.s = 1;
+ var.l = 0;
+ var.unusable = 0;
+ var.type = 0x3;
+ var.avl = 0;
+ if (save->base & 0xf)
+ printk_once(KERN_WARNING "kvm: segment base is not "
+ "paragraph aligned when entering "
+ "protected mode (seg=%d)", seg);
+ }
+
+ vmcs_write16(sf->selector, var.selector);
+ vmcs_write32(sf->base, var.base);
+ vmcs_write32(sf->limit, var.limit);
+ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
}
static void enter_rmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_segment var;
if (enable_unrestricted_guest)
return;
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
+ vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
vmx->emulation_required = 1;
vmx->rmode.vm86_active = 1;
-
/*
* Very old userspace does not call KVM_SET_TSS_ADDR before entering
* vcpu. Call it here with phys address pointing 16M below 4G.
vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
update_exception_bitmap(vcpu);
- if (emulate_invalid_guest_state)
- goto continue_rmode;
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_SS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_SS);
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_CS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_CS);
+ fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
+ fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
+ fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
+ fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
+ fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
+ fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
- vmx_get_segment(vcpu, &var, VCPU_SREG_ES);
- vmx_set_segment(vcpu, &var, VCPU_SREG_ES);
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_DS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_DS);
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_GS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_GS);
-
- vmx_get_segment(vcpu, &var, VCPU_SREG_FS);
- vmx_set_segment(vcpu, &var, VCPU_SREG_FS);
-
-continue_rmode:
kvm_mmu_reset_context(vcpu);
}
vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, hw_cr0);
vcpu->arch.cr0 = cr0;
- __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
}
static u64 construct_eptp(unsigned long root_hpa)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 ar;
- if (vmx->rmode.vm86_active
- && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
- || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
- || seg == VCPU_SREG_GS)) {
+ if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
*var = vmx->rmode.segs[seg];
if (seg == VCPU_SREG_TR
|| var->selector == vmx_read_guest_seg_selector(vmx, seg))
return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
}
-static int __vmx_get_cpl(struct kvm_vcpu *vcpu)
+static int vmx_get_cpl(struct kvm_vcpu *vcpu)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
if (!is_protmode(vcpu))
return 0;
&& (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */
return 3;
- return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3;
-}
-
-static int vmx_get_cpl(struct kvm_vcpu *vcpu)
-{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- /*
- * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations
- * fail; use the cache instead.
- */
- if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) {
- return vmx->cpl;
- }
-
if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) {
__set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
- vmx->cpl = __vmx_get_cpl(vcpu);
+ vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3;
}
return vmx->cpl;
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
- u32 ar;
vmx_segment_cache_clear(vmx);
+ if (seg == VCPU_SREG_CS)
+ __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
- if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
- vmcs_write16(sf->selector, var->selector);
- vmx->rmode.segs[VCPU_SREG_TR] = *var;
- return;
+ if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
+ vmx->rmode.segs[seg] = *var;
+ if (seg == VCPU_SREG_TR)
+ vmcs_write16(sf->selector, var->selector);
+ else if (var->s)
+ fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
+ goto out;
}
+
vmcs_writel(sf->base, var->base);
vmcs_write32(sf->limit, var->limit);
vmcs_write16(sf->selector, var->selector);
- if (vmx->rmode.vm86_active && var->s) {
- vmx->rmode.segs[seg] = *var;
- /*
- * Hack real-mode segments into vm86 compatibility.
- */
- if (var->base == 0xffff0000 && var->selector == 0xf000)
- vmcs_writel(sf->base, 0xf0000);
- ar = 0xf3;
- } else
- ar = vmx_segment_access_rights(var);
/*
* Fix the "Accessed" bit in AR field of segment registers for older
* kvm hack.
*/
if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
- ar |= 0x1; /* Accessed */
+ var->type |= 0x1; /* Accessed */
- vmcs_write32(sf->ar_bytes, ar);
- __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
- /*
- * Fix segments for real mode guest in hosts that don't have
- * "unrestricted_mode" or it was disabled.
- * This is done to allow migration of the guests from hosts with
- * unrestricted guest like Westmere to older host that don't have
- * unrestricted guest like Nehelem.
- */
- if (!enable_unrestricted_guest && vmx->rmode.vm86_active) {
- switch (seg) {
- case VCPU_SREG_CS:
- vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
- vmcs_write32(GUEST_CS_LIMIT, 0xffff);
- if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
- vmcs_writel(GUEST_CS_BASE, 0xf0000);
- vmcs_write16(GUEST_CS_SELECTOR,
- vmcs_readl(GUEST_CS_BASE) >> 4);
- break;
- case VCPU_SREG_ES:
- case VCPU_SREG_DS:
- case VCPU_SREG_GS:
- case VCPU_SREG_FS:
- fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
- break;
- case VCPU_SREG_SS:
- vmcs_write16(GUEST_SS_SELECTOR,
- vmcs_readl(GUEST_SS_BASE) >> 4);
- vmcs_write32(GUEST_SS_LIMIT, 0xffff);
- vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
- break;
- }
- }
+out:
+ if (!vmx->emulation_required)
+ vmx->emulation_required = !guest_state_valid(vcpu);
}
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
u32 ar;
vmx_get_segment(vcpu, &var, seg);
+ var.dpl = 0x3;
+ if (seg == VCPU_SREG_CS)
+ var.type = 0x3;
ar = vmx_segment_access_rights(&var);
if (var.base != (var.selector << 4))
return false;
- if (var.limit < 0xffff)
+ if (var.limit != 0xffff)
return false;
- if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3)
+ if (ar != 0xf3)
return false;
return true;
vmcs_write16(sf->selector, 0);
vmcs_writel(sf->base, 0);
vmcs_write32(sf->limit, 0xffff);
- if (enable_unrestricted_guest) {
- ar = 0x93;
- if (seg == VCPU_SREG_CS)
- ar |= 0x08; /* code segment */
- } else
- ar = 0xf3;
+ ar = 0x93;
+ if (seg == VCPU_SREG_CS)
+ ar |= 0x08; /* code segment */
vmcs_write32(sf->ar_bytes, ar);
}
kvm_userspace_mem.flags = 0;
kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
+ r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
if (r)
goto out;
kvm_userspace_mem.guest_phys_addr =
kvm->arch.ept_identity_map_addr;
kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
+ r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false);
if (r)
goto out;
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
set_cr4_guest_host_mask(vmx);
- kvm_write_tsc(&vmx->vcpu, 0);
-
return 0;
}
u64 msr;
int ret;
- vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-
vmx->rmode.vm86_active = 0;
vmx->soft_vnmi_blocked = 0;
msr |= MSR_IA32_APICBASE_BSP;
kvm_set_apic_base(&vmx->vcpu, msr);
- ret = fx_init(&vmx->vcpu);
- if (ret != 0)
- goto out;
-
vmx_segment_cache_clear(vmx);
seg_setup(VCPU_SREG_CS);
- /*
- * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
- * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh.
- */
- if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
+ if (kvm_vcpu_is_bsp(&vmx->vcpu))
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
- vmcs_writel(GUEST_CS_BASE, 0x000f0000);
- } else {
+ else {
vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8);
vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12);
}
kvm_rip_write(vcpu, 0xfff0);
else
kvm_rip_write(vcpu, 0);
- kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
vmcs_writel(GUEST_GDTR_BASE, 0);
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
/* HACK: Don't enable emulation on guest boot/reset */
vmx->emulation_required = 0;
-out:
return ret;
}
.flags = 0,
};
- ret = kvm_set_memory_region(kvm, &tss_mem, 0);
+ ret = kvm_set_memory_region(kvm, &tss_mem, false);
if (ret)
return ret;
kvm->arch.tss_addr = addr;
return 0;
}
-static int handle_rmode_exception(struct kvm_vcpu *vcpu,
- int vec, u32 err_code)
+static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
{
- /*
- * Instruction with address size override prefix opcode 0x67
- * Cause the #SS fault with 0 error code in VM86 mode.
- */
- if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
- if (emulate_instruction(vcpu, 0) == EMULATE_DONE)
- return 1;
- /*
- * Forward all other exceptions that are valid in real mode.
- * FIXME: Breaks guest debugging in real mode, needs to be fixed with
- * the required debugging infrastructure rework.
- */
switch (vec) {
- case DB_VECTOR:
- if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- return 0;
- kvm_queue_exception(vcpu, vec);
- return 1;
case BP_VECTOR:
/*
* Update instruction length as we may reinject the exception
to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
- return 0;
+ return false;
+ /* fall through */
+ case DB_VECTOR:
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ return false;
/* fall through */
case DE_VECTOR:
case OF_VECTOR:
case SS_VECTOR:
case GP_VECTOR:
case MF_VECTOR:
- kvm_queue_exception(vcpu, vec);
- return 1;
+ return true;
+ break;
}
- return 0;
+ return false;
+}
+
+static int handle_rmode_exception(struct kvm_vcpu *vcpu,
+ int vec, u32 err_code)
+{
+ /*
+ * Instruction with address size override prefix opcode 0x67
+ * Cause the #SS fault with 0 error code in VM86 mode.
+ */
+ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
+ if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+ if (vcpu->arch.halt_request) {
+ vcpu->arch.halt_request = 0;
+ return kvm_emulate_halt(vcpu);
+ }
+ return 1;
+ }
+ return 0;
+ }
+
+ /*
+ * Forward all other exceptions that are valid in real mode.
+ * FIXME: Breaks guest debugging in real mode, needs to be fixed with
+ * the required debugging infrastructure rework.
+ */
+ kvm_queue_exception(vcpu, vec);
+ return 1;
}
/*
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
- if ((vect_info & VECTORING_INFO_VALID_MASK) &&
- !is_page_fault(intr_info)) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
- vcpu->run->internal.ndata = 2;
- vcpu->run->internal.data[0] = vect_info;
- vcpu->run->internal.data[1] = intr_info;
- return 0;
- }
-
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
return 1; /* already handled by vmx_vcpu_run() */
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+
+ /*
+ * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
+ * MMIO, it is better to report an internal error.
+ * See the comments in vmx_handle_exit.
+ */
+ if ((vect_info & VECTORING_INFO_VALID_MASK) &&
+ !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = vect_info;
+ vcpu->run->internal.data[1] = intr_info;
+ return 0;
+ }
+
if (is_page_fault(intr_info)) {
/* EPT won't cause page fault directly */
BUG_ON(enable_ept);
return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
}
- if (vmx->rmode.vm86_active &&
- handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK,
- error_code)) {
- if (vcpu->arch.halt_request) {
- vcpu->arch.halt_request = 0;
- return kvm_emulate_halt(vcpu);
- }
- return 1;
- }
-
ex_no = intr_info & INTR_INFO_VECTOR_MASK;
+
+ if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
+ return handle_rmode_exception(vcpu, ex_no, error_code);
+
switch (ex_no) {
case DB_VECTOR:
dr6 = vmcs_readl(EXIT_QUALIFICATION);
static int handle_wrmsr(struct kvm_vcpu *vcpu)
{
+ struct msr_data msr;
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
- if (vmx_set_msr(vcpu, ecx, data) != 0) {
+ msr.data = data;
+ msr.index = ecx;
+ msr.host_initiated = false;
+ if (vmx_set_msr(vcpu, &msr) != 0) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- if (exit_qualification & (1 << 6)) {
- printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
- return -EINVAL;
- }
-
gla_validity = (exit_qualification >> 7) & 0x3;
if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
return 0;
}
+ /*
+ * Note:
+ * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
+ * delivery event since it indicates guest is accessing MMIO.
+ * The vm-exit can be triggered again after return to guest that
+ * will cause infinite loop.
+ */
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION &&
- exit_reason != EXIT_REASON_TASK_SWITCH))
- printk(KERN_WARNING "%s: unexpected, valid vectoring info "
- "(0x%x) and exit reason is 0x%x\n",
- __func__, vectoring_info, exit_reason);
+ exit_reason != EXIT_REASON_TASK_SWITCH)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = vectoring_info;
+ vcpu->run->internal.data[1] = exit_reason;
+ return 0;
+ }
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
.set_tsc_khz = vmx_set_tsc_khz,
+ .read_tsc_offset = vmx_read_tsc_offset,
.write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset = vmx_adjust_tsc_offset,
.compute_tsc_offset = vmx_compute_tsc_offset,
if (r)
goto out3;
+#ifdef CONFIG_KEXEC
+ rcu_assign_pointer(crash_vmclear_loaded_vmcss,
+ crash_vmclear_local_loaded_vmcss);
+#endif
+
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
free_page((unsigned long)vmx_io_bitmap_b);
free_page((unsigned long)vmx_io_bitmap_a);
+#ifdef CONFIG_KEXEC
+ rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+ synchronize_rcu();
+#endif
+
kvm_exit();
}