KVM: x86: fix OOPS after invalid KVM_SET_DEBUGREGS
[pandora-kernel.git] / arch / x86 / kvm / x86.c
index c38efd7..4ae334a 100644 (file)
@@ -92,6 +92,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
 int ignore_msrs = 0;
 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
+unsigned int min_timer_period_us = 500;
+module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
+
 bool kvm_has_tsc_control;
 EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
 u32  kvm_max_guest_tsc_khz;
@@ -551,8 +554,6 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
        if (index != XCR_XFEATURE_ENABLED_MASK)
                return 1;
        xcr0 = xcr;
-       if (kvm_x86_ops->get_cpl(vcpu) != 0)
-               return 1;
        if (!(xcr0 & XSTATE_FP))
                return 1;
        if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
@@ -566,7 +567,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 
 int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 {
-       if (__kvm_set_xcr(vcpu, index, xcr)) {
+       if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
+           __kvm_set_xcr(vcpu, index, xcr)) {
                kvm_inject_gp(vcpu, 0);
                return 1;
        }
@@ -578,6 +580,9 @@ static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
 
+       if (!static_cpu_has(X86_FEATURE_XSAVE))
+               return 0;
+
        best = kvm_find_cpuid_entry(vcpu, 1, 0);
        return best && (best->ecx & bit(X86_FEATURE_XSAVE));
 }
@@ -602,7 +607,6 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
        struct kvm_lapic *apic = vcpu->arch.apic;
-       u32 timer_mode_mask;
 
        best = kvm_find_cpuid_entry(vcpu, 1, 0);
        if (!best)
@@ -615,15 +619,12 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
                        best->ecx |= bit(X86_FEATURE_OSXSAVE);
        }
 
-       if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-               best->function == 0x1) {
-               best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER);
-               timer_mode_mask = 3 << 17;
-       } else
-               timer_mode_mask = 1 << 17;
-
-       if (apic)
-               apic->lapic_timer.timer_mode_mask = timer_mode_mask;
+       if (apic) {
+               if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+                       apic->lapic_timer.timer_mode_mask = 3 << 17;
+               else
+                       apic->lapic_timer.timer_mode_mask = 1 << 17;
+       }
 }
 
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -833,7 +834,8 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-       MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+       MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+       MSR_TSC_AUX,
 };
 
 static unsigned num_msrs_to_save;
@@ -892,7 +894,6 @@ void kvm_enable_efer_bits(u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
 
-
 /*
  * Writes msr value into into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
@@ -900,8 +901,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
  */
 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
+       switch (msr_index) {
+       case MSR_FS_BASE:
+       case MSR_GS_BASE:
+       case MSR_KERNEL_GS_BASE:
+       case MSR_CSTAR:
+       case MSR_LSTAR:
+               if (is_noncanonical_address(data))
+                       return 1;
+               break;
+       case MSR_IA32_SYSENTER_EIP:
+       case MSR_IA32_SYSENTER_ESP:
+               /*
+                * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
+                * non-canonical address is written on Intel but not on
+                * AMD (which ignores the top 32-bits, because it does
+                * not implement 64-bit SYSENTER).
+                *
+                * 64-bit code should hence be able to write a non-canonical
+                * value on AMD.  Making the address canonical ensures that
+                * vmentry does not fail on Intel after writing a non-canonical
+                * value, and that something deterministic happens if the guest
+                * invokes 64-bit SYSENTER.
+                */
+               data = get_canonical(data);
+       }
        return kvm_x86_ops->set_msr(vcpu, msr_index, data);
 }
+EXPORT_SYMBOL_GPL(kvm_set_msr);
 
 /*
  * Adapt set_msr() to msr_io()'s calling convention
@@ -1106,7 +1133,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 {
        unsigned long flags;
        struct kvm_vcpu_arch *vcpu = &v->arch;
-       void *shared_kaddr;
        unsigned long this_tsc_khz;
        s64 kernel_ns, max_kernel_ns;
        u64 tsc_timestamp;
@@ -1142,7 +1168,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
        local_irq_restore(flags);
 
-       if (!vcpu->time_page)
+       if (!vcpu->pv_time_enabled)
                return 0;
 
        /*
@@ -1200,14 +1226,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
         */
        vcpu->hv_clock.version += 2;
 
-       shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
-
-       memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-              sizeof(vcpu->hv_clock));
-
-       kunmap_atomic(shared_kaddr, KM_USER0);
-
-       mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+       kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+                               &vcpu->hv_clock,
+                               sizeof(vcpu->hv_clock));
        return 0;
 }
 
@@ -1487,7 +1508,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
                return 0;
        }
 
-       if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
+       if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
+                                       sizeof(u32)))
                return 1;
 
        vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
@@ -1497,10 +1519,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
-       if (vcpu->arch.time_page) {
-               kvm_release_page_dirty(vcpu->arch.time_page);
-               vcpu->arch.time_page = NULL;
-       }
+       vcpu->arch.pv_time_enabled = false;
 }
 
 static void accumulate_steal_time(struct kvm_vcpu *vcpu)
@@ -1592,6 +1611,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                break;
        case MSR_KVM_SYSTEM_TIME_NEW:
        case MSR_KVM_SYSTEM_TIME: {
+               u64 gpa_offset;
                kvmclock_reset(vcpu);
 
                vcpu->arch.time = data;
@@ -1601,16 +1621,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                if (!(data & 1))
                        break;
 
-               /* ...but clean it before doing the actual write */
-               vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
-
-               vcpu->arch.time_page =
-                               gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+               gpa_offset = data & ~(PAGE_MASK | 1);
 
-               if (is_error_page(vcpu->arch.time_page)) {
-                       kvm_release_page_clean(vcpu->arch.time_page);
-                       vcpu->arch.time_page = NULL;
-               }
+               if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+                    &vcpu->arch.pv_time, data & ~1ULL,
+                    sizeof(struct pvclock_vcpu_time_info)))
+                       vcpu->arch.pv_time_enabled = false;
+               else
+                       vcpu->arch.pv_time_enabled = true;
                break;
        }
        case MSR_KVM_ASYNC_PF_EN:
@@ -1626,7 +1644,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                        return 1;
 
                if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
-                                                       data & KVM_STEAL_VALID_BITS))
+                                               data & KVM_STEAL_VALID_BITS,
+                                               sizeof(struct kvm_steal_time)))
                        return 1;
 
                vcpu->arch.st.msr_val = data;
@@ -1867,6 +1886,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_IA32_LASTINTFROMIP:
        case MSR_IA32_LASTINTTOIP:
        case MSR_K8_SYSCFG:
+       case MSR_K8_TSEG_ADDR:
+       case MSR_K8_TSEG_MASK:
        case MSR_K7_HWCR:
        case MSR_VM_HSAVE_PA:
        case MSR_P6_PERFCTR0:
@@ -2135,6 +2156,9 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_TSC_CONTROL:
                r = kvm_has_tsc_control;
                break;
+       case KVM_CAP_TSC_DEADLINE_TIMER:
+               r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
+               break;
        default:
                r = 0;
                break;
@@ -2928,6 +2952,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
        if (dbgregs->flags)
                return -EINVAL;
 
+       if (dbgregs->dr6 & ~0xffffffffull)
+               return -EINVAL;
+       if (dbgregs->dr7 & ~0xffffffffull)
+               return -EINVAL;
+
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
        vcpu->arch.dr6 = dbgregs->dr6;
        vcpu->arch.dr7 = dbgregs->dr7;
@@ -3147,8 +3176,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = -EFAULT;
                if (copy_from_user(&va, argp, sizeof va))
                        goto out;
-               r = 0;
-               kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
+               r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
                break;
        }
        case KVM_X86_SETUP_MCE: {
@@ -3412,10 +3440,11 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 {
        int r = 0;
-
+       int i;
        mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
-       kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+       for (i = 0; i < 3; i++)
+               kvm_pit_load_count(kvm, i, ps->channels[i].count, 0);
        mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return r;
 }
@@ -3436,6 +3465,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
 {
        int r = 0, start = 0;
+       int i;
        u32 prev_legacy, cur_legacy;
        mutex_lock(&kvm->arch.vpit->pit_state.lock);
        prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
@@ -3445,7 +3475,9 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
        memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
               sizeof(kvm->arch.vpit->pit_state.channels));
        kvm->arch.vpit->pit_state.flags = ps->flags;
-       kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
+       for (i = 0; i < 3; i++)
+               kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count,
+                                  start && i == 0);
        mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        return r;
 }
@@ -3580,6 +3612,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = -EEXIST;
                if (kvm->arch.vpic)
                        goto create_irqchip_unlock;
+               r = -EINVAL;
+               if (atomic_read(&kvm->online_vcpus))
+                       goto create_irqchip_unlock;
                r = -ENOMEM;
                vpic = kvm_create_pic(kvm);
                if (vpic) {
@@ -3838,6 +3873,20 @@ static void kvm_init_msr_list(void)
        for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                        continue;
+
+               /*
+                * Even MSRs that are valid in the host may not be exposed
+                * to the guests in some cases.
+                */
+               switch (msrs_to_save[i]) {
+               case MSR_TSC_AUX:
+                       if (!kvm_x86_ops->rdtscp_supported())
+                               continue;
+                       break;
+               default:
+                       break;
+               }
+
                if (j < i)
                        msrs_to_save[j] = msrs_to_save[i];
                j++;
@@ -4656,6 +4705,28 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
        return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
 }
 
+static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
+                              u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+       struct kvm_cpuid_entry2 *cpuid = NULL;
+
+       if (eax && ecx)
+               cpuid = kvm_find_cpuid_entry(emul_to_vcpu(ctxt),
+                                           *eax, *ecx);
+
+       if (cpuid) {
+               *eax = cpuid->eax;
+               *ecx = cpuid->ecx;
+               if (ebx)
+                       *ebx = cpuid->ebx;
+               if (edx)
+                       *edx = cpuid->edx;
+               return true;
+       }
+
+       return false;
+}
+
 static struct x86_emulate_ops emulate_ops = {
        .read_std            = kvm_read_guest_virt_system,
        .write_std           = kvm_write_guest_virt_system,
@@ -4686,6 +4757,7 @@ static struct x86_emulate_ops emulate_ops = {
        .get_fpu             = emulator_get_fpu,
        .put_fpu             = emulator_put_fpu,
        .intercept           = emulator_intercept,
+       .get_cpuid           = emulator_get_cpuid,
 };
 
 static void cache_all_regs(struct kvm_vcpu *vcpu)
@@ -4800,7 +4872,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 
        ++vcpu->stat.insn_emulation_fail;
        trace_kvm_emulate_insn_failed(vcpu);
-       if (!is_guest_mode(vcpu)) {
+       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
@@ -5190,9 +5262,10 @@ int kvm_arch_init(void *opaque)
                goto out;
 
        kvm_set_mmio_spte_mask();
-       kvm_init_msr_list();
 
        kvm_x86_ops = ops;
+       kvm_init_msr_list();
+
        kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                        PT_DIRTY_MASK, PT64_NX_MASK, 0);
 
@@ -5518,33 +5591,6 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
                        !kvm_event_needs_reinjection(vcpu);
 }
 
-static void vapic_enter(struct kvm_vcpu *vcpu)
-{
-       struct kvm_lapic *apic = vcpu->arch.apic;
-       struct page *page;
-
-       if (!apic || !apic->vapic_addr)
-               return;
-
-       page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-
-       vcpu->arch.apic->vapic_page = page;
-}
-
-static void vapic_exit(struct kvm_vcpu *vcpu)
-{
-       struct kvm_lapic *apic = vcpu->arch.apic;
-       int idx;
-
-       if (!apic || !apic->vapic_addr)
-               return;
-
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
-       kvm_release_page_dirty(apic->vapic_page);
-       mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
-}
-
 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 {
        int max_irr, tpr;
@@ -5593,12 +5639,10 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
        }
 
        /* try to inject new event if pending */
-       if (vcpu->arch.nmi_pending) {
-               if (kvm_x86_ops->nmi_allowed(vcpu)) {
-                       --vcpu->arch.nmi_pending;
-                       vcpu->arch.nmi_injected = true;
-                       kvm_x86_ops->set_nmi(vcpu);
-               }
+       if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+               --vcpu->arch.nmi_pending;
+               vcpu->arch.nmi_injected = true;
+               kvm_x86_ops->set_nmi(vcpu);
        } else if (kvm_cpu_has_interrupt(vcpu)) {
                if (kvm_x86_ops->interrupt_allowed(vcpu)) {
                        kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
@@ -5701,7 +5745,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                /* enable NMI/IRQ window open exits if needed */
                if (vcpu->arch.nmi_pending)
                        kvm_x86_ops->enable_nmi_window(vcpu);
-               else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+               if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
                        kvm_x86_ops->enable_irq_window(vcpu);
 
                if (kvm_lapic_enabled(vcpu)) {
@@ -5715,8 +5759,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        kvm_x86_ops->prepare_guest_switch(vcpu);
        if (vcpu->fpu_active)
                kvm_load_guest_fpu(vcpu);
-       kvm_load_guest_xcr0(vcpu);
-
        vcpu->mode = IN_GUEST_MODE;
 
        /* We should set ->mode before check ->requests,
@@ -5737,6 +5779,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                goto out;
        }
 
+       kvm_load_guest_xcr0(vcpu);
+
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
        kvm_guest_enter();
@@ -5766,6 +5810,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        vcpu->mode = OUTSIDE_GUEST_MODE;
        smp_wmb();
+
+       kvm_put_guest_xcr0(vcpu);
+
        local_irq_enable();
 
        ++vcpu->stat.exits;
@@ -5817,7 +5864,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
        }
 
        vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-       vapic_enter(vcpu);
 
        r = 1;
        while (r > 0) {
@@ -5874,8 +5920,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 
        srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
 
-       vapic_exit(vcpu);
-
        return r;
 }
 
@@ -6124,6 +6168,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        int pending_vec, max_bits, idx;
        struct desc_ptr dt;
 
+       if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
+               return -EINVAL;
+
        dt.size = sregs->idt.limit;
        dt.address = sregs->idt.base;
        kvm_x86_ops->set_idt(vcpu, &dt);
@@ -6339,7 +6386,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
         * and assume host would use all available bits.
         * Guest xcr0 would be loaded later.
         */
-       kvm_put_guest_xcr0(vcpu);
        vcpu->guest_fpu_loaded = 1;
        unlazy_fpu(current);
        fpu_restore_checking(&vcpu->arch.guest_fpu);
@@ -6348,8 +6394,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-       kvm_put_guest_xcr0(vcpu);
-
        if (!vcpu->guest_fpu_loaded)
                return;
 
@@ -6464,6 +6508,11 @@ void kvm_arch_check_processor_compat(void *rtn)
        kvm_x86_ops->check_processor_compatibility(rtn);
 }
 
+bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
+{
+       return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
        struct page *page;
@@ -6513,6 +6562,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
                goto fail_free_mce_banks;
 
+       vcpu->arch.pv_time_enabled = false;
        kvm_async_pf_hash_reset(vcpu);
 
        return 0;