KVM: Disable irq while unregistering user notifier

[pandora-kernel.git] / arch / x86 / kvm / x86.c
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index e82a53a..4408aee 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -92,6 +92,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
  int ignore_msrs = 0;
  module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
  
+unsigned int min_timer_period_us = 500;
+module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
+
  bool kvm_has_tsc_control;
  EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
  u32  kvm_max_guest_tsc_khz;
@@ -169,7 +172,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
         struct kvm_shared_msrs *locals
                 = container_of(urn, struct kvm_shared_msrs, urn);
         struct kvm_shared_msr_values *values;
+       unsigned long flags;
  
+       /*
+        * Disabling irqs at this point since the following code could be
+        * interrupted and executed through kvm_arch_hardware_disable()
+        */
+       local_irq_save(flags);
+       if (locals->registered) {
+               locals->registered = false;
+               user_return_notifier_unregister(urn);
+       }
+       local_irq_restore(flags);
         for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
                 values = &locals->values[slot];
                 if (values->host != values->curr) {
@@ -177,8 +191,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
                         values->curr = values->host;
                 }
         }
-       locals->registered = false;
-       user_return_notifier_unregister(urn);
  }
  
  static void shared_msr_update(unsigned slot, u32 msr)
@@ -551,8 +563,6 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
         if (index != XCR_XFEATURE_ENABLED_MASK)
                 return 1;
         xcr0 = xcr;
-       if (kvm_x86_ops->get_cpl(vcpu) != 0)
-               return 1;
         if (!(xcr0 & XSTATE_FP))
                 return 1;
         if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
@@ -566,7 +576,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
  
  int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
  {
-       if (__kvm_set_xcr(vcpu, index, xcr)) {
+       if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
+           __kvm_set_xcr(vcpu, index, xcr)) {
                 kvm_inject_gp(vcpu, 0);
                 return 1;
         }
@@ -832,7 +843,8 @@ static u32 msrs_to_save[] = {
  #ifdef CONFIG_X86_64
         MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
  #endif
-       MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+       MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+       MSR_TSC_AUX,
  };
  
  static unsigned num_msrs_to_save;
@@ -891,7 +903,6 @@ void kvm_enable_efer_bits(u64 mask)
  }
  EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
  
-
  /*
   * Writes msr value into into the appropriate "register".
   * Returns 0 on success, non-0 otherwise.
@@ -899,8 +910,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
   */
  int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
  {
+       switch (msr_index) {
+       case MSR_FS_BASE:
+       case MSR_GS_BASE:
+       case MSR_KERNEL_GS_BASE:
+       case MSR_CSTAR:
+       case MSR_LSTAR:
+               if (is_noncanonical_address(data))
+                       return 1;
+               break;
+       case MSR_IA32_SYSENTER_EIP:
+       case MSR_IA32_SYSENTER_ESP:
+               /*
+                * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
+                * non-canonical address is written on Intel but not on
+                * AMD (which ignores the top 32-bits, because it does
+                * not implement 64-bit SYSENTER).
+                *
+                * 64-bit code should hence be able to write a non-canonical
+                * value on AMD.  Making the address canonical ensures that
+                * vmentry does not fail on Intel after writing a non-canonical
+                * value, and that something deterministic happens if the guest
+                * invokes 64-bit SYSENTER.
+                */
+               data = get_canonical(data);
+       }
         return kvm_x86_ops->set_msr(vcpu, msr_index, data);
  }
+EXPORT_SYMBOL_GPL(kvm_set_msr);
  
  /*
   * Adapt set_msr() to msr_io()'s calling convention
@@ -1858,6 +1895,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
         case MSR_IA32_LASTINTFROMIP:
         case MSR_IA32_LASTINTTOIP:
         case MSR_K8_SYSCFG:
+       case MSR_K8_TSEG_ADDR:
+       case MSR_K8_TSEG_MASK:
         case MSR_K7_HWCR:
         case MSR_VM_HSAVE_PA:
         case MSR_P6_PERFCTR0:
@@ -2922,6 +2961,11 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
         if (dbgregs->flags)
                 return -EINVAL;
  
+       if (dbgregs->dr6 & ~0xffffffffull)
+               return -EINVAL;
+       if (dbgregs->dr7 & ~0xffffffffull)
+               return -EINVAL;
+
         memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
         vcpu->arch.dr6 = dbgregs->dr6;
         vcpu->arch.dr7 = dbgregs->dr7;
@@ -3141,8 +3185,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                 r = -EFAULT;
                 if (copy_from_user(&va, argp, sizeof va))
                         goto out;
-               r = 0;
-               kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
+               r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
                 break;
         }
         case KVM_X86_SETUP_MCE: {
@@ -3406,10 +3449,11 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
  static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
  {
         int r = 0;
-
+       int i;
         mutex_lock(&kvm->arch.vpit->pit_state.lock);
         memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
-       kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+       for (i = 0; i < 3; i++)
+               kvm_pit_load_count(kvm, i, ps->channels[i].count, 0);
         mutex_unlock(&kvm->arch.vpit->pit_state.lock);
         return r;
  }
@@ -3430,6 +3474,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
  static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
  {
         int r = 0, start = 0;
+       int i;
         u32 prev_legacy, cur_legacy;
         mutex_lock(&kvm->arch.vpit->pit_state.lock);
         prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
@@ -3439,7 +3484,9 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
         memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
                sizeof(kvm->arch.vpit->pit_state.channels));
         kvm->arch.vpit->pit_state.flags = ps->flags;
-       kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
+       for (i = 0; i < 3; i++)
+               kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count,
+                                  start && i == 0);
         mutex_unlock(&kvm->arch.vpit->pit_state.lock);
         return r;
  }
@@ -3835,6 +3882,20 @@ static void kvm_init_msr_list(void)
         for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
                 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                         continue;
+
+               /*
+                * Even MSRs that are valid in the host may not be exposed
+                * to the guests in some cases.
+                */
+               switch (msrs_to_save[i]) {
+               case MSR_TSC_AUX:
+                       if (!kvm_x86_ops->rdtscp_supported())
+                               continue;
+                       break;
+               default:
+                       break;
+               }
+
                 if (j < i)
                         msrs_to_save[j] = msrs_to_save[i];
                 j++;
@@ -4820,7 +4881,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
  
         ++vcpu->stat.insn_emulation_fail;
         trace_kvm_emulate_insn_failed(vcpu);
-       if (!is_guest_mode(vcpu)) {
+       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                 vcpu->run->internal.ndata = 0;
@@ -5210,9 +5271,10 @@ int kvm_arch_init(void *opaque)
                 goto out;
  
         kvm_set_mmio_spte_mask();
-       kvm_init_msr_list();
  
         kvm_x86_ops = ops;
+       kvm_init_msr_list();
+
         kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                         PT_DIRTY_MASK, PT64_NX_MASK, 0);
  
@@ -5538,33 +5600,6 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
                         !kvm_event_needs_reinjection(vcpu);
  }
  
-static void vapic_enter(struct kvm_vcpu *vcpu)
-{
-       struct kvm_lapic *apic = vcpu->arch.apic;
-       struct page *page;
-
-       if (!apic || !apic->vapic_addr)
-               return;
-
-       page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-
-       vcpu->arch.apic->vapic_page = page;
-}
-
-static void vapic_exit(struct kvm_vcpu *vcpu)
-{
-       struct kvm_lapic *apic = vcpu->arch.apic;
-       int idx;
-
-       if (!apic || !apic->vapic_addr)
-               return;
-
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
-       kvm_release_page_dirty(apic->vapic_page);
-       mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
-}
-
  static void update_cr8_intercept(struct kvm_vcpu *vcpu)
  {
         int max_irr, tpr;
@@ -5613,12 +5648,10 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
         }
  
         /* try to inject new event if pending */
-       if (vcpu->arch.nmi_pending) {
-               if (kvm_x86_ops->nmi_allowed(vcpu)) {
-                       --vcpu->arch.nmi_pending;
-                       vcpu->arch.nmi_injected = true;
-                       kvm_x86_ops->set_nmi(vcpu);
-               }
+       if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
+               --vcpu->arch.nmi_pending;
+               vcpu->arch.nmi_injected = true;
+               kvm_x86_ops->set_nmi(vcpu);
         } else if (kvm_cpu_has_interrupt(vcpu)) {
                 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
                         kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
@@ -5721,7 +5754,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 /* enable NMI/IRQ window open exits if needed */
                 if (vcpu->arch.nmi_pending)
                         kvm_x86_ops->enable_nmi_window(vcpu);
-               else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
+               if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
                         kvm_x86_ops->enable_irq_window(vcpu);
  
                 if (kvm_lapic_enabled(vcpu)) {
@@ -5735,8 +5768,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
         kvm_x86_ops->prepare_guest_switch(vcpu);
         if (vcpu->fpu_active)
                 kvm_load_guest_fpu(vcpu);
-       kvm_load_guest_xcr0(vcpu);
-
         vcpu->mode = IN_GUEST_MODE;
  
         /* We should set ->mode before check ->requests,
@@ -5757,6 +5788,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                 goto out;
         }
  
+       kvm_load_guest_xcr0(vcpu);
+
         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
  
         kvm_guest_enter();
@@ -5786,6 +5819,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  
         vcpu->mode = OUTSIDE_GUEST_MODE;
         smp_wmb();
+
+       kvm_put_guest_xcr0(vcpu);
+
         local_irq_enable();
  
         ++vcpu->stat.exits;
@@ -5837,7 +5873,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
         }
  
         vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-       vapic_enter(vcpu);
  
         r = 1;
         while (r > 0) {
@@ -5894,8 +5929,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
  
         srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
  
-       vapic_exit(vcpu);
-
         return r;
  }
  
@@ -6362,7 +6395,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
          * and assume host would use all available bits.
          * Guest xcr0 would be loaded later.
          */
-       kvm_put_guest_xcr0(vcpu);
         vcpu->guest_fpu_loaded = 1;
         unlazy_fpu(current);
         fpu_restore_checking(&vcpu->arch.guest_fpu);
@@ -6371,8 +6403,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  
  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
  {
-       kvm_put_guest_xcr0(vcpu);
-
         if (!vcpu->guest_fpu_loaded)
                 return;
  
@@ -6385,11 +6415,13 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
  
  void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
  {
+       void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
+
         kvmclock_reset(vcpu);
  
-       free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
         fx_free(vcpu);
         kvm_x86_ops->vcpu_free(vcpu);
+       free_cpumask_var(wbinvd_dirty_mask);
  }
  
  struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,