Merge branch 'kvm-updates/2.6.37' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[pandora-kernel.git] / arch / x86 / kvm / x86.c
index ce57cd8..2288ad8 100644 (file)
@@ -6,7 +6,7 @@
  * Copyright (C) 2006 Qumranet, Inc.
  * Copyright (C) 2008 Qumranet, Inc.
  * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affilates.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  *
  * Authors:
  *   Avi Kivity   <avi@qumranet.com>
@@ -73,7 +73,7 @@
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 
 #define KVM_MAX_MCE_BANKS 32
-#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
+#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
 
 /* EFER defaults:
  * - enable syscall per default because its emulated by KVM
@@ -930,14 +930,14 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
 
        tps64 = base_khz * 1000LL;
        scaled64 = scaled_khz * 1000LL;
-       while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000UL) {
+       while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
                tps64 >>= 1;
                shift--;
        }
 
        tps32 = (uint32_t)tps64;
-       while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000UL) {
-               if (scaled64 & 0xffffffff00000000UL || tps32 & 0x80000000)
+       while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
+               if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
                        scaled64 >>= 1;
                else
                        tps32 <<= 1;
@@ -962,6 +962,7 @@ static inline u64 get_kernel_ns(void)
 }
 
 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
+unsigned long max_tsc_khz;
 
 static inline int kvm_tsc_changes_freq(void)
 {
@@ -985,6 +986,24 @@ static inline u64 nsec_to_cycles(u64 nsec)
        return ret;
 }
 
+static void kvm_arch_set_tsc_khz(struct kvm *kvm, u32 this_tsc_khz)
+{
+       /* Compute a scale to convert nanoseconds in TSC cycles */
+       kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
+                          &kvm->arch.virtual_tsc_shift,
+                          &kvm->arch.virtual_tsc_mult);
+       kvm->arch.virtual_tsc_khz = this_tsc_khz;
+}
+
+static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
+{
+       u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
+                                     vcpu->kvm->arch.virtual_tsc_mult,
+                                     vcpu->kvm->arch.virtual_tsc_shift);
+       tsc += vcpu->arch.last_tsc_write;
+       return tsc;
+}
+
 void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -1029,6 +1048,8 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 
        /* Reset of TSC must disable overshoot protection below */
        vcpu->arch.hv_clock.tsc_timestamp = 0;
+       vcpu->arch.last_tsc_write = data;
+       vcpu->arch.last_tsc_nsec = ns;
 }
 EXPORT_SYMBOL_GPL(kvm_write_tsc);
 
@@ -1041,21 +1062,41 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        s64 kernel_ns, max_kernel_ns;
        u64 tsc_timestamp;
 
-       if ((!vcpu->time_page))
-               return 0;
-
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
        kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
        kernel_ns = get_kernel_ns();
        this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
-       local_irq_restore(flags);
 
        if (unlikely(this_tsc_khz == 0)) {
+               local_irq_restore(flags);
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
                return 1;
        }
 
+       /*
+        * We may have to catch up the TSC to match elapsed wall clock
+        * time for two reasons, even if kvmclock is used.
+        *   1) CPU could have been running below the maximum TSC rate
+        *   2) Broken TSC compensation resets the base at each VCPU
+        *      entry to avoid unknown leaps of TSC even when running
+        *      again on the same CPU.  This may cause apparent elapsed
+        *      time to disappear, and the guest to stand still or run
+        *      very slowly.
+        */
+       if (vcpu->tsc_catchup) {
+               u64 tsc = compute_guest_tsc(v, kernel_ns);
+               if (tsc > tsc_timestamp) {
+                       kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
+                       tsc_timestamp = tsc;
+               }
+       }
+
+       local_irq_restore(flags);
+
+       if (!vcpu->time_page)
+               return 0;
+
        /*
         * Time as measured by the TSC may go backwards when resetting the base
         * tsc_timestamp.  The reason for this is that the TSC resolution is
@@ -1122,16 +1163,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        return 0;
 }
 
-static int kvm_request_guest_time_update(struct kvm_vcpu *v)
-{
-       struct kvm_vcpu_arch *vcpu = &v->arch;
-
-       if (!vcpu->time_page)
-               return 0;
-       kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
-       return 1;
-}
-
 static bool msr_mtrr_valid(unsigned msr)
 {
        switch (msr) {
@@ -1455,6 +1486,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                }
 
                vcpu->arch.time = data;
+               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
                /* we verify if the enable bit is set... */
                if (!(data & 1))
@@ -1470,8 +1502,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                        kvm_release_page_clean(vcpu->arch.time_page);
                        vcpu->arch.time_page = NULL;
                }
-
-               kvm_request_guest_time_update(vcpu);
                break;
        }
        case MSR_IA32_MCG_CTL:
@@ -2028,9 +2058,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                                native_read_tsc() - vcpu->arch.last_host_tsc;
                if (tsc_delta < 0)
                        mark_tsc_unstable("KVM discovered backwards TSC");
-               if (check_tsc_unstable())
+               if (check_tsc_unstable()) {
                        kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
-               kvm_migrate_timers(vcpu);
+                       vcpu->arch.tsc_catchup = 1;
+                       kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+               }
+               if (vcpu->cpu != cpu)
+                       kvm_migrate_timers(vcpu);
                vcpu->cpu = cpu;
        }
 }
@@ -2210,13 +2244,14 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
                0 /* Reserved, DCA */ | F(XMM4_1) |
                F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-               0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX);
+               0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
+               F(F16C);
        /* cpuid 0x80000001.ecx */
        const u32 kvm_supported_word6_x86_features =
                F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
                F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
-               F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) |
-               0 /* SKINIT */ | 0 /* WDT */;
+               F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
+               0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
 
        /* all calls to cpuid_count() should be made on the same cpu */
        get_cpu();
@@ -3435,8 +3470,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
 
                r = 0;
+               local_irq_disable();
                now_ns = get_kernel_ns();
                delta = user_ns.clock - now_ns;
+               local_irq_enable();
                kvm->arch.kvmclock_offset = delta;
                break;
        }
@@ -3444,8 +3481,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
                struct kvm_clock_data user_ns;
                u64 now_ns;
 
+               local_irq_disable();
                now_ns = get_kernel_ns();
                user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
+               local_irq_enable();
                user_ns.flags = 0;
 
                r = -EFAULT;
@@ -4461,8 +4500,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (vcpu->cpu != freq->cpu)
                                continue;
-                       if (!kvm_request_guest_time_update(vcpu))
-                               continue;
+                       kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
                        if (vcpu->cpu != smp_processor_id())
                                send_ipi = 1;
                }
@@ -4517,11 +4555,20 @@ static void kvm_timer_init(void)
 {
        int cpu;
 
+       max_tsc_khz = tsc_khz;
        register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+#ifdef CONFIG_CPU_FREQ
+               struct cpufreq_policy policy;
+               memset(&policy, 0, sizeof(policy));
+               cpufreq_get_policy(&policy, get_cpu());
+               if (policy.cpuinfo.max_freq)
+                       max_tsc_khz = policy.cpuinfo.max_freq;
+#endif
                cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
                                          CPUFREQ_TRANSITION_NOTIFIER);
        }
+       pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
        for_each_online_cpu(cpu)
                smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
 }
@@ -5752,7 +5799,7 @@ int kvm_arch_hardware_enable(void *garbage)
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
                        if (vcpu->cpu == smp_processor_id())
-                               kvm_request_guest_time_update(vcpu);
+                               kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
        return kvm_x86_ops->hardware_enable(garbage);
 }
 
@@ -5803,6 +5850,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        }
        vcpu->arch.pio_data = page_address(page);
 
+       if (!kvm->arch.virtual_tsc_khz)
+               kvm_arch_set_tsc_khz(kvm, max_tsc_khz);
+
        r = kvm_mmu_create(vcpu);
        if (r < 0)
                goto fail_free_pio_data;