KVM: VMX: Check cpl before emulating debug register access
[pandora-kernel.git] / arch / x86 / kvm / x86.c
index 5916191..9947224 100644 (file)
@@ -197,6 +197,19 @@ static void __queue_exception(struct kvm_vcpu *vcpu)
                                     vcpu->arch.exception.error_code);
 }
 
+/*
+ * Checks if cpl <= required_cpl; if true, return true.  Otherwise queue
+ * a #GP and return false.
+ */
+bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
+{
+       if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
+               return true;
+       kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+       return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_cpl);
+
 /*
  * Load the pae pdptrs.  Return true is they are all valid.
  */
@@ -318,6 +331,9 @@ EXPORT_SYMBOL_GPL(kvm_lmsw);
 
 void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
+       unsigned long old_cr4 = vcpu->arch.cr4;
+       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
+
        if (cr4 & CR4_RESERVED_BITS) {
                printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
                kvm_inject_gp(vcpu, 0);
@@ -331,7 +347,8 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                        kvm_inject_gp(vcpu, 0);
                        return;
                }
-       } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
+       } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
+                  && ((cr4 ^ old_cr4) & pdptr_bits)
                   && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
                printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
                kvm_inject_gp(vcpu, 0);
@@ -752,6 +769,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case MSR_IA32_MC0_MISC+8:
        case MSR_IA32_MC0_MISC+12:
        case MSR_IA32_MC0_MISC+16:
+       case MSR_IA32_MC0_MISC+20:
        case MSR_IA32_UCODE_REV:
        case MSR_IA32_EBL_CR_POWERON:
                data = 0;
@@ -883,6 +901,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_PIT:
        case KVM_CAP_NOP_IO_DELAY:
        case KVM_CAP_MP_STATE:
+       case KVM_CAP_SYNC_MMU:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -981,9 +1000,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 
 static int is_efer_nx(void)
 {
-       u64 efer;
+       unsigned long long efer = 0;
 
-       rdmsrl(MSR_EFER, efer);
+       rdmsrl_safe(MSR_EFER, &efer);
        return efer & EFER_NX;
 }
 
@@ -1217,6 +1236,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 
        if (cpuid->nent < 1)
                goto out;
+       if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+               cpuid->nent = KVM_MAX_CPUID_ENTRIES;
        r = -ENOMEM;
        cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
        if (!cpuid_entries)
@@ -1302,28 +1323,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        struct kvm_vcpu *vcpu = filp->private_data;
        void __user *argp = (void __user *)arg;
        int r;
+       struct kvm_lapic_state *lapic = NULL;
 
        switch (ioctl) {
        case KVM_GET_LAPIC: {
-               struct kvm_lapic_state lapic;
+               lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
 
-               memset(&lapic, 0, sizeof lapic);
-               r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
+               r = -ENOMEM;
+               if (!lapic)
+                       goto out;
+               r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
                if (r)
                        goto out;
                r = -EFAULT;
-               if (copy_to_user(argp, &lapic, sizeof lapic))
+               if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
                        goto out;
                r = 0;
                break;
        }
        case KVM_SET_LAPIC: {
-               struct kvm_lapic_state lapic;
-
+               lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
+               r = -ENOMEM;
+               if (!lapic)
+                       goto out;
                r = -EFAULT;
-               if (copy_from_user(&lapic, argp, sizeof lapic))
+               if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
                        goto out;
-               r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
+               r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
                if (r)
                        goto out;
                r = 0;
@@ -1421,6 +1447,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = -EINVAL;
        }
 out:
+       if (lapic)
+               kfree(lapic);
        return r;
 }
 
@@ -1441,10 +1469,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
                return -EINVAL;
 
        down_write(&kvm->slots_lock);
+       spin_lock(&kvm->mmu_lock);
 
        kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
        kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
+       spin_unlock(&kvm->mmu_lock);
        up_write(&kvm->slots_lock);
        return 0;
 }
@@ -1495,6 +1525,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
                goto out;
 
        down_write(&kvm->slots_lock);
+       spin_lock(&kvm->mmu_lock);
 
        p = &kvm->arch.aliases[alias->slot];
        p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1506,6 +1537,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
                        break;
        kvm->arch.naliases = n;
 
+       spin_unlock(&kvm->mmu_lock);
        kvm_mmu_zap_all(kvm);
 
        up_write(&kvm->slots_lock);
@@ -1609,7 +1641,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
        /* If nothing is dirty, don't bother messing with page tables. */
        if (is_dirty) {
+               spin_lock(&kvm->mmu_lock);
                kvm_mmu_slot_remove_write_access(kvm, log->slot);
+               spin_unlock(&kvm->mmu_lock);
                kvm_flush_remote_tlbs(kvm);
                memslot = &kvm->memslots[log->slot];
                n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@@ -1627,6 +1661,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
        struct kvm *kvm = filp->private_data;
        void __user *argp = (void __user *)arg;
        int r = -EINVAL;
+       /*
+        * This union makes it completely explicit to gcc-3.x
+        * that these two variables' stack usage should be
+        * combined, not added together.
+        */
+       union {
+               struct kvm_pit_state ps;
+               struct kvm_memory_alias alias;
+       } u;
 
        switch (ioctl) {
        case KVM_SET_TSS_ADDR:
@@ -1658,17 +1701,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
        case KVM_GET_NR_MMU_PAGES:
                r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
                break;
-       case KVM_SET_MEMORY_ALIAS: {
-               struct kvm_memory_alias alias;
-
+       case KVM_SET_MEMORY_ALIAS:
                r = -EFAULT;
-               if (copy_from_user(&alias, argp, sizeof alias))
+               if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
                        goto out;
-               r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
+               r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
                if (r)
                        goto out;
                break;
-       }
        case KVM_CREATE_IRQCHIP:
                r = -ENOMEM;
                kvm->arch.vpic = kvm_create_pic(kvm);
@@ -1710,65 +1750,77 @@ long kvm_arch_vm_ioctl(struct file *filp,
        }
        case KVM_GET_IRQCHIP: {
                /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-               struct kvm_irqchip chip;
+               struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
 
-               r = -EFAULT;
-               if (copy_from_user(&chip, argp, sizeof chip))
+               r = -ENOMEM;
+               if (!chip)
                        goto out;
+               r = -EFAULT;
+               if (copy_from_user(chip, argp, sizeof *chip))
+                       goto get_irqchip_out;
                r = -ENXIO;
                if (!irqchip_in_kernel(kvm))
-                       goto out;
-               r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
+                       goto get_irqchip_out;
+               r = kvm_vm_ioctl_get_irqchip(kvm, chip);
                if (r)
-                       goto out;
+                       goto get_irqchip_out;
                r = -EFAULT;
-               if (copy_to_user(argp, &chip, sizeof chip))
-                       goto out;
+               if (copy_to_user(argp, chip, sizeof *chip))
+                       goto get_irqchip_out;
                r = 0;
+       get_irqchip_out:
+               kfree(chip);
+               if (r)
+                       goto out;
                break;
        }
        case KVM_SET_IRQCHIP: {
                /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-               struct kvm_irqchip chip;
+               struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
 
-               r = -EFAULT;
-               if (copy_from_user(&chip, argp, sizeof chip))
+               r = -ENOMEM;
+               if (!chip)
                        goto out;
+               r = -EFAULT;
+               if (copy_from_user(chip, argp, sizeof *chip))
+                       goto set_irqchip_out;
                r = -ENXIO;
                if (!irqchip_in_kernel(kvm))
-                       goto out;
-               r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
+                       goto set_irqchip_out;
+               r = kvm_vm_ioctl_set_irqchip(kvm, chip);
                if (r)
-                       goto out;
+                       goto set_irqchip_out;
                r = 0;
+       set_irqchip_out:
+               kfree(chip);
+               if (r)
+                       goto out;
                break;
        }
        case KVM_GET_PIT: {
-               struct kvm_pit_state ps;
                r = -EFAULT;
-               if (copy_from_user(&ps, argp, sizeof ps))
+               if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
                        goto out;
                r = -ENXIO;
                if (!kvm->arch.vpit)
                        goto out;
-               r = kvm_vm_ioctl_get_pit(kvm, &ps);
+               r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
                if (r)
                        goto out;
                r = -EFAULT;
-               if (copy_to_user(argp, &ps, sizeof ps))
+               if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
                        goto out;
                r = 0;
                break;
        }
        case KVM_SET_PIT: {
-               struct kvm_pit_state ps;
                r = -EFAULT;
-               if (copy_from_user(&ps, argp, sizeof ps))
+               if (copy_from_user(&u.ps, argp, sizeof u.ps))
                        goto out;
                r = -ENXIO;
                if (!kvm->arch.vpit)
                        goto out;
-               r = kvm_vm_ioctl_set_pit(kvm, &ps);
+               r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
                if (r)
                        goto out;
                r = 0;
@@ -2534,6 +2586,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                a3 &= 0xFFFFFFFF;
        }
 
+       if (kvm_x86_ops->get_cpl(vcpu) != 0) {
+               ret = -KVM_EPERM;
+               goto out;
+       }
+
        switch (nr) {
        case KVM_HC_VAPIC_POLL_IRQ:
                ret = 0;
@@ -2545,6 +2602,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
                ret = -KVM_ENOSYS;
                break;
        }
+out:
        vcpu->arch.regs[VCPU_REGS_RAX] = ret;
        kvm_x86_ops->decache_regs(vcpu);
        ++vcpu->stat.hypercalls;
@@ -2810,10 +2868,6 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        down_read(&vcpu->kvm->slots_lock);
        vapic_enter(vcpu);
 
-preempted:
-       if (vcpu->guest_debug.enabled)
-               kvm_x86_ops->guest_debug_pre(vcpu);
-
 again:
        if (vcpu->requests)
                if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
@@ -2867,6 +2921,9 @@ again:
                goto out;
        }
 
+       if (vcpu->guest_debug.enabled)
+               kvm_x86_ops->guest_debug_pre(vcpu);
+
        vcpu->guest_mode = 1;
        /*
         * Make sure that guest_mode assignment won't happen after
@@ -2941,7 +2998,7 @@ out:
        if (r > 0) {
                kvm_resched(vcpu);
                down_read(&vcpu->kvm->slots_lock);
-               goto preempted;
+               goto again;
        }
 
        post_kvm_run_save(vcpu, kvm_run);
@@ -3291,11 +3348,33 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
+{
+       struct kvm_segment segvar = {
+               .base = selector << 4,
+               .limit = 0xffff,
+               .selector = selector,
+               .type = 3,
+               .present = 1,
+               .dpl = 3,
+               .db = 0,
+               .s = 1,
+               .l = 0,
+               .g = 0,
+               .avl = 0,
+               .unusable = 0,
+       };
+       kvm_x86_ops->set_segment(vcpu, &segvar, seg);
+       return 0;
+}
+
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
                                int type_bits, int seg)
 {
        struct kvm_segment kvm_seg;
 
+       if (!(vcpu->arch.cr0 & X86_CR0_PE))
+               return kvm_load_realmode_segment(vcpu, selector, seg);
        if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
                return 1;
        kvm_seg.type |= type_bits;
@@ -3972,16 +4051,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
         */
        if (!user_alloc) {
                if (npages && !old.rmap) {
+                       unsigned long userspace_addr;
+
                        down_write(&current->mm->mmap_sem);
-                       memslot->userspace_addr = do_mmap(NULL, 0,
-                                                    npages * PAGE_SIZE,
-                                                    PROT_READ | PROT_WRITE,
-                                                    MAP_SHARED | MAP_ANONYMOUS,
-                                                    0);
+                       userspace_addr = do_mmap(NULL, 0,
+                                                npages * PAGE_SIZE,
+                                                PROT_READ | PROT_WRITE,
+                                                MAP_PRIVATE | MAP_ANONYMOUS,
+                                                0);
                        up_write(&current->mm->mmap_sem);
 
-                       if (IS_ERR((void *)memslot->userspace_addr))
-                               return PTR_ERR((void *)memslot->userspace_addr);
+                       if (IS_ERR((void *)userspace_addr))
+                               return PTR_ERR((void *)userspace_addr);
+
+                       /* set userspace_addr atomically for kvm_hva_to_rmapp */
+                       spin_lock(&kvm->mmu_lock);
+                       memslot->userspace_addr = userspace_addr;
+                       spin_unlock(&kvm->mmu_lock);
                } else {
                        if (!old.user_alloc && old.rmap) {
                                int ret;
@@ -3998,12 +4084,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
                }
        }
 
+       spin_lock(&kvm->mmu_lock);
        if (!kvm->arch.n_requested_mmu_pages) {
                unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
                kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
        }
 
        kvm_mmu_slot_remove_write_access(kvm, mem->slot);
+       spin_unlock(&kvm->mmu_lock);
        kvm_flush_remote_tlbs(kvm);
 
        return 0;
@@ -4012,6 +4100,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
        kvm_mmu_zap_all(kvm);
+       kvm_reload_remote_mmus(kvm);
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)