X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fkvm%2Fkvm_main.c;h=cf163bddfbd3d6d50212e969e9c843bd76a8a85f;hb=5fb76f9be1a050a25e21a44ab2003c9d36a72a77;hp=353e58527d15d6ddfbb68c392ba78054224ea19a;hpb=df3d80f5a5c74168be42788364d13cf6c83c7b9c;p=pandora-kernel.git diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 353e58527d15..cf163bddfbd3 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -16,8 +16,8 @@ */ #include "kvm.h" +#include "x86.h" #include "x86_emulate.h" -#include "segment_descriptor.h" #include "irq.h" #include @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #include #include @@ -87,8 +90,6 @@ static struct kvm_stats_debugfs_item { static struct dentry *debugfs_dir; -#define MAX_IO_MSRS 256 - #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ @@ -102,50 +103,9 @@ static struct dentry *debugfs_dir; #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) #define EFER_RESERVED_BITS 0xfffffffffffff2fe -#ifdef CONFIG_X86_64 -// LDT or TSS descriptor in the GDT. 16 bytes. -struct segment_descriptor_64 { - struct segment_descriptor s; - u32 base_higher; - u32 pad_zero; -}; - -#endif - static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); -unsigned long segment_base(u16 selector) -{ - struct descriptor_table gdt; - struct segment_descriptor *d; - unsigned long table_base; - typedef unsigned long ul; - unsigned long v; - - if (selector == 0) - return 0; - - asm ("sgdt %0" : "=m"(gdt)); - table_base = gdt.base; - - if (selector & 4) { /* from ldt */ - u16 ldt_selector; - - asm ("sldt %0" : "=g"(ldt_selector)); - table_base = segment_base(ldt_selector); - } - d = (struct segment_descriptor *)(table_base + (selector & ~7)); - v = d->base_low | ((ul)d->base_mid << 16) | ((ul)d->base_high << 24); -#ifdef CONFIG_X86_64 - if (d->system == 0 - && (d->type == 2 || d->type == 9 || d->type == 11)) - v |= ((ul)((struct segment_descriptor_64 *)d)->base_higher) << 32; -#endif - return v; -} -EXPORT_SYMBOL_GPL(segment_base); - static inline int valid_vcpu(int n) { return likely(n >= 0 && n < KVM_MAX_VCPUS); @@ -176,21 +136,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); /* * Switches to specified vcpu, until a matching vcpu_put() */ -static void vcpu_load(struct kvm_vcpu *vcpu) +void vcpu_load(struct kvm_vcpu *vcpu) { int cpu; mutex_lock(&vcpu->mutex); cpu = get_cpu(); preempt_notifier_register(&vcpu->preempt_notifier); - kvm_x86_ops->vcpu_load(vcpu, cpu); + kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); } -static void vcpu_put(struct kvm_vcpu *vcpu) +void vcpu_put(struct kvm_vcpu *vcpu) { preempt_disable(); - kvm_x86_ops->vcpu_put(vcpu); + kvm_arch_vcpu_put(vcpu); preempt_notifier_unregister(&vcpu->preempt_notifier); preempt_enable(); mutex_unlock(&vcpu->mutex); @@ -198,46 +158,26 @@ static void vcpu_put(struct kvm_vcpu *vcpu) static void ack_flush(void *_completed) { - atomic_t *completed = _completed; - - atomic_inc(completed); } void kvm_flush_remote_tlbs(struct kvm *kvm) { - int i, cpu, needed; + int i, cpu; cpumask_t cpus; struct kvm_vcpu *vcpu; - atomic_t completed; - atomic_set(&completed, 0); cpus_clear(cpus); - needed = 0; for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; if (!vcpu) continue; - if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) + if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; if (cpu != -1 && cpu != raw_smp_processor_id()) - if (!cpu_isset(cpu, cpus)) { - cpu_set(cpu, cpus); - ++needed; - } - } - - /* - * We really want smp_call_function_mask() here. But that's not - * available, so ipi all cpus in parallel and wait for them - * to complete. - */ - for (cpu = first_cpu(cpus); cpu != NR_CPUS; cpu = next_cpu(cpu, cpus)) - smp_call_function_single(cpu, ack_flush, &completed, 1, 0); - while (atomic_read(&completed) != needed) { - cpu_relax(); - barrier(); + cpu_set(cpu, cpus); } + smp_call_function_mask(cpus, ack_flush, NULL, 1); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -274,23 +214,29 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) if (r < 0) goto fail_free_pio_data; + if (irqchip_in_kernel(kvm)) { + r = kvm_create_lapic(vcpu); + if (r < 0) + goto fail_mmu_destroy; + } + return 0; +fail_mmu_destroy: + kvm_mmu_destroy(vcpu); fail_free_pio_data: free_page((unsigned long)vcpu->pio_data); fail_free_run: free_page((unsigned long)vcpu->run); fail: - return -ENOMEM; + return r; } EXPORT_SYMBOL_GPL(kvm_vcpu_init); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) { + kvm_free_lapic(vcpu); kvm_mmu_destroy(vcpu); - if (vcpu->apic) - hrtimer_cancel(&vcpu->apic->timer.dev); - kvm_free_apic(vcpu->apic); free_page((unsigned long)vcpu->pio_data); free_page((unsigned long)vcpu->run); } @@ -319,22 +265,15 @@ static struct kvm *kvm_create_vm(void) static void kvm_free_physmem_slot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - int i; - - if (!dont || free->phys_mem != dont->phys_mem) - if (free->phys_mem) { - for (i = 0; i < free->npages; ++i) - if (free->phys_mem[i]) - __free_page(free->phys_mem[i]); - vfree(free->phys_mem); - } + if (!dont || free->rmap != dont->rmap) + vfree(free->rmap); if (!dont || free->dirty_bitmap != dont->dirty_bitmap) vfree(free->dirty_bitmap); - free->phys_mem = NULL; free->npages = 0; free->dirty_bitmap = NULL; + free->rmap = NULL; } static void kvm_free_physmem(struct kvm *kvm) @@ -351,7 +290,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i) if (vcpu->pio.guest_pages[i]) { - __free_page(vcpu->pio.guest_pages[i]); + kvm_release_page(vcpu->pio.guest_pages[i]); vcpu->pio.guest_pages[i] = NULL; } } @@ -417,22 +356,16 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; int i; - u64 *pdpt; int ret; - struct page *page; u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; mutex_lock(&vcpu->kvm->lock); - page = gfn_to_page(vcpu->kvm, pdpt_gfn); - if (!page) { + ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, + offset * sizeof(u64), sizeof(pdpte)); + if (ret < 0) { ret = 0; goto out; } - - pdpt = kmap_atomic(page, KM_USER0); - memcpy(pdpte, pdpt+offset, sizeof(pdpte)); - kunmap_atomic(pdpt, KM_USER0); - for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { ret = 0; @@ -573,14 +506,11 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) inject_gp(vcpu); return; } - } else { - if (cr3 & CR3_NONPAE_RESERVED_BITS) { - printk(KERN_DEBUG - "set_cr3: #GP, reserved bits\n"); - inject_gp(vcpu); - return; - } } + /* + * We don't check reserved bits in nonpae mode, because + * this isn't enforced, and VMware depends on this. + */ } mutex_lock(&vcpu->kvm->lock); @@ -671,8 +601,9 @@ EXPORT_SYMBOL_GPL(fx_init); * * Discontiguous memory is allowed, mostly for framebuffers. */ -static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_memory_region *mem) +int kvm_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + int user_alloc) { int r; gfn_t base_gfn; @@ -687,7 +618,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, goto out; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; - if (mem->slot >= KVM_MEMORY_SLOTS) + if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; @@ -724,10 +655,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, goto out_unlock; } - /* Deallocate if slot is being removed */ - if (!npages) - new.phys_mem = NULL; - /* Free page dirty bitmap if unneeded */ if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES)) new.dirty_bitmap = NULL; @@ -735,19 +662,41 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, r = -ENOMEM; /* Allocate if a slot is being created */ - if (npages && !new.phys_mem) { - new.phys_mem = vmalloc(npages * sizeof(struct page *)); + if (npages && !new.rmap) { + new.rmap = vmalloc(npages * sizeof(struct page *)); - if (!new.phys_mem) + if (!new.rmap) goto out_unlock; - memset(new.phys_mem, 0, npages * sizeof(struct page *)); - for (i = 0; i < npages; ++i) { - new.phys_mem[i] = alloc_page(GFP_HIGHUSER - | __GFP_ZERO); - if (!new.phys_mem[i]) + memset(new.rmap, 0, npages * sizeof(*new.rmap)); + + new.user_alloc = user_alloc; + if (user_alloc) + new.userspace_addr = mem->userspace_addr; + else { + down_write(¤t->mm->mmap_sem); + new.userspace_addr = do_mmap(NULL, 0, + npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + 0); + up_write(¤t->mm->mmap_sem); + + if (IS_ERR((void *)new.userspace_addr)) goto out_unlock; - set_page_private(new.phys_mem[i],0); + } + } else { + if (!old.user_alloc && old.rmap) { + int ret; + + down_write(¤t->mm->mmap_sem); + ret = do_munmap(current->mm, old.userspace_addr, + old.npages * PAGE_SIZE); + up_write(¤t->mm->mmap_sem); + if (ret < 0) + printk(KERN_WARNING + "kvm_vm_ioctl_set_memory_region: " + "failed to munmap memory\n"); } } @@ -764,6 +713,24 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; + if (!kvm->n_requested_mmu_pages) { + unsigned int n_pages; + + if (npages) { + n_pages = npages * KVM_PERMILLE_MMU_PAGES / 1000; + kvm_mmu_change_mmu_pages(kvm, kvm->n_alloc_mmu_pages + + n_pages); + } else { + unsigned int nr_mmu_pages; + + n_pages = old.npages * KVM_PERMILLE_MMU_PAGES / 1000; + nr_mmu_pages = kvm->n_alloc_mmu_pages - n_pages; + nr_mmu_pages = max(nr_mmu_pages, + (unsigned int) KVM_MIN_ALLOC_MMU_PAGES); + kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); + } + } + *memslot = new; kvm_mmu_slot_remove_write_access(kvm, mem->slot); @@ -779,6 +746,18 @@ out_unlock: kvm_free_physmem_slot(&new, &old); out: return r; + +} +EXPORT_SYMBOL_GPL(kvm_set_memory_region); + +int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, + struct + kvm_userspace_memory_region *mem, + int user_alloc) +{ + if (mem->slot >= KVM_MEMORY_SLOTS) + return -EINVAL; + return kvm_set_memory_region(kvm, mem, user_alloc); } /* @@ -826,112 +805,13 @@ out: return r; } -/* - * Set a new alias region. Aliases map a portion of physical memory into - * another portion. This is useful for memory windows, for example the PC - * VGA region. - */ -static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, - struct kvm_memory_alias *alias) +int is_error_page(struct page *page) { - int r, n; - struct kvm_mem_alias *p; - - r = -EINVAL; - /* General sanity checks */ - if (alias->memory_size & (PAGE_SIZE - 1)) - goto out; - if (alias->guest_phys_addr & (PAGE_SIZE - 1)) - goto out; - if (alias->slot >= KVM_ALIAS_SLOTS) - goto out; - if (alias->guest_phys_addr + alias->memory_size - < alias->guest_phys_addr) - goto out; - if (alias->target_phys_addr + alias->memory_size - < alias->target_phys_addr) - goto out; - - mutex_lock(&kvm->lock); - - p = &kvm->aliases[alias->slot]; - p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; - p->npages = alias->memory_size >> PAGE_SHIFT; - p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; - - for (n = KVM_ALIAS_SLOTS; n > 0; --n) - if (kvm->aliases[n - 1].npages) - break; - kvm->naliases = n; - - kvm_mmu_zap_all(kvm); - - mutex_unlock(&kvm->lock); - - return 0; - -out: - return r; + return page == bad_page; } +EXPORT_SYMBOL_GPL(is_error_page); -static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) -{ - int r; - - r = 0; - switch (chip->chip_id) { - case KVM_IRQCHIP_PIC_MASTER: - memcpy (&chip->chip.pic, - &pic_irqchip(kvm)->pics[0], - sizeof(struct kvm_pic_state)); - break; - case KVM_IRQCHIP_PIC_SLAVE: - memcpy (&chip->chip.pic, - &pic_irqchip(kvm)->pics[1], - sizeof(struct kvm_pic_state)); - break; - case KVM_IRQCHIP_IOAPIC: - memcpy (&chip->chip.ioapic, - ioapic_irqchip(kvm), - sizeof(struct kvm_ioapic_state)); - break; - default: - r = -EINVAL; - break; - } - return r; -} - -static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) -{ - int r; - - r = 0; - switch (chip->chip_id) { - case KVM_IRQCHIP_PIC_MASTER: - memcpy (&pic_irqchip(kvm)->pics[0], - &chip->chip.pic, - sizeof(struct kvm_pic_state)); - break; - case KVM_IRQCHIP_PIC_SLAVE: - memcpy (&pic_irqchip(kvm)->pics[1], - &chip->chip.pic, - sizeof(struct kvm_pic_state)); - break; - case KVM_IRQCHIP_IOAPIC: - memcpy (ioapic_irqchip(kvm), - &chip->chip.ioapic, - sizeof(struct kvm_ioapic_state)); - break; - default: - r = -EINVAL; - break; - } - kvm_pic_update_irq(pic_irqchip(kvm)); - return r; -} - -static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { int i; struct kvm_mem_alias *alias; @@ -965,18 +845,190 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) return __gfn_to_memslot(kvm, gfn); } +int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) +{ + int i; + + gfn = unalias_gfn(kvm, gfn); + for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { + struct kvm_memory_slot *memslot = &kvm->memslots[i]; + + if (gfn >= memslot->base_gfn + && gfn < memslot->base_gfn + memslot->npages) + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); + struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; + struct page *page[1]; + int npages; + + might_sleep(); gfn = unalias_gfn(kvm, gfn); slot = __gfn_to_memslot(kvm, gfn); - if (!slot) - return NULL; - return slot->phys_mem[gfn - slot->base_gfn]; + if (!slot) { + get_page(bad_page); + return bad_page; + } + + down_read(¤t->mm->mmap_sem); + npages = get_user_pages(current, current->mm, + slot->userspace_addr + + (gfn - slot->base_gfn) * PAGE_SIZE, 1, + 1, 1, page, NULL); + up_read(¤t->mm->mmap_sem); + if (npages != 1) { + get_page(bad_page); + return bad_page; + } + + return page[0]; } EXPORT_SYMBOL_GPL(gfn_to_page); +void kvm_release_page(struct page *page) +{ + if (!PageReserved(page)) + SetPageDirty(page); + put_page(page); +} +EXPORT_SYMBOL_GPL(kvm_release_page); + +static int next_segment(unsigned long len, int offset) +{ + if (len > PAGE_SIZE - offset) + return PAGE_SIZE - offset; + else + return len; +} + +int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, + int len) +{ + void *page_virt; + struct page *page; + + page = gfn_to_page(kvm, gfn); + if (is_error_page(page)) { + kvm_release_page(page); + return -EFAULT; + } + page_virt = kmap_atomic(page, KM_USER0); + + memcpy(data, page_virt + offset, len); + + kunmap_atomic(page_virt, KM_USER0); + kvm_release_page(page); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_read_guest_page); + +int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + int seg; + int offset = offset_in_page(gpa); + int ret; + + while ((seg = next_segment(len, offset)) != 0) { + ret = kvm_read_guest_page(kvm, gfn, data, offset, seg); + if (ret < 0) + return ret; + offset = 0; + len -= seg; + data += seg; + ++gfn; + } + return 0; +} +EXPORT_SYMBOL_GPL(kvm_read_guest); + +int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, + int offset, int len) +{ + void *page_virt; + struct page *page; + + page = gfn_to_page(kvm, gfn); + if (is_error_page(page)) { + kvm_release_page(page); + return -EFAULT; + } + page_virt = kmap_atomic(page, KM_USER0); + + memcpy(page_virt + offset, data, len); + + kunmap_atomic(page_virt, KM_USER0); + mark_page_dirty(kvm, gfn); + kvm_release_page(page); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_write_guest_page); + +int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, + unsigned long len) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + int seg; + int offset = offset_in_page(gpa); + int ret; + + while ((seg = next_segment(len, offset)) != 0) { + ret = kvm_write_guest_page(kvm, gfn, data, offset, seg); + if (ret < 0) + return ret; + offset = 0; + len -= seg; + data += seg; + ++gfn; + } + return 0; +} + +int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) +{ + void *page_virt; + struct page *page; + + page = gfn_to_page(kvm, gfn); + if (is_error_page(page)) { + kvm_release_page(page); + return -EFAULT; + } + page_virt = kmap_atomic(page, KM_USER0); + + memset(page_virt + offset, 0, len); + + kunmap_atomic(page_virt, KM_USER0); + kvm_release_page(page); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_clear_guest_page); + +int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) +{ + gfn_t gfn = gpa >> PAGE_SHIFT; + int seg; + int offset = offset_in_page(gpa); + int ret; + + while ((seg = next_segment(len, offset)) != 0) { + ret = kvm_clear_guest_page(kvm, gfn, offset, seg); + if (ret < 0) + return ret; + offset = 0; + len -= seg; + ++gfn; + } + return 0; +} +EXPORT_SYMBOL_GPL(kvm_clear_guest); + /* WARNING: Does not work on aliased pages. */ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) { @@ -1003,21 +1055,13 @@ int emulator_read_std(unsigned long addr, gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); unsigned offset = addr & (PAGE_SIZE-1); unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); - unsigned long pfn; - struct page *page; - void *page_virt; + int ret; if (gpa == UNMAPPED_GVA) return X86EMUL_PROPAGATE_FAULT; - pfn = gpa >> PAGE_SHIFT; - page = gfn_to_page(vcpu->kvm, pfn); - if (!page) + ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy); + if (ret < 0) return X86EMUL_UNHANDLEABLE; - page_virt = kmap_atomic(page, KM_USER0); - - memcpy(data, page_virt + offset, tocopy); - - kunmap_atomic(page_virt, KM_USER0); bytes -= tocopy; data += tocopy; @@ -1110,19 +1154,12 @@ static int emulator_read_emulated(unsigned long addr, static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes) { - struct page *page; - void *virt; + int ret; - if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) - return 0; - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - if (!page) + ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); + if (ret < 0) return 0; - mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); - virt = kmap_atomic(page, KM_USER0); kvm_mmu_pte_write(vcpu, gpa, val, bytes); - memcpy(virt + offset_in_page(gpa), val, bytes); - kunmap_atomic(virt, KM_USER0); return 1; } @@ -1208,12 +1245,11 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) int emulate_clts(struct kvm_vcpu *vcpu) { - vcpu->cr0 &= ~X86_CR0_TS; - kvm_x86_ops->set_cr0(vcpu, vcpu->cr0); + kvm_x86_ops->set_cr0(vcpu, vcpu->cr0 & ~X86_CR0_TS); return X86EMUL_CONTINUE; } -int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest) +int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { struct kvm_vcpu *vcpu = ctxt->vcpu; @@ -1271,43 +1307,61 @@ struct x86_emulate_ops emulate_ops = { int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long cr2, - u16 error_code) + u16 error_code, + int no_decode) { - struct x86_emulate_ctxt emulate_ctxt; int r; - int cs_db, cs_l; vcpu->mmio_fault_cr2 = cr2; kvm_x86_ops->cache_regs(vcpu); - kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - - emulate_ctxt.vcpu = vcpu; - emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); - emulate_ctxt.cr2 = cr2; - emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) - ? X86EMUL_MODE_REAL : cs_l - ? X86EMUL_MODE_PROT64 : cs_db - ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; - - if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) { - emulate_ctxt.cs_base = 0; - emulate_ctxt.ds_base = 0; - emulate_ctxt.es_base = 0; - emulate_ctxt.ss_base = 0; - } else { - emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS); - emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS); - emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES); - emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS); + vcpu->mmio_is_write = 0; + vcpu->pio.string = 0; + + if (!no_decode) { + int cs_db, cs_l; + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + + vcpu->emulate_ctxt.vcpu = vcpu; + vcpu->emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->emulate_ctxt.cr2 = cr2; + vcpu->emulate_ctxt.mode = + (vcpu->emulate_ctxt.eflags & X86_EFLAGS_VM) + ? X86EMUL_MODE_REAL : cs_l + ? X86EMUL_MODE_PROT64 : cs_db + ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + + if (vcpu->emulate_ctxt.mode == X86EMUL_MODE_PROT64) { + vcpu->emulate_ctxt.cs_base = 0; + vcpu->emulate_ctxt.ds_base = 0; + vcpu->emulate_ctxt.es_base = 0; + vcpu->emulate_ctxt.ss_base = 0; + } else { + vcpu->emulate_ctxt.cs_base = + get_segment_base(vcpu, VCPU_SREG_CS); + vcpu->emulate_ctxt.ds_base = + get_segment_base(vcpu, VCPU_SREG_DS); + vcpu->emulate_ctxt.es_base = + get_segment_base(vcpu, VCPU_SREG_ES); + vcpu->emulate_ctxt.ss_base = + get_segment_base(vcpu, VCPU_SREG_SS); + } + + vcpu->emulate_ctxt.gs_base = + get_segment_base(vcpu, VCPU_SREG_GS); + vcpu->emulate_ctxt.fs_base = + get_segment_base(vcpu, VCPU_SREG_FS); + + r = x86_decode_insn(&vcpu->emulate_ctxt, &emulate_ops); + if (r) { + if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) + return EMULATE_DONE; + return EMULATE_FAIL; + } } - emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS); - emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); + r = x86_emulate_insn(&vcpu->emulate_ctxt, &emulate_ops); - vcpu->mmio_is_write = 0; - vcpu->pio.string = 0; - r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); if (vcpu->pio.string) return EMULATE_DO_MMIO; @@ -1330,7 +1384,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } kvm_x86_ops->decache_regs(vcpu); - kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags); + kvm_x86_ops->set_rflags(vcpu, vcpu->emulate_ctxt.eflags); if (vcpu->mmio_is_write) { vcpu->mmio_needed = 0; @@ -1383,51 +1437,61 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); -int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { - unsigned long nr, a0, a1, a2, a3, a4, a5, ret; + unsigned long nr, a0, a1, a2, a3, ret; kvm_x86_ops->cache_regs(vcpu); - ret = -KVM_EINVAL; -#ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { - nr = vcpu->regs[VCPU_REGS_RAX]; - a0 = vcpu->regs[VCPU_REGS_RDI]; - a1 = vcpu->regs[VCPU_REGS_RSI]; - a2 = vcpu->regs[VCPU_REGS_RDX]; - a3 = vcpu->regs[VCPU_REGS_RCX]; - a4 = vcpu->regs[VCPU_REGS_R8]; - a5 = vcpu->regs[VCPU_REGS_R9]; - } else -#endif - { - nr = vcpu->regs[VCPU_REGS_RBX] & -1u; - a0 = vcpu->regs[VCPU_REGS_RAX] & -1u; - a1 = vcpu->regs[VCPU_REGS_RCX] & -1u; - a2 = vcpu->regs[VCPU_REGS_RDX] & -1u; - a3 = vcpu->regs[VCPU_REGS_RSI] & -1u; - a4 = vcpu->regs[VCPU_REGS_RDI] & -1u; - a5 = vcpu->regs[VCPU_REGS_RBP] & -1u; + + nr = vcpu->regs[VCPU_REGS_RAX]; + a0 = vcpu->regs[VCPU_REGS_RBX]; + a1 = vcpu->regs[VCPU_REGS_RCX]; + a2 = vcpu->regs[VCPU_REGS_RDX]; + a3 = vcpu->regs[VCPU_REGS_RSI]; + + if (!is_long_mode(vcpu)) { + nr &= 0xFFFFFFFF; + a0 &= 0xFFFFFFFF; + a1 &= 0xFFFFFFFF; + a2 &= 0xFFFFFFFF; + a3 &= 0xFFFFFFFF; } + switch (nr) { default: - run->hypercall.nr = nr; - run->hypercall.args[0] = a0; - run->hypercall.args[1] = a1; - run->hypercall.args[2] = a2; - run->hypercall.args[3] = a3; - run->hypercall.args[4] = a4; - run->hypercall.args[5] = a5; - run->hypercall.ret = ret; - run->hypercall.longmode = is_long_mode(vcpu); - kvm_x86_ops->decache_regs(vcpu); - return 0; + ret = -KVM_ENOSYS; + break; } vcpu->regs[VCPU_REGS_RAX] = ret; kvm_x86_ops->decache_regs(vcpu); - return 1; + return 0; +} +EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); + +int kvm_fix_hypercall(struct kvm_vcpu *vcpu) +{ + char instruction[3]; + int ret = 0; + + mutex_lock(&vcpu->kvm->lock); + + /* + * Blow out the MMU to ensure that no other VCPU has an active mapping + * to ensure that the updated hypercall appears atomically across all + * VCPUs. + */ + kvm_mmu_zap_all(vcpu->kvm); + + kvm_x86_ops->cache_regs(vcpu); + kvm_x86_ops->patch_hypercall(vcpu, instruction); + if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu) + != X86EMUL_CONTINUE) + ret = -EFAULT; + + mutex_unlock(&vcpu->kvm->lock); + + return ret; } -EXPORT_SYMBOL_GPL(kvm_hypercall); static u64 mk_cr_64(u64 curr_cr, u32 new_val) { @@ -1495,75 +1559,6 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, } } -/* - * Register the para guest with the host: - */ -static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) -{ - struct kvm_vcpu_para_state *para_state; - hpa_t para_state_hpa, hypercall_hpa; - struct page *para_state_page; - unsigned char *hypercall; - gpa_t hypercall_gpa; - - printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n"); - printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa); - - /* - * Needs to be page aligned: - */ - if (para_state_gpa != PAGE_ALIGN(para_state_gpa)) - goto err_gp; - - para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa); - printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa); - if (is_error_hpa(para_state_hpa)) - goto err_gp; - - mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); - para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); - para_state = kmap(para_state_page); - - printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); - printk(KERN_DEBUG ".... size: %d\n", para_state->size); - - para_state->host_version = KVM_PARA_API_VERSION; - /* - * We cannot support guests that try to register themselves - * with a newer API version than the host supports: - */ - if (para_state->guest_version > KVM_PARA_API_VERSION) { - para_state->ret = -KVM_EINVAL; - goto err_kunmap_skip; - } - - hypercall_gpa = para_state->hypercall_gpa; - hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa); - printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa); - if (is_error_hpa(hypercall_hpa)) { - para_state->ret = -KVM_EINVAL; - goto err_kunmap_skip; - } - - printk(KERN_DEBUG "kvm: para guest successfully registered.\n"); - vcpu->para_state_page = para_state_page; - vcpu->para_state_gpa = para_state_gpa; - vcpu->hypercall_gpa = hypercall_gpa; - - mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); - hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), - KM_USER1) + (hypercall_hpa & ~PAGE_MASK); - kvm_x86_ops->patch_hypercall(vcpu, hypercall); - kunmap_atomic(hypercall, KM_USER1); - - para_state->ret = 0; -err_kunmap_skip: - kunmap(para_state_page); - return 0; -err_gp: - return 1; -} - int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 data; @@ -1677,12 +1672,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_IA32_MISC_ENABLE: vcpu->ia32_misc_enable_msr = data; break; - /* - * This is the 'probe whether the host is KVM' logic: - */ - case MSR_KVM_API_MAGIC: - return vcpu_register_para(vcpu, data); - default: pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr); return 1; @@ -1853,7 +1842,7 @@ static void pio_string_write(struct kvm_io_device *pio_dev, mutex_unlock(&vcpu->kvm->lock); } -int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, +int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, int size, unsigned port) { struct kvm_io_device *pio_dev; @@ -1944,8 +1933,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, for (i = 0; i < nr_pages; ++i) { mutex_lock(&vcpu->kvm->lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); - if (page) - get_page(page); vcpu->pio.guest_pages[i] = page; mutex_unlock(&vcpu->kvm->lock); if (!page) { @@ -2008,10 +1995,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { - printk("vcpu %d received sipi with vector # %x\n", + pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->sipi_vector); kvm_lapic_reset(vcpu); - kvm_x86_ops->vcpu_reset(vcpu); + r = kvm_x86_ops->vcpu_reset(vcpu); + if (r) + return r; vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; } @@ -2024,6 +2013,8 @@ again: if (unlikely(r)) goto out; + kvm_inject_pending_timer_irqs(vcpu); + preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); @@ -2046,9 +2037,10 @@ again: kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); vcpu->guest_mode = 1; + kvm_guest_enter(); if (vcpu->requests) - if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) + if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) kvm_x86_ops->tlb_flush(vcpu); kvm_x86_ops->run(vcpu, kvm_run); @@ -2058,6 +2050,16 @@ again: ++vcpu->stat.exits; + /* + * We must have an instruction between local_irq_enable() and + * kvm_guest_exit(), so the timer interrupt isn't delayed by + * the interrupt shadow. The stat.exits increment will do nicely. + * But we need to prevent reordering, hence this barrier(): + */ + barrier(); + + kvm_guest_exit(); + preempt_enable(); /* @@ -2120,13 +2122,13 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (r) goto out; } - +#if CONFIG_HAS_IOMEM if (vcpu->mmio_needed) { memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); vcpu->mmio_read_completed = 1; vcpu->mmio_needed = 0; r = emulate_instruction(vcpu, kvm_run, - vcpu->mmio_fault_cr2, 0); + vcpu->mmio_fault_cr2, 0, 1); if (r == EMULATE_DO_MMIO) { /* * Read-modify-write. Back to userspace. @@ -2135,7 +2137,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } } - +#endif if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { kvm_x86_ops->cache_regs(vcpu); vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; @@ -2271,7 +2273,8 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sizeof sregs->interrupt_bitmap); pending_vec = kvm_x86_ops->get_irq(vcpu); if (pending_vec >= 0) - set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap); + set_bit(pending_vec, + (unsigned long *)sregs->interrupt_bitmap); } else memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, sizeof sregs->interrupt_bitmap); @@ -2344,7 +2347,8 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, /* Only pending external irq is handled here */ if (pending_vec < max_bits) { kvm_x86_ops->set_irq(vcpu, pending_vec); - printk("Set back pending irq %d\n", pending_vec); + pr_debug("Set back pending irq %d\n", + pending_vec); } } @@ -2373,123 +2377,6 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) } EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); -/* - * List of msr numbers which we expose to userspace through KVM_GET_MSRS - * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. - * - * This list is modified at module load time to reflect the - * capabilities of the host cpu. - */ -static u32 msrs_to_save[] = { - MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, - MSR_K6_STAR, -#ifdef CONFIG_X86_64 - MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, -#endif - MSR_IA32_TIME_STAMP_COUNTER, -}; - -static unsigned num_msrs_to_save; - -static u32 emulated_msrs[] = { - MSR_IA32_MISC_ENABLE, -}; - -static __init void kvm_init_msr_list(void) -{ - u32 dummy[2]; - unsigned i, j; - - for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { - if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) - continue; - if (j < i) - msrs_to_save[j] = msrs_to_save[i]; - j++; - } - num_msrs_to_save = j; -} - -/* - * Adapt set_msr() to msr_io()'s calling convention - */ -static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) -{ - return kvm_set_msr(vcpu, index, *data); -} - -/* - * Read or write a bunch of msrs. All parameters are kernel addresses. - * - * @return number of msrs set successfully. - */ -static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, - struct kvm_msr_entry *entries, - int (*do_msr)(struct kvm_vcpu *vcpu, - unsigned index, u64 *data)) -{ - int i; - - vcpu_load(vcpu); - - for (i = 0; i < msrs->nmsrs; ++i) - if (do_msr(vcpu, entries[i].index, &entries[i].data)) - break; - - vcpu_put(vcpu); - - return i; -} - -/* - * Read or write a bunch of msrs. Parameters are user addresses. - * - * @return number of msrs set successfully. - */ -static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs, - int (*do_msr)(struct kvm_vcpu *vcpu, - unsigned index, u64 *data), - int writeback) -{ - struct kvm_msrs msrs; - struct kvm_msr_entry *entries; - int r, n; - unsigned size; - - r = -EFAULT; - if (copy_from_user(&msrs, user_msrs, sizeof msrs)) - goto out; - - r = -E2BIG; - if (msrs.nmsrs >= MAX_IO_MSRS) - goto out; - - r = -ENOMEM; - size = sizeof(struct kvm_msr_entry) * msrs.nmsrs; - entries = vmalloc(size); - if (!entries) - goto out; - - r = -EFAULT; - if (copy_from_user(entries, user_msrs->entries, size)) - goto out_free; - - r = n = __msr_io(vcpu, &msrs, entries, do_msr); - if (r < 0) - goto out_free; - - r = -EFAULT; - if (writeback && copy_to_user(user_msrs->entries, entries, size)) - goto out_free; - - r = n; - -out_free: - vfree(entries); -out: - return r; -} - /* * Translate a guest virtual address to a guest physical address. */ @@ -2628,7 +2515,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF); vcpu_load(vcpu); - r = kvm_mmu_setup(vcpu); + r = kvm_x86_ops->vcpu_reset(vcpu); + if (r == 0) + r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); if (r < 0) goto free_vcpu; @@ -2663,48 +2552,6 @@ free_vcpu: return r; } -static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) -{ - u64 efer; - int i; - struct kvm_cpuid_entry *e, *entry; - - rdmsrl(MSR_EFER, efer); - entry = NULL; - for (i = 0; i < vcpu->cpuid_nent; ++i) { - e = &vcpu->cpuid_entries[i]; - if (e->function == 0x80000001) { - entry = e; - break; - } - } - if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) { - entry->edx &= ~(1 << 20); - printk(KERN_INFO "kvm: guest NX capability removed\n"); - } -} - -static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, - struct kvm_cpuid *cpuid, - struct kvm_cpuid_entry __user *entries) -{ - int r; - - r = -E2BIG; - if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) - goto out; - r = -EFAULT; - if (copy_from_user(&vcpu->cpuid_entries, entries, - cpuid->nent * sizeof(struct kvm_cpuid_entry))) - goto out; - vcpu->cpuid_nent = cpuid->nent; - cpuid_fix_nx_cap(vcpu); - return 0; - -out: - return r; -} - static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) { if (sigset) { @@ -2777,33 +2624,12 @@ static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } -static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, - struct kvm_lapic_state *s) -{ - vcpu_load(vcpu); - memcpy(s->regs, vcpu->apic->regs, sizeof *s); - vcpu_put(vcpu); - - return 0; -} - -static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, - struct kvm_lapic_state *s) -{ - vcpu_load(vcpu); - memcpy(vcpu->apic->regs, s->regs, sizeof *s); - kvm_apic_post_state_restore(vcpu); - vcpu_put(vcpu); - - return 0; -} - static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; - int r = -EINVAL; + int r; switch (ioctl) { case KVM_RUN: @@ -2901,24 +2727,6 @@ static long kvm_vcpu_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_MSRS: - r = msr_io(vcpu, argp, kvm_get_msr, 1); - break; - case KVM_SET_MSRS: - r = msr_io(vcpu, argp, do_set_msr, 0); - break; - case KVM_SET_CPUID: { - struct kvm_cpuid __user *cpuid_arg = argp; - struct kvm_cpuid cpuid; - - r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) - goto out; - r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries); - if (r) - goto out; - break; - } case KVM_SET_SIGNAL_MASK: { struct kvm_signal_mask __user *sigmask_arg = argp; struct kvm_signal_mask kvm_sigmask; @@ -2967,33 +2775,8 @@ static long kvm_vcpu_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_LAPIC: { - struct kvm_lapic_state lapic; - - memset(&lapic, 0, sizeof lapic); - r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); - if (r) - goto out; - r = -EFAULT; - if (copy_to_user(argp, &lapic, sizeof lapic)) - goto out; - r = 0; - break; - } - case KVM_SET_LAPIC: { - struct kvm_lapic_state lapic; - - r = -EFAULT; - if (copy_from_user(&lapic, argp, sizeof lapic)) - goto out; - r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; - if (r) - goto out; - r = 0; - break; - } default: - ; + r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } out: return r; @@ -3004,7 +2787,7 @@ static long kvm_vm_ioctl(struct file *filp, { struct kvm *kvm = filp->private_data; void __user *argp = (void __user *)arg; - int r = -EINVAL; + int r; switch (ioctl) { case KVM_CREATE_VCPU: @@ -3012,13 +2795,15 @@ static long kvm_vm_ioctl(struct file *filp, if (r < 0) goto out; break; - case KVM_SET_MEMORY_REGION: { - struct kvm_memory_region kvm_mem; + case KVM_SET_USER_MEMORY_REGION: { + struct kvm_userspace_memory_region kvm_userspace_mem; r = -EFAULT; - if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) + if (copy_from_user(&kvm_userspace_mem, argp, + sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_mem); + + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 1); if (r) goto out; break; @@ -3034,88 +2819,8 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } - case KVM_SET_MEMORY_ALIAS: { - struct kvm_memory_alias alias; - - r = -EFAULT; - if (copy_from_user(&alias, argp, sizeof alias)) - goto out; - r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); - if (r) - goto out; - break; - } - case KVM_CREATE_IRQCHIP: - r = -ENOMEM; - kvm->vpic = kvm_create_pic(kvm); - if (kvm->vpic) { - r = kvm_ioapic_init(kvm); - if (r) { - kfree(kvm->vpic); - kvm->vpic = NULL; - goto out; - } - } - else - goto out; - break; - case KVM_IRQ_LINE: { - struct kvm_irq_level irq_event; - - r = -EFAULT; - if (copy_from_user(&irq_event, argp, sizeof irq_event)) - goto out; - if (irqchip_in_kernel(kvm)) { - mutex_lock(&kvm->lock); - if (irq_event.irq < 16) - kvm_pic_set_irq(pic_irqchip(kvm), - irq_event.irq, - irq_event.level); - kvm_ioapic_set_irq(kvm->vioapic, - irq_event.irq, - irq_event.level); - mutex_unlock(&kvm->lock); - r = 0; - } - break; - } - case KVM_GET_IRQCHIP: { - /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ - struct kvm_irqchip chip; - - r = -EFAULT; - if (copy_from_user(&chip, argp, sizeof chip)) - goto out; - r = -ENXIO; - if (!irqchip_in_kernel(kvm)) - goto out; - r = kvm_vm_ioctl_get_irqchip(kvm, &chip); - if (r) - goto out; - r = -EFAULT; - if (copy_to_user(argp, &chip, sizeof chip)) - goto out; - r = 0; - break; - } - case KVM_SET_IRQCHIP: { - /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ - struct kvm_irqchip chip; - - r = -EFAULT; - if (copy_from_user(&chip, argp, sizeof chip)) - goto out; - r = -ENXIO; - if (!irqchip_in_kernel(kvm)) - goto out; - r = kvm_vm_ioctl_set_irqchip(kvm, &chip); - if (r) - goto out; - r = 0; - break; - } default: - ; + r = kvm_arch_vm_ioctl(filp, ioctl, arg); } out: return r; @@ -3130,10 +2835,13 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, struct page *page; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + if (!kvm_is_visible_gfn(kvm, pgoff)) + return NOPAGE_SIGBUS; page = gfn_to_page(kvm, pgoff); - if (!page) + if (is_error_page(page)) { + kvm_release_page(page); return NOPAGE_SIGBUS; - get_page(page); + } if (type != NULL) *type = VM_FAULT_MINOR; @@ -3197,39 +2905,15 @@ static long kvm_dev_ioctl(struct file *filp, goto out; r = kvm_dev_ioctl_create_vm(); break; - case KVM_GET_MSR_INDEX_LIST: { - struct kvm_msr_list __user *user_msr_list = argp; - struct kvm_msr_list msr_list; - unsigned n; - - r = -EFAULT; - if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) - goto out; - n = msr_list.nmsrs; - msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs); - if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) - goto out; - r = -E2BIG; - if (n < num_msrs_to_save) - goto out; - r = -EFAULT; - if (copy_to_user(user_msr_list->indices, &msrs_to_save, - num_msrs_to_save * sizeof(u32))) - goto out; - if (copy_to_user(user_msr_list->indices - + num_msrs_to_save * sizeof(u32), - &emulated_msrs, - ARRAY_SIZE(emulated_msrs) * sizeof(u32))) - goto out; - r = 0; - break; - } case KVM_CHECK_EXTENSION: { int ext = (long)argp; switch (ext) { case KVM_CAP_IRQCHIP: case KVM_CAP_HLT: + case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SET_TSS_ADDR: r = 1; break; default: @@ -3245,7 +2929,7 @@ static long kvm_dev_ioctl(struct file *filp, r = 2 * PAGE_SIZE; break; default: - ; + return kvm_arch_dev_ioctl(filp, ioctl, arg); } out: return r; @@ -3347,7 +3031,7 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, } static int kvm_reboot(struct notifier_block *notifier, unsigned long val, - void *v) + void *v) { if (val == SYS_RESTART) { /* @@ -3461,7 +3145,7 @@ static int kvm_resume(struct sys_device *dev) } static struct sysdev_class kvm_sysdev_class = { - set_kset_name("kvm"), + .name = "kvm", .suspend = kvm_suspend, .resume = kvm_resume, }; @@ -3471,7 +3155,7 @@ static struct sys_device kvm_sysdev = { .cls = &kvm_sysdev_class, }; -hpa_t bad_page_address; +struct page *bad_page; static inline struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) @@ -3554,14 +3238,16 @@ int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size, r = misc_register(&kvm_dev); if (r) { - printk (KERN_ERR "kvm: misc device register failed\n"); + printk(KERN_ERR "kvm: misc device register failed\n"); goto out_free; } kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; - return r; + kvm_mmu_set_nonpresent_ptes(0ull, 0ull); + + return 0; out_free: kmem_cache_destroy(kvm_vcpu_cache); @@ -3580,6 +3266,7 @@ out: kvm_x86_ops = NULL; return r; } +EXPORT_SYMBOL_GPL(kvm_init_x86); void kvm_exit_x86(void) { @@ -3593,10 +3280,10 @@ void kvm_exit_x86(void) kvm_x86_ops->hardware_unsetup(); kvm_x86_ops = NULL; } +EXPORT_SYMBOL_GPL(kvm_exit_x86); static __init int kvm_init(void) { - static struct page *bad_page; int r; r = kvm_mmu_module_init(); @@ -3605,16 +3292,15 @@ static __init int kvm_init(void) kvm_init_debug(); - kvm_init_msr_list(); + kvm_arch_init(); - if ((bad_page = alloc_page(GFP_KERNEL)) == NULL) { + bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + + if (bad_page == NULL) { r = -ENOMEM; goto out; } - bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT; - memset(__va(bad_page_address), 0, PAGE_SIZE); - return 0; out: @@ -3627,12 +3313,9 @@ out4: static __exit void kvm_exit(void) { kvm_exit_debug(); - __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); + __free_page(bad_page); kvm_mmu_module_exit(); } module_init(kvm_init) module_exit(kvm_exit) - -EXPORT_SYMBOL_GPL(kvm_init_x86); -EXPORT_SYMBOL_GPL(kvm_exit_x86);