Merge branch 'devel' of master.kernel.org:/home/rmk/linux-2.6-arm
[pandora-kernel.git] / drivers / kvm / kvm_main.c
index 5603000..bcbe683 100644 (file)
  */
 
 #include "kvm.h"
+#include "x86_emulate.h"
+#include "segment_descriptor.h"
 
 #include <linux/kvm.h>
 #include <linux/module.h>
 #include <linux/errno.h>
-#include <linux/magic.h>
-#include <asm/processor.h>
 #include <linux/percpu.h>
 #include <linux/gfp.h>
-#include <asm/msr.h>
 #include <linux/mm.h>
 #include <linux/miscdevice.h>
 #include <linux/vmalloc.h>
-#include <asm/uaccess.h>
 #include <linux/reboot.h>
-#include <asm/io.h>
 #include <linux/debugfs.h>
 #include <linux/highmem.h>
 #include <linux/file.h>
-#include <asm/desc.h>
 #include <linux/sysdev.h>
 #include <linux/cpu.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
+#include <linux/anon_inodes.h>
 
-#include "x86_emulate.h"
-#include "segment_descriptor.h"
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/desc.h>
 
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
@@ -53,8 +50,12 @@ MODULE_LICENSE("GPL");
 static DEFINE_SPINLOCK(kvm_lock);
 static LIST_HEAD(vm_list);
 
+static cpumask_t cpus_hardware_enabled;
+
 struct kvm_arch_ops *kvm_arch_ops;
 
+static void hardware_disable(void *ignored);
+
 #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
 
 static struct kvm_stats_debugfs_item {
@@ -81,8 +82,6 @@ static struct kvm_stats_debugfs_item {
 
 static struct dentry *debugfs_dir;
 
-struct vfsmount *kvmfs_mnt;
-
 #define MAX_IO_MSRS 256
 
 #define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL
@@ -104,55 +103,6 @@ struct segment_descriptor_64 {
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
                           unsigned long arg);
 
-static struct inode *kvmfs_inode(struct file_operations *fops)
-{
-       int error = -ENOMEM;
-       struct inode *inode = new_inode(kvmfs_mnt->mnt_sb);
-
-       if (!inode)
-               goto eexit_1;
-
-       inode->i_fop = fops;
-
-       /*
-        * Mark the inode dirty from the very beginning,
-        * that way it will never be moved to the dirty
-        * list because mark_inode_dirty() will think
-        * that it already _is_ on the dirty list.
-        */
-       inode->i_state = I_DIRTY;
-       inode->i_mode = S_IRUSR | S_IWUSR;
-       inode->i_uid = current->fsuid;
-       inode->i_gid = current->fsgid;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-       return inode;
-
-eexit_1:
-       return ERR_PTR(error);
-}
-
-static struct file *kvmfs_file(struct inode *inode, void *private_data)
-{
-       struct file *file = get_empty_filp();
-
-       if (!file)
-               return ERR_PTR(-ENFILE);
-
-       file->f_path.mnt = mntget(kvmfs_mnt);
-       file->f_path.dentry = d_alloc_anon(inode);
-       if (!file->f_path.dentry)
-               return ERR_PTR(-ENOMEM);
-       file->f_mapping = inode->i_mapping;
-
-       file->f_pos = 0;
-       file->f_flags = O_RDWR;
-       file->f_op = inode->i_fop;
-       file->f_mode = FMODE_READ | FMODE_WRITE;
-       file->f_version = 0;
-       file->private_data = private_data;
-       return file;
-}
-
 unsigned long segment_base(u16 selector)
 {
        struct descriptor_table gdt;
@@ -288,23 +238,6 @@ static void vcpu_load(struct kvm_vcpu *vcpu)
        kvm_arch_ops->vcpu_load(vcpu);
 }
 
-/*
- * Switches to specified vcpu, until a matching vcpu_put(). Will return NULL
- * if the slot is not populated.
- */
-static struct kvm_vcpu *vcpu_load_slot(struct kvm *kvm, int slot)
-{
-       struct kvm_vcpu *vcpu = &kvm->vcpus[slot];
-
-       mutex_lock(&vcpu->mutex);
-       if (!vcpu->vmcs) {
-               mutex_unlock(&vcpu->mutex);
-               return NULL;
-       }
-       kvm_arch_ops->vcpu_load(vcpu);
-       return vcpu;
-}
-
 static void vcpu_put(struct kvm_vcpu *vcpu)
 {
        kvm_arch_ops->vcpu_put(vcpu);
@@ -713,13 +646,6 @@ void fx_init(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(fx_init);
 
-static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot)
-{
-       spin_lock(&vcpu->kvm->lock);
-       kvm_mmu_slot_remove_write_access(vcpu, slot);
-       spin_unlock(&vcpu->kvm->lock);
-}
-
 /*
  * Allocate some memory and give it an address in the guest physical address
  * space.
@@ -842,19 +768,10 @@ raced:
        *memslot = new;
        ++kvm->memory_config_version;
 
-       spin_unlock(&kvm->lock);
+       kvm_mmu_slot_remove_write_access(kvm, mem->slot);
+       kvm_flush_remote_tlbs(kvm);
 
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               struct kvm_vcpu *vcpu;
-
-               vcpu = vcpu_load_slot(kvm, i);
-               if (!vcpu)
-                       continue;
-               if (new.flags & KVM_MEM_LOG_DIRTY_PAGES)
-                       do_remove_write_access(vcpu, mem->slot);
-               kvm_mmu_reset_context(vcpu);
-               vcpu_put(vcpu);
-       }
+       spin_unlock(&kvm->lock);
 
        kvm_free_physmem_slot(&old, &new);
        return 0;
@@ -876,7 +793,6 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        struct kvm_memory_slot *memslot;
        int r, i;
        int n;
-       int cleared;
        unsigned long any = 0;
 
        spin_lock(&kvm->lock);
@@ -905,23 +821,11 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
                goto out;
 
-       if (any) {
-               cleared = 0;
-               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-                       struct kvm_vcpu *vcpu;
-
-                       vcpu = vcpu_load_slot(kvm, i);
-                       if (!vcpu)
-                               continue;
-                       if (!cleared) {
-                               do_remove_write_access(vcpu, log->slot);
-                               memset(memslot->dirty_bitmap, 0, n);
-                               cleared = 1;
-                       }
-                       kvm_arch_ops->tlb_flush(vcpu);
-                       vcpu_put(vcpu);
-               }
-       }
+       spin_lock(&kvm->lock);
+       kvm_mmu_slot_remove_write_access(kvm, log->slot);
+       kvm_flush_remote_tlbs(kvm);
+       memset(memslot->dirty_bitmap, 0, n);
+       spin_unlock(&kvm->lock);
 
        r = 0;
 
@@ -970,13 +874,9 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
                        break;
        kvm->naliases = n;
 
-       spin_unlock(&kvm->lock);
+       kvm_mmu_zap_all(kvm);
 
-       vcpu_load(&kvm->vcpus[0]);
-       spin_lock(&kvm->lock);
-       kvm_mmu_zap_all(&kvm->vcpus[0]);
        spin_unlock(&kvm->lock);
-       vcpu_put(&kvm->vcpus[0]);
 
        return 0;
 
@@ -1617,7 +1517,7 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common);
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
  */
-static int get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 {
        return kvm_arch_ops->get_msr(vcpu, msr_index, pdata);
 }
@@ -1695,7 +1595,7 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common);
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
  */
-static int set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 {
        return kvm_arch_ops->set_msr(vcpu, msr_index, data);
 }
@@ -2233,7 +2133,7 @@ static __init void kvm_init_msr_list(void)
  */
 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 {
-       return set_msr(vcpu, index, *data);
+       return kvm_set_msr(vcpu, index, *data);
 }
 
 /*
@@ -2413,34 +2313,12 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
        struct inode *inode;
        struct file *file;
 
+       r = anon_inode_getfd(&fd, &inode, &file,
+                            "kvm-vcpu", &kvm_vcpu_fops, vcpu);
+       if (r)
+               return r;
        atomic_inc(&vcpu->kvm->filp->f_count);
-       inode = kvmfs_inode(&kvm_vcpu_fops);
-       if (IS_ERR(inode)) {
-               r = PTR_ERR(inode);
-               goto out1;
-       }
-
-       file = kvmfs_file(inode, vcpu);
-       if (IS_ERR(file)) {
-               r = PTR_ERR(file);
-               goto out2;
-       }
-
-       r = get_unused_fd();
-       if (r < 0)
-               goto out3;
-       fd = r;
-       fd_install(fd, file);
-
        return fd;
-
-out3:
-       fput(file);
-out2:
-       iput(inode);
-out1:
-       fput(vcpu->kvm->filp);
-       return r;
 }
 
 /*
@@ -2739,7 +2617,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_GET_MSRS:
-               r = msr_io(vcpu, argp, get_msr, 1);
+               r = msr_io(vcpu, argp, kvm_get_msr, 1);
                break;
        case KVM_SET_MSRS:
                r = msr_io(vcpu, argp, do_set_msr, 0);
@@ -2905,41 +2783,18 @@ static int kvm_dev_ioctl_create_vm(void)
        struct file *file;
        struct kvm *kvm;
 
-       inode = kvmfs_inode(&kvm_vm_fops);
-       if (IS_ERR(inode)) {
-               r = PTR_ERR(inode);
-               goto out1;
-       }
-
        kvm = kvm_create_vm();
-       if (IS_ERR(kvm)) {
-               r = PTR_ERR(kvm);
-               goto out2;
+       if (IS_ERR(kvm))
+               return PTR_ERR(kvm);
+       r = anon_inode_getfd(&fd, &inode, &file, "kvm-vm", &kvm_vm_fops, kvm);
+       if (r) {
+               kvm_destroy_vm(kvm);
+               return r;
        }
 
-       file = kvmfs_file(inode, kvm);
-       if (IS_ERR(file)) {
-               r = PTR_ERR(file);
-               goto out3;
-       }
        kvm->filp = file;
 
-       r = get_unused_fd();
-       if (r < 0)
-               goto out4;
-       fd = r;
-       fd_install(fd, file);
-
        return fd;
-
-out4:
-       fput(file);
-out3:
-       kvm_destroy_vm(kvm);
-out2:
-       iput(inode);
-out1:
-       return r;
 }
 
 static long kvm_dev_ioctl(struct file *filp,
@@ -3029,7 +2884,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
                 * in vmx root mode.
                 */
                printk(KERN_INFO "kvm: exiting hardware virtualization\n");
-               on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+               on_each_cpu(hardware_disable, NULL, 0, 1);
        }
        return NOTIFY_OK;
 }
@@ -3072,28 +2927,46 @@ static void decache_vcpus_on_cpu(int cpu)
        spin_unlock(&kvm_lock);
 }
 
+static void hardware_enable(void *junk)
+{
+       int cpu = raw_smp_processor_id();
+
+       if (cpu_isset(cpu, cpus_hardware_enabled))
+               return;
+       cpu_set(cpu, cpus_hardware_enabled);
+       kvm_arch_ops->hardware_enable(NULL);
+}
+
+static void hardware_disable(void *junk)
+{
+       int cpu = raw_smp_processor_id();
+
+       if (!cpu_isset(cpu, cpus_hardware_enabled))
+               return;
+       cpu_clear(cpu, cpus_hardware_enabled);
+       decache_vcpus_on_cpu(cpu);
+       kvm_arch_ops->hardware_disable(NULL);
+}
+
 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
                           void *v)
 {
        int cpu = (long)v;
 
        switch (val) {
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
+       case CPU_DYING:
+       case CPU_DYING_FROZEN:
        case CPU_UP_CANCELED:
        case CPU_UP_CANCELED_FROZEN:
                printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
                       cpu);
-               decache_vcpus_on_cpu(cpu);
-               smp_call_function_single(cpu, kvm_arch_ops->hardware_disable,
-                                        NULL, 0, 1);
+               smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
                break;
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
                printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
                       cpu);
-               smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
-                                        NULL, 0, 1);
+               smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
                break;
        }
        return NOTIFY_OK;
@@ -3187,14 +3060,13 @@ static void kvm_exit_debug(void)
 
 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
 {
-       decache_vcpus_on_cpu(raw_smp_processor_id());
-       on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+       hardware_disable(NULL);
        return 0;
 }
 
 static int kvm_resume(struct sys_device *dev)
 {
-       on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+       hardware_enable(NULL);
        return 0;
 }
 
@@ -3211,18 +3083,6 @@ static struct sys_device kvm_sysdev = {
 
 hpa_t bad_page_address;
 
-static int kvmfs_get_sb(struct file_system_type *fs_type, int flags,
-                       const char *dev_name, void *data, struct vfsmount *mnt)
-{
-       return get_sb_pseudo(fs_type, "kvm:", NULL, KVMFS_SUPER_MAGIC, mnt);
-}
-
-static struct file_system_type kvm_fs_type = {
-       .name           = "kvmfs",
-       .get_sb         = kvmfs_get_sb,
-       .kill_sb        = kill_anon_super,
-};
-
 int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
 {
        int r;
@@ -3247,7 +3107,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
        if (r < 0)
                goto out;
 
-       on_each_cpu(kvm_arch_ops->hardware_enable, NULL, 0, 1);
+       on_each_cpu(hardware_enable, NULL, 0, 1);
        r = register_cpu_notifier(&kvm_cpu_notifier);
        if (r)
                goto out_free_1;
@@ -3279,7 +3139,7 @@ out_free_2:
        unregister_reboot_notifier(&kvm_reboot_notifier);
        unregister_cpu_notifier(&kvm_cpu_notifier);
 out_free_1:
-       on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+       on_each_cpu(hardware_disable, NULL, 0, 1);
        kvm_arch_ops->hardware_unsetup();
 out:
        kvm_arch_ops = NULL;
@@ -3293,7 +3153,7 @@ void kvm_exit_arch(void)
        sysdev_class_unregister(&kvm_sysdev_class);
        unregister_reboot_notifier(&kvm_reboot_notifier);
        unregister_cpu_notifier(&kvm_cpu_notifier);
-       on_each_cpu(kvm_arch_ops->hardware_disable, NULL, 0, 1);
+       on_each_cpu(hardware_disable, NULL, 0, 1);
        kvm_arch_ops->hardware_unsetup();
        kvm_arch_ops = NULL;
 }
@@ -3307,14 +3167,6 @@ static __init int kvm_init(void)
        if (r)
                goto out4;
 
-       r = register_filesystem(&kvm_fs_type);
-       if (r)
-               goto out3;
-
-       kvmfs_mnt = kern_mount(&kvm_fs_type);
-       r = PTR_ERR(kvmfs_mnt);
-       if (IS_ERR(kvmfs_mnt))
-               goto out2;
        kvm_init_debug();
 
        kvm_init_msr_list();
@@ -3331,10 +3183,6 @@ static __init int kvm_init(void)
 
 out:
        kvm_exit_debug();
-       mntput(kvmfs_mnt);
-out2:
-       unregister_filesystem(&kvm_fs_type);
-out3:
        kvm_mmu_module_exit();
 out4:
        return r;
@@ -3344,8 +3192,6 @@ static __exit void kvm_exit(void)
 {
        kvm_exit_debug();
        __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
-       mntput(kvmfs_mnt);
-       unregister_filesystem(&kvm_fs_type);
        kvm_mmu_module_exit();
 }