b84cb6707f787fbde5fe5fcba2131e8ce173263d
[pandora-kernel.git] / drivers / kvm / x86.c
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * derived from drivers/kvm/kvm_main.c
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  *
8  * Authors:
9  *   Avi Kivity   <avi@qumranet.com>
10  *   Yaniv Kamay  <yaniv@qumranet.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2.  See
13  * the COPYING file in the top-level directory.
14  *
15  */
16
17 #include "kvm.h"
18 #include "x86.h"
19 #include "irq.h"
20
21 #include <linux/kvm.h>
22 #include <linux/fs.h>
23 #include <linux/vmalloc.h>
24
25 #include <asm/uaccess.h>
26
27 #define MAX_IO_MSRS 256
28
29 /*
30  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
31  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
32  *
33  * This list is modified at module load time to reflect the
34  * capabilities of the host cpu.
35  */
36 static u32 msrs_to_save[] = {
37         MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
38         MSR_K6_STAR,
39 #ifdef CONFIG_X86_64
40         MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
41 #endif
42         MSR_IA32_TIME_STAMP_COUNTER,
43 };
44
45 static unsigned num_msrs_to_save;
46
47 static u32 emulated_msrs[] = {
48         MSR_IA32_MISC_ENABLE,
49 };
50
51 /*
52  * Adapt set_msr() to msr_io()'s calling convention
53  */
54 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
55 {
56         return kvm_set_msr(vcpu, index, *data);
57 }
58
59 /*
60  * Read or write a bunch of msrs. All parameters are kernel addresses.
61  *
62  * @return number of msrs set successfully.
63  */
64 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
65                     struct kvm_msr_entry *entries,
66                     int (*do_msr)(struct kvm_vcpu *vcpu,
67                                   unsigned index, u64 *data))
68 {
69         int i;
70
71         vcpu_load(vcpu);
72
73         for (i = 0; i < msrs->nmsrs; ++i)
74                 if (do_msr(vcpu, entries[i].index, &entries[i].data))
75                         break;
76
77         vcpu_put(vcpu);
78
79         return i;
80 }
81
82 /*
83  * Read or write a bunch of msrs. Parameters are user addresses.
84  *
85  * @return number of msrs set successfully.
86  */
87 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
88                   int (*do_msr)(struct kvm_vcpu *vcpu,
89                                 unsigned index, u64 *data),
90                   int writeback)
91 {
92         struct kvm_msrs msrs;
93         struct kvm_msr_entry *entries;
94         int r, n;
95         unsigned size;
96
97         r = -EFAULT;
98         if (copy_from_user(&msrs, user_msrs, sizeof msrs))
99                 goto out;
100
101         r = -E2BIG;
102         if (msrs.nmsrs >= MAX_IO_MSRS)
103                 goto out;
104
105         r = -ENOMEM;
106         size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
107         entries = vmalloc(size);
108         if (!entries)
109                 goto out;
110
111         r = -EFAULT;
112         if (copy_from_user(entries, user_msrs->entries, size))
113                 goto out_free;
114
115         r = n = __msr_io(vcpu, &msrs, entries, do_msr);
116         if (r < 0)
117                 goto out_free;
118
119         r = -EFAULT;
120         if (writeback && copy_to_user(user_msrs->entries, entries, size))
121                 goto out_free;
122
123         r = n;
124
125 out_free:
126         vfree(entries);
127 out:
128         return r;
129 }
130
131 long kvm_arch_dev_ioctl(struct file *filp,
132                         unsigned int ioctl, unsigned long arg)
133 {
134         void __user *argp = (void __user *)arg;
135         long r;
136
137         switch (ioctl) {
138         case KVM_GET_MSR_INDEX_LIST: {
139                 struct kvm_msr_list __user *user_msr_list = argp;
140                 struct kvm_msr_list msr_list;
141                 unsigned n;
142
143                 r = -EFAULT;
144                 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
145                         goto out;
146                 n = msr_list.nmsrs;
147                 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
148                 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
149                         goto out;
150                 r = -E2BIG;
151                 if (n < num_msrs_to_save)
152                         goto out;
153                 r = -EFAULT;
154                 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
155                                  num_msrs_to_save * sizeof(u32)))
156                         goto out;
157                 if (copy_to_user(user_msr_list->indices
158                                  + num_msrs_to_save * sizeof(u32),
159                                  &emulated_msrs,
160                                  ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
161                         goto out;
162                 r = 0;
163                 break;
164         }
165         default:
166                 r = -EINVAL;
167         }
168 out:
169         return r;
170 }
171
172 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
173 {
174         kvm_x86_ops->vcpu_load(vcpu, cpu);
175 }
176
177 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
178 {
179         kvm_x86_ops->vcpu_put(vcpu);
180 }
181
182 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
183 {
184         u64 efer;
185         int i;
186         struct kvm_cpuid_entry *e, *entry;
187
188         rdmsrl(MSR_EFER, efer);
189         entry = NULL;
190         for (i = 0; i < vcpu->cpuid_nent; ++i) {
191                 e = &vcpu->cpuid_entries[i];
192                 if (e->function == 0x80000001) {
193                         entry = e;
194                         break;
195                 }
196         }
197         if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
198                 entry->edx &= ~(1 << 20);
199                 printk(KERN_INFO "kvm: guest NX capability removed\n");
200         }
201 }
202
203 static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
204                                     struct kvm_cpuid *cpuid,
205                                     struct kvm_cpuid_entry __user *entries)
206 {
207         int r;
208
209         r = -E2BIG;
210         if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
211                 goto out;
212         r = -EFAULT;
213         if (copy_from_user(&vcpu->cpuid_entries, entries,
214                            cpuid->nent * sizeof(struct kvm_cpuid_entry)))
215                 goto out;
216         vcpu->cpuid_nent = cpuid->nent;
217         cpuid_fix_nx_cap(vcpu);
218         return 0;
219
220 out:
221         return r;
222 }
223
224 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
225                                     struct kvm_lapic_state *s)
226 {
227         vcpu_load(vcpu);
228         memcpy(s->regs, vcpu->apic->regs, sizeof *s);
229         vcpu_put(vcpu);
230
231         return 0;
232 }
233
234 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
235                                     struct kvm_lapic_state *s)
236 {
237         vcpu_load(vcpu);
238         memcpy(vcpu->apic->regs, s->regs, sizeof *s);
239         kvm_apic_post_state_restore(vcpu);
240         vcpu_put(vcpu);
241
242         return 0;
243 }
244
245 long kvm_arch_vcpu_ioctl(struct file *filp,
246                          unsigned int ioctl, unsigned long arg)
247 {
248         struct kvm_vcpu *vcpu = filp->private_data;
249         void __user *argp = (void __user *)arg;
250         int r;
251
252         switch (ioctl) {
253         case KVM_GET_LAPIC: {
254                 struct kvm_lapic_state lapic;
255
256                 memset(&lapic, 0, sizeof lapic);
257                 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
258                 if (r)
259                         goto out;
260                 r = -EFAULT;
261                 if (copy_to_user(argp, &lapic, sizeof lapic))
262                         goto out;
263                 r = 0;
264                 break;
265         }
266         case KVM_SET_LAPIC: {
267                 struct kvm_lapic_state lapic;
268
269                 r = -EFAULT;
270                 if (copy_from_user(&lapic, argp, sizeof lapic))
271                         goto out;
272                 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
273                 if (r)
274                         goto out;
275                 r = 0;
276                 break;
277         }
278         case KVM_SET_CPUID: {
279                 struct kvm_cpuid __user *cpuid_arg = argp;
280                 struct kvm_cpuid cpuid;
281
282                 r = -EFAULT;
283                 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
284                         goto out;
285                 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
286                 if (r)
287                         goto out;
288                 break;
289         }
290         case KVM_GET_MSRS:
291                 r = msr_io(vcpu, argp, kvm_get_msr, 1);
292                 break;
293         case KVM_SET_MSRS:
294                 r = msr_io(vcpu, argp, do_set_msr, 0);
295                 break;
296         default:
297                 r = -EINVAL;
298         }
299 out:
300         return r;
301 }
302
303 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
304 {
305         int ret;
306
307         if (addr > (unsigned int)(-3 * PAGE_SIZE))
308                 return -1;
309         ret = kvm_x86_ops->set_tss_addr(kvm, addr);
310         return ret;
311 }
312
313 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
314                                           u32 kvm_nr_mmu_pages)
315 {
316         if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
317                 return -EINVAL;
318
319         mutex_lock(&kvm->lock);
320
321         kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
322         kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
323
324         mutex_unlock(&kvm->lock);
325         return 0;
326 }
327
328 static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
329 {
330         return kvm->n_alloc_mmu_pages;
331 }
332
333 /*
334  * Set a new alias region.  Aliases map a portion of physical memory into
335  * another portion.  This is useful for memory windows, for example the PC
336  * VGA region.
337  */
338 static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
339                                          struct kvm_memory_alias *alias)
340 {
341         int r, n;
342         struct kvm_mem_alias *p;
343
344         r = -EINVAL;
345         /* General sanity checks */
346         if (alias->memory_size & (PAGE_SIZE - 1))
347                 goto out;
348         if (alias->guest_phys_addr & (PAGE_SIZE - 1))
349                 goto out;
350         if (alias->slot >= KVM_ALIAS_SLOTS)
351                 goto out;
352         if (alias->guest_phys_addr + alias->memory_size
353             < alias->guest_phys_addr)
354                 goto out;
355         if (alias->target_phys_addr + alias->memory_size
356             < alias->target_phys_addr)
357                 goto out;
358
359         mutex_lock(&kvm->lock);
360
361         p = &kvm->aliases[alias->slot];
362         p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
363         p->npages = alias->memory_size >> PAGE_SHIFT;
364         p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
365
366         for (n = KVM_ALIAS_SLOTS; n > 0; --n)
367                 if (kvm->aliases[n - 1].npages)
368                         break;
369         kvm->naliases = n;
370
371         kvm_mmu_zap_all(kvm);
372
373         mutex_unlock(&kvm->lock);
374
375         return 0;
376
377 out:
378         return r;
379 }
380
381 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
382 {
383         int r;
384
385         r = 0;
386         switch (chip->chip_id) {
387         case KVM_IRQCHIP_PIC_MASTER:
388                 memcpy(&chip->chip.pic,
389                         &pic_irqchip(kvm)->pics[0],
390                         sizeof(struct kvm_pic_state));
391                 break;
392         case KVM_IRQCHIP_PIC_SLAVE:
393                 memcpy(&chip->chip.pic,
394                         &pic_irqchip(kvm)->pics[1],
395                         sizeof(struct kvm_pic_state));
396                 break;
397         case KVM_IRQCHIP_IOAPIC:
398                 memcpy(&chip->chip.ioapic,
399                         ioapic_irqchip(kvm),
400                         sizeof(struct kvm_ioapic_state));
401                 break;
402         default:
403                 r = -EINVAL;
404                 break;
405         }
406         return r;
407 }
408
409 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
410 {
411         int r;
412
413         r = 0;
414         switch (chip->chip_id) {
415         case KVM_IRQCHIP_PIC_MASTER:
416                 memcpy(&pic_irqchip(kvm)->pics[0],
417                         &chip->chip.pic,
418                         sizeof(struct kvm_pic_state));
419                 break;
420         case KVM_IRQCHIP_PIC_SLAVE:
421                 memcpy(&pic_irqchip(kvm)->pics[1],
422                         &chip->chip.pic,
423                         sizeof(struct kvm_pic_state));
424                 break;
425         case KVM_IRQCHIP_IOAPIC:
426                 memcpy(ioapic_irqchip(kvm),
427                         &chip->chip.ioapic,
428                         sizeof(struct kvm_ioapic_state));
429                 break;
430         default:
431                 r = -EINVAL;
432                 break;
433         }
434         kvm_pic_update_irq(pic_irqchip(kvm));
435         return r;
436 }
437
438 long kvm_arch_vm_ioctl(struct file *filp,
439                        unsigned int ioctl, unsigned long arg)
440 {
441         struct kvm *kvm = filp->private_data;
442         void __user *argp = (void __user *)arg;
443         int r = -EINVAL;
444
445         switch (ioctl) {
446         case KVM_SET_TSS_ADDR:
447                 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
448                 if (r < 0)
449                         goto out;
450                 break;
451         case KVM_SET_MEMORY_REGION: {
452                 struct kvm_memory_region kvm_mem;
453                 struct kvm_userspace_memory_region kvm_userspace_mem;
454
455                 r = -EFAULT;
456                 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
457                         goto out;
458                 kvm_userspace_mem.slot = kvm_mem.slot;
459                 kvm_userspace_mem.flags = kvm_mem.flags;
460                 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
461                 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
462                 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
463                 if (r)
464                         goto out;
465                 break;
466         }
467         case KVM_SET_NR_MMU_PAGES:
468                 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
469                 if (r)
470                         goto out;
471                 break;
472         case KVM_GET_NR_MMU_PAGES:
473                 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
474                 break;
475         case KVM_SET_MEMORY_ALIAS: {
476                 struct kvm_memory_alias alias;
477
478                 r = -EFAULT;
479                 if (copy_from_user(&alias, argp, sizeof alias))
480                         goto out;
481                 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
482                 if (r)
483                         goto out;
484                 break;
485         }
486         case KVM_CREATE_IRQCHIP:
487                 r = -ENOMEM;
488                 kvm->vpic = kvm_create_pic(kvm);
489                 if (kvm->vpic) {
490                         r = kvm_ioapic_init(kvm);
491                         if (r) {
492                                 kfree(kvm->vpic);
493                                 kvm->vpic = NULL;
494                                 goto out;
495                         }
496                 } else
497                         goto out;
498                 break;
499         case KVM_IRQ_LINE: {
500                 struct kvm_irq_level irq_event;
501
502                 r = -EFAULT;
503                 if (copy_from_user(&irq_event, argp, sizeof irq_event))
504                         goto out;
505                 if (irqchip_in_kernel(kvm)) {
506                         mutex_lock(&kvm->lock);
507                         if (irq_event.irq < 16)
508                                 kvm_pic_set_irq(pic_irqchip(kvm),
509                                         irq_event.irq,
510                                         irq_event.level);
511                         kvm_ioapic_set_irq(kvm->vioapic,
512                                         irq_event.irq,
513                                         irq_event.level);
514                         mutex_unlock(&kvm->lock);
515                         r = 0;
516                 }
517                 break;
518         }
519         case KVM_GET_IRQCHIP: {
520                 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
521                 struct kvm_irqchip chip;
522
523                 r = -EFAULT;
524                 if (copy_from_user(&chip, argp, sizeof chip))
525                         goto out;
526                 r = -ENXIO;
527                 if (!irqchip_in_kernel(kvm))
528                         goto out;
529                 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
530                 if (r)
531                         goto out;
532                 r = -EFAULT;
533                 if (copy_to_user(argp, &chip, sizeof chip))
534                         goto out;
535                 r = 0;
536                 break;
537         }
538         case KVM_SET_IRQCHIP: {
539                 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
540                 struct kvm_irqchip chip;
541
542                 r = -EFAULT;
543                 if (copy_from_user(&chip, argp, sizeof chip))
544                         goto out;
545                 r = -ENXIO;
546                 if (!irqchip_in_kernel(kvm))
547                         goto out;
548                 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
549                 if (r)
550                         goto out;
551                 r = 0;
552                 break;
553         }
554         default:
555                 ;
556         }
557 out:
558         return r;
559 }
560
561 static __init void kvm_init_msr_list(void)
562 {
563         u32 dummy[2];
564         unsigned i, j;
565
566         for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
567                 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
568                         continue;
569                 if (j < i)
570                         msrs_to_save[j] = msrs_to_save[i];
571                 j++;
572         }
573         num_msrs_to_save = j;
574 }
575
576 __init void kvm_arch_init(void)
577 {
578         kvm_init_msr_list();
579 }