KVM: SVM: Move msrpm offset calculation to seperate function
[pandora-kernel.git] / arch / x86 / kvm / svm.c
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * AMD SVM support
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  *
8  * Authors:
9  *   Yaniv Kamay  <yaniv@qumranet.com>
10  *   Avi Kivity   <avi@qumranet.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2.  See
13  * the COPYING file in the top-level directory.
14  *
15  */
16 #include <linux/kvm_host.h>
17
18 #include "irq.h"
19 #include "mmu.h"
20 #include "kvm_cache_regs.h"
21 #include "x86.h"
22
23 #include <linux/module.h>
24 #include <linux/kernel.h>
25 #include <linux/vmalloc.h>
26 #include <linux/highmem.h>
27 #include <linux/sched.h>
28 #include <linux/ftrace_event.h>
29 #include <linux/slab.h>
30
31 #include <asm/desc.h>
32
33 #include <asm/virtext.h>
34 #include "trace.h"
35
36 #define __ex(x) __kvm_handle_fault_on_reboot(x)
37
38 MODULE_AUTHOR("Qumranet");
39 MODULE_LICENSE("GPL");
40
41 #define IOPM_ALLOC_ORDER 2
42 #define MSRPM_ALLOC_ORDER 1
43
44 #define SEG_TYPE_LDT 2
45 #define SEG_TYPE_BUSY_TSS16 3
46
47 #define SVM_FEATURE_NPT  (1 << 0)
48 #define SVM_FEATURE_LBRV (1 << 1)
49 #define SVM_FEATURE_SVML (1 << 2)
50 #define SVM_FEATURE_NRIP (1 << 3)
51 #define SVM_FEATURE_PAUSE_FILTER (1 << 10)
52
53 #define NESTED_EXIT_HOST        0       /* Exit handled on host level */
54 #define NESTED_EXIT_DONE        1       /* Exit caused nested vmexit  */
55 #define NESTED_EXIT_CONTINUE    2       /* Further checks needed      */
56
57 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
58
59 static const u32 host_save_user_msrs[] = {
60 #ifdef CONFIG_X86_64
61         MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
62         MSR_FS_BASE,
63 #endif
64         MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
65 };
66
67 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
68
69 struct kvm_vcpu;
70
71 struct nested_state {
72         struct vmcb *hsave;
73         u64 hsave_msr;
74         u64 vm_cr_msr;
75         u64 vmcb;
76
77         /* These are the merged vectors */
78         u32 *msrpm;
79
80         /* gpa pointers to the real vectors */
81         u64 vmcb_msrpm;
82
83         /* A VMEXIT is required but not yet emulated */
84         bool exit_required;
85
86         /* cache for intercepts of the guest */
87         u16 intercept_cr_read;
88         u16 intercept_cr_write;
89         u16 intercept_dr_read;
90         u16 intercept_dr_write;
91         u32 intercept_exceptions;
92         u64 intercept;
93
94 };
95
96 struct vcpu_svm {
97         struct kvm_vcpu vcpu;
98         struct vmcb *vmcb;
99         unsigned long vmcb_pa;
100         struct svm_cpu_data *svm_data;
101         uint64_t asid_generation;
102         uint64_t sysenter_esp;
103         uint64_t sysenter_eip;
104
105         u64 next_rip;
106
107         u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
108         u64 host_gs_base;
109
110         u32 *msrpm;
111
112         struct nested_state nested;
113
114         bool nmi_singlestep;
115
116         unsigned int3_injected;
117         unsigned long int3_rip;
118 };
119
120 #define MSR_INVALID                     0xffffffffU
121
122 /* enable NPT for AMD64 and X86 with PAE */
123 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
124 static bool npt_enabled = true;
125 #else
126 static bool npt_enabled;
127 #endif
128 static int npt = 1;
129
130 module_param(npt, int, S_IRUGO);
131
132 static int nested = 1;
133 module_param(nested, int, S_IRUGO);
134
135 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
136 static void svm_complete_interrupts(struct vcpu_svm *svm);
137
138 static int nested_svm_exit_handled(struct vcpu_svm *svm);
139 static int nested_svm_intercept(struct vcpu_svm *svm);
140 static int nested_svm_vmexit(struct vcpu_svm *svm);
141 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
142                                       bool has_error_code, u32 error_code);
143
144 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
145 {
146         return container_of(vcpu, struct vcpu_svm, vcpu);
147 }
148
149 static inline bool is_nested(struct vcpu_svm *svm)
150 {
151         return svm->nested.vmcb;
152 }
153
154 static inline void enable_gif(struct vcpu_svm *svm)
155 {
156         svm->vcpu.arch.hflags |= HF_GIF_MASK;
157 }
158
159 static inline void disable_gif(struct vcpu_svm *svm)
160 {
161         svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
162 }
163
164 static inline bool gif_set(struct vcpu_svm *svm)
165 {
166         return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
167 }
168
169 static unsigned long iopm_base;
170
171 struct kvm_ldttss_desc {
172         u16 limit0;
173         u16 base0;
174         unsigned base1:8, type:5, dpl:2, p:1;
175         unsigned limit1:4, zero0:3, g:1, base2:8;
176         u32 base3;
177         u32 zero1;
178 } __attribute__((packed));
179
180 struct svm_cpu_data {
181         int cpu;
182
183         u64 asid_generation;
184         u32 max_asid;
185         u32 next_asid;
186         struct kvm_ldttss_desc *tss_desc;
187
188         struct page *save_area;
189 };
190
191 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
192 static uint32_t svm_features;
193
194 struct svm_init_data {
195         int cpu;
196         int r;
197 };
198
199 static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
200
201 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
202 #define MSRS_RANGE_SIZE 2048
203 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
204
205 static u32 svm_msrpm_offset(u32 msr)
206 {
207         u32 offset;
208         int i;
209
210         for (i = 0; i < NUM_MSR_MAPS; i++) {
211                 if (msr < msrpm_ranges[i] ||
212                     msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
213                         continue;
214
215                 offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
216                 offset += (i * MSRS_RANGE_SIZE);       /* add range offset */
217
218                 /* Now we have the u8 offset - but need the u32 offset */
219                 return offset / 4;
220         }
221
222         /* MSR not in any range */
223         return MSR_INVALID;
224 }
225
226 #define MAX_INST_SIZE 15
227
228 static inline u32 svm_has(u32 feat)
229 {
230         return svm_features & feat;
231 }
232
233 static inline void clgi(void)
234 {
235         asm volatile (__ex(SVM_CLGI));
236 }
237
238 static inline void stgi(void)
239 {
240         asm volatile (__ex(SVM_STGI));
241 }
242
243 static inline void invlpga(unsigned long addr, u32 asid)
244 {
245         asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
246 }
247
248 static inline void force_new_asid(struct kvm_vcpu *vcpu)
249 {
250         to_svm(vcpu)->asid_generation--;
251 }
252
253 static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
254 {
255         force_new_asid(vcpu);
256 }
257
258 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
259 {
260         if (!npt_enabled && !(efer & EFER_LMA))
261                 efer &= ~EFER_LME;
262
263         to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
264         vcpu->arch.efer = efer;
265 }
266
267 static int is_external_interrupt(u32 info)
268 {
269         info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
270         return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
271 }
272
273 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
274 {
275         struct vcpu_svm *svm = to_svm(vcpu);
276         u32 ret = 0;
277
278         if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
279                 ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
280         return ret & mask;
281 }
282
283 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
284 {
285         struct vcpu_svm *svm = to_svm(vcpu);
286
287         if (mask == 0)
288                 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
289         else
290                 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
291
292 }
293
294 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
295 {
296         struct vcpu_svm *svm = to_svm(vcpu);
297
298         if (!svm->next_rip) {
299                 if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
300                                 EMULATE_DONE)
301                         printk(KERN_DEBUG "%s: NOP\n", __func__);
302                 return;
303         }
304         if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
305                 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
306                        __func__, kvm_rip_read(vcpu), svm->next_rip);
307
308         kvm_rip_write(vcpu, svm->next_rip);
309         svm_set_interrupt_shadow(vcpu, 0);
310 }
311
312 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
313                                 bool has_error_code, u32 error_code)
314 {
315         struct vcpu_svm *svm = to_svm(vcpu);
316
317         /*
318          * If we are within a nested VM we'd better #VMEXIT and let the guest
319          * handle the exception
320          */
321         if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
322                 return;
323
324         if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) {
325                 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
326
327                 /*
328                  * For guest debugging where we have to reinject #BP if some
329                  * INT3 is guest-owned:
330                  * Emulate nRIP by moving RIP forward. Will fail if injection
331                  * raises a fault that is not intercepted. Still better than
332                  * failing in all cases.
333                  */
334                 skip_emulated_instruction(&svm->vcpu);
335                 rip = kvm_rip_read(&svm->vcpu);
336                 svm->int3_rip = rip + svm->vmcb->save.cs.base;
337                 svm->int3_injected = rip - old_rip;
338         }
339
340         svm->vmcb->control.event_inj = nr
341                 | SVM_EVTINJ_VALID
342                 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
343                 | SVM_EVTINJ_TYPE_EXEPT;
344         svm->vmcb->control.event_inj_err = error_code;
345 }
346
347 static int has_svm(void)
348 {
349         const char *msg;
350
351         if (!cpu_has_svm(&msg)) {
352                 printk(KERN_INFO "has_svm: %s\n", msg);
353                 return 0;
354         }
355
356         return 1;
357 }
358
359 static void svm_hardware_disable(void *garbage)
360 {
361         cpu_svm_disable();
362 }
363
364 static int svm_hardware_enable(void *garbage)
365 {
366
367         struct svm_cpu_data *sd;
368         uint64_t efer;
369         struct desc_ptr gdt_descr;
370         struct desc_struct *gdt;
371         int me = raw_smp_processor_id();
372
373         rdmsrl(MSR_EFER, efer);
374         if (efer & EFER_SVME)
375                 return -EBUSY;
376
377         if (!has_svm()) {
378                 printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
379                        me);
380                 return -EINVAL;
381         }
382         sd = per_cpu(svm_data, me);
383
384         if (!sd) {
385                 printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
386                        me);
387                 return -EINVAL;
388         }
389
390         sd->asid_generation = 1;
391         sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
392         sd->next_asid = sd->max_asid + 1;
393
394         native_store_gdt(&gdt_descr);
395         gdt = (struct desc_struct *)gdt_descr.address;
396         sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
397
398         wrmsrl(MSR_EFER, efer | EFER_SVME);
399
400         wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
401
402         return 0;
403 }
404
405 static void svm_cpu_uninit(int cpu)
406 {
407         struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
408
409         if (!sd)
410                 return;
411
412         per_cpu(svm_data, raw_smp_processor_id()) = NULL;
413         __free_page(sd->save_area);
414         kfree(sd);
415 }
416
417 static int svm_cpu_init(int cpu)
418 {
419         struct svm_cpu_data *sd;
420         int r;
421
422         sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
423         if (!sd)
424                 return -ENOMEM;
425         sd->cpu = cpu;
426         sd->save_area = alloc_page(GFP_KERNEL);
427         r = -ENOMEM;
428         if (!sd->save_area)
429                 goto err_1;
430
431         per_cpu(svm_data, cpu) = sd;
432
433         return 0;
434
435 err_1:
436         kfree(sd);
437         return r;
438
439 }
440
441 static void set_msr_interception(u32 *msrpm, unsigned msr,
442                                  int read, int write)
443 {
444         u8 bit_read, bit_write;
445         unsigned long tmp;
446         u32 offset;
447
448         offset    = svm_msrpm_offset(msr);
449         bit_read  = 2 * (msr & 0x0f);
450         bit_write = 2 * (msr & 0x0f) + 1;
451         tmp       = msrpm[offset];
452
453         BUG_ON(offset == MSR_INVALID);
454
455         read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
456         write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
457
458         msrpm[offset] = tmp;
459 }
460
461 static void svm_vcpu_init_msrpm(u32 *msrpm)
462 {
463         memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
464
465 #ifdef CONFIG_X86_64
466         set_msr_interception(msrpm, MSR_GS_BASE, 1, 1);
467         set_msr_interception(msrpm, MSR_FS_BASE, 1, 1);
468         set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1);
469         set_msr_interception(msrpm, MSR_LSTAR, 1, 1);
470         set_msr_interception(msrpm, MSR_CSTAR, 1, 1);
471         set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1);
472 #endif
473         set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
474         set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
475 }
476
477 static void svm_enable_lbrv(struct vcpu_svm *svm)
478 {
479         u32 *msrpm = svm->msrpm;
480
481         svm->vmcb->control.lbr_ctl = 1;
482         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
483         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
484         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
485         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
486 }
487
488 static void svm_disable_lbrv(struct vcpu_svm *svm)
489 {
490         u32 *msrpm = svm->msrpm;
491
492         svm->vmcb->control.lbr_ctl = 0;
493         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
494         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
495         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
496         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
497 }
498
499 static __init int svm_hardware_setup(void)
500 {
501         int cpu;
502         struct page *iopm_pages;
503         void *iopm_va;
504         int r;
505
506         iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
507
508         if (!iopm_pages)
509                 return -ENOMEM;
510
511         iopm_va = page_address(iopm_pages);
512         memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
513         iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
514
515         if (boot_cpu_has(X86_FEATURE_NX))
516                 kvm_enable_efer_bits(EFER_NX);
517
518         if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
519                 kvm_enable_efer_bits(EFER_FFXSR);
520
521         if (nested) {
522                 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
523                 kvm_enable_efer_bits(EFER_SVME);
524         }
525
526         for_each_possible_cpu(cpu) {
527                 r = svm_cpu_init(cpu);
528                 if (r)
529                         goto err;
530         }
531
532         svm_features = cpuid_edx(SVM_CPUID_FUNC);
533
534         if (!svm_has(SVM_FEATURE_NPT))
535                 npt_enabled = false;
536
537         if (npt_enabled && !npt) {
538                 printk(KERN_INFO "kvm: Nested Paging disabled\n");
539                 npt_enabled = false;
540         }
541
542         if (npt_enabled) {
543                 printk(KERN_INFO "kvm: Nested Paging enabled\n");
544                 kvm_enable_tdp();
545         } else
546                 kvm_disable_tdp();
547
548         return 0;
549
550 err:
551         __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
552         iopm_base = 0;
553         return r;
554 }
555
556 static __exit void svm_hardware_unsetup(void)
557 {
558         int cpu;
559
560         for_each_possible_cpu(cpu)
561                 svm_cpu_uninit(cpu);
562
563         __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
564         iopm_base = 0;
565 }
566
567 static void init_seg(struct vmcb_seg *seg)
568 {
569         seg->selector = 0;
570         seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
571                       SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
572         seg->limit = 0xffff;
573         seg->base = 0;
574 }
575
576 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
577 {
578         seg->selector = 0;
579         seg->attrib = SVM_SELECTOR_P_MASK | type;
580         seg->limit = 0xffff;
581         seg->base = 0;
582 }
583
584 static void init_vmcb(struct vcpu_svm *svm)
585 {
586         struct vmcb_control_area *control = &svm->vmcb->control;
587         struct vmcb_save_area *save = &svm->vmcb->save;
588
589         svm->vcpu.fpu_active = 1;
590
591         control->intercept_cr_read =    INTERCEPT_CR0_MASK |
592                                         INTERCEPT_CR3_MASK |
593                                         INTERCEPT_CR4_MASK;
594
595         control->intercept_cr_write =   INTERCEPT_CR0_MASK |
596                                         INTERCEPT_CR3_MASK |
597                                         INTERCEPT_CR4_MASK |
598                                         INTERCEPT_CR8_MASK;
599
600         control->intercept_dr_read =    INTERCEPT_DR0_MASK |
601                                         INTERCEPT_DR1_MASK |
602                                         INTERCEPT_DR2_MASK |
603                                         INTERCEPT_DR3_MASK |
604                                         INTERCEPT_DR4_MASK |
605                                         INTERCEPT_DR5_MASK |
606                                         INTERCEPT_DR6_MASK |
607                                         INTERCEPT_DR7_MASK;
608
609         control->intercept_dr_write =   INTERCEPT_DR0_MASK |
610                                         INTERCEPT_DR1_MASK |
611                                         INTERCEPT_DR2_MASK |
612                                         INTERCEPT_DR3_MASK |
613                                         INTERCEPT_DR4_MASK |
614                                         INTERCEPT_DR5_MASK |
615                                         INTERCEPT_DR6_MASK |
616                                         INTERCEPT_DR7_MASK;
617
618         control->intercept_exceptions = (1 << PF_VECTOR) |
619                                         (1 << UD_VECTOR) |
620                                         (1 << MC_VECTOR);
621
622
623         control->intercept =    (1ULL << INTERCEPT_INTR) |
624                                 (1ULL << INTERCEPT_NMI) |
625                                 (1ULL << INTERCEPT_SMI) |
626                                 (1ULL << INTERCEPT_SELECTIVE_CR0) |
627                                 (1ULL << INTERCEPT_CPUID) |
628                                 (1ULL << INTERCEPT_INVD) |
629                                 (1ULL << INTERCEPT_HLT) |
630                                 (1ULL << INTERCEPT_INVLPG) |
631                                 (1ULL << INTERCEPT_INVLPGA) |
632                                 (1ULL << INTERCEPT_IOIO_PROT) |
633                                 (1ULL << INTERCEPT_MSR_PROT) |
634                                 (1ULL << INTERCEPT_TASK_SWITCH) |
635                                 (1ULL << INTERCEPT_SHUTDOWN) |
636                                 (1ULL << INTERCEPT_VMRUN) |
637                                 (1ULL << INTERCEPT_VMMCALL) |
638                                 (1ULL << INTERCEPT_VMLOAD) |
639                                 (1ULL << INTERCEPT_VMSAVE) |
640                                 (1ULL << INTERCEPT_STGI) |
641                                 (1ULL << INTERCEPT_CLGI) |
642                                 (1ULL << INTERCEPT_SKINIT) |
643                                 (1ULL << INTERCEPT_WBINVD) |
644                                 (1ULL << INTERCEPT_MONITOR) |
645                                 (1ULL << INTERCEPT_MWAIT);
646
647         control->iopm_base_pa = iopm_base;
648         control->msrpm_base_pa = __pa(svm->msrpm);
649         control->tsc_offset = 0;
650         control->int_ctl = V_INTR_MASKING_MASK;
651
652         init_seg(&save->es);
653         init_seg(&save->ss);
654         init_seg(&save->ds);
655         init_seg(&save->fs);
656         init_seg(&save->gs);
657
658         save->cs.selector = 0xf000;
659         /* Executable/Readable Code Segment */
660         save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
661                 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
662         save->cs.limit = 0xffff;
663         /*
664          * cs.base should really be 0xffff0000, but vmx can't handle that, so
665          * be consistent with it.
666          *
667          * Replace when we have real mode working for vmx.
668          */
669         save->cs.base = 0xf0000;
670
671         save->gdtr.limit = 0xffff;
672         save->idtr.limit = 0xffff;
673
674         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
675         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
676
677         save->efer = EFER_SVME;
678         save->dr6 = 0xffff0ff0;
679         save->dr7 = 0x400;
680         save->rflags = 2;
681         save->rip = 0x0000fff0;
682         svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
683
684         /*
685          * This is the guest-visible cr0 value.
686          * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
687          */
688         svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
689         kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
690
691         save->cr4 = X86_CR4_PAE;
692         /* rdx = ?? */
693
694         if (npt_enabled) {
695                 /* Setup VMCB for Nested Paging */
696                 control->nested_ctl = 1;
697                 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
698                                         (1ULL << INTERCEPT_INVLPG));
699                 control->intercept_exceptions &= ~(1 << PF_VECTOR);
700                 control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
701                 control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
702                 save->g_pat = 0x0007040600070406ULL;
703                 save->cr3 = 0;
704                 save->cr4 = 0;
705         }
706         force_new_asid(&svm->vcpu);
707
708         svm->nested.vmcb = 0;
709         svm->vcpu.arch.hflags = 0;
710
711         if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
712                 control->pause_filter_count = 3000;
713                 control->intercept |= (1ULL << INTERCEPT_PAUSE);
714         }
715
716         enable_gif(svm);
717 }
718
719 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
720 {
721         struct vcpu_svm *svm = to_svm(vcpu);
722
723         init_vmcb(svm);
724
725         if (!kvm_vcpu_is_bsp(vcpu)) {
726                 kvm_rip_write(vcpu, 0);
727                 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
728                 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
729         }
730         vcpu->arch.regs_avail = ~0;
731         vcpu->arch.regs_dirty = ~0;
732
733         return 0;
734 }
735
736 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
737 {
738         struct vcpu_svm *svm;
739         struct page *page;
740         struct page *msrpm_pages;
741         struct page *hsave_page;
742         struct page *nested_msrpm_pages;
743         int err;
744
745         svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
746         if (!svm) {
747                 err = -ENOMEM;
748                 goto out;
749         }
750
751         err = kvm_vcpu_init(&svm->vcpu, kvm, id);
752         if (err)
753                 goto free_svm;
754
755         err = -ENOMEM;
756         page = alloc_page(GFP_KERNEL);
757         if (!page)
758                 goto uninit;
759
760         msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
761         if (!msrpm_pages)
762                 goto free_page1;
763
764         nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
765         if (!nested_msrpm_pages)
766                 goto free_page2;
767
768         hsave_page = alloc_page(GFP_KERNEL);
769         if (!hsave_page)
770                 goto free_page3;
771
772         svm->nested.hsave = page_address(hsave_page);
773
774         svm->msrpm = page_address(msrpm_pages);
775         svm_vcpu_init_msrpm(svm->msrpm);
776
777         svm->nested.msrpm = page_address(nested_msrpm_pages);
778
779         svm->vmcb = page_address(page);
780         clear_page(svm->vmcb);
781         svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
782         svm->asid_generation = 0;
783         init_vmcb(svm);
784
785         fx_init(&svm->vcpu);
786         svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
787         if (kvm_vcpu_is_bsp(&svm->vcpu))
788                 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
789
790         return &svm->vcpu;
791
792 free_page3:
793         __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
794 free_page2:
795         __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
796 free_page1:
797         __free_page(page);
798 uninit:
799         kvm_vcpu_uninit(&svm->vcpu);
800 free_svm:
801         kmem_cache_free(kvm_vcpu_cache, svm);
802 out:
803         return ERR_PTR(err);
804 }
805
806 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
807 {
808         struct vcpu_svm *svm = to_svm(vcpu);
809
810         __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
811         __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
812         __free_page(virt_to_page(svm->nested.hsave));
813         __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
814         kvm_vcpu_uninit(vcpu);
815         kmem_cache_free(kvm_vcpu_cache, svm);
816 }
817
818 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
819 {
820         struct vcpu_svm *svm = to_svm(vcpu);
821         int i;
822
823         if (unlikely(cpu != vcpu->cpu)) {
824                 u64 delta;
825
826                 if (check_tsc_unstable()) {
827                         /*
828                          * Make sure that the guest sees a monotonically
829                          * increasing TSC.
830                          */
831                         delta = vcpu->arch.host_tsc - native_read_tsc();
832                         svm->vmcb->control.tsc_offset += delta;
833                         if (is_nested(svm))
834                                 svm->nested.hsave->control.tsc_offset += delta;
835                 }
836                 vcpu->cpu = cpu;
837                 kvm_migrate_timers(vcpu);
838                 svm->asid_generation = 0;
839         }
840
841         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
842                 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
843 }
844
845 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
846 {
847         struct vcpu_svm *svm = to_svm(vcpu);
848         int i;
849
850         ++vcpu->stat.host_state_reload;
851         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
852                 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
853
854         vcpu->arch.host_tsc = native_read_tsc();
855 }
856
857 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
858 {
859         return to_svm(vcpu)->vmcb->save.rflags;
860 }
861
862 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
863 {
864         to_svm(vcpu)->vmcb->save.rflags = rflags;
865 }
866
867 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
868 {
869         switch (reg) {
870         case VCPU_EXREG_PDPTR:
871                 BUG_ON(!npt_enabled);
872                 load_pdptrs(vcpu, vcpu->arch.cr3);
873                 break;
874         default:
875                 BUG();
876         }
877 }
878
879 static void svm_set_vintr(struct vcpu_svm *svm)
880 {
881         svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
882 }
883
884 static void svm_clear_vintr(struct vcpu_svm *svm)
885 {
886         svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
887 }
888
889 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
890 {
891         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
892
893         switch (seg) {
894         case VCPU_SREG_CS: return &save->cs;
895         case VCPU_SREG_DS: return &save->ds;
896         case VCPU_SREG_ES: return &save->es;
897         case VCPU_SREG_FS: return &save->fs;
898         case VCPU_SREG_GS: return &save->gs;
899         case VCPU_SREG_SS: return &save->ss;
900         case VCPU_SREG_TR: return &save->tr;
901         case VCPU_SREG_LDTR: return &save->ldtr;
902         }
903         BUG();
904         return NULL;
905 }
906
907 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
908 {
909         struct vmcb_seg *s = svm_seg(vcpu, seg);
910
911         return s->base;
912 }
913
914 static void svm_get_segment(struct kvm_vcpu *vcpu,
915                             struct kvm_segment *var, int seg)
916 {
917         struct vmcb_seg *s = svm_seg(vcpu, seg);
918
919         var->base = s->base;
920         var->limit = s->limit;
921         var->selector = s->selector;
922         var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
923         var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
924         var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
925         var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
926         var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
927         var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
928         var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
929         var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
930
931         /*
932          * AMD's VMCB does not have an explicit unusable field, so emulate it
933          * for cross vendor migration purposes by "not present"
934          */
935         var->unusable = !var->present || (var->type == 0);
936
937         switch (seg) {
938         case VCPU_SREG_CS:
939                 /*
940                  * SVM always stores 0 for the 'G' bit in the CS selector in
941                  * the VMCB on a VMEXIT. This hurts cross-vendor migration:
942                  * Intel's VMENTRY has a check on the 'G' bit.
943                  */
944                 var->g = s->limit > 0xfffff;
945                 break;
946         case VCPU_SREG_TR:
947                 /*
948                  * Work around a bug where the busy flag in the tr selector
949                  * isn't exposed
950                  */
951                 var->type |= 0x2;
952                 break;
953         case VCPU_SREG_DS:
954         case VCPU_SREG_ES:
955         case VCPU_SREG_FS:
956         case VCPU_SREG_GS:
957                 /*
958                  * The accessed bit must always be set in the segment
959                  * descriptor cache, although it can be cleared in the
960                  * descriptor, the cached bit always remains at 1. Since
961                  * Intel has a check on this, set it here to support
962                  * cross-vendor migration.
963                  */
964                 if (!var->unusable)
965                         var->type |= 0x1;
966                 break;
967         case VCPU_SREG_SS:
968                 /*
969                  * On AMD CPUs sometimes the DB bit in the segment
970                  * descriptor is left as 1, although the whole segment has
971                  * been made unusable. Clear it here to pass an Intel VMX
972                  * entry check when cross vendor migrating.
973                  */
974                 if (var->unusable)
975                         var->db = 0;
976                 break;
977         }
978 }
979
980 static int svm_get_cpl(struct kvm_vcpu *vcpu)
981 {
982         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
983
984         return save->cpl;
985 }
986
987 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
988 {
989         struct vcpu_svm *svm = to_svm(vcpu);
990
991         dt->size = svm->vmcb->save.idtr.limit;
992         dt->address = svm->vmcb->save.idtr.base;
993 }
994
995 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
996 {
997         struct vcpu_svm *svm = to_svm(vcpu);
998
999         svm->vmcb->save.idtr.limit = dt->size;
1000         svm->vmcb->save.idtr.base = dt->address ;
1001 }
1002
1003 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1004 {
1005         struct vcpu_svm *svm = to_svm(vcpu);
1006
1007         dt->size = svm->vmcb->save.gdtr.limit;
1008         dt->address = svm->vmcb->save.gdtr.base;
1009 }
1010
1011 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1012 {
1013         struct vcpu_svm *svm = to_svm(vcpu);
1014
1015         svm->vmcb->save.gdtr.limit = dt->size;
1016         svm->vmcb->save.gdtr.base = dt->address ;
1017 }
1018
1019 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1020 {
1021 }
1022
1023 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1024 {
1025 }
1026
1027 static void update_cr0_intercept(struct vcpu_svm *svm)
1028 {
1029         struct vmcb *vmcb = svm->vmcb;
1030         ulong gcr0 = svm->vcpu.arch.cr0;
1031         u64 *hcr0 = &svm->vmcb->save.cr0;
1032
1033         if (!svm->vcpu.fpu_active)
1034                 *hcr0 |= SVM_CR0_SELECTIVE_MASK;
1035         else
1036                 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1037                         | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1038
1039
1040         if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1041                 vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
1042                 vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
1043                 if (is_nested(svm)) {
1044                         struct vmcb *hsave = svm->nested.hsave;
1045
1046                         hsave->control.intercept_cr_read  &= ~INTERCEPT_CR0_MASK;
1047                         hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
1048                         vmcb->control.intercept_cr_read  |= svm->nested.intercept_cr_read;
1049                         vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write;
1050                 }
1051         } else {
1052                 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
1053                 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1054                 if (is_nested(svm)) {
1055                         struct vmcb *hsave = svm->nested.hsave;
1056
1057                         hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
1058                         hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
1059                 }
1060         }
1061 }
1062
1063 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1064 {
1065         struct vcpu_svm *svm = to_svm(vcpu);
1066
1067         if (is_nested(svm)) {
1068                 /*
1069                  * We are here because we run in nested mode, the host kvm
1070                  * intercepts cr0 writes but the l1 hypervisor does not.
1071                  * But the L1 hypervisor may intercept selective cr0 writes.
1072                  * This needs to be checked here.
1073                  */
1074                 unsigned long old, new;
1075
1076                 /* Remove bits that would trigger a real cr0 write intercept */
1077                 old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
1078                 new = cr0 & SVM_CR0_SELECTIVE_MASK;
1079
1080                 if (old == new) {
1081                         /* cr0 write with ts and mp unchanged */
1082                         svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
1083                         if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE)
1084                                 return;
1085                 }
1086         }
1087
1088 #ifdef CONFIG_X86_64
1089         if (vcpu->arch.efer & EFER_LME) {
1090                 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1091                         vcpu->arch.efer |= EFER_LMA;
1092                         svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1093                 }
1094
1095                 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1096                         vcpu->arch.efer &= ~EFER_LMA;
1097                         svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1098                 }
1099         }
1100 #endif
1101         vcpu->arch.cr0 = cr0;
1102
1103         if (!npt_enabled)
1104                 cr0 |= X86_CR0_PG | X86_CR0_WP;
1105
1106         if (!vcpu->fpu_active)
1107                 cr0 |= X86_CR0_TS;
1108         /*
1109          * re-enable caching here because the QEMU bios
1110          * does not do it - this results in some delay at
1111          * reboot
1112          */
1113         cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1114         svm->vmcb->save.cr0 = cr0;
1115         update_cr0_intercept(svm);
1116 }
1117
1118 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1119 {
1120         unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1121         unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1122
1123         if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1124                 force_new_asid(vcpu);
1125
1126         vcpu->arch.cr4 = cr4;
1127         if (!npt_enabled)
1128                 cr4 |= X86_CR4_PAE;
1129         cr4 |= host_cr4_mce;
1130         to_svm(vcpu)->vmcb->save.cr4 = cr4;
1131 }
1132
1133 static void svm_set_segment(struct kvm_vcpu *vcpu,
1134                             struct kvm_segment *var, int seg)
1135 {
1136         struct vcpu_svm *svm = to_svm(vcpu);
1137         struct vmcb_seg *s = svm_seg(vcpu, seg);
1138
1139         s->base = var->base;
1140         s->limit = var->limit;
1141         s->selector = var->selector;
1142         if (var->unusable)
1143                 s->attrib = 0;
1144         else {
1145                 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1146                 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1147                 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1148                 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1149                 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1150                 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1151                 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1152                 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1153         }
1154         if (seg == VCPU_SREG_CS)
1155                 svm->vmcb->save.cpl
1156                         = (svm->vmcb->save.cs.attrib
1157                            >> SVM_SELECTOR_DPL_SHIFT) & 3;
1158
1159 }
1160
1161 static void update_db_intercept(struct kvm_vcpu *vcpu)
1162 {
1163         struct vcpu_svm *svm = to_svm(vcpu);
1164
1165         svm->vmcb->control.intercept_exceptions &=
1166                 ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
1167
1168         if (svm->nmi_singlestep)
1169                 svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
1170
1171         if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1172                 if (vcpu->guest_debug &
1173                     (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
1174                         svm->vmcb->control.intercept_exceptions |=
1175                                 1 << DB_VECTOR;
1176                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1177                         svm->vmcb->control.intercept_exceptions |=
1178                                 1 << BP_VECTOR;
1179         } else
1180                 vcpu->guest_debug = 0;
1181 }
1182
1183 static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1184 {
1185         struct vcpu_svm *svm = to_svm(vcpu);
1186
1187         if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1188                 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1189         else
1190                 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1191
1192         update_db_intercept(vcpu);
1193 }
1194
1195 static void load_host_msrs(struct kvm_vcpu *vcpu)
1196 {
1197 #ifdef CONFIG_X86_64
1198         wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
1199 #endif
1200 }
1201
1202 static void save_host_msrs(struct kvm_vcpu *vcpu)
1203 {
1204 #ifdef CONFIG_X86_64
1205         rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base);
1206 #endif
1207 }
1208
1209 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1210 {
1211         if (sd->next_asid > sd->max_asid) {
1212                 ++sd->asid_generation;
1213                 sd->next_asid = 1;
1214                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1215         }
1216
1217         svm->asid_generation = sd->asid_generation;
1218         svm->vmcb->control.asid = sd->next_asid++;
1219 }
1220
1221 static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest)
1222 {
1223         struct vcpu_svm *svm = to_svm(vcpu);
1224
1225         switch (dr) {
1226         case 0 ... 3:
1227                 *dest = vcpu->arch.db[dr];
1228                 break;
1229         case 4:
1230                 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1231                         return EMULATE_FAIL; /* will re-inject UD */
1232                 /* fall through */
1233         case 6:
1234                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1235                         *dest = vcpu->arch.dr6;
1236                 else
1237                         *dest = svm->vmcb->save.dr6;
1238                 break;
1239         case 5:
1240                 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1241                         return EMULATE_FAIL; /* will re-inject UD */
1242                 /* fall through */
1243         case 7:
1244                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1245                         *dest = vcpu->arch.dr7;
1246                 else
1247                         *dest = svm->vmcb->save.dr7;
1248                 break;
1249         }
1250
1251         return EMULATE_DONE;
1252 }
1253
1254 static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
1255 {
1256         struct vcpu_svm *svm = to_svm(vcpu);
1257
1258         switch (dr) {
1259         case 0 ... 3:
1260                 vcpu->arch.db[dr] = value;
1261                 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1262                         vcpu->arch.eff_db[dr] = value;
1263                 break;
1264         case 4:
1265                 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1266                         return EMULATE_FAIL; /* will re-inject UD */
1267                 /* fall through */
1268         case 6:
1269                 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
1270                 break;
1271         case 5:
1272                 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1273                         return EMULATE_FAIL; /* will re-inject UD */
1274                 /* fall through */
1275         case 7:
1276                 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1277                 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1278                         svm->vmcb->save.dr7 = vcpu->arch.dr7;
1279                         vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1280                 }
1281                 break;
1282         }
1283
1284         return EMULATE_DONE;
1285 }
1286
1287 static int pf_interception(struct vcpu_svm *svm)
1288 {
1289         u64 fault_address;
1290         u32 error_code;
1291
1292         fault_address  = svm->vmcb->control.exit_info_2;
1293         error_code = svm->vmcb->control.exit_info_1;
1294
1295         trace_kvm_page_fault(fault_address, error_code);
1296         if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1297                 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1298         return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
1299 }
1300
1301 static int db_interception(struct vcpu_svm *svm)
1302 {
1303         struct kvm_run *kvm_run = svm->vcpu.run;
1304
1305         if (!(svm->vcpu.guest_debug &
1306               (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1307                 !svm->nmi_singlestep) {
1308                 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1309                 return 1;
1310         }
1311
1312         if (svm->nmi_singlestep) {
1313                 svm->nmi_singlestep = false;
1314                 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1315                         svm->vmcb->save.rflags &=
1316                                 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1317                 update_db_intercept(&svm->vcpu);
1318         }
1319
1320         if (svm->vcpu.guest_debug &
1321             (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1322                 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1323                 kvm_run->debug.arch.pc =
1324                         svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1325                 kvm_run->debug.arch.exception = DB_VECTOR;
1326                 return 0;
1327         }
1328
1329         return 1;
1330 }
1331
1332 static int bp_interception(struct vcpu_svm *svm)
1333 {
1334         struct kvm_run *kvm_run = svm->vcpu.run;
1335
1336         kvm_run->exit_reason = KVM_EXIT_DEBUG;
1337         kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1338         kvm_run->debug.arch.exception = BP_VECTOR;
1339         return 0;
1340 }
1341
1342 static int ud_interception(struct vcpu_svm *svm)
1343 {
1344         int er;
1345
1346         er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
1347         if (er != EMULATE_DONE)
1348                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1349         return 1;
1350 }
1351
1352 static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1353 {
1354         struct vcpu_svm *svm = to_svm(vcpu);
1355         u32 excp;
1356
1357         if (is_nested(svm)) {
1358                 u32 h_excp, n_excp;
1359
1360                 h_excp  = svm->nested.hsave->control.intercept_exceptions;
1361                 n_excp  = svm->nested.intercept_exceptions;
1362                 h_excp &= ~(1 << NM_VECTOR);
1363                 excp    = h_excp | n_excp;
1364         } else {
1365                 excp  = svm->vmcb->control.intercept_exceptions;
1366                 excp &= ~(1 << NM_VECTOR);
1367         }
1368
1369         svm->vmcb->control.intercept_exceptions = excp;
1370
1371         svm->vcpu.fpu_active = 1;
1372         update_cr0_intercept(svm);
1373 }
1374
1375 static int nm_interception(struct vcpu_svm *svm)
1376 {
1377         svm_fpu_activate(&svm->vcpu);
1378         return 1;
1379 }
1380
1381 static int mc_interception(struct vcpu_svm *svm)
1382 {
1383         /*
1384          * On an #MC intercept the MCE handler is not called automatically in
1385          * the host. So do it by hand here.
1386          */
1387         asm volatile (
1388                 "int $0x12\n");
1389         /* not sure if we ever come back to this point */
1390
1391         return 1;
1392 }
1393
1394 static int shutdown_interception(struct vcpu_svm *svm)
1395 {
1396         struct kvm_run *kvm_run = svm->vcpu.run;
1397
1398         /*
1399          * VMCB is undefined after a SHUTDOWN intercept
1400          * so reinitialize it.
1401          */
1402         clear_page(svm->vmcb);
1403         init_vmcb(svm);
1404
1405         kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1406         return 0;
1407 }
1408
1409 static int io_interception(struct vcpu_svm *svm)
1410 {
1411         u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1412         int size, in, string;
1413         unsigned port;
1414
1415         ++svm->vcpu.stat.io_exits;
1416
1417         svm->next_rip = svm->vmcb->control.exit_info_2;
1418
1419         string = (io_info & SVM_IOIO_STR_MASK) != 0;
1420
1421         if (string) {
1422                 if (emulate_instruction(&svm->vcpu,
1423                                         0, 0, 0) == EMULATE_DO_MMIO)
1424                         return 0;
1425                 return 1;
1426         }
1427
1428         in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1429         port = io_info >> 16;
1430         size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1431
1432         skip_emulated_instruction(&svm->vcpu);
1433         return kvm_emulate_pio(&svm->vcpu, in, size, port);
1434 }
1435
1436 static int nmi_interception(struct vcpu_svm *svm)
1437 {
1438         return 1;
1439 }
1440
1441 static int intr_interception(struct vcpu_svm *svm)
1442 {
1443         ++svm->vcpu.stat.irq_exits;
1444         return 1;
1445 }
1446
1447 static int nop_on_interception(struct vcpu_svm *svm)
1448 {
1449         return 1;
1450 }
1451
1452 static int halt_interception(struct vcpu_svm *svm)
1453 {
1454         svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1455         skip_emulated_instruction(&svm->vcpu);
1456         return kvm_emulate_halt(&svm->vcpu);
1457 }
1458
1459 static int vmmcall_interception(struct vcpu_svm *svm)
1460 {
1461         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1462         skip_emulated_instruction(&svm->vcpu);
1463         kvm_emulate_hypercall(&svm->vcpu);
1464         return 1;
1465 }
1466
1467 static int nested_svm_check_permissions(struct vcpu_svm *svm)
1468 {
1469         if (!(svm->vcpu.arch.efer & EFER_SVME)
1470             || !is_paging(&svm->vcpu)) {
1471                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1472                 return 1;
1473         }
1474
1475         if (svm->vmcb->save.cpl) {
1476                 kvm_inject_gp(&svm->vcpu, 0);
1477                 return 1;
1478         }
1479
1480        return 0;
1481 }
1482
1483 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1484                                       bool has_error_code, u32 error_code)
1485 {
1486         int vmexit;
1487
1488         if (!is_nested(svm))
1489                 return 0;
1490
1491         svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1492         svm->vmcb->control.exit_code_hi = 0;
1493         svm->vmcb->control.exit_info_1 = error_code;
1494         svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1495
1496         vmexit = nested_svm_intercept(svm);
1497         if (vmexit == NESTED_EXIT_DONE)
1498                 svm->nested.exit_required = true;
1499
1500         return vmexit;
1501 }
1502
1503 /* This function returns true if it is save to enable the irq window */
1504 static inline bool nested_svm_intr(struct vcpu_svm *svm)
1505 {
1506         if (!is_nested(svm))
1507                 return true;
1508
1509         if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1510                 return true;
1511
1512         if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1513                 return false;
1514
1515         svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
1516         svm->vmcb->control.exit_info_1 = 0;
1517         svm->vmcb->control.exit_info_2 = 0;
1518
1519         if (svm->nested.intercept & 1ULL) {
1520                 /*
1521                  * The #vmexit can't be emulated here directly because this
1522                  * code path runs with irqs and preemtion disabled. A
1523                  * #vmexit emulation might sleep. Only signal request for
1524                  * the #vmexit here.
1525                  */
1526                 svm->nested.exit_required = true;
1527                 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1528                 return false;
1529         }
1530
1531         return true;
1532 }
1533
1534 /* This function returns true if it is save to enable the nmi window */
1535 static inline bool nested_svm_nmi(struct vcpu_svm *svm)
1536 {
1537         if (!is_nested(svm))
1538                 return true;
1539
1540         if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
1541                 return true;
1542
1543         svm->vmcb->control.exit_code = SVM_EXIT_NMI;
1544         svm->nested.exit_required = true;
1545
1546         return false;
1547 }
1548
1549 static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
1550 {
1551         struct page *page;
1552
1553         might_sleep();
1554
1555         page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1556         if (is_error_page(page))
1557                 goto error;
1558
1559         *_page = page;
1560
1561         return kmap(page);
1562
1563 error:
1564         kvm_release_page_clean(page);
1565         kvm_inject_gp(&svm->vcpu, 0);
1566
1567         return NULL;
1568 }
1569
1570 static void nested_svm_unmap(struct page *page)
1571 {
1572         kunmap(page);
1573         kvm_release_page_dirty(page);
1574 }
1575
1576 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1577 {
1578         u32 param = svm->vmcb->control.exit_info_1 & 1;
1579         u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1580         u32 t0, t1;
1581         int ret;
1582         u8 val;
1583
1584         if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1585                 return NESTED_EXIT_HOST;
1586
1587         switch (msr) {
1588         case 0 ... 0x1fff:
1589                 t0 = (msr * 2) % 8;
1590                 t1 = msr / 8;
1591                 break;
1592         case 0xc0000000 ... 0xc0001fff:
1593                 t0 = (8192 + msr - 0xc0000000) * 2;
1594                 t1 = (t0 / 8);
1595                 t0 %= 8;
1596                 break;
1597         case 0xc0010000 ... 0xc0011fff:
1598                 t0 = (16384 + msr - 0xc0010000) * 2;
1599                 t1 = (t0 / 8);
1600                 t0 %= 8;
1601                 break;
1602         default:
1603                 ret = NESTED_EXIT_DONE;
1604                 goto out;
1605         }
1606
1607         if (!kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + t1, &val, 1))
1608                 ret = val & ((1 << param) << t0) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1609
1610 out:
1611         return ret;
1612 }
1613
1614 static int nested_svm_exit_special(struct vcpu_svm *svm)
1615 {
1616         u32 exit_code = svm->vmcb->control.exit_code;
1617
1618         switch (exit_code) {
1619         case SVM_EXIT_INTR:
1620         case SVM_EXIT_NMI:
1621                 return NESTED_EXIT_HOST;
1622         case SVM_EXIT_NPF:
1623                 /* For now we are always handling NPFs when using them */
1624                 if (npt_enabled)
1625                         return NESTED_EXIT_HOST;
1626                 break;
1627         case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1628                 /* When we're shadowing, trap PFs */
1629                 if (!npt_enabled)
1630                         return NESTED_EXIT_HOST;
1631                 break;
1632         case SVM_EXIT_EXCP_BASE + NM_VECTOR:
1633                 nm_interception(svm);
1634                 break;
1635         default:
1636                 break;
1637         }
1638
1639         return NESTED_EXIT_CONTINUE;
1640 }
1641
1642 /*
1643  * If this function returns true, this #vmexit was already handled
1644  */
1645 static int nested_svm_intercept(struct vcpu_svm *svm)
1646 {
1647         u32 exit_code = svm->vmcb->control.exit_code;
1648         int vmexit = NESTED_EXIT_HOST;
1649
1650         switch (exit_code) {
1651         case SVM_EXIT_MSR:
1652                 vmexit = nested_svm_exit_handled_msr(svm);
1653                 break;
1654         case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
1655                 u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
1656                 if (svm->nested.intercept_cr_read & cr_bits)
1657                         vmexit = NESTED_EXIT_DONE;
1658                 break;
1659         }
1660         case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
1661                 u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
1662                 if (svm->nested.intercept_cr_write & cr_bits)
1663                         vmexit = NESTED_EXIT_DONE;
1664                 break;
1665         }
1666         case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
1667                 u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
1668                 if (svm->nested.intercept_dr_read & dr_bits)
1669                         vmexit = NESTED_EXIT_DONE;
1670                 break;
1671         }
1672         case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
1673                 u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
1674                 if (svm->nested.intercept_dr_write & dr_bits)
1675                         vmexit = NESTED_EXIT_DONE;
1676                 break;
1677         }
1678         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1679                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1680                 if (svm->nested.intercept_exceptions & excp_bits)
1681                         vmexit = NESTED_EXIT_DONE;
1682                 break;
1683         }
1684         default: {
1685                 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
1686                 if (svm->nested.intercept & exit_bits)
1687                         vmexit = NESTED_EXIT_DONE;
1688         }
1689         }
1690
1691         return vmexit;
1692 }
1693
1694 static int nested_svm_exit_handled(struct vcpu_svm *svm)
1695 {
1696         int vmexit;
1697
1698         vmexit = nested_svm_intercept(svm);
1699
1700         if (vmexit == NESTED_EXIT_DONE)
1701                 nested_svm_vmexit(svm);
1702
1703         return vmexit;
1704 }
1705
1706 static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
1707 {
1708         struct vmcb_control_area *dst  = &dst_vmcb->control;
1709         struct vmcb_control_area *from = &from_vmcb->control;
1710
1711         dst->intercept_cr_read    = from->intercept_cr_read;
1712         dst->intercept_cr_write   = from->intercept_cr_write;
1713         dst->intercept_dr_read    = from->intercept_dr_read;
1714         dst->intercept_dr_write   = from->intercept_dr_write;
1715         dst->intercept_exceptions = from->intercept_exceptions;
1716         dst->intercept            = from->intercept;
1717         dst->iopm_base_pa         = from->iopm_base_pa;
1718         dst->msrpm_base_pa        = from->msrpm_base_pa;
1719         dst->tsc_offset           = from->tsc_offset;
1720         dst->asid                 = from->asid;
1721         dst->tlb_ctl              = from->tlb_ctl;
1722         dst->int_ctl              = from->int_ctl;
1723         dst->int_vector           = from->int_vector;
1724         dst->int_state            = from->int_state;
1725         dst->exit_code            = from->exit_code;
1726         dst->exit_code_hi         = from->exit_code_hi;
1727         dst->exit_info_1          = from->exit_info_1;
1728         dst->exit_info_2          = from->exit_info_2;
1729         dst->exit_int_info        = from->exit_int_info;
1730         dst->exit_int_info_err    = from->exit_int_info_err;
1731         dst->nested_ctl           = from->nested_ctl;
1732         dst->event_inj            = from->event_inj;
1733         dst->event_inj_err        = from->event_inj_err;
1734         dst->nested_cr3           = from->nested_cr3;
1735         dst->lbr_ctl              = from->lbr_ctl;
1736 }
1737
1738 static int nested_svm_vmexit(struct vcpu_svm *svm)
1739 {
1740         struct vmcb *nested_vmcb;
1741         struct vmcb *hsave = svm->nested.hsave;
1742         struct vmcb *vmcb = svm->vmcb;
1743         struct page *page;
1744
1745         trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
1746                                        vmcb->control.exit_info_1,
1747                                        vmcb->control.exit_info_2,
1748                                        vmcb->control.exit_int_info,
1749                                        vmcb->control.exit_int_info_err);
1750
1751         nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
1752         if (!nested_vmcb)
1753                 return 1;
1754
1755         /* Exit nested SVM mode */
1756         svm->nested.vmcb = 0;
1757
1758         /* Give the current vmcb to the guest */
1759         disable_gif(svm);
1760
1761         nested_vmcb->save.es     = vmcb->save.es;
1762         nested_vmcb->save.cs     = vmcb->save.cs;
1763         nested_vmcb->save.ss     = vmcb->save.ss;
1764         nested_vmcb->save.ds     = vmcb->save.ds;
1765         nested_vmcb->save.gdtr   = vmcb->save.gdtr;
1766         nested_vmcb->save.idtr   = vmcb->save.idtr;
1767         nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
1768         if (npt_enabled)
1769                 nested_vmcb->save.cr3    = vmcb->save.cr3;
1770         else
1771                 nested_vmcb->save.cr3    = svm->vcpu.arch.cr3;
1772         nested_vmcb->save.cr2    = vmcb->save.cr2;
1773         nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
1774         nested_vmcb->save.rflags = vmcb->save.rflags;
1775         nested_vmcb->save.rip    = vmcb->save.rip;
1776         nested_vmcb->save.rsp    = vmcb->save.rsp;
1777         nested_vmcb->save.rax    = vmcb->save.rax;
1778         nested_vmcb->save.dr7    = vmcb->save.dr7;
1779         nested_vmcb->save.dr6    = vmcb->save.dr6;
1780         nested_vmcb->save.cpl    = vmcb->save.cpl;
1781
1782         nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
1783         nested_vmcb->control.int_vector        = vmcb->control.int_vector;
1784         nested_vmcb->control.int_state         = vmcb->control.int_state;
1785         nested_vmcb->control.exit_code         = vmcb->control.exit_code;
1786         nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
1787         nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
1788         nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
1789         nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
1790         nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
1791
1792         /*
1793          * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
1794          * to make sure that we do not lose injected events. So check event_inj
1795          * here and copy it to exit_int_info if it is valid.
1796          * Exit_int_info and event_inj can't be both valid because the case
1797          * below only happens on a VMRUN instruction intercept which has
1798          * no valid exit_int_info set.
1799          */
1800         if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
1801                 struct vmcb_control_area *nc = &nested_vmcb->control;
1802
1803                 nc->exit_int_info     = vmcb->control.event_inj;
1804                 nc->exit_int_info_err = vmcb->control.event_inj_err;
1805         }
1806
1807         nested_vmcb->control.tlb_ctl           = 0;
1808         nested_vmcb->control.event_inj         = 0;
1809         nested_vmcb->control.event_inj_err     = 0;
1810
1811         /* We always set V_INTR_MASKING and remember the old value in hflags */
1812         if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1813                 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
1814
1815         /* Restore the original control entries */
1816         copy_vmcb_control_area(vmcb, hsave);
1817
1818         kvm_clear_exception_queue(&svm->vcpu);
1819         kvm_clear_interrupt_queue(&svm->vcpu);
1820
1821         /* Restore selected save entries */
1822         svm->vmcb->save.es = hsave->save.es;
1823         svm->vmcb->save.cs = hsave->save.cs;
1824         svm->vmcb->save.ss = hsave->save.ss;
1825         svm->vmcb->save.ds = hsave->save.ds;
1826         svm->vmcb->save.gdtr = hsave->save.gdtr;
1827         svm->vmcb->save.idtr = hsave->save.idtr;
1828         svm->vmcb->save.rflags = hsave->save.rflags;
1829         svm_set_efer(&svm->vcpu, hsave->save.efer);
1830         svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
1831         svm_set_cr4(&svm->vcpu, hsave->save.cr4);
1832         if (npt_enabled) {
1833                 svm->vmcb->save.cr3 = hsave->save.cr3;
1834                 svm->vcpu.arch.cr3 = hsave->save.cr3;
1835         } else {
1836                 kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
1837         }
1838         kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
1839         kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
1840         kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
1841         svm->vmcb->save.dr7 = 0;
1842         svm->vmcb->save.cpl = 0;
1843         svm->vmcb->control.exit_int_info = 0;
1844
1845         nested_svm_unmap(page);
1846
1847         kvm_mmu_reset_context(&svm->vcpu);
1848         kvm_mmu_load(&svm->vcpu);
1849
1850         return 0;
1851 }
1852
1853 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
1854 {
1855         u32 *nested_msrpm;
1856         struct page *page;
1857         int i;
1858
1859         nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, &page);
1860         if (!nested_msrpm)
1861                 return false;
1862
1863         for (i = 0; i < PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
1864                 svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
1865
1866         svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
1867
1868         nested_svm_unmap(page);
1869
1870         return true;
1871 }
1872
1873 static bool nested_svm_vmrun(struct vcpu_svm *svm)
1874 {
1875         struct vmcb *nested_vmcb;
1876         struct vmcb *hsave = svm->nested.hsave;
1877         struct vmcb *vmcb = svm->vmcb;
1878         struct page *page;
1879         u64 vmcb_gpa;
1880
1881         vmcb_gpa = svm->vmcb->save.rax;
1882
1883         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
1884         if (!nested_vmcb)
1885                 return false;
1886
1887         trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa,
1888                                nested_vmcb->save.rip,
1889                                nested_vmcb->control.int_ctl,
1890                                nested_vmcb->control.event_inj,
1891                                nested_vmcb->control.nested_ctl);
1892
1893         trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read,
1894                                     nested_vmcb->control.intercept_cr_write,
1895                                     nested_vmcb->control.intercept_exceptions,
1896                                     nested_vmcb->control.intercept);
1897
1898         /* Clear internal status */
1899         kvm_clear_exception_queue(&svm->vcpu);
1900         kvm_clear_interrupt_queue(&svm->vcpu);
1901
1902         /*
1903          * Save the old vmcb, so we don't need to pick what we save, but can
1904          * restore everything when a VMEXIT occurs
1905          */
1906         hsave->save.es     = vmcb->save.es;
1907         hsave->save.cs     = vmcb->save.cs;
1908         hsave->save.ss     = vmcb->save.ss;
1909         hsave->save.ds     = vmcb->save.ds;
1910         hsave->save.gdtr   = vmcb->save.gdtr;
1911         hsave->save.idtr   = vmcb->save.idtr;
1912         hsave->save.efer   = svm->vcpu.arch.efer;
1913         hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
1914         hsave->save.cr4    = svm->vcpu.arch.cr4;
1915         hsave->save.rflags = vmcb->save.rflags;
1916         hsave->save.rip    = svm->next_rip;
1917         hsave->save.rsp    = vmcb->save.rsp;
1918         hsave->save.rax    = vmcb->save.rax;
1919         if (npt_enabled)
1920                 hsave->save.cr3    = vmcb->save.cr3;
1921         else
1922                 hsave->save.cr3    = svm->vcpu.arch.cr3;
1923
1924         copy_vmcb_control_area(hsave, vmcb);
1925
1926         if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
1927                 svm->vcpu.arch.hflags |= HF_HIF_MASK;
1928         else
1929                 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
1930
1931         /* Load the nested guest state */
1932         svm->vmcb->save.es = nested_vmcb->save.es;
1933         svm->vmcb->save.cs = nested_vmcb->save.cs;
1934         svm->vmcb->save.ss = nested_vmcb->save.ss;
1935         svm->vmcb->save.ds = nested_vmcb->save.ds;
1936         svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
1937         svm->vmcb->save.idtr = nested_vmcb->save.idtr;
1938         svm->vmcb->save.rflags = nested_vmcb->save.rflags;
1939         svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
1940         svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
1941         svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
1942         if (npt_enabled) {
1943                 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
1944                 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
1945         } else
1946                 kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
1947
1948         /* Guest paging mode is active - reset mmu */
1949         kvm_mmu_reset_context(&svm->vcpu);
1950
1951         svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
1952         kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
1953         kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
1954         kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
1955
1956         /* In case we don't even reach vcpu_run, the fields are not updated */
1957         svm->vmcb->save.rax = nested_vmcb->save.rax;
1958         svm->vmcb->save.rsp = nested_vmcb->save.rsp;
1959         svm->vmcb->save.rip = nested_vmcb->save.rip;
1960         svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
1961         svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
1962         svm->vmcb->save.cpl = nested_vmcb->save.cpl;
1963
1964         svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
1965
1966         /* cache intercepts */
1967         svm->nested.intercept_cr_read    = nested_vmcb->control.intercept_cr_read;
1968         svm->nested.intercept_cr_write   = nested_vmcb->control.intercept_cr_write;
1969         svm->nested.intercept_dr_read    = nested_vmcb->control.intercept_dr_read;
1970         svm->nested.intercept_dr_write   = nested_vmcb->control.intercept_dr_write;
1971         svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
1972         svm->nested.intercept            = nested_vmcb->control.intercept;
1973
1974         force_new_asid(&svm->vcpu);
1975         svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
1976         if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
1977                 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
1978         else
1979                 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
1980
1981         if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
1982                 /* We only want the cr8 intercept bits of the guest */
1983                 svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK;
1984                 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
1985         }
1986
1987         /*
1988          * We don't want a nested guest to be more powerful than the guest, so
1989          * all intercepts are ORed
1990          */
1991         svm->vmcb->control.intercept_cr_read |=
1992                 nested_vmcb->control.intercept_cr_read;
1993         svm->vmcb->control.intercept_cr_write |=
1994                 nested_vmcb->control.intercept_cr_write;
1995         svm->vmcb->control.intercept_dr_read |=
1996                 nested_vmcb->control.intercept_dr_read;
1997         svm->vmcb->control.intercept_dr_write |=
1998                 nested_vmcb->control.intercept_dr_write;
1999         svm->vmcb->control.intercept_exceptions |=
2000                 nested_vmcb->control.intercept_exceptions;
2001
2002         svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
2003
2004         svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
2005         svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2006         svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2007         svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
2008         svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2009         svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2010
2011         nested_svm_unmap(page);
2012
2013         /* nested_vmcb is our indicator if nested SVM is activated */
2014         svm->nested.vmcb = vmcb_gpa;
2015
2016         enable_gif(svm);
2017
2018         return true;
2019 }
2020
2021 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
2022 {
2023         to_vmcb->save.fs = from_vmcb->save.fs;
2024         to_vmcb->save.gs = from_vmcb->save.gs;
2025         to_vmcb->save.tr = from_vmcb->save.tr;
2026         to_vmcb->save.ldtr = from_vmcb->save.ldtr;
2027         to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
2028         to_vmcb->save.star = from_vmcb->save.star;
2029         to_vmcb->save.lstar = from_vmcb->save.lstar;
2030         to_vmcb->save.cstar = from_vmcb->save.cstar;
2031         to_vmcb->save.sfmask = from_vmcb->save.sfmask;
2032         to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
2033         to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
2034         to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
2035 }
2036
2037 static int vmload_interception(struct vcpu_svm *svm)
2038 {
2039         struct vmcb *nested_vmcb;
2040         struct page *page;
2041
2042         if (nested_svm_check_permissions(svm))
2043                 return 1;
2044
2045         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2046         skip_emulated_instruction(&svm->vcpu);
2047
2048         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2049         if (!nested_vmcb)
2050                 return 1;
2051
2052         nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2053         nested_svm_unmap(page);
2054
2055         return 1;
2056 }
2057
2058 static int vmsave_interception(struct vcpu_svm *svm)
2059 {
2060         struct vmcb *nested_vmcb;
2061         struct page *page;
2062
2063         if (nested_svm_check_permissions(svm))
2064                 return 1;
2065
2066         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2067         skip_emulated_instruction(&svm->vcpu);
2068
2069         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2070         if (!nested_vmcb)
2071                 return 1;
2072
2073         nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2074         nested_svm_unmap(page);
2075
2076         return 1;
2077 }
2078
2079 static int vmrun_interception(struct vcpu_svm *svm)
2080 {
2081         if (nested_svm_check_permissions(svm))
2082                 return 1;
2083
2084         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2085         skip_emulated_instruction(&svm->vcpu);
2086
2087         if (!nested_svm_vmrun(svm))
2088                 return 1;
2089
2090         if (!nested_svm_vmrun_msrpm(svm))
2091                 goto failed;
2092
2093         return 1;
2094
2095 failed:
2096
2097         svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
2098         svm->vmcb->control.exit_code_hi = 0;
2099         svm->vmcb->control.exit_info_1  = 0;
2100         svm->vmcb->control.exit_info_2  = 0;
2101
2102         nested_svm_vmexit(svm);
2103
2104         return 1;
2105 }
2106
2107 static int stgi_interception(struct vcpu_svm *svm)
2108 {
2109         if (nested_svm_check_permissions(svm))
2110                 return 1;
2111
2112         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2113         skip_emulated_instruction(&svm->vcpu);
2114
2115         enable_gif(svm);
2116
2117         return 1;
2118 }
2119
2120 static int clgi_interception(struct vcpu_svm *svm)
2121 {
2122         if (nested_svm_check_permissions(svm))
2123                 return 1;
2124
2125         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2126         skip_emulated_instruction(&svm->vcpu);
2127
2128         disable_gif(svm);
2129
2130         /* After a CLGI no interrupts should come */
2131         svm_clear_vintr(svm);
2132         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2133
2134         return 1;
2135 }
2136
2137 static int invlpga_interception(struct vcpu_svm *svm)
2138 {
2139         struct kvm_vcpu *vcpu = &svm->vcpu;
2140
2141         trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
2142                           vcpu->arch.regs[VCPU_REGS_RAX]);
2143
2144         /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
2145         kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
2146
2147         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2148         skip_emulated_instruction(&svm->vcpu);
2149         return 1;
2150 }
2151
2152 static int skinit_interception(struct vcpu_svm *svm)
2153 {
2154         trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
2155
2156         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2157         return 1;
2158 }
2159
2160 static int invalid_op_interception(struct vcpu_svm *svm)
2161 {
2162         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2163         return 1;
2164 }
2165
2166 static int task_switch_interception(struct vcpu_svm *svm)
2167 {
2168         u16 tss_selector;
2169         int reason;
2170         int int_type = svm->vmcb->control.exit_int_info &
2171                 SVM_EXITINTINFO_TYPE_MASK;
2172         int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2173         uint32_t type =
2174                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2175         uint32_t idt_v =
2176                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2177
2178         tss_selector = (u16)svm->vmcb->control.exit_info_1;
2179
2180         if (svm->vmcb->control.exit_info_2 &
2181             (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2182                 reason = TASK_SWITCH_IRET;
2183         else if (svm->vmcb->control.exit_info_2 &
2184                  (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2185                 reason = TASK_SWITCH_JMP;
2186         else if (idt_v)
2187                 reason = TASK_SWITCH_GATE;
2188         else
2189                 reason = TASK_SWITCH_CALL;
2190
2191         if (reason == TASK_SWITCH_GATE) {
2192                 switch (type) {
2193                 case SVM_EXITINTINFO_TYPE_NMI:
2194                         svm->vcpu.arch.nmi_injected = false;
2195                         break;
2196                 case SVM_EXITINTINFO_TYPE_EXEPT:
2197                         kvm_clear_exception_queue(&svm->vcpu);
2198                         break;
2199                 case SVM_EXITINTINFO_TYPE_INTR:
2200                         kvm_clear_interrupt_queue(&svm->vcpu);
2201                         break;
2202                 default:
2203                         break;
2204                 }
2205         }
2206
2207         if (reason != TASK_SWITCH_GATE ||
2208             int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2209             (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2210              (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2211                 skip_emulated_instruction(&svm->vcpu);
2212
2213         return kvm_task_switch(&svm->vcpu, tss_selector, reason);
2214 }
2215
2216 static int cpuid_interception(struct vcpu_svm *svm)
2217 {
2218         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2219         kvm_emulate_cpuid(&svm->vcpu);
2220         return 1;
2221 }
2222
2223 static int iret_interception(struct vcpu_svm *svm)
2224 {
2225         ++svm->vcpu.stat.nmi_window_exits;
2226         svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
2227         svm->vcpu.arch.hflags |= HF_IRET_MASK;
2228         return 1;
2229 }
2230
2231 static int invlpg_interception(struct vcpu_svm *svm)
2232 {
2233         if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
2234                 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
2235         return 1;
2236 }
2237
2238 static int emulate_on_interception(struct vcpu_svm *svm)
2239 {
2240         if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
2241                 pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
2242         return 1;
2243 }
2244
2245 static int cr8_write_interception(struct vcpu_svm *svm)
2246 {
2247         struct kvm_run *kvm_run = svm->vcpu.run;
2248
2249         u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2250         /* instruction emulation calls kvm_set_cr8() */
2251         emulate_instruction(&svm->vcpu, 0, 0, 0);
2252         if (irqchip_in_kernel(svm->vcpu.kvm)) {
2253                 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
2254                 return 1;
2255         }
2256         if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
2257                 return 1;
2258         kvm_run->exit_reason = KVM_EXIT_SET_TPR;
2259         return 0;
2260 }
2261
2262 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2263 {
2264         struct vcpu_svm *svm = to_svm(vcpu);
2265
2266         switch (ecx) {
2267         case MSR_IA32_TSC: {
2268                 u64 tsc_offset;
2269
2270                 if (is_nested(svm))
2271                         tsc_offset = svm->nested.hsave->control.tsc_offset;
2272                 else
2273                         tsc_offset = svm->vmcb->control.tsc_offset;
2274
2275                 *data = tsc_offset + native_read_tsc();
2276                 break;
2277         }
2278         case MSR_K6_STAR:
2279                 *data = svm->vmcb->save.star;
2280                 break;
2281 #ifdef CONFIG_X86_64
2282         case MSR_LSTAR:
2283                 *data = svm->vmcb->save.lstar;
2284                 break;
2285         case MSR_CSTAR:
2286                 *data = svm->vmcb->save.cstar;
2287                 break;
2288         case MSR_KERNEL_GS_BASE:
2289                 *data = svm->vmcb->save.kernel_gs_base;
2290                 break;
2291         case MSR_SYSCALL_MASK:
2292                 *data = svm->vmcb->save.sfmask;
2293                 break;
2294 #endif
2295         case MSR_IA32_SYSENTER_CS:
2296                 *data = svm->vmcb->save.sysenter_cs;
2297                 break;
2298         case MSR_IA32_SYSENTER_EIP:
2299                 *data = svm->sysenter_eip;
2300                 break;
2301         case MSR_IA32_SYSENTER_ESP:
2302                 *data = svm->sysenter_esp;
2303                 break;
2304         /*
2305          * Nobody will change the following 5 values in the VMCB so we can
2306          * safely return them on rdmsr. They will always be 0 until LBRV is
2307          * implemented.
2308          */
2309         case MSR_IA32_DEBUGCTLMSR:
2310                 *data = svm->vmcb->save.dbgctl;
2311                 break;
2312         case MSR_IA32_LASTBRANCHFROMIP:
2313                 *data = svm->vmcb->save.br_from;
2314                 break;
2315         case MSR_IA32_LASTBRANCHTOIP:
2316                 *data = svm->vmcb->save.br_to;
2317                 break;
2318         case MSR_IA32_LASTINTFROMIP:
2319                 *data = svm->vmcb->save.last_excp_from;
2320                 break;
2321         case MSR_IA32_LASTINTTOIP:
2322                 *data = svm->vmcb->save.last_excp_to;
2323                 break;
2324         case MSR_VM_HSAVE_PA:
2325                 *data = svm->nested.hsave_msr;
2326                 break;
2327         case MSR_VM_CR:
2328                 *data = svm->nested.vm_cr_msr;
2329                 break;
2330         case MSR_IA32_UCODE_REV:
2331                 *data = 0x01000065;
2332                 break;
2333         default:
2334                 return kvm_get_msr_common(vcpu, ecx, data);
2335         }
2336         return 0;
2337 }
2338
2339 static int rdmsr_interception(struct vcpu_svm *svm)
2340 {
2341         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2342         u64 data;
2343
2344         if (svm_get_msr(&svm->vcpu, ecx, &data)) {
2345                 trace_kvm_msr_read_ex(ecx);
2346                 kvm_inject_gp(&svm->vcpu, 0);
2347         } else {
2348                 trace_kvm_msr_read(ecx, data);
2349
2350                 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
2351                 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
2352                 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2353                 skip_emulated_instruction(&svm->vcpu);
2354         }
2355         return 1;
2356 }
2357
2358 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
2359 {
2360         struct vcpu_svm *svm = to_svm(vcpu);
2361         int svm_dis, chg_mask;
2362
2363         if (data & ~SVM_VM_CR_VALID_MASK)
2364                 return 1;
2365
2366         chg_mask = SVM_VM_CR_VALID_MASK;
2367
2368         if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
2369                 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
2370
2371         svm->nested.vm_cr_msr &= ~chg_mask;
2372         svm->nested.vm_cr_msr |= (data & chg_mask);
2373
2374         svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
2375
2376         /* check for svm_disable while efer.svme is set */
2377         if (svm_dis && (vcpu->arch.efer & EFER_SVME))
2378                 return 1;
2379
2380         return 0;
2381 }
2382
2383 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2384 {
2385         struct vcpu_svm *svm = to_svm(vcpu);
2386
2387         switch (ecx) {
2388         case MSR_IA32_TSC: {
2389                 u64 tsc_offset = data - native_read_tsc();
2390                 u64 g_tsc_offset = 0;
2391
2392                 if (is_nested(svm)) {
2393                         g_tsc_offset = svm->vmcb->control.tsc_offset -
2394                                        svm->nested.hsave->control.tsc_offset;
2395                         svm->nested.hsave->control.tsc_offset = tsc_offset;
2396                 }
2397
2398                 svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
2399
2400                 break;
2401         }
2402         case MSR_K6_STAR:
2403                 svm->vmcb->save.star = data;
2404                 break;
2405 #ifdef CONFIG_X86_64
2406         case MSR_LSTAR:
2407                 svm->vmcb->save.lstar = data;
2408                 break;
2409         case MSR_CSTAR:
2410                 svm->vmcb->save.cstar = data;
2411                 break;
2412         case MSR_KERNEL_GS_BASE:
2413                 svm->vmcb->save.kernel_gs_base = data;
2414                 break;
2415         case MSR_SYSCALL_MASK:
2416                 svm->vmcb->save.sfmask = data;
2417                 break;
2418 #endif
2419         case MSR_IA32_SYSENTER_CS:
2420                 svm->vmcb->save.sysenter_cs = data;
2421                 break;
2422         case MSR_IA32_SYSENTER_EIP:
2423                 svm->sysenter_eip = data;
2424                 svm->vmcb->save.sysenter_eip = data;
2425                 break;
2426         case MSR_IA32_SYSENTER_ESP:
2427                 svm->sysenter_esp = data;
2428                 svm->vmcb->save.sysenter_esp = data;
2429                 break;
2430         case MSR_IA32_DEBUGCTLMSR:
2431                 if (!svm_has(SVM_FEATURE_LBRV)) {
2432                         pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
2433                                         __func__, data);
2434                         break;
2435                 }
2436                 if (data & DEBUGCTL_RESERVED_BITS)
2437                         return 1;
2438
2439                 svm->vmcb->save.dbgctl = data;
2440                 if (data & (1ULL<<0))
2441                         svm_enable_lbrv(svm);
2442                 else
2443                         svm_disable_lbrv(svm);
2444                 break;
2445         case MSR_VM_HSAVE_PA:
2446                 svm->nested.hsave_msr = data;
2447                 break;
2448         case MSR_VM_CR:
2449                 return svm_set_vm_cr(vcpu, data);
2450         case MSR_VM_IGNNE:
2451                 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
2452                 break;
2453         default:
2454                 return kvm_set_msr_common(vcpu, ecx, data);
2455         }
2456         return 0;
2457 }
2458
2459 static int wrmsr_interception(struct vcpu_svm *svm)
2460 {
2461         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2462         u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
2463                 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2464
2465
2466         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2467         if (svm_set_msr(&svm->vcpu, ecx, data)) {
2468                 trace_kvm_msr_write_ex(ecx, data);
2469                 kvm_inject_gp(&svm->vcpu, 0);
2470         } else {
2471                 trace_kvm_msr_write(ecx, data);
2472                 skip_emulated_instruction(&svm->vcpu);
2473         }
2474         return 1;
2475 }
2476
2477 static int msr_interception(struct vcpu_svm *svm)
2478 {
2479         if (svm->vmcb->control.exit_info_1)
2480                 return wrmsr_interception(svm);
2481         else
2482                 return rdmsr_interception(svm);
2483 }
2484
2485 static int interrupt_window_interception(struct vcpu_svm *svm)
2486 {
2487         struct kvm_run *kvm_run = svm->vcpu.run;
2488
2489         svm_clear_vintr(svm);
2490         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2491         /*
2492          * If the user space waits to inject interrupts, exit as soon as
2493          * possible
2494          */
2495         if (!irqchip_in_kernel(svm->vcpu.kvm) &&
2496             kvm_run->request_interrupt_window &&
2497             !kvm_cpu_has_interrupt(&svm->vcpu)) {
2498                 ++svm->vcpu.stat.irq_window_exits;
2499                 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
2500                 return 0;
2501         }
2502
2503         return 1;
2504 }
2505
2506 static int pause_interception(struct vcpu_svm *svm)
2507 {
2508         kvm_vcpu_on_spin(&(svm->vcpu));
2509         return 1;
2510 }
2511
2512 static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2513         [SVM_EXIT_READ_CR0]                     = emulate_on_interception,
2514         [SVM_EXIT_READ_CR3]                     = emulate_on_interception,
2515         [SVM_EXIT_READ_CR4]                     = emulate_on_interception,
2516         [SVM_EXIT_READ_CR8]                     = emulate_on_interception,
2517         [SVM_EXIT_CR0_SEL_WRITE]                = emulate_on_interception,
2518         [SVM_EXIT_WRITE_CR0]                    = emulate_on_interception,
2519         [SVM_EXIT_WRITE_CR3]                    = emulate_on_interception,
2520         [SVM_EXIT_WRITE_CR4]                    = emulate_on_interception,
2521         [SVM_EXIT_WRITE_CR8]                    = cr8_write_interception,
2522         [SVM_EXIT_READ_DR0]                     = emulate_on_interception,
2523         [SVM_EXIT_READ_DR1]                     = emulate_on_interception,
2524         [SVM_EXIT_READ_DR2]                     = emulate_on_interception,
2525         [SVM_EXIT_READ_DR3]                     = emulate_on_interception,
2526         [SVM_EXIT_READ_DR4]                     = emulate_on_interception,
2527         [SVM_EXIT_READ_DR5]                     = emulate_on_interception,
2528         [SVM_EXIT_READ_DR6]                     = emulate_on_interception,
2529         [SVM_EXIT_READ_DR7]                     = emulate_on_interception,
2530         [SVM_EXIT_WRITE_DR0]                    = emulate_on_interception,
2531         [SVM_EXIT_WRITE_DR1]                    = emulate_on_interception,
2532         [SVM_EXIT_WRITE_DR2]                    = emulate_on_interception,
2533         [SVM_EXIT_WRITE_DR3]                    = emulate_on_interception,
2534         [SVM_EXIT_WRITE_DR4]                    = emulate_on_interception,
2535         [SVM_EXIT_WRITE_DR5]                    = emulate_on_interception,
2536         [SVM_EXIT_WRITE_DR6]                    = emulate_on_interception,
2537         [SVM_EXIT_WRITE_DR7]                    = emulate_on_interception,
2538         [SVM_EXIT_EXCP_BASE + DB_VECTOR]        = db_interception,
2539         [SVM_EXIT_EXCP_BASE + BP_VECTOR]        = bp_interception,
2540         [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
2541         [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
2542         [SVM_EXIT_EXCP_BASE + NM_VECTOR]        = nm_interception,
2543         [SVM_EXIT_EXCP_BASE + MC_VECTOR]        = mc_interception,
2544         [SVM_EXIT_INTR]                         = intr_interception,
2545         [SVM_EXIT_NMI]                          = nmi_interception,
2546         [SVM_EXIT_SMI]                          = nop_on_interception,
2547         [SVM_EXIT_INIT]                         = nop_on_interception,
2548         [SVM_EXIT_VINTR]                        = interrupt_window_interception,
2549         [SVM_EXIT_CPUID]                        = cpuid_interception,
2550         [SVM_EXIT_IRET]                         = iret_interception,
2551         [SVM_EXIT_INVD]                         = emulate_on_interception,
2552         [SVM_EXIT_PAUSE]                        = pause_interception,
2553         [SVM_EXIT_HLT]                          = halt_interception,
2554         [SVM_EXIT_INVLPG]                       = invlpg_interception,
2555         [SVM_EXIT_INVLPGA]                      = invlpga_interception,
2556         [SVM_EXIT_IOIO]                         = io_interception,
2557         [SVM_EXIT_MSR]                          = msr_interception,
2558         [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
2559         [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
2560         [SVM_EXIT_VMRUN]                        = vmrun_interception,
2561         [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
2562         [SVM_EXIT_VMLOAD]                       = vmload_interception,
2563         [SVM_EXIT_VMSAVE]                       = vmsave_interception,
2564         [SVM_EXIT_STGI]                         = stgi_interception,
2565         [SVM_EXIT_CLGI]                         = clgi_interception,
2566         [SVM_EXIT_SKINIT]                       = skinit_interception,
2567         [SVM_EXIT_WBINVD]                       = emulate_on_interception,
2568         [SVM_EXIT_MONITOR]                      = invalid_op_interception,
2569         [SVM_EXIT_MWAIT]                        = invalid_op_interception,
2570         [SVM_EXIT_NPF]                          = pf_interception,
2571 };
2572
2573 static int handle_exit(struct kvm_vcpu *vcpu)
2574 {
2575         struct vcpu_svm *svm = to_svm(vcpu);
2576         struct kvm_run *kvm_run = vcpu->run;
2577         u32 exit_code = svm->vmcb->control.exit_code;
2578
2579         trace_kvm_exit(exit_code, svm->vmcb->save.rip);
2580
2581         if (unlikely(svm->nested.exit_required)) {
2582                 nested_svm_vmexit(svm);
2583                 svm->nested.exit_required = false;
2584
2585                 return 1;
2586         }
2587
2588         if (is_nested(svm)) {
2589                 int vmexit;
2590
2591                 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
2592                                         svm->vmcb->control.exit_info_1,
2593                                         svm->vmcb->control.exit_info_2,
2594                                         svm->vmcb->control.exit_int_info,
2595                                         svm->vmcb->control.exit_int_info_err);
2596
2597                 vmexit = nested_svm_exit_special(svm);
2598
2599                 if (vmexit == NESTED_EXIT_CONTINUE)
2600                         vmexit = nested_svm_exit_handled(svm);
2601
2602                 if (vmexit == NESTED_EXIT_DONE)
2603                         return 1;
2604         }
2605
2606         svm_complete_interrupts(svm);
2607
2608         if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2609                 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2610         if (npt_enabled)
2611                 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2612
2613         if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2614                 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2615                 kvm_run->fail_entry.hardware_entry_failure_reason
2616                         = svm->vmcb->control.exit_code;
2617                 return 0;
2618         }
2619
2620         if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
2621             exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
2622             exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH)
2623                 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
2624                        "exit_code 0x%x\n",
2625                        __func__, svm->vmcb->control.exit_int_info,
2626                        exit_code);
2627
2628         if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
2629             || !svm_exit_handlers[exit_code]) {
2630                 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
2631                 kvm_run->hw.hardware_exit_reason = exit_code;
2632                 return 0;
2633         }
2634
2635         return svm_exit_handlers[exit_code](svm);
2636 }
2637
2638 static void reload_tss(struct kvm_vcpu *vcpu)
2639 {
2640         int cpu = raw_smp_processor_id();
2641
2642         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2643         sd->tss_desc->type = 9; /* available 32/64-bit TSS */
2644         load_TR_desc();
2645 }
2646
2647 static void pre_svm_run(struct vcpu_svm *svm)
2648 {
2649         int cpu = raw_smp_processor_id();
2650
2651         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2652
2653         svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2654         /* FIXME: handle wraparound of asid_generation */
2655         if (svm->asid_generation != sd->asid_generation)
2656                 new_asid(svm, sd);
2657 }
2658
2659 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
2660 {
2661         struct vcpu_svm *svm = to_svm(vcpu);
2662
2663         svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
2664         vcpu->arch.hflags |= HF_NMI_MASK;
2665         svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
2666         ++vcpu->stat.nmi_injections;
2667 }
2668
2669 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
2670 {
2671         struct vmcb_control_area *control;
2672
2673         trace_kvm_inj_virq(irq);
2674
2675         ++svm->vcpu.stat.irq_injections;
2676         control = &svm->vmcb->control;
2677         control->int_vector = irq;
2678         control->int_ctl &= ~V_INTR_PRIO_MASK;
2679         control->int_ctl |= V_IRQ_MASK |
2680                 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
2681 }
2682
2683 static void svm_set_irq(struct kvm_vcpu *vcpu)
2684 {
2685         struct vcpu_svm *svm = to_svm(vcpu);
2686
2687         BUG_ON(!(gif_set(svm)));
2688
2689         svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
2690                 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
2691 }
2692
2693 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
2694 {
2695         struct vcpu_svm *svm = to_svm(vcpu);
2696
2697         if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2698                 return;
2699
2700         if (irr == -1)
2701                 return;
2702
2703         if (tpr >= irr)
2704                 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
2705 }
2706
2707 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
2708 {
2709         struct vcpu_svm *svm = to_svm(vcpu);
2710         struct vmcb *vmcb = svm->vmcb;
2711         return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
2712                 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
2713 }
2714
2715 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
2716 {
2717         struct vcpu_svm *svm = to_svm(vcpu);
2718
2719         return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
2720 }
2721
2722 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
2723 {
2724         struct vcpu_svm *svm = to_svm(vcpu);
2725
2726         if (masked) {
2727                 svm->vcpu.arch.hflags |= HF_NMI_MASK;
2728                 svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
2729         } else {
2730                 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
2731                 svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
2732         }
2733 }
2734
2735 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
2736 {
2737         struct vcpu_svm *svm = to_svm(vcpu);
2738         struct vmcb *vmcb = svm->vmcb;
2739         int ret;
2740
2741         if (!gif_set(svm) ||
2742              (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
2743                 return 0;
2744
2745         ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
2746
2747         if (is_nested(svm))
2748                 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
2749
2750         return ret;
2751 }
2752
2753 static void enable_irq_window(struct kvm_vcpu *vcpu)
2754 {
2755         struct vcpu_svm *svm = to_svm(vcpu);
2756
2757         /*
2758          * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
2759          * 1, because that's a separate STGI/VMRUN intercept.  The next time we
2760          * get that intercept, this function will be called again though and
2761          * we'll get the vintr intercept.
2762          */
2763         if (gif_set(svm) && nested_svm_intr(svm)) {
2764                 svm_set_vintr(svm);
2765                 svm_inject_irq(svm, 0x0);
2766         }
2767 }
2768
2769 static void enable_nmi_window(struct kvm_vcpu *vcpu)
2770 {
2771         struct vcpu_svm *svm = to_svm(vcpu);
2772
2773         if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
2774             == HF_NMI_MASK)
2775                 return; /* IRET will cause a vm exit */
2776
2777         /*
2778          * Something prevents NMI from been injected. Single step over possible
2779          * problem (IRET or exception injection or interrupt shadow)
2780          */
2781         if (gif_set(svm) && nested_svm_nmi(svm)) {
2782                 svm->nmi_singlestep = true;
2783                 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2784                 update_db_intercept(vcpu);
2785         }
2786 }
2787
2788 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
2789 {
2790         return 0;
2791 }
2792
2793 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
2794 {
2795         force_new_asid(vcpu);
2796 }
2797
2798 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
2799 {
2800 }
2801
2802 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
2803 {
2804         struct vcpu_svm *svm = to_svm(vcpu);
2805
2806         if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2807                 return;
2808
2809         if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
2810                 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
2811                 kvm_set_cr8(vcpu, cr8);
2812         }
2813 }
2814
2815 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
2816 {
2817         struct vcpu_svm *svm = to_svm(vcpu);
2818         u64 cr8;
2819
2820         if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
2821                 return;
2822
2823         cr8 = kvm_get_cr8(vcpu);
2824         svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
2825         svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
2826 }
2827
2828 static void svm_complete_interrupts(struct vcpu_svm *svm)
2829 {
2830         u8 vector;
2831         int type;
2832         u32 exitintinfo = svm->vmcb->control.exit_int_info;
2833         unsigned int3_injected = svm->int3_injected;
2834
2835         svm->int3_injected = 0;
2836
2837         if (svm->vcpu.arch.hflags & HF_IRET_MASK)
2838                 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
2839
2840         svm->vcpu.arch.nmi_injected = false;
2841         kvm_clear_exception_queue(&svm->vcpu);
2842         kvm_clear_interrupt_queue(&svm->vcpu);
2843
2844         if (!(exitintinfo & SVM_EXITINTINFO_VALID))
2845                 return;
2846
2847         vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
2848         type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
2849
2850         switch (type) {
2851         case SVM_EXITINTINFO_TYPE_NMI:
2852                 svm->vcpu.arch.nmi_injected = true;
2853                 break;
2854         case SVM_EXITINTINFO_TYPE_EXEPT:
2855                 if (is_nested(svm))
2856                         break;
2857                 /*
2858                  * In case of software exceptions, do not reinject the vector,
2859                  * but re-execute the instruction instead. Rewind RIP first
2860                  * if we emulated INT3 before.
2861                  */
2862                 if (kvm_exception_is_soft(vector)) {
2863                         if (vector == BP_VECTOR && int3_injected &&
2864                             kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
2865                                 kvm_rip_write(&svm->vcpu,
2866                                               kvm_rip_read(&svm->vcpu) -
2867                                               int3_injected);
2868                         break;
2869                 }
2870                 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
2871                         u32 err = svm->vmcb->control.exit_int_info_err;
2872                         kvm_queue_exception_e(&svm->vcpu, vector, err);
2873
2874                 } else
2875                         kvm_queue_exception(&svm->vcpu, vector);
2876                 break;
2877         case SVM_EXITINTINFO_TYPE_INTR:
2878                 kvm_queue_interrupt(&svm->vcpu, vector, false);
2879                 break;
2880         default:
2881                 break;
2882         }
2883 }
2884
2885 #ifdef CONFIG_X86_64
2886 #define R "r"
2887 #else
2888 #define R "e"
2889 #endif
2890
2891 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
2892 {
2893         struct vcpu_svm *svm = to_svm(vcpu);
2894         u16 fs_selector;
2895         u16 gs_selector;
2896         u16 ldt_selector;
2897
2898         /*
2899          * A vmexit emulation is required before the vcpu can be executed
2900          * again.
2901          */
2902         if (unlikely(svm->nested.exit_required))
2903                 return;
2904
2905         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
2906         svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
2907         svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
2908
2909         pre_svm_run(svm);
2910
2911         sync_lapic_to_cr8(vcpu);
2912
2913         save_host_msrs(vcpu);
2914         fs_selector = kvm_read_fs();
2915         gs_selector = kvm_read_gs();
2916         ldt_selector = kvm_read_ldt();
2917         svm->vmcb->save.cr2 = vcpu->arch.cr2;
2918         /* required for live migration with NPT */
2919         if (npt_enabled)
2920                 svm->vmcb->save.cr3 = vcpu->arch.cr3;
2921
2922         clgi();
2923
2924         local_irq_enable();
2925
2926         asm volatile (
2927                 "push %%"R"bp; \n\t"
2928                 "mov %c[rbx](%[svm]), %%"R"bx \n\t"
2929                 "mov %c[rcx](%[svm]), %%"R"cx \n\t"
2930                 "mov %c[rdx](%[svm]), %%"R"dx \n\t"
2931                 "mov %c[rsi](%[svm]), %%"R"si \n\t"
2932                 "mov %c[rdi](%[svm]), %%"R"di \n\t"
2933                 "mov %c[rbp](%[svm]), %%"R"bp \n\t"
2934 #ifdef CONFIG_X86_64
2935                 "mov %c[r8](%[svm]),  %%r8  \n\t"
2936                 "mov %c[r9](%[svm]),  %%r9  \n\t"
2937                 "mov %c[r10](%[svm]), %%r10 \n\t"
2938                 "mov %c[r11](%[svm]), %%r11 \n\t"
2939                 "mov %c[r12](%[svm]), %%r12 \n\t"
2940                 "mov %c[r13](%[svm]), %%r13 \n\t"
2941                 "mov %c[r14](%[svm]), %%r14 \n\t"
2942                 "mov %c[r15](%[svm]), %%r15 \n\t"
2943 #endif
2944
2945                 /* Enter guest mode */
2946                 "push %%"R"ax \n\t"
2947                 "mov %c[vmcb](%[svm]), %%"R"ax \n\t"
2948                 __ex(SVM_VMLOAD) "\n\t"
2949                 __ex(SVM_VMRUN) "\n\t"
2950                 __ex(SVM_VMSAVE) "\n\t"
2951                 "pop %%"R"ax \n\t"
2952
2953                 /* Save guest registers, load host registers */
2954                 "mov %%"R"bx, %c[rbx](%[svm]) \n\t"
2955                 "mov %%"R"cx, %c[rcx](%[svm]) \n\t"
2956                 "mov %%"R"dx, %c[rdx](%[svm]) \n\t"
2957                 "mov %%"R"si, %c[rsi](%[svm]) \n\t"
2958                 "mov %%"R"di, %c[rdi](%[svm]) \n\t"
2959                 "mov %%"R"bp, %c[rbp](%[svm]) \n\t"
2960 #ifdef CONFIG_X86_64
2961                 "mov %%r8,  %c[r8](%[svm]) \n\t"
2962                 "mov %%r9,  %c[r9](%[svm]) \n\t"
2963                 "mov %%r10, %c[r10](%[svm]) \n\t"
2964                 "mov %%r11, %c[r11](%[svm]) \n\t"
2965                 "mov %%r12, %c[r12](%[svm]) \n\t"
2966                 "mov %%r13, %c[r13](%[svm]) \n\t"
2967                 "mov %%r14, %c[r14](%[svm]) \n\t"
2968                 "mov %%r15, %c[r15](%[svm]) \n\t"
2969 #endif
2970                 "pop %%"R"bp"
2971                 :
2972                 : [svm]"a"(svm),
2973                   [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
2974                   [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
2975                   [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
2976                   [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
2977                   [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
2978                   [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
2979                   [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
2980 #ifdef CONFIG_X86_64
2981                   , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
2982                   [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
2983                   [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
2984                   [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
2985                   [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
2986                   [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
2987                   [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
2988                   [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
2989 #endif
2990                 : "cc", "memory"
2991                 , R"bx", R"cx", R"dx", R"si", R"di"
2992 #ifdef CONFIG_X86_64
2993                 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
2994 #endif
2995                 );
2996
2997         vcpu->arch.cr2 = svm->vmcb->save.cr2;
2998         vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
2999         vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3000         vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3001
3002         kvm_load_fs(fs_selector);
3003         kvm_load_gs(gs_selector);
3004         kvm_load_ldt(ldt_selector);
3005         load_host_msrs(vcpu);
3006
3007         reload_tss(vcpu);
3008
3009         local_irq_disable();
3010
3011         stgi();
3012
3013         sync_cr8_to_lapic(vcpu);
3014
3015         svm->next_rip = 0;
3016
3017         if (npt_enabled) {
3018                 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3019                 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
3020         }
3021 }
3022
3023 #undef R
3024
3025 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3026 {
3027         struct vcpu_svm *svm = to_svm(vcpu);
3028
3029         if (npt_enabled) {
3030                 svm->vmcb->control.nested_cr3 = root;
3031                 force_new_asid(vcpu);
3032                 return;
3033         }
3034
3035         svm->vmcb->save.cr3 = root;
3036         force_new_asid(vcpu);
3037 }
3038
3039 static int is_disabled(void)
3040 {
3041         u64 vm_cr;
3042
3043         rdmsrl(MSR_VM_CR, vm_cr);
3044         if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3045                 return 1;
3046
3047         return 0;
3048 }
3049
3050 static void
3051 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3052 {
3053         /*
3054          * Patch in the VMMCALL instruction:
3055          */
3056         hypercall[0] = 0x0f;
3057         hypercall[1] = 0x01;
3058         hypercall[2] = 0xd9;
3059 }
3060
3061 static void svm_check_processor_compat(void *rtn)
3062 {
3063         *(int *)rtn = 0;
3064 }
3065
3066 static bool svm_cpu_has_accelerated_tpr(void)
3067 {
3068         return false;
3069 }
3070
3071 static int get_npt_level(void)
3072 {
3073 #ifdef CONFIG_X86_64
3074         return PT64_ROOT_LEVEL;
3075 #else
3076         return PT32E_ROOT_LEVEL;
3077 #endif
3078 }
3079
3080 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3081 {
3082         return 0;
3083 }
3084
3085 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3086 {
3087 }
3088
3089 static const struct trace_print_flags svm_exit_reasons_str[] = {
3090         { SVM_EXIT_READ_CR0,                    "read_cr0" },
3091         { SVM_EXIT_READ_CR3,                    "read_cr3" },
3092         { SVM_EXIT_READ_CR4,                    "read_cr4" },
3093         { SVM_EXIT_READ_CR8,                    "read_cr8" },
3094         { SVM_EXIT_WRITE_CR0,                   "write_cr0" },
3095         { SVM_EXIT_WRITE_CR3,                   "write_cr3" },
3096         { SVM_EXIT_WRITE_CR4,                   "write_cr4" },
3097         { SVM_EXIT_WRITE_CR8,                   "write_cr8" },
3098         { SVM_EXIT_READ_DR0,                    "read_dr0" },
3099         { SVM_EXIT_READ_DR1,                    "read_dr1" },
3100         { SVM_EXIT_READ_DR2,                    "read_dr2" },
3101         { SVM_EXIT_READ_DR3,                    "read_dr3" },
3102         { SVM_EXIT_WRITE_DR0,                   "write_dr0" },
3103         { SVM_EXIT_WRITE_DR1,                   "write_dr1" },
3104         { SVM_EXIT_WRITE_DR2,                   "write_dr2" },
3105         { SVM_EXIT_WRITE_DR3,                   "write_dr3" },
3106         { SVM_EXIT_WRITE_DR5,                   "write_dr5" },
3107         { SVM_EXIT_WRITE_DR7,                   "write_dr7" },
3108         { SVM_EXIT_EXCP_BASE + DB_VECTOR,       "DB excp" },
3109         { SVM_EXIT_EXCP_BASE + BP_VECTOR,       "BP excp" },
3110         { SVM_EXIT_EXCP_BASE + UD_VECTOR,       "UD excp" },
3111         { SVM_EXIT_EXCP_BASE + PF_VECTOR,       "PF excp" },
3112         { SVM_EXIT_EXCP_BASE + NM_VECTOR,       "NM excp" },
3113         { SVM_EXIT_EXCP_BASE + MC_VECTOR,       "MC excp" },
3114         { SVM_EXIT_INTR,                        "interrupt" },
3115         { SVM_EXIT_NMI,                         "nmi" },
3116         { SVM_EXIT_SMI,                         "smi" },
3117         { SVM_EXIT_INIT,                        "init" },
3118         { SVM_EXIT_VINTR,                       "vintr" },
3119         { SVM_EXIT_CPUID,                       "cpuid" },
3120         { SVM_EXIT_INVD,                        "invd" },
3121         { SVM_EXIT_HLT,                         "hlt" },
3122         { SVM_EXIT_INVLPG,                      "invlpg" },
3123         { SVM_EXIT_INVLPGA,                     "invlpga" },
3124         { SVM_EXIT_IOIO,                        "io" },
3125         { SVM_EXIT_MSR,                         "msr" },
3126         { SVM_EXIT_TASK_SWITCH,                 "task_switch" },
3127         { SVM_EXIT_SHUTDOWN,                    "shutdown" },
3128         { SVM_EXIT_VMRUN,                       "vmrun" },
3129         { SVM_EXIT_VMMCALL,                     "hypercall" },
3130         { SVM_EXIT_VMLOAD,                      "vmload" },
3131         { SVM_EXIT_VMSAVE,                      "vmsave" },
3132         { SVM_EXIT_STGI,                        "stgi" },
3133         { SVM_EXIT_CLGI,                        "clgi" },
3134         { SVM_EXIT_SKINIT,                      "skinit" },
3135         { SVM_EXIT_WBINVD,                      "wbinvd" },
3136         { SVM_EXIT_MONITOR,                     "monitor" },
3137         { SVM_EXIT_MWAIT,                       "mwait" },
3138         { SVM_EXIT_NPF,                         "npf" },
3139         { -1, NULL }
3140 };
3141
3142 static int svm_get_lpage_level(void)
3143 {
3144         return PT_PDPE_LEVEL;
3145 }
3146
3147 static bool svm_rdtscp_supported(void)
3148 {
3149         return false;
3150 }
3151
3152 static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
3153 {
3154         struct vcpu_svm *svm = to_svm(vcpu);
3155
3156         svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
3157         if (is_nested(svm))
3158                 svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR;
3159         update_cr0_intercept(svm);
3160 }
3161
3162 static struct kvm_x86_ops svm_x86_ops = {
3163         .cpu_has_kvm_support = has_svm,
3164         .disabled_by_bios = is_disabled,
3165         .hardware_setup = svm_hardware_setup,
3166         .hardware_unsetup = svm_hardware_unsetup,
3167         .check_processor_compatibility = svm_check_processor_compat,
3168         .hardware_enable = svm_hardware_enable,
3169         .hardware_disable = svm_hardware_disable,
3170         .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
3171
3172         .vcpu_create = svm_create_vcpu,
3173         .vcpu_free = svm_free_vcpu,
3174         .vcpu_reset = svm_vcpu_reset,
3175
3176         .prepare_guest_switch = svm_prepare_guest_switch,
3177         .vcpu_load = svm_vcpu_load,
3178         .vcpu_put = svm_vcpu_put,
3179
3180         .set_guest_debug = svm_guest_debug,
3181         .get_msr = svm_get_msr,
3182         .set_msr = svm_set_msr,
3183         .get_segment_base = svm_get_segment_base,
3184         .get_segment = svm_get_segment,
3185         .set_segment = svm_set_segment,
3186         .get_cpl = svm_get_cpl,
3187         .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
3188         .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
3189         .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
3190         .set_cr0 = svm_set_cr0,
3191         .set_cr3 = svm_set_cr3,
3192         .set_cr4 = svm_set_cr4,
3193         .set_efer = svm_set_efer,
3194         .get_idt = svm_get_idt,
3195         .set_idt = svm_set_idt,
3196         .get_gdt = svm_get_gdt,
3197         .set_gdt = svm_set_gdt,
3198         .get_dr = svm_get_dr,
3199         .set_dr = svm_set_dr,
3200         .cache_reg = svm_cache_reg,
3201         .get_rflags = svm_get_rflags,
3202         .set_rflags = svm_set_rflags,
3203         .fpu_activate = svm_fpu_activate,
3204         .fpu_deactivate = svm_fpu_deactivate,
3205
3206         .tlb_flush = svm_flush_tlb,
3207
3208         .run = svm_vcpu_run,
3209         .handle_exit = handle_exit,
3210         .skip_emulated_instruction = skip_emulated_instruction,
3211         .set_interrupt_shadow = svm_set_interrupt_shadow,
3212         .get_interrupt_shadow = svm_get_interrupt_shadow,
3213         .patch_hypercall = svm_patch_hypercall,
3214         .set_irq = svm_set_irq,
3215         .set_nmi = svm_inject_nmi,
3216         .queue_exception = svm_queue_exception,
3217         .interrupt_allowed = svm_interrupt_allowed,
3218         .nmi_allowed = svm_nmi_allowed,
3219         .get_nmi_mask = svm_get_nmi_mask,
3220         .set_nmi_mask = svm_set_nmi_mask,
3221         .enable_nmi_window = enable_nmi_window,
3222         .enable_irq_window = enable_irq_window,
3223         .update_cr8_intercept = update_cr8_intercept,
3224
3225         .set_tss_addr = svm_set_tss_addr,
3226         .get_tdp_level = get_npt_level,
3227         .get_mt_mask = svm_get_mt_mask,
3228
3229         .exit_reasons_str = svm_exit_reasons_str,
3230         .get_lpage_level = svm_get_lpage_level,
3231
3232         .cpuid_update = svm_cpuid_update,
3233
3234         .rdtscp_supported = svm_rdtscp_supported,
3235 };
3236
3237 static int __init svm_init(void)
3238 {
3239         return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
3240                               THIS_MODULE);
3241 }
3242
3243 static void __exit svm_exit(void)
3244 {
3245         kvm_exit();
3246 }
3247
3248 module_init(svm_init)
3249 module_exit(svm_exit)