KVM: SVM: Add intercept checks for remaining twobyte instructions
[pandora-kernel.git] / arch / x86 / kvm / svm.c
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  *
4  * AMD SVM support
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
8  *
9  * Authors:
10  *   Yaniv Kamay  <yaniv@qumranet.com>
11  *   Avi Kivity   <avi@qumranet.com>
12  *
13  * This work is licensed under the terms of the GNU GPL, version 2.  See
14  * the COPYING file in the top-level directory.
15  *
16  */
17 #include <linux/kvm_host.h>
18
19 #include "irq.h"
20 #include "mmu.h"
21 #include "kvm_cache_regs.h"
22 #include "x86.h"
23
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/vmalloc.h>
27 #include <linux/highmem.h>
28 #include <linux/sched.h>
29 #include <linux/ftrace_event.h>
30 #include <linux/slab.h>
31
32 #include <asm/tlbflush.h>
33 #include <asm/desc.h>
34 #include <asm/kvm_para.h>
35
36 #include <asm/virtext.h>
37 #include "trace.h"
38
39 #define __ex(x) __kvm_handle_fault_on_reboot(x)
40
41 MODULE_AUTHOR("Qumranet");
42 MODULE_LICENSE("GPL");
43
44 #define IOPM_ALLOC_ORDER 2
45 #define MSRPM_ALLOC_ORDER 1
46
47 #define SEG_TYPE_LDT 2
48 #define SEG_TYPE_BUSY_TSS16 3
49
50 #define SVM_FEATURE_NPT            (1 <<  0)
51 #define SVM_FEATURE_LBRV           (1 <<  1)
52 #define SVM_FEATURE_SVML           (1 <<  2)
53 #define SVM_FEATURE_NRIP           (1 <<  3)
54 #define SVM_FEATURE_TSC_RATE       (1 <<  4)
55 #define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
56 #define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
57 #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
58 #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
59
60 #define NESTED_EXIT_HOST        0       /* Exit handled on host level */
61 #define NESTED_EXIT_DONE        1       /* Exit caused nested vmexit  */
62 #define NESTED_EXIT_CONTINUE    2       /* Further checks needed      */
63
64 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
65
66 static bool erratum_383_found __read_mostly;
67
68 static const u32 host_save_user_msrs[] = {
69 #ifdef CONFIG_X86_64
70         MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
71         MSR_FS_BASE,
72 #endif
73         MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
74 };
75
76 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
77
78 struct kvm_vcpu;
79
80 struct nested_state {
81         struct vmcb *hsave;
82         u64 hsave_msr;
83         u64 vm_cr_msr;
84         u64 vmcb;
85
86         /* These are the merged vectors */
87         u32 *msrpm;
88
89         /* gpa pointers to the real vectors */
90         u64 vmcb_msrpm;
91         u64 vmcb_iopm;
92
93         /* A VMEXIT is required but not yet emulated */
94         bool exit_required;
95
96         /*
97          * If we vmexit during an instruction emulation we need this to restore
98          * the l1 guest rip after the emulation
99          */
100         unsigned long vmexit_rip;
101         unsigned long vmexit_rsp;
102         unsigned long vmexit_rax;
103
104         /* cache for intercepts of the guest */
105         u32 intercept_cr;
106         u32 intercept_dr;
107         u32 intercept_exceptions;
108         u64 intercept;
109
110         /* Nested Paging related state */
111         u64 nested_cr3;
112 };
113
114 #define MSRPM_OFFSETS   16
115 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
116
117 struct vcpu_svm {
118         struct kvm_vcpu vcpu;
119         struct vmcb *vmcb;
120         unsigned long vmcb_pa;
121         struct svm_cpu_data *svm_data;
122         uint64_t asid_generation;
123         uint64_t sysenter_esp;
124         uint64_t sysenter_eip;
125
126         u64 next_rip;
127
128         u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
129         struct {
130                 u16 fs;
131                 u16 gs;
132                 u16 ldt;
133                 u64 gs_base;
134         } host;
135
136         u32 *msrpm;
137
138         ulong nmi_iret_rip;
139
140         struct nested_state nested;
141
142         bool nmi_singlestep;
143
144         unsigned int3_injected;
145         unsigned long int3_rip;
146         u32 apf_reason;
147 };
148
149 #define MSR_INVALID                     0xffffffffU
150
151 static struct svm_direct_access_msrs {
152         u32 index;   /* Index of the MSR */
153         bool always; /* True if intercept is always on */
154 } direct_access_msrs[] = {
155         { .index = MSR_STAR,                            .always = true  },
156         { .index = MSR_IA32_SYSENTER_CS,                .always = true  },
157 #ifdef CONFIG_X86_64
158         { .index = MSR_GS_BASE,                         .always = true  },
159         { .index = MSR_FS_BASE,                         .always = true  },
160         { .index = MSR_KERNEL_GS_BASE,                  .always = true  },
161         { .index = MSR_LSTAR,                           .always = true  },
162         { .index = MSR_CSTAR,                           .always = true  },
163         { .index = MSR_SYSCALL_MASK,                    .always = true  },
164 #endif
165         { .index = MSR_IA32_LASTBRANCHFROMIP,           .always = false },
166         { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
167         { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
168         { .index = MSR_IA32_LASTINTTOIP,                .always = false },
169         { .index = MSR_INVALID,                         .always = false },
170 };
171
172 /* enable NPT for AMD64 and X86 with PAE */
173 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
174 static bool npt_enabled = true;
175 #else
176 static bool npt_enabled;
177 #endif
178 static int npt = 1;
179
180 module_param(npt, int, S_IRUGO);
181
182 static int nested = 1;
183 module_param(nested, int, S_IRUGO);
184
185 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
186 static void svm_complete_interrupts(struct vcpu_svm *svm);
187
188 static int nested_svm_exit_handled(struct vcpu_svm *svm);
189 static int nested_svm_intercept(struct vcpu_svm *svm);
190 static int nested_svm_vmexit(struct vcpu_svm *svm);
191 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
192                                       bool has_error_code, u32 error_code);
193
194 enum {
195         VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
196                             pause filter count */
197         VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
198         VMCB_ASID,       /* ASID */
199         VMCB_INTR,       /* int_ctl, int_vector */
200         VMCB_NPT,        /* npt_en, nCR3, gPAT */
201         VMCB_CR,         /* CR0, CR3, CR4, EFER */
202         VMCB_DR,         /* DR6, DR7 */
203         VMCB_DT,         /* GDT, IDT */
204         VMCB_SEG,        /* CS, DS, SS, ES, CPL */
205         VMCB_CR2,        /* CR2 only */
206         VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
207         VMCB_DIRTY_MAX,
208 };
209
210 /* TPR and CR2 are always written before VMRUN */
211 #define VMCB_ALWAYS_DIRTY_MASK  ((1U << VMCB_INTR) | (1U << VMCB_CR2))
212
213 static inline void mark_all_dirty(struct vmcb *vmcb)
214 {
215         vmcb->control.clean = 0;
216 }
217
218 static inline void mark_all_clean(struct vmcb *vmcb)
219 {
220         vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
221                                & ~VMCB_ALWAYS_DIRTY_MASK;
222 }
223
224 static inline void mark_dirty(struct vmcb *vmcb, int bit)
225 {
226         vmcb->control.clean &= ~(1 << bit);
227 }
228
229 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
230 {
231         return container_of(vcpu, struct vcpu_svm, vcpu);
232 }
233
234 static void recalc_intercepts(struct vcpu_svm *svm)
235 {
236         struct vmcb_control_area *c, *h;
237         struct nested_state *g;
238
239         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
240
241         if (!is_guest_mode(&svm->vcpu))
242                 return;
243
244         c = &svm->vmcb->control;
245         h = &svm->nested.hsave->control;
246         g = &svm->nested;
247
248         c->intercept_cr = h->intercept_cr | g->intercept_cr;
249         c->intercept_dr = h->intercept_dr | g->intercept_dr;
250         c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
251         c->intercept = h->intercept | g->intercept;
252 }
253
254 static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
255 {
256         if (is_guest_mode(&svm->vcpu))
257                 return svm->nested.hsave;
258         else
259                 return svm->vmcb;
260 }
261
262 static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
263 {
264         struct vmcb *vmcb = get_host_vmcb(svm);
265
266         vmcb->control.intercept_cr |= (1U << bit);
267
268         recalc_intercepts(svm);
269 }
270
271 static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
272 {
273         struct vmcb *vmcb = get_host_vmcb(svm);
274
275         vmcb->control.intercept_cr &= ~(1U << bit);
276
277         recalc_intercepts(svm);
278 }
279
280 static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
281 {
282         struct vmcb *vmcb = get_host_vmcb(svm);
283
284         return vmcb->control.intercept_cr & (1U << bit);
285 }
286
287 static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
288 {
289         struct vmcb *vmcb = get_host_vmcb(svm);
290
291         vmcb->control.intercept_dr |= (1U << bit);
292
293         recalc_intercepts(svm);
294 }
295
296 static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
297 {
298         struct vmcb *vmcb = get_host_vmcb(svm);
299
300         vmcb->control.intercept_dr &= ~(1U << bit);
301
302         recalc_intercepts(svm);
303 }
304
305 static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
306 {
307         struct vmcb *vmcb = get_host_vmcb(svm);
308
309         vmcb->control.intercept_exceptions |= (1U << bit);
310
311         recalc_intercepts(svm);
312 }
313
314 static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
315 {
316         struct vmcb *vmcb = get_host_vmcb(svm);
317
318         vmcb->control.intercept_exceptions &= ~(1U << bit);
319
320         recalc_intercepts(svm);
321 }
322
323 static inline void set_intercept(struct vcpu_svm *svm, int bit)
324 {
325         struct vmcb *vmcb = get_host_vmcb(svm);
326
327         vmcb->control.intercept |= (1ULL << bit);
328
329         recalc_intercepts(svm);
330 }
331
332 static inline void clr_intercept(struct vcpu_svm *svm, int bit)
333 {
334         struct vmcb *vmcb = get_host_vmcb(svm);
335
336         vmcb->control.intercept &= ~(1ULL << bit);
337
338         recalc_intercepts(svm);
339 }
340
341 static inline void enable_gif(struct vcpu_svm *svm)
342 {
343         svm->vcpu.arch.hflags |= HF_GIF_MASK;
344 }
345
346 static inline void disable_gif(struct vcpu_svm *svm)
347 {
348         svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
349 }
350
351 static inline bool gif_set(struct vcpu_svm *svm)
352 {
353         return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
354 }
355
356 static unsigned long iopm_base;
357
358 struct kvm_ldttss_desc {
359         u16 limit0;
360         u16 base0;
361         unsigned base1:8, type:5, dpl:2, p:1;
362         unsigned limit1:4, zero0:3, g:1, base2:8;
363         u32 base3;
364         u32 zero1;
365 } __attribute__((packed));
366
367 struct svm_cpu_data {
368         int cpu;
369
370         u64 asid_generation;
371         u32 max_asid;
372         u32 next_asid;
373         struct kvm_ldttss_desc *tss_desc;
374
375         struct page *save_area;
376 };
377
378 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
379
380 struct svm_init_data {
381         int cpu;
382         int r;
383 };
384
385 static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
386
387 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
388 #define MSRS_RANGE_SIZE 2048
389 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
390
391 static u32 svm_msrpm_offset(u32 msr)
392 {
393         u32 offset;
394         int i;
395
396         for (i = 0; i < NUM_MSR_MAPS; i++) {
397                 if (msr < msrpm_ranges[i] ||
398                     msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
399                         continue;
400
401                 offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
402                 offset += (i * MSRS_RANGE_SIZE);       /* add range offset */
403
404                 /* Now we have the u8 offset - but need the u32 offset */
405                 return offset / 4;
406         }
407
408         /* MSR not in any range */
409         return MSR_INVALID;
410 }
411
412 #define MAX_INST_SIZE 15
413
414 static inline void clgi(void)
415 {
416         asm volatile (__ex(SVM_CLGI));
417 }
418
419 static inline void stgi(void)
420 {
421         asm volatile (__ex(SVM_STGI));
422 }
423
424 static inline void invlpga(unsigned long addr, u32 asid)
425 {
426         asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
427 }
428
429 static int get_npt_level(void)
430 {
431 #ifdef CONFIG_X86_64
432         return PT64_ROOT_LEVEL;
433 #else
434         return PT32E_ROOT_LEVEL;
435 #endif
436 }
437
438 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
439 {
440         vcpu->arch.efer = efer;
441         if (!npt_enabled && !(efer & EFER_LMA))
442                 efer &= ~EFER_LME;
443
444         to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
445         mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
446 }
447
448 static int is_external_interrupt(u32 info)
449 {
450         info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
451         return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
452 }
453
454 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
455 {
456         struct vcpu_svm *svm = to_svm(vcpu);
457         u32 ret = 0;
458
459         if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
460                 ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
461         return ret & mask;
462 }
463
464 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
465 {
466         struct vcpu_svm *svm = to_svm(vcpu);
467
468         if (mask == 0)
469                 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
470         else
471                 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
472
473 }
474
475 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
476 {
477         struct vcpu_svm *svm = to_svm(vcpu);
478
479         if (svm->vmcb->control.next_rip != 0)
480                 svm->next_rip = svm->vmcb->control.next_rip;
481
482         if (!svm->next_rip) {
483                 if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
484                                 EMULATE_DONE)
485                         printk(KERN_DEBUG "%s: NOP\n", __func__);
486                 return;
487         }
488         if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
489                 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
490                        __func__, kvm_rip_read(vcpu), svm->next_rip);
491
492         kvm_rip_write(vcpu, svm->next_rip);
493         svm_set_interrupt_shadow(vcpu, 0);
494 }
495
496 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
497                                 bool has_error_code, u32 error_code,
498                                 bool reinject)
499 {
500         struct vcpu_svm *svm = to_svm(vcpu);
501
502         /*
503          * If we are within a nested VM we'd better #VMEXIT and let the guest
504          * handle the exception
505          */
506         if (!reinject &&
507             nested_svm_check_exception(svm, nr, has_error_code, error_code))
508                 return;
509
510         if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
511                 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
512
513                 /*
514                  * For guest debugging where we have to reinject #BP if some
515                  * INT3 is guest-owned:
516                  * Emulate nRIP by moving RIP forward. Will fail if injection
517                  * raises a fault that is not intercepted. Still better than
518                  * failing in all cases.
519                  */
520                 skip_emulated_instruction(&svm->vcpu);
521                 rip = kvm_rip_read(&svm->vcpu);
522                 svm->int3_rip = rip + svm->vmcb->save.cs.base;
523                 svm->int3_injected = rip - old_rip;
524         }
525
526         svm->vmcb->control.event_inj = nr
527                 | SVM_EVTINJ_VALID
528                 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
529                 | SVM_EVTINJ_TYPE_EXEPT;
530         svm->vmcb->control.event_inj_err = error_code;
531 }
532
533 static void svm_init_erratum_383(void)
534 {
535         u32 low, high;
536         int err;
537         u64 val;
538
539         if (!cpu_has_amd_erratum(amd_erratum_383))
540                 return;
541
542         /* Use _safe variants to not break nested virtualization */
543         val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
544         if (err)
545                 return;
546
547         val |= (1ULL << 47);
548
549         low  = lower_32_bits(val);
550         high = upper_32_bits(val);
551
552         native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
553
554         erratum_383_found = true;
555 }
556
557 static int has_svm(void)
558 {
559         const char *msg;
560
561         if (!cpu_has_svm(&msg)) {
562                 printk(KERN_INFO "has_svm: %s\n", msg);
563                 return 0;
564         }
565
566         return 1;
567 }
568
569 static void svm_hardware_disable(void *garbage)
570 {
571         cpu_svm_disable();
572 }
573
574 static int svm_hardware_enable(void *garbage)
575 {
576
577         struct svm_cpu_data *sd;
578         uint64_t efer;
579         struct desc_ptr gdt_descr;
580         struct desc_struct *gdt;
581         int me = raw_smp_processor_id();
582
583         rdmsrl(MSR_EFER, efer);
584         if (efer & EFER_SVME)
585                 return -EBUSY;
586
587         if (!has_svm()) {
588                 printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
589                        me);
590                 return -EINVAL;
591         }
592         sd = per_cpu(svm_data, me);
593
594         if (!sd) {
595                 printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
596                        me);
597                 return -EINVAL;
598         }
599
600         sd->asid_generation = 1;
601         sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
602         sd->next_asid = sd->max_asid + 1;
603
604         native_store_gdt(&gdt_descr);
605         gdt = (struct desc_struct *)gdt_descr.address;
606         sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
607
608         wrmsrl(MSR_EFER, efer | EFER_SVME);
609
610         wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
611
612         svm_init_erratum_383();
613
614         return 0;
615 }
616
617 static void svm_cpu_uninit(int cpu)
618 {
619         struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
620
621         if (!sd)
622                 return;
623
624         per_cpu(svm_data, raw_smp_processor_id()) = NULL;
625         __free_page(sd->save_area);
626         kfree(sd);
627 }
628
629 static int svm_cpu_init(int cpu)
630 {
631         struct svm_cpu_data *sd;
632         int r;
633
634         sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
635         if (!sd)
636                 return -ENOMEM;
637         sd->cpu = cpu;
638         sd->save_area = alloc_page(GFP_KERNEL);
639         r = -ENOMEM;
640         if (!sd->save_area)
641                 goto err_1;
642
643         per_cpu(svm_data, cpu) = sd;
644
645         return 0;
646
647 err_1:
648         kfree(sd);
649         return r;
650
651 }
652
653 static bool valid_msr_intercept(u32 index)
654 {
655         int i;
656
657         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
658                 if (direct_access_msrs[i].index == index)
659                         return true;
660
661         return false;
662 }
663
664 static void set_msr_interception(u32 *msrpm, unsigned msr,
665                                  int read, int write)
666 {
667         u8 bit_read, bit_write;
668         unsigned long tmp;
669         u32 offset;
670
671         /*
672          * If this warning triggers extend the direct_access_msrs list at the
673          * beginning of the file
674          */
675         WARN_ON(!valid_msr_intercept(msr));
676
677         offset    = svm_msrpm_offset(msr);
678         bit_read  = 2 * (msr & 0x0f);
679         bit_write = 2 * (msr & 0x0f) + 1;
680         tmp       = msrpm[offset];
681
682         BUG_ON(offset == MSR_INVALID);
683
684         read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
685         write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
686
687         msrpm[offset] = tmp;
688 }
689
690 static void svm_vcpu_init_msrpm(u32 *msrpm)
691 {
692         int i;
693
694         memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
695
696         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
697                 if (!direct_access_msrs[i].always)
698                         continue;
699
700                 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
701         }
702 }
703
704 static void add_msr_offset(u32 offset)
705 {
706         int i;
707
708         for (i = 0; i < MSRPM_OFFSETS; ++i) {
709
710                 /* Offset already in list? */
711                 if (msrpm_offsets[i] == offset)
712                         return;
713
714                 /* Slot used by another offset? */
715                 if (msrpm_offsets[i] != MSR_INVALID)
716                         continue;
717
718                 /* Add offset to list */
719                 msrpm_offsets[i] = offset;
720
721                 return;
722         }
723
724         /*
725          * If this BUG triggers the msrpm_offsets table has an overflow. Just
726          * increase MSRPM_OFFSETS in this case.
727          */
728         BUG();
729 }
730
731 static void init_msrpm_offsets(void)
732 {
733         int i;
734
735         memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
736
737         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
738                 u32 offset;
739
740                 offset = svm_msrpm_offset(direct_access_msrs[i].index);
741                 BUG_ON(offset == MSR_INVALID);
742
743                 add_msr_offset(offset);
744         }
745 }
746
747 static void svm_enable_lbrv(struct vcpu_svm *svm)
748 {
749         u32 *msrpm = svm->msrpm;
750
751         svm->vmcb->control.lbr_ctl = 1;
752         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
753         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
754         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
755         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
756 }
757
758 static void svm_disable_lbrv(struct vcpu_svm *svm)
759 {
760         u32 *msrpm = svm->msrpm;
761
762         svm->vmcb->control.lbr_ctl = 0;
763         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
764         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
765         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
766         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
767 }
768
769 static __init int svm_hardware_setup(void)
770 {
771         int cpu;
772         struct page *iopm_pages;
773         void *iopm_va;
774         int r;
775
776         iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
777
778         if (!iopm_pages)
779                 return -ENOMEM;
780
781         iopm_va = page_address(iopm_pages);
782         memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
783         iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
784
785         init_msrpm_offsets();
786
787         if (boot_cpu_has(X86_FEATURE_NX))
788                 kvm_enable_efer_bits(EFER_NX);
789
790         if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
791                 kvm_enable_efer_bits(EFER_FFXSR);
792
793         if (nested) {
794                 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
795                 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
796         }
797
798         for_each_possible_cpu(cpu) {
799                 r = svm_cpu_init(cpu);
800                 if (r)
801                         goto err;
802         }
803
804         if (!boot_cpu_has(X86_FEATURE_NPT))
805                 npt_enabled = false;
806
807         if (npt_enabled && !npt) {
808                 printk(KERN_INFO "kvm: Nested Paging disabled\n");
809                 npt_enabled = false;
810         }
811
812         if (npt_enabled) {
813                 printk(KERN_INFO "kvm: Nested Paging enabled\n");
814                 kvm_enable_tdp();
815         } else
816                 kvm_disable_tdp();
817
818         return 0;
819
820 err:
821         __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
822         iopm_base = 0;
823         return r;
824 }
825
826 static __exit void svm_hardware_unsetup(void)
827 {
828         int cpu;
829
830         for_each_possible_cpu(cpu)
831                 svm_cpu_uninit(cpu);
832
833         __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
834         iopm_base = 0;
835 }
836
837 static void init_seg(struct vmcb_seg *seg)
838 {
839         seg->selector = 0;
840         seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
841                       SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
842         seg->limit = 0xffff;
843         seg->base = 0;
844 }
845
846 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
847 {
848         seg->selector = 0;
849         seg->attrib = SVM_SELECTOR_P_MASK | type;
850         seg->limit = 0xffff;
851         seg->base = 0;
852 }
853
854 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
855 {
856         struct vcpu_svm *svm = to_svm(vcpu);
857         u64 g_tsc_offset = 0;
858
859         if (is_guest_mode(vcpu)) {
860                 g_tsc_offset = svm->vmcb->control.tsc_offset -
861                                svm->nested.hsave->control.tsc_offset;
862                 svm->nested.hsave->control.tsc_offset = offset;
863         }
864
865         svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
866
867         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
868 }
869
870 static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
871 {
872         struct vcpu_svm *svm = to_svm(vcpu);
873
874         svm->vmcb->control.tsc_offset += adjustment;
875         if (is_guest_mode(vcpu))
876                 svm->nested.hsave->control.tsc_offset += adjustment;
877         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
878 }
879
880 static void init_vmcb(struct vcpu_svm *svm)
881 {
882         struct vmcb_control_area *control = &svm->vmcb->control;
883         struct vmcb_save_area *save = &svm->vmcb->save;
884
885         svm->vcpu.fpu_active = 1;
886         svm->vcpu.arch.hflags = 0;
887
888         set_cr_intercept(svm, INTERCEPT_CR0_READ);
889         set_cr_intercept(svm, INTERCEPT_CR3_READ);
890         set_cr_intercept(svm, INTERCEPT_CR4_READ);
891         set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
892         set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
893         set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
894         set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
895
896         set_dr_intercept(svm, INTERCEPT_DR0_READ);
897         set_dr_intercept(svm, INTERCEPT_DR1_READ);
898         set_dr_intercept(svm, INTERCEPT_DR2_READ);
899         set_dr_intercept(svm, INTERCEPT_DR3_READ);
900         set_dr_intercept(svm, INTERCEPT_DR4_READ);
901         set_dr_intercept(svm, INTERCEPT_DR5_READ);
902         set_dr_intercept(svm, INTERCEPT_DR6_READ);
903         set_dr_intercept(svm, INTERCEPT_DR7_READ);
904
905         set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
906         set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
907         set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
908         set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
909         set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
910         set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
911         set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
912         set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
913
914         set_exception_intercept(svm, PF_VECTOR);
915         set_exception_intercept(svm, UD_VECTOR);
916         set_exception_intercept(svm, MC_VECTOR);
917
918         set_intercept(svm, INTERCEPT_INTR);
919         set_intercept(svm, INTERCEPT_NMI);
920         set_intercept(svm, INTERCEPT_SMI);
921         set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
922         set_intercept(svm, INTERCEPT_CPUID);
923         set_intercept(svm, INTERCEPT_INVD);
924         set_intercept(svm, INTERCEPT_HLT);
925         set_intercept(svm, INTERCEPT_INVLPG);
926         set_intercept(svm, INTERCEPT_INVLPGA);
927         set_intercept(svm, INTERCEPT_IOIO_PROT);
928         set_intercept(svm, INTERCEPT_MSR_PROT);
929         set_intercept(svm, INTERCEPT_TASK_SWITCH);
930         set_intercept(svm, INTERCEPT_SHUTDOWN);
931         set_intercept(svm, INTERCEPT_VMRUN);
932         set_intercept(svm, INTERCEPT_VMMCALL);
933         set_intercept(svm, INTERCEPT_VMLOAD);
934         set_intercept(svm, INTERCEPT_VMSAVE);
935         set_intercept(svm, INTERCEPT_STGI);
936         set_intercept(svm, INTERCEPT_CLGI);
937         set_intercept(svm, INTERCEPT_SKINIT);
938         set_intercept(svm, INTERCEPT_WBINVD);
939         set_intercept(svm, INTERCEPT_MONITOR);
940         set_intercept(svm, INTERCEPT_MWAIT);
941         set_intercept(svm, INTERCEPT_XSETBV);
942
943         control->iopm_base_pa = iopm_base;
944         control->msrpm_base_pa = __pa(svm->msrpm);
945         control->int_ctl = V_INTR_MASKING_MASK;
946
947         init_seg(&save->es);
948         init_seg(&save->ss);
949         init_seg(&save->ds);
950         init_seg(&save->fs);
951         init_seg(&save->gs);
952
953         save->cs.selector = 0xf000;
954         /* Executable/Readable Code Segment */
955         save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
956                 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
957         save->cs.limit = 0xffff;
958         /*
959          * cs.base should really be 0xffff0000, but vmx can't handle that, so
960          * be consistent with it.
961          *
962          * Replace when we have real mode working for vmx.
963          */
964         save->cs.base = 0xf0000;
965
966         save->gdtr.limit = 0xffff;
967         save->idtr.limit = 0xffff;
968
969         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
970         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
971
972         svm_set_efer(&svm->vcpu, 0);
973         save->dr6 = 0xffff0ff0;
974         save->dr7 = 0x400;
975         kvm_set_rflags(&svm->vcpu, 2);
976         save->rip = 0x0000fff0;
977         svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
978
979         /*
980          * This is the guest-visible cr0 value.
981          * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
982          */
983         svm->vcpu.arch.cr0 = 0;
984         (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
985
986         save->cr4 = X86_CR4_PAE;
987         /* rdx = ?? */
988
989         if (npt_enabled) {
990                 /* Setup VMCB for Nested Paging */
991                 control->nested_ctl = 1;
992                 clr_intercept(svm, INTERCEPT_TASK_SWITCH);
993                 clr_intercept(svm, INTERCEPT_INVLPG);
994                 clr_exception_intercept(svm, PF_VECTOR);
995                 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
996                 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
997                 save->g_pat = 0x0007040600070406ULL;
998                 save->cr3 = 0;
999                 save->cr4 = 0;
1000         }
1001         svm->asid_generation = 0;
1002
1003         svm->nested.vmcb = 0;
1004         svm->vcpu.arch.hflags = 0;
1005
1006         if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1007                 control->pause_filter_count = 3000;
1008                 set_intercept(svm, INTERCEPT_PAUSE);
1009         }
1010
1011         mark_all_dirty(svm->vmcb);
1012
1013         enable_gif(svm);
1014 }
1015
1016 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
1017 {
1018         struct vcpu_svm *svm = to_svm(vcpu);
1019
1020         init_vmcb(svm);
1021
1022         if (!kvm_vcpu_is_bsp(vcpu)) {
1023                 kvm_rip_write(vcpu, 0);
1024                 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
1025                 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
1026         }
1027         vcpu->arch.regs_avail = ~0;
1028         vcpu->arch.regs_dirty = ~0;
1029
1030         return 0;
1031 }
1032
1033 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1034 {
1035         struct vcpu_svm *svm;
1036         struct page *page;
1037         struct page *msrpm_pages;
1038         struct page *hsave_page;
1039         struct page *nested_msrpm_pages;
1040         int err;
1041
1042         svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1043         if (!svm) {
1044                 err = -ENOMEM;
1045                 goto out;
1046         }
1047
1048         err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1049         if (err)
1050                 goto free_svm;
1051
1052         err = -ENOMEM;
1053         page = alloc_page(GFP_KERNEL);
1054         if (!page)
1055                 goto uninit;
1056
1057         msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1058         if (!msrpm_pages)
1059                 goto free_page1;
1060
1061         nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1062         if (!nested_msrpm_pages)
1063                 goto free_page2;
1064
1065         hsave_page = alloc_page(GFP_KERNEL);
1066         if (!hsave_page)
1067                 goto free_page3;
1068
1069         svm->nested.hsave = page_address(hsave_page);
1070
1071         svm->msrpm = page_address(msrpm_pages);
1072         svm_vcpu_init_msrpm(svm->msrpm);
1073
1074         svm->nested.msrpm = page_address(nested_msrpm_pages);
1075         svm_vcpu_init_msrpm(svm->nested.msrpm);
1076
1077         svm->vmcb = page_address(page);
1078         clear_page(svm->vmcb);
1079         svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1080         svm->asid_generation = 0;
1081         init_vmcb(svm);
1082         kvm_write_tsc(&svm->vcpu, 0);
1083
1084         err = fx_init(&svm->vcpu);
1085         if (err)
1086                 goto free_page4;
1087
1088         svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1089         if (kvm_vcpu_is_bsp(&svm->vcpu))
1090                 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1091
1092         return &svm->vcpu;
1093
1094 free_page4:
1095         __free_page(hsave_page);
1096 free_page3:
1097         __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1098 free_page2:
1099         __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1100 free_page1:
1101         __free_page(page);
1102 uninit:
1103         kvm_vcpu_uninit(&svm->vcpu);
1104 free_svm:
1105         kmem_cache_free(kvm_vcpu_cache, svm);
1106 out:
1107         return ERR_PTR(err);
1108 }
1109
1110 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1111 {
1112         struct vcpu_svm *svm = to_svm(vcpu);
1113
1114         __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
1115         __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
1116         __free_page(virt_to_page(svm->nested.hsave));
1117         __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
1118         kvm_vcpu_uninit(vcpu);
1119         kmem_cache_free(kvm_vcpu_cache, svm);
1120 }
1121
1122 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1123 {
1124         struct vcpu_svm *svm = to_svm(vcpu);
1125         int i;
1126
1127         if (unlikely(cpu != vcpu->cpu)) {
1128                 svm->asid_generation = 0;
1129                 mark_all_dirty(svm->vmcb);
1130         }
1131
1132 #ifdef CONFIG_X86_64
1133         rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1134 #endif
1135         savesegment(fs, svm->host.fs);
1136         savesegment(gs, svm->host.gs);
1137         svm->host.ldt = kvm_read_ldt();
1138
1139         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1140                 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1141 }
1142
1143 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1144 {
1145         struct vcpu_svm *svm = to_svm(vcpu);
1146         int i;
1147
1148         ++vcpu->stat.host_state_reload;
1149         kvm_load_ldt(svm->host.ldt);
1150 #ifdef CONFIG_X86_64
1151         loadsegment(fs, svm->host.fs);
1152         wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
1153         load_gs_index(svm->host.gs);
1154 #else
1155 #ifdef CONFIG_X86_32_LAZY_GS
1156         loadsegment(gs, svm->host.gs);
1157 #endif
1158 #endif
1159         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1160                 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1161 }
1162
1163 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1164 {
1165         return to_svm(vcpu)->vmcb->save.rflags;
1166 }
1167
1168 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1169 {
1170         to_svm(vcpu)->vmcb->save.rflags = rflags;
1171 }
1172
1173 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1174 {
1175         switch (reg) {
1176         case VCPU_EXREG_PDPTR:
1177                 BUG_ON(!npt_enabled);
1178                 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
1179                 break;
1180         default:
1181                 BUG();
1182         }
1183 }
1184
1185 static void svm_set_vintr(struct vcpu_svm *svm)
1186 {
1187         set_intercept(svm, INTERCEPT_VINTR);
1188 }
1189
1190 static void svm_clear_vintr(struct vcpu_svm *svm)
1191 {
1192         clr_intercept(svm, INTERCEPT_VINTR);
1193 }
1194
1195 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1196 {
1197         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1198
1199         switch (seg) {
1200         case VCPU_SREG_CS: return &save->cs;
1201         case VCPU_SREG_DS: return &save->ds;
1202         case VCPU_SREG_ES: return &save->es;
1203         case VCPU_SREG_FS: return &save->fs;
1204         case VCPU_SREG_GS: return &save->gs;
1205         case VCPU_SREG_SS: return &save->ss;
1206         case VCPU_SREG_TR: return &save->tr;
1207         case VCPU_SREG_LDTR: return &save->ldtr;
1208         }
1209         BUG();
1210         return NULL;
1211 }
1212
1213 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1214 {
1215         struct vmcb_seg *s = svm_seg(vcpu, seg);
1216
1217         return s->base;
1218 }
1219
1220 static void svm_get_segment(struct kvm_vcpu *vcpu,
1221                             struct kvm_segment *var, int seg)
1222 {
1223         struct vmcb_seg *s = svm_seg(vcpu, seg);
1224
1225         var->base = s->base;
1226         var->limit = s->limit;
1227         var->selector = s->selector;
1228         var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1229         var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1230         var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1231         var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1232         var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1233         var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1234         var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1235         var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
1236
1237         /*
1238          * AMD's VMCB does not have an explicit unusable field, so emulate it
1239          * for cross vendor migration purposes by "not present"
1240          */
1241         var->unusable = !var->present || (var->type == 0);
1242
1243         switch (seg) {
1244         case VCPU_SREG_CS:
1245                 /*
1246                  * SVM always stores 0 for the 'G' bit in the CS selector in
1247                  * the VMCB on a VMEXIT. This hurts cross-vendor migration:
1248                  * Intel's VMENTRY has a check on the 'G' bit.
1249                  */
1250                 var->g = s->limit > 0xfffff;
1251                 break;
1252         case VCPU_SREG_TR:
1253                 /*
1254                  * Work around a bug where the busy flag in the tr selector
1255                  * isn't exposed
1256                  */
1257                 var->type |= 0x2;
1258                 break;
1259         case VCPU_SREG_DS:
1260         case VCPU_SREG_ES:
1261         case VCPU_SREG_FS:
1262         case VCPU_SREG_GS:
1263                 /*
1264                  * The accessed bit must always be set in the segment
1265                  * descriptor cache, although it can be cleared in the
1266                  * descriptor, the cached bit always remains at 1. Since
1267                  * Intel has a check on this, set it here to support
1268                  * cross-vendor migration.
1269                  */
1270                 if (!var->unusable)
1271                         var->type |= 0x1;
1272                 break;
1273         case VCPU_SREG_SS:
1274                 /*
1275                  * On AMD CPUs sometimes the DB bit in the segment
1276                  * descriptor is left as 1, although the whole segment has
1277                  * been made unusable. Clear it here to pass an Intel VMX
1278                  * entry check when cross vendor migrating.
1279                  */
1280                 if (var->unusable)
1281                         var->db = 0;
1282                 break;
1283         }
1284 }
1285
1286 static int svm_get_cpl(struct kvm_vcpu *vcpu)
1287 {
1288         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1289
1290         return save->cpl;
1291 }
1292
1293 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1294 {
1295         struct vcpu_svm *svm = to_svm(vcpu);
1296
1297         dt->size = svm->vmcb->save.idtr.limit;
1298         dt->address = svm->vmcb->save.idtr.base;
1299 }
1300
1301 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1302 {
1303         struct vcpu_svm *svm = to_svm(vcpu);
1304
1305         svm->vmcb->save.idtr.limit = dt->size;
1306         svm->vmcb->save.idtr.base = dt->address ;
1307         mark_dirty(svm->vmcb, VMCB_DT);
1308 }
1309
1310 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1311 {
1312         struct vcpu_svm *svm = to_svm(vcpu);
1313
1314         dt->size = svm->vmcb->save.gdtr.limit;
1315         dt->address = svm->vmcb->save.gdtr.base;
1316 }
1317
1318 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1319 {
1320         struct vcpu_svm *svm = to_svm(vcpu);
1321
1322         svm->vmcb->save.gdtr.limit = dt->size;
1323         svm->vmcb->save.gdtr.base = dt->address ;
1324         mark_dirty(svm->vmcb, VMCB_DT);
1325 }
1326
1327 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1328 {
1329 }
1330
1331 static void svm_decache_cr3(struct kvm_vcpu *vcpu)
1332 {
1333 }
1334
1335 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1336 {
1337 }
1338
1339 static void update_cr0_intercept(struct vcpu_svm *svm)
1340 {
1341         ulong gcr0 = svm->vcpu.arch.cr0;
1342         u64 *hcr0 = &svm->vmcb->save.cr0;
1343
1344         if (!svm->vcpu.fpu_active)
1345                 *hcr0 |= SVM_CR0_SELECTIVE_MASK;
1346         else
1347                 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1348                         | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1349
1350         mark_dirty(svm->vmcb, VMCB_CR);
1351
1352         if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1353                 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1354                 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1355         } else {
1356                 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1357                 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1358         }
1359 }
1360
1361 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1362 {
1363         struct vcpu_svm *svm = to_svm(vcpu);
1364
1365         if (is_guest_mode(vcpu)) {
1366                 /*
1367                  * We are here because we run in nested mode, the host kvm
1368                  * intercepts cr0 writes but the l1 hypervisor does not.
1369                  * But the L1 hypervisor may intercept selective cr0 writes.
1370                  * This needs to be checked here.
1371                  */
1372                 unsigned long old, new;
1373
1374                 /* Remove bits that would trigger a real cr0 write intercept */
1375                 old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK;
1376                 new = cr0 & SVM_CR0_SELECTIVE_MASK;
1377
1378                 if (old == new) {
1379                         /* cr0 write with ts and mp unchanged */
1380                         svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
1381                         if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) {
1382                                 svm->nested.vmexit_rip = kvm_rip_read(vcpu);
1383                                 svm->nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
1384                                 svm->nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
1385                                 return;
1386                         }
1387                 }
1388         }
1389
1390 #ifdef CONFIG_X86_64
1391         if (vcpu->arch.efer & EFER_LME) {
1392                 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1393                         vcpu->arch.efer |= EFER_LMA;
1394                         svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1395                 }
1396
1397                 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1398                         vcpu->arch.efer &= ~EFER_LMA;
1399                         svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1400                 }
1401         }
1402 #endif
1403         vcpu->arch.cr0 = cr0;
1404
1405         if (!npt_enabled)
1406                 cr0 |= X86_CR0_PG | X86_CR0_WP;
1407
1408         if (!vcpu->fpu_active)
1409                 cr0 |= X86_CR0_TS;
1410         /*
1411          * re-enable caching here because the QEMU bios
1412          * does not do it - this results in some delay at
1413          * reboot
1414          */
1415         cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1416         svm->vmcb->save.cr0 = cr0;
1417         mark_dirty(svm->vmcb, VMCB_CR);
1418         update_cr0_intercept(svm);
1419 }
1420
1421 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1422 {
1423         unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1424         unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1425
1426         if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1427                 svm_flush_tlb(vcpu);
1428
1429         vcpu->arch.cr4 = cr4;
1430         if (!npt_enabled)
1431                 cr4 |= X86_CR4_PAE;
1432         cr4 |= host_cr4_mce;
1433         to_svm(vcpu)->vmcb->save.cr4 = cr4;
1434         mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1435 }
1436
1437 static void svm_set_segment(struct kvm_vcpu *vcpu,
1438                             struct kvm_segment *var, int seg)
1439 {
1440         struct vcpu_svm *svm = to_svm(vcpu);
1441         struct vmcb_seg *s = svm_seg(vcpu, seg);
1442
1443         s->base = var->base;
1444         s->limit = var->limit;
1445         s->selector = var->selector;
1446         if (var->unusable)
1447                 s->attrib = 0;
1448         else {
1449                 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1450                 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1451                 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1452                 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1453                 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1454                 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1455                 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1456                 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1457         }
1458         if (seg == VCPU_SREG_CS)
1459                 svm->vmcb->save.cpl
1460                         = (svm->vmcb->save.cs.attrib
1461                            >> SVM_SELECTOR_DPL_SHIFT) & 3;
1462
1463         mark_dirty(svm->vmcb, VMCB_SEG);
1464 }
1465
1466 static void update_db_intercept(struct kvm_vcpu *vcpu)
1467 {
1468         struct vcpu_svm *svm = to_svm(vcpu);
1469
1470         clr_exception_intercept(svm, DB_VECTOR);
1471         clr_exception_intercept(svm, BP_VECTOR);
1472
1473         if (svm->nmi_singlestep)
1474                 set_exception_intercept(svm, DB_VECTOR);
1475
1476         if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1477                 if (vcpu->guest_debug &
1478                     (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
1479                         set_exception_intercept(svm, DB_VECTOR);
1480                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1481                         set_exception_intercept(svm, BP_VECTOR);
1482         } else
1483                 vcpu->guest_debug = 0;
1484 }
1485
1486 static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1487 {
1488         struct vcpu_svm *svm = to_svm(vcpu);
1489
1490         if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1491                 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1492         else
1493                 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1494
1495         mark_dirty(svm->vmcb, VMCB_DR);
1496
1497         update_db_intercept(vcpu);
1498 }
1499
1500 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1501 {
1502         if (sd->next_asid > sd->max_asid) {
1503                 ++sd->asid_generation;
1504                 sd->next_asid = 1;
1505                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1506         }
1507
1508         svm->asid_generation = sd->asid_generation;
1509         svm->vmcb->control.asid = sd->next_asid++;
1510
1511         mark_dirty(svm->vmcb, VMCB_ASID);
1512 }
1513
1514 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1515 {
1516         struct vcpu_svm *svm = to_svm(vcpu);
1517
1518         svm->vmcb->save.dr7 = value;
1519         mark_dirty(svm->vmcb, VMCB_DR);
1520 }
1521
1522 static int pf_interception(struct vcpu_svm *svm)
1523 {
1524         u64 fault_address = svm->vmcb->control.exit_info_2;
1525         u32 error_code;
1526         int r = 1;
1527
1528         switch (svm->apf_reason) {
1529         default:
1530                 error_code = svm->vmcb->control.exit_info_1;
1531
1532                 trace_kvm_page_fault(fault_address, error_code);
1533                 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1534                         kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1535                 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
1536                         svm->vmcb->control.insn_bytes,
1537                         svm->vmcb->control.insn_len);
1538                 break;
1539         case KVM_PV_REASON_PAGE_NOT_PRESENT:
1540                 svm->apf_reason = 0;
1541                 local_irq_disable();
1542                 kvm_async_pf_task_wait(fault_address);
1543                 local_irq_enable();
1544                 break;
1545         case KVM_PV_REASON_PAGE_READY:
1546                 svm->apf_reason = 0;
1547                 local_irq_disable();
1548                 kvm_async_pf_task_wake(fault_address);
1549                 local_irq_enable();
1550                 break;
1551         }
1552         return r;
1553 }
1554
1555 static int db_interception(struct vcpu_svm *svm)
1556 {
1557         struct kvm_run *kvm_run = svm->vcpu.run;
1558
1559         if (!(svm->vcpu.guest_debug &
1560               (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1561                 !svm->nmi_singlestep) {
1562                 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1563                 return 1;
1564         }
1565
1566         if (svm->nmi_singlestep) {
1567                 svm->nmi_singlestep = false;
1568                 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1569                         svm->vmcb->save.rflags &=
1570                                 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1571                 update_db_intercept(&svm->vcpu);
1572         }
1573
1574         if (svm->vcpu.guest_debug &
1575             (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1576                 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1577                 kvm_run->debug.arch.pc =
1578                         svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1579                 kvm_run->debug.arch.exception = DB_VECTOR;
1580                 return 0;
1581         }
1582
1583         return 1;
1584 }
1585
1586 static int bp_interception(struct vcpu_svm *svm)
1587 {
1588         struct kvm_run *kvm_run = svm->vcpu.run;
1589
1590         kvm_run->exit_reason = KVM_EXIT_DEBUG;
1591         kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1592         kvm_run->debug.arch.exception = BP_VECTOR;
1593         return 0;
1594 }
1595
1596 static int ud_interception(struct vcpu_svm *svm)
1597 {
1598         int er;
1599
1600         er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
1601         if (er != EMULATE_DONE)
1602                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1603         return 1;
1604 }
1605
1606 static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1607 {
1608         struct vcpu_svm *svm = to_svm(vcpu);
1609
1610         clr_exception_intercept(svm, NM_VECTOR);
1611
1612         svm->vcpu.fpu_active = 1;
1613         update_cr0_intercept(svm);
1614 }
1615
1616 static int nm_interception(struct vcpu_svm *svm)
1617 {
1618         svm_fpu_activate(&svm->vcpu);
1619         return 1;
1620 }
1621
1622 static bool is_erratum_383(void)
1623 {
1624         int err, i;
1625         u64 value;
1626
1627         if (!erratum_383_found)
1628                 return false;
1629
1630         value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1631         if (err)
1632                 return false;
1633
1634         /* Bit 62 may or may not be set for this mce */
1635         value &= ~(1ULL << 62);
1636
1637         if (value != 0xb600000000010015ULL)
1638                 return false;
1639
1640         /* Clear MCi_STATUS registers */
1641         for (i = 0; i < 6; ++i)
1642                 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1643
1644         value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1645         if (!err) {
1646                 u32 low, high;
1647
1648                 value &= ~(1ULL << 2);
1649                 low    = lower_32_bits(value);
1650                 high   = upper_32_bits(value);
1651
1652                 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1653         }
1654
1655         /* Flush tlb to evict multi-match entries */
1656         __flush_tlb_all();
1657
1658         return true;
1659 }
1660
1661 static void svm_handle_mce(struct vcpu_svm *svm)
1662 {
1663         if (is_erratum_383()) {
1664                 /*
1665                  * Erratum 383 triggered. Guest state is corrupt so kill the
1666                  * guest.
1667                  */
1668                 pr_err("KVM: Guest triggered AMD Erratum 383\n");
1669
1670                 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
1671
1672                 return;
1673         }
1674
1675         /*
1676          * On an #MC intercept the MCE handler is not called automatically in
1677          * the host. So do it by hand here.
1678          */
1679         asm volatile (
1680                 "int $0x12\n");
1681         /* not sure if we ever come back to this point */
1682
1683         return;
1684 }
1685
1686 static int mc_interception(struct vcpu_svm *svm)
1687 {
1688         return 1;
1689 }
1690
1691 static int shutdown_interception(struct vcpu_svm *svm)
1692 {
1693         struct kvm_run *kvm_run = svm->vcpu.run;
1694
1695         /*
1696          * VMCB is undefined after a SHUTDOWN intercept
1697          * so reinitialize it.
1698          */
1699         clear_page(svm->vmcb);
1700         init_vmcb(svm);
1701
1702         kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1703         return 0;
1704 }
1705
1706 static int io_interception(struct vcpu_svm *svm)
1707 {
1708         struct kvm_vcpu *vcpu = &svm->vcpu;
1709         u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1710         int size, in, string;
1711         unsigned port;
1712
1713         ++svm->vcpu.stat.io_exits;
1714         string = (io_info & SVM_IOIO_STR_MASK) != 0;
1715         in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1716         if (string || in)
1717                 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
1718
1719         port = io_info >> 16;
1720         size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1721         svm->next_rip = svm->vmcb->control.exit_info_2;
1722         skip_emulated_instruction(&svm->vcpu);
1723
1724         return kvm_fast_pio_out(vcpu, size, port);
1725 }
1726
1727 static int nmi_interception(struct vcpu_svm *svm)
1728 {
1729         return 1;
1730 }
1731
1732 static int intr_interception(struct vcpu_svm *svm)
1733 {
1734         ++svm->vcpu.stat.irq_exits;
1735         return 1;
1736 }
1737
1738 static int nop_on_interception(struct vcpu_svm *svm)
1739 {
1740         return 1;
1741 }
1742
1743 static int halt_interception(struct vcpu_svm *svm)
1744 {
1745         svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1746         skip_emulated_instruction(&svm->vcpu);
1747         return kvm_emulate_halt(&svm->vcpu);
1748 }
1749
1750 static int vmmcall_interception(struct vcpu_svm *svm)
1751 {
1752         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1753         skip_emulated_instruction(&svm->vcpu);
1754         kvm_emulate_hypercall(&svm->vcpu);
1755         return 1;
1756 }
1757
1758 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
1759 {
1760         struct vcpu_svm *svm = to_svm(vcpu);
1761
1762         return svm->nested.nested_cr3;
1763 }
1764
1765 static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
1766                                    unsigned long root)
1767 {
1768         struct vcpu_svm *svm = to_svm(vcpu);
1769
1770         svm->vmcb->control.nested_cr3 = root;
1771         mark_dirty(svm->vmcb, VMCB_NPT);
1772         svm_flush_tlb(vcpu);
1773 }
1774
1775 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1776                                        struct x86_exception *fault)
1777 {
1778         struct vcpu_svm *svm = to_svm(vcpu);
1779
1780         svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1781         svm->vmcb->control.exit_code_hi = 0;
1782         svm->vmcb->control.exit_info_1 = fault->error_code;
1783         svm->vmcb->control.exit_info_2 = fault->address;
1784
1785         nested_svm_vmexit(svm);
1786 }
1787
1788 static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1789 {
1790         int r;
1791
1792         r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1793
1794         vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
1795         vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
1796         vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1797         vcpu->arch.mmu.shadow_root_level = get_npt_level();
1798         vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
1799
1800         return r;
1801 }
1802
1803 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
1804 {
1805         vcpu->arch.walk_mmu = &vcpu->arch.mmu;
1806 }
1807
1808 static int nested_svm_check_permissions(struct vcpu_svm *svm)
1809 {
1810         if (!(svm->vcpu.arch.efer & EFER_SVME)
1811             || !is_paging(&svm->vcpu)) {
1812                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1813                 return 1;
1814         }
1815
1816         if (svm->vmcb->save.cpl) {
1817                 kvm_inject_gp(&svm->vcpu, 0);
1818                 return 1;
1819         }
1820
1821        return 0;
1822 }
1823
1824 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1825                                       bool has_error_code, u32 error_code)
1826 {
1827         int vmexit;
1828
1829         if (!is_guest_mode(&svm->vcpu))
1830                 return 0;
1831
1832         svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1833         svm->vmcb->control.exit_code_hi = 0;
1834         svm->vmcb->control.exit_info_1 = error_code;
1835         svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1836
1837         vmexit = nested_svm_intercept(svm);
1838         if (vmexit == NESTED_EXIT_DONE)
1839                 svm->nested.exit_required = true;
1840
1841         return vmexit;
1842 }
1843
1844 /* This function returns true if it is save to enable the irq window */
1845 static inline bool nested_svm_intr(struct vcpu_svm *svm)
1846 {
1847         if (!is_guest_mode(&svm->vcpu))
1848                 return true;
1849
1850         if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1851                 return true;
1852
1853         if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1854                 return false;
1855
1856         /*
1857          * if vmexit was already requested (by intercepted exception
1858          * for instance) do not overwrite it with "external interrupt"
1859          * vmexit.
1860          */
1861         if (svm->nested.exit_required)
1862                 return false;
1863
1864         svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
1865         svm->vmcb->control.exit_info_1 = 0;
1866         svm->vmcb->control.exit_info_2 = 0;
1867
1868         if (svm->nested.intercept & 1ULL) {
1869                 /*
1870                  * The #vmexit can't be emulated here directly because this
1871                  * code path runs with irqs and preemtion disabled. A
1872                  * #vmexit emulation might sleep. Only signal request for
1873                  * the #vmexit here.
1874                  */
1875                 svm->nested.exit_required = true;
1876                 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1877                 return false;
1878         }
1879
1880         return true;
1881 }
1882
1883 /* This function returns true if it is save to enable the nmi window */
1884 static inline bool nested_svm_nmi(struct vcpu_svm *svm)
1885 {
1886         if (!is_guest_mode(&svm->vcpu))
1887                 return true;
1888
1889         if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
1890                 return true;
1891
1892         svm->vmcb->control.exit_code = SVM_EXIT_NMI;
1893         svm->nested.exit_required = true;
1894
1895         return false;
1896 }
1897
1898 static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
1899 {
1900         struct page *page;
1901
1902         might_sleep();
1903
1904         page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
1905         if (is_error_page(page))
1906                 goto error;
1907
1908         *_page = page;
1909
1910         return kmap(page);
1911
1912 error:
1913         kvm_release_page_clean(page);
1914         kvm_inject_gp(&svm->vcpu, 0);
1915
1916         return NULL;
1917 }
1918
1919 static void nested_svm_unmap(struct page *page)
1920 {
1921         kunmap(page);
1922         kvm_release_page_dirty(page);
1923 }
1924
1925 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
1926 {
1927         unsigned port;
1928         u8 val, bit;
1929         u64 gpa;
1930
1931         if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
1932                 return NESTED_EXIT_HOST;
1933
1934         port = svm->vmcb->control.exit_info_1 >> 16;
1935         gpa  = svm->nested.vmcb_iopm + (port / 8);
1936         bit  = port % 8;
1937         val  = 0;
1938
1939         if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
1940                 val &= (1 << bit);
1941
1942         return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1943 }
1944
1945 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1946 {
1947         u32 offset, msr, value;
1948         int write, mask;
1949
1950         if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
1951                 return NESTED_EXIT_HOST;
1952
1953         msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1954         offset = svm_msrpm_offset(msr);
1955         write  = svm->vmcb->control.exit_info_1 & 1;
1956         mask   = 1 << ((2 * (msr & 0xf)) + write);
1957
1958         if (offset == MSR_INVALID)
1959                 return NESTED_EXIT_DONE;
1960
1961         /* Offset is in 32 bit units but need in 8 bit units */
1962         offset *= 4;
1963
1964         if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
1965                 return NESTED_EXIT_DONE;
1966
1967         return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1968 }
1969
1970 static int nested_svm_exit_special(struct vcpu_svm *svm)
1971 {
1972         u32 exit_code = svm->vmcb->control.exit_code;
1973
1974         switch (exit_code) {
1975         case SVM_EXIT_INTR:
1976         case SVM_EXIT_NMI:
1977         case SVM_EXIT_EXCP_BASE + MC_VECTOR:
1978                 return NESTED_EXIT_HOST;
1979         case SVM_EXIT_NPF:
1980                 /* For now we are always handling NPFs when using them */
1981                 if (npt_enabled)
1982                         return NESTED_EXIT_HOST;
1983                 break;
1984         case SVM_EXIT_EXCP_BASE + PF_VECTOR:
1985                 /* When we're shadowing, trap PFs, but not async PF */
1986                 if (!npt_enabled && svm->apf_reason == 0)
1987                         return NESTED_EXIT_HOST;
1988                 break;
1989         case SVM_EXIT_EXCP_BASE + NM_VECTOR:
1990                 nm_interception(svm);
1991                 break;
1992         default:
1993                 break;
1994         }
1995
1996         return NESTED_EXIT_CONTINUE;
1997 }
1998
1999 /*
2000  * If this function returns true, this #vmexit was already handled
2001  */
2002 static int nested_svm_intercept(struct vcpu_svm *svm)
2003 {
2004         u32 exit_code = svm->vmcb->control.exit_code;
2005         int vmexit = NESTED_EXIT_HOST;
2006
2007         switch (exit_code) {
2008         case SVM_EXIT_MSR:
2009                 vmexit = nested_svm_exit_handled_msr(svm);
2010                 break;
2011         case SVM_EXIT_IOIO:
2012                 vmexit = nested_svm_intercept_ioio(svm);
2013                 break;
2014         case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
2015                 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
2016                 if (svm->nested.intercept_cr & bit)
2017                         vmexit = NESTED_EXIT_DONE;
2018                 break;
2019         }
2020         case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
2021                 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
2022                 if (svm->nested.intercept_dr & bit)
2023                         vmexit = NESTED_EXIT_DONE;
2024                 break;
2025         }
2026         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
2027                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
2028                 if (svm->nested.intercept_exceptions & excp_bits)
2029                         vmexit = NESTED_EXIT_DONE;
2030                 /* async page fault always cause vmexit */
2031                 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
2032                          svm->apf_reason != 0)
2033                         vmexit = NESTED_EXIT_DONE;
2034                 break;
2035         }
2036         case SVM_EXIT_ERR: {
2037                 vmexit = NESTED_EXIT_DONE;
2038                 break;
2039         }
2040         default: {
2041                 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
2042                 if (svm->nested.intercept & exit_bits)
2043                         vmexit = NESTED_EXIT_DONE;
2044         }
2045         }
2046
2047         return vmexit;
2048 }
2049
2050 static int nested_svm_exit_handled(struct vcpu_svm *svm)
2051 {
2052         int vmexit;
2053
2054         vmexit = nested_svm_intercept(svm);
2055
2056         if (vmexit == NESTED_EXIT_DONE)
2057                 nested_svm_vmexit(svm);
2058
2059         return vmexit;
2060 }
2061
2062 static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
2063 {
2064         struct vmcb_control_area *dst  = &dst_vmcb->control;
2065         struct vmcb_control_area *from = &from_vmcb->control;
2066
2067         dst->intercept_cr         = from->intercept_cr;
2068         dst->intercept_dr         = from->intercept_dr;
2069         dst->intercept_exceptions = from->intercept_exceptions;
2070         dst->intercept            = from->intercept;
2071         dst->iopm_base_pa         = from->iopm_base_pa;
2072         dst->msrpm_base_pa        = from->msrpm_base_pa;
2073         dst->tsc_offset           = from->tsc_offset;
2074         dst->asid                 = from->asid;
2075         dst->tlb_ctl              = from->tlb_ctl;
2076         dst->int_ctl              = from->int_ctl;
2077         dst->int_vector           = from->int_vector;
2078         dst->int_state            = from->int_state;
2079         dst->exit_code            = from->exit_code;
2080         dst->exit_code_hi         = from->exit_code_hi;
2081         dst->exit_info_1          = from->exit_info_1;
2082         dst->exit_info_2          = from->exit_info_2;
2083         dst->exit_int_info        = from->exit_int_info;
2084         dst->exit_int_info_err    = from->exit_int_info_err;
2085         dst->nested_ctl           = from->nested_ctl;
2086         dst->event_inj            = from->event_inj;
2087         dst->event_inj_err        = from->event_inj_err;
2088         dst->nested_cr3           = from->nested_cr3;
2089         dst->lbr_ctl              = from->lbr_ctl;
2090 }
2091
2092 static int nested_svm_vmexit(struct vcpu_svm *svm)
2093 {
2094         struct vmcb *nested_vmcb;
2095         struct vmcb *hsave = svm->nested.hsave;
2096         struct vmcb *vmcb = svm->vmcb;
2097         struct page *page;
2098
2099         trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
2100                                        vmcb->control.exit_info_1,
2101                                        vmcb->control.exit_info_2,
2102                                        vmcb->control.exit_int_info,
2103                                        vmcb->control.exit_int_info_err);
2104
2105         nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
2106         if (!nested_vmcb)
2107                 return 1;
2108
2109         /* Exit Guest-Mode */
2110         leave_guest_mode(&svm->vcpu);
2111         svm->nested.vmcb = 0;
2112
2113         /* Give the current vmcb to the guest */
2114         disable_gif(svm);
2115
2116         nested_vmcb->save.es     = vmcb->save.es;
2117         nested_vmcb->save.cs     = vmcb->save.cs;
2118         nested_vmcb->save.ss     = vmcb->save.ss;
2119         nested_vmcb->save.ds     = vmcb->save.ds;
2120         nested_vmcb->save.gdtr   = vmcb->save.gdtr;
2121         nested_vmcb->save.idtr   = vmcb->save.idtr;
2122         nested_vmcb->save.efer   = svm->vcpu.arch.efer;
2123         nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
2124         nested_vmcb->save.cr3    = kvm_read_cr3(&svm->vcpu);
2125         nested_vmcb->save.cr2    = vmcb->save.cr2;
2126         nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
2127         nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
2128         nested_vmcb->save.rip    = vmcb->save.rip;
2129         nested_vmcb->save.rsp    = vmcb->save.rsp;
2130         nested_vmcb->save.rax    = vmcb->save.rax;
2131         nested_vmcb->save.dr7    = vmcb->save.dr7;
2132         nested_vmcb->save.dr6    = vmcb->save.dr6;
2133         nested_vmcb->save.cpl    = vmcb->save.cpl;
2134
2135         nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
2136         nested_vmcb->control.int_vector        = vmcb->control.int_vector;
2137         nested_vmcb->control.int_state         = vmcb->control.int_state;
2138         nested_vmcb->control.exit_code         = vmcb->control.exit_code;
2139         nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
2140         nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
2141         nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
2142         nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
2143         nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
2144         nested_vmcb->control.next_rip          = vmcb->control.next_rip;
2145
2146         /*
2147          * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
2148          * to make sure that we do not lose injected events. So check event_inj
2149          * here and copy it to exit_int_info if it is valid.
2150          * Exit_int_info and event_inj can't be both valid because the case
2151          * below only happens on a VMRUN instruction intercept which has
2152          * no valid exit_int_info set.
2153          */
2154         if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
2155                 struct vmcb_control_area *nc = &nested_vmcb->control;
2156
2157                 nc->exit_int_info     = vmcb->control.event_inj;
2158                 nc->exit_int_info_err = vmcb->control.event_inj_err;
2159         }
2160
2161         nested_vmcb->control.tlb_ctl           = 0;
2162         nested_vmcb->control.event_inj         = 0;
2163         nested_vmcb->control.event_inj_err     = 0;
2164
2165         /* We always set V_INTR_MASKING and remember the old value in hflags */
2166         if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2167                 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
2168
2169         /* Restore the original control entries */
2170         copy_vmcb_control_area(vmcb, hsave);
2171
2172         kvm_clear_exception_queue(&svm->vcpu);
2173         kvm_clear_interrupt_queue(&svm->vcpu);
2174
2175         svm->nested.nested_cr3 = 0;
2176
2177         /* Restore selected save entries */
2178         svm->vmcb->save.es = hsave->save.es;
2179         svm->vmcb->save.cs = hsave->save.cs;
2180         svm->vmcb->save.ss = hsave->save.ss;
2181         svm->vmcb->save.ds = hsave->save.ds;
2182         svm->vmcb->save.gdtr = hsave->save.gdtr;
2183         svm->vmcb->save.idtr = hsave->save.idtr;
2184         kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
2185         svm_set_efer(&svm->vcpu, hsave->save.efer);
2186         svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2187         svm_set_cr4(&svm->vcpu, hsave->save.cr4);
2188         if (npt_enabled) {
2189                 svm->vmcb->save.cr3 = hsave->save.cr3;
2190                 svm->vcpu.arch.cr3 = hsave->save.cr3;
2191         } else {
2192                 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
2193         }
2194         kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
2195         kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
2196         kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
2197         svm->vmcb->save.dr7 = 0;
2198         svm->vmcb->save.cpl = 0;
2199         svm->vmcb->control.exit_int_info = 0;
2200
2201         mark_all_dirty(svm->vmcb);
2202
2203         nested_svm_unmap(page);
2204
2205         nested_svm_uninit_mmu_context(&svm->vcpu);
2206         kvm_mmu_reset_context(&svm->vcpu);
2207         kvm_mmu_load(&svm->vcpu);
2208
2209         return 0;
2210 }
2211
2212 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
2213 {
2214         /*
2215          * This function merges the msr permission bitmaps of kvm and the
2216          * nested vmcb. It is omptimized in that it only merges the parts where
2217          * the kvm msr permission bitmap may contain zero bits
2218          */
2219         int i;
2220
2221         if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2222                 return true;
2223
2224         for (i = 0; i < MSRPM_OFFSETS; i++) {
2225                 u32 value, p;
2226                 u64 offset;
2227
2228                 if (msrpm_offsets[i] == 0xffffffff)
2229                         break;
2230
2231                 p      = msrpm_offsets[i];
2232                 offset = svm->nested.vmcb_msrpm + (p * 4);
2233
2234                 if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
2235                         return false;
2236
2237                 svm->nested.msrpm[p] = svm->msrpm[p] | value;
2238         }
2239
2240         svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
2241
2242         return true;
2243 }
2244
2245 static bool nested_vmcb_checks(struct vmcb *vmcb)
2246 {
2247         if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
2248                 return false;
2249
2250         if (vmcb->control.asid == 0)
2251                 return false;
2252
2253         if (vmcb->control.nested_ctl && !npt_enabled)
2254                 return false;
2255
2256         return true;
2257 }
2258
2259 static bool nested_svm_vmrun(struct vcpu_svm *svm)
2260 {
2261         struct vmcb *nested_vmcb;
2262         struct vmcb *hsave = svm->nested.hsave;
2263         struct vmcb *vmcb = svm->vmcb;
2264         struct page *page;
2265         u64 vmcb_gpa;
2266
2267         vmcb_gpa = svm->vmcb->save.rax;
2268
2269         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2270         if (!nested_vmcb)
2271                 return false;
2272
2273         if (!nested_vmcb_checks(nested_vmcb)) {
2274                 nested_vmcb->control.exit_code    = SVM_EXIT_ERR;
2275                 nested_vmcb->control.exit_code_hi = 0;
2276                 nested_vmcb->control.exit_info_1  = 0;
2277                 nested_vmcb->control.exit_info_2  = 0;
2278
2279                 nested_svm_unmap(page);
2280
2281                 return false;
2282         }
2283
2284         trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
2285                                nested_vmcb->save.rip,
2286                                nested_vmcb->control.int_ctl,
2287                                nested_vmcb->control.event_inj,
2288                                nested_vmcb->control.nested_ctl);
2289
2290         trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
2291                                     nested_vmcb->control.intercept_cr >> 16,
2292                                     nested_vmcb->control.intercept_exceptions,
2293                                     nested_vmcb->control.intercept);
2294
2295         /* Clear internal status */
2296         kvm_clear_exception_queue(&svm->vcpu);
2297         kvm_clear_interrupt_queue(&svm->vcpu);
2298
2299         /*
2300          * Save the old vmcb, so we don't need to pick what we save, but can
2301          * restore everything when a VMEXIT occurs
2302          */
2303         hsave->save.es     = vmcb->save.es;
2304         hsave->save.cs     = vmcb->save.cs;
2305         hsave->save.ss     = vmcb->save.ss;
2306         hsave->save.ds     = vmcb->save.ds;
2307         hsave->save.gdtr   = vmcb->save.gdtr;
2308         hsave->save.idtr   = vmcb->save.idtr;
2309         hsave->save.efer   = svm->vcpu.arch.efer;
2310         hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
2311         hsave->save.cr4    = svm->vcpu.arch.cr4;
2312         hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
2313         hsave->save.rip    = kvm_rip_read(&svm->vcpu);
2314         hsave->save.rsp    = vmcb->save.rsp;
2315         hsave->save.rax    = vmcb->save.rax;
2316         if (npt_enabled)
2317                 hsave->save.cr3    = vmcb->save.cr3;
2318         else
2319                 hsave->save.cr3    = kvm_read_cr3(&svm->vcpu);
2320
2321         copy_vmcb_control_area(hsave, vmcb);
2322
2323         if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
2324                 svm->vcpu.arch.hflags |= HF_HIF_MASK;
2325         else
2326                 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
2327
2328         if (nested_vmcb->control.nested_ctl) {
2329                 kvm_mmu_unload(&svm->vcpu);
2330                 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
2331                 nested_svm_init_mmu_context(&svm->vcpu);
2332         }
2333
2334         /* Load the nested guest state */
2335         svm->vmcb->save.es = nested_vmcb->save.es;
2336         svm->vmcb->save.cs = nested_vmcb->save.cs;
2337         svm->vmcb->save.ss = nested_vmcb->save.ss;
2338         svm->vmcb->save.ds = nested_vmcb->save.ds;
2339         svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2340         svm->vmcb->save.idtr = nested_vmcb->save.idtr;
2341         kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
2342         svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2343         svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2344         svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
2345         if (npt_enabled) {
2346                 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
2347                 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
2348         } else
2349                 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
2350
2351         /* Guest paging mode is active - reset mmu */
2352         kvm_mmu_reset_context(&svm->vcpu);
2353
2354         svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
2355         kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
2356         kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
2357         kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
2358
2359         /* In case we don't even reach vcpu_run, the fields are not updated */
2360         svm->vmcb->save.rax = nested_vmcb->save.rax;
2361         svm->vmcb->save.rsp = nested_vmcb->save.rsp;
2362         svm->vmcb->save.rip = nested_vmcb->save.rip;
2363         svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
2364         svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
2365         svm->vmcb->save.cpl = nested_vmcb->save.cpl;
2366
2367         svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
2368         svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;
2369
2370         /* cache intercepts */
2371         svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
2372         svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
2373         svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
2374         svm->nested.intercept            = nested_vmcb->control.intercept;
2375
2376         svm_flush_tlb(&svm->vcpu);
2377         svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
2378         if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
2379                 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
2380         else
2381                 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
2382
2383         if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2384                 /* We only want the cr8 intercept bits of the guest */
2385                 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
2386                 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2387         }
2388
2389         /* We don't want to see VMMCALLs from a nested guest */
2390         clr_intercept(svm, INTERCEPT_VMMCALL);
2391
2392         svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
2393         svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2394         svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2395         svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
2396         svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2397         svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2398
2399         nested_svm_unmap(page);
2400
2401         /* Enter Guest-Mode */
2402         enter_guest_mode(&svm->vcpu);
2403
2404         /*
2405          * Merge guest and host intercepts - must be called  with vcpu in
2406          * guest-mode to take affect here
2407          */
2408         recalc_intercepts(svm);
2409
2410         svm->nested.vmcb = vmcb_gpa;
2411
2412         enable_gif(svm);
2413
2414         mark_all_dirty(svm->vmcb);
2415
2416         return true;
2417 }
2418
2419 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
2420 {
2421         to_vmcb->save.fs = from_vmcb->save.fs;
2422         to_vmcb->save.gs = from_vmcb->save.gs;
2423         to_vmcb->save.tr = from_vmcb->save.tr;
2424         to_vmcb->save.ldtr = from_vmcb->save.ldtr;
2425         to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
2426         to_vmcb->save.star = from_vmcb->save.star;
2427         to_vmcb->save.lstar = from_vmcb->save.lstar;
2428         to_vmcb->save.cstar = from_vmcb->save.cstar;
2429         to_vmcb->save.sfmask = from_vmcb->save.sfmask;
2430         to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
2431         to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
2432         to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
2433 }
2434
2435 static int vmload_interception(struct vcpu_svm *svm)
2436 {
2437         struct vmcb *nested_vmcb;
2438         struct page *page;
2439
2440         if (nested_svm_check_permissions(svm))
2441                 return 1;
2442
2443         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2444         skip_emulated_instruction(&svm->vcpu);
2445
2446         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2447         if (!nested_vmcb)
2448                 return 1;
2449
2450         nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2451         nested_svm_unmap(page);
2452
2453         return 1;
2454 }
2455
2456 static int vmsave_interception(struct vcpu_svm *svm)
2457 {
2458         struct vmcb *nested_vmcb;
2459         struct page *page;
2460
2461         if (nested_svm_check_permissions(svm))
2462                 return 1;
2463
2464         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2465         skip_emulated_instruction(&svm->vcpu);
2466
2467         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2468         if (!nested_vmcb)
2469                 return 1;
2470
2471         nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2472         nested_svm_unmap(page);
2473
2474         return 1;
2475 }
2476
2477 static int vmrun_interception(struct vcpu_svm *svm)
2478 {
2479         if (nested_svm_check_permissions(svm))
2480                 return 1;
2481
2482         /* Save rip after vmrun instruction */
2483         kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
2484
2485         if (!nested_svm_vmrun(svm))
2486                 return 1;
2487
2488         if (!nested_svm_vmrun_msrpm(svm))
2489                 goto failed;
2490
2491         return 1;
2492
2493 failed:
2494
2495         svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
2496         svm->vmcb->control.exit_code_hi = 0;
2497         svm->vmcb->control.exit_info_1  = 0;
2498         svm->vmcb->control.exit_info_2  = 0;
2499
2500         nested_svm_vmexit(svm);
2501
2502         return 1;
2503 }
2504
2505 static int stgi_interception(struct vcpu_svm *svm)
2506 {
2507         if (nested_svm_check_permissions(svm))
2508                 return 1;
2509
2510         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2511         skip_emulated_instruction(&svm->vcpu);
2512         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2513
2514         enable_gif(svm);
2515
2516         return 1;
2517 }
2518
2519 static int clgi_interception(struct vcpu_svm *svm)
2520 {
2521         if (nested_svm_check_permissions(svm))
2522                 return 1;
2523
2524         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2525         skip_emulated_instruction(&svm->vcpu);
2526
2527         disable_gif(svm);
2528
2529         /* After a CLGI no interrupts should come */
2530         svm_clear_vintr(svm);
2531         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2532
2533         mark_dirty(svm->vmcb, VMCB_INTR);
2534
2535         return 1;
2536 }
2537
2538 static int invlpga_interception(struct vcpu_svm *svm)
2539 {
2540         struct kvm_vcpu *vcpu = &svm->vcpu;
2541
2542         trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
2543                           vcpu->arch.regs[VCPU_REGS_RAX]);
2544
2545         /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
2546         kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
2547
2548         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2549         skip_emulated_instruction(&svm->vcpu);
2550         return 1;
2551 }
2552
2553 static int skinit_interception(struct vcpu_svm *svm)
2554 {
2555         trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
2556
2557         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2558         return 1;
2559 }
2560
2561 static int xsetbv_interception(struct vcpu_svm *svm)
2562 {
2563         u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
2564         u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
2565
2566         if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
2567                 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2568                 skip_emulated_instruction(&svm->vcpu);
2569         }
2570
2571         return 1;
2572 }
2573
2574 static int invalid_op_interception(struct vcpu_svm *svm)
2575 {
2576         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2577         return 1;
2578 }
2579
2580 static int task_switch_interception(struct vcpu_svm *svm)
2581 {
2582         u16 tss_selector;
2583         int reason;
2584         int int_type = svm->vmcb->control.exit_int_info &
2585                 SVM_EXITINTINFO_TYPE_MASK;
2586         int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2587         uint32_t type =
2588                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2589         uint32_t idt_v =
2590                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2591         bool has_error_code = false;
2592         u32 error_code = 0;
2593
2594         tss_selector = (u16)svm->vmcb->control.exit_info_1;
2595
2596         if (svm->vmcb->control.exit_info_2 &
2597             (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2598                 reason = TASK_SWITCH_IRET;
2599         else if (svm->vmcb->control.exit_info_2 &
2600                  (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2601                 reason = TASK_SWITCH_JMP;
2602         else if (idt_v)
2603                 reason = TASK_SWITCH_GATE;
2604         else
2605                 reason = TASK_SWITCH_CALL;
2606
2607         if (reason == TASK_SWITCH_GATE) {
2608                 switch (type) {
2609                 case SVM_EXITINTINFO_TYPE_NMI:
2610                         svm->vcpu.arch.nmi_injected = false;
2611                         break;
2612                 case SVM_EXITINTINFO_TYPE_EXEPT:
2613                         if (svm->vmcb->control.exit_info_2 &
2614                             (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2615                                 has_error_code = true;
2616                                 error_code =
2617                                         (u32)svm->vmcb->control.exit_info_2;
2618                         }
2619                         kvm_clear_exception_queue(&svm->vcpu);
2620                         break;
2621                 case SVM_EXITINTINFO_TYPE_INTR:
2622                         kvm_clear_interrupt_queue(&svm->vcpu);
2623                         break;
2624                 default:
2625                         break;
2626                 }
2627         }
2628
2629         if (reason != TASK_SWITCH_GATE ||
2630             int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2631             (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2632              (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2633                 skip_emulated_instruction(&svm->vcpu);
2634
2635         if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
2636                                 has_error_code, error_code) == EMULATE_FAIL) {
2637                 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2638                 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2639                 svm->vcpu.run->internal.ndata = 0;
2640                 return 0;
2641         }
2642         return 1;
2643 }
2644
2645 static int cpuid_interception(struct vcpu_svm *svm)
2646 {
2647         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2648         kvm_emulate_cpuid(&svm->vcpu);
2649         return 1;
2650 }
2651
2652 static int iret_interception(struct vcpu_svm *svm)
2653 {
2654         ++svm->vcpu.stat.nmi_window_exits;
2655         clr_intercept(svm, INTERCEPT_IRET);
2656         svm->vcpu.arch.hflags |= HF_IRET_MASK;
2657         svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2658         return 1;
2659 }
2660
2661 static int invlpg_interception(struct vcpu_svm *svm)
2662 {
2663         if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2664                 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2665
2666         kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
2667         skip_emulated_instruction(&svm->vcpu);
2668         return 1;
2669 }
2670
2671 static int emulate_on_interception(struct vcpu_svm *svm)
2672 {
2673         return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2674 }
2675
2676 #define CR_VALID (1ULL << 63)
2677
2678 static int cr_interception(struct vcpu_svm *svm)
2679 {
2680         int reg, cr;
2681         unsigned long val;
2682         int err;
2683
2684         if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2685                 return emulate_on_interception(svm);
2686
2687         if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2688                 return emulate_on_interception(svm);
2689
2690         reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2691         cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2692
2693         err = 0;
2694         if (cr >= 16) { /* mov to cr */
2695                 cr -= 16;
2696                 val = kvm_register_read(&svm->vcpu, reg);
2697                 switch (cr) {
2698                 case 0:
2699                         err = kvm_set_cr0(&svm->vcpu, val);
2700                         break;
2701                 case 3:
2702                         err = kvm_set_cr3(&svm->vcpu, val);
2703                         break;
2704                 case 4:
2705                         err = kvm_set_cr4(&svm->vcpu, val);
2706                         break;
2707                 case 8:
2708                         err = kvm_set_cr8(&svm->vcpu, val);
2709                         break;
2710                 default:
2711                         WARN(1, "unhandled write to CR%d", cr);
2712                         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2713                         return 1;
2714                 }
2715         } else { /* mov from cr */
2716                 switch (cr) {
2717                 case 0:
2718                         val = kvm_read_cr0(&svm->vcpu);
2719                         break;
2720                 case 2:
2721                         val = svm->vcpu.arch.cr2;
2722                         break;
2723                 case 3:
2724                         val = kvm_read_cr3(&svm->vcpu);
2725                         break;
2726                 case 4:
2727                         val = kvm_read_cr4(&svm->vcpu);
2728                         break;
2729                 case 8:
2730                         val = kvm_get_cr8(&svm->vcpu);
2731                         break;
2732                 default:
2733                         WARN(1, "unhandled read from CR%d", cr);
2734                         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2735                         return 1;
2736                 }
2737                 kvm_register_write(&svm->vcpu, reg, val);
2738         }
2739         kvm_complete_insn_gp(&svm->vcpu, err);
2740
2741         return 1;
2742 }
2743
2744 static int cr0_write_interception(struct vcpu_svm *svm)
2745 {
2746         struct kvm_vcpu *vcpu = &svm->vcpu;
2747         int r;
2748
2749         r = cr_interception(svm);
2750
2751         if (svm->nested.vmexit_rip) {
2752                 kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
2753                 kvm_register_write(vcpu, VCPU_REGS_RSP, svm->nested.vmexit_rsp);
2754                 kvm_register_write(vcpu, VCPU_REGS_RAX, svm->nested.vmexit_rax);
2755                 svm->nested.vmexit_rip = 0;
2756         }
2757
2758         return r;
2759 }
2760
2761 static int dr_interception(struct vcpu_svm *svm)
2762 {
2763         int reg, dr;
2764         unsigned long val;
2765         int err;
2766
2767         if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2768                 return emulate_on_interception(svm);
2769
2770         reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2771         dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2772
2773         if (dr >= 16) { /* mov to DRn */
2774                 val = kvm_register_read(&svm->vcpu, reg);
2775                 kvm_set_dr(&svm->vcpu, dr - 16, val);
2776         } else {
2777                 err = kvm_get_dr(&svm->vcpu, dr, &val);
2778                 if (!err)
2779                         kvm_register_write(&svm->vcpu, reg, val);
2780         }
2781
2782         skip_emulated_instruction(&svm->vcpu);
2783
2784         return 1;
2785 }
2786
2787 static int cr8_write_interception(struct vcpu_svm *svm)
2788 {
2789         struct kvm_run *kvm_run = svm->vcpu.run;
2790         int r;
2791
2792         u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2793         /* instruction emulation calls kvm_set_cr8() */
2794         r = cr_interception(svm);
2795         if (irqchip_in_kernel(svm->vcpu.kvm)) {
2796                 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2797                 return r;
2798         }
2799         if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
2800                 return r;
2801         kvm_run->exit_reason = KVM_EXIT_SET_TPR;
2802         return 0;
2803 }
2804
2805 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2806 {
2807         struct vcpu_svm *svm = to_svm(vcpu);
2808
2809         switch (ecx) {
2810         case MSR_IA32_TSC: {
2811                 struct vmcb *vmcb = get_host_vmcb(svm);
2812
2813                 *data = vmcb->control.tsc_offset + native_read_tsc();
2814                 break;
2815         }
2816         case MSR_STAR:
2817                 *data = svm->vmcb->save.star;
2818                 break;
2819 #ifdef CONFIG_X86_64
2820         case MSR_LSTAR:
2821                 *data = svm->vmcb->save.lstar;
2822                 break;
2823         case MSR_CSTAR:
2824                 *data = svm->vmcb->save.cstar;
2825                 break;
2826         case MSR_KERNEL_GS_BASE:
2827                 *data = svm->vmcb->save.kernel_gs_base;
2828                 break;
2829         case MSR_SYSCALL_MASK:
2830                 *data = svm->vmcb->save.sfmask;
2831                 break;
2832 #endif
2833         case MSR_IA32_SYSENTER_CS:
2834                 *data = svm->vmcb->save.sysenter_cs;
2835                 break;
2836         case MSR_IA32_SYSENTER_EIP:
2837                 *data = svm->sysenter_eip;
2838                 break;
2839         case MSR_IA32_SYSENTER_ESP:
2840                 *data = svm->sysenter_esp;
2841                 break;
2842         /*
2843          * Nobody will change the following 5 values in the VMCB so we can
2844          * safely return them on rdmsr. They will always be 0 until LBRV is
2845          * implemented.
2846          */
2847         case MSR_IA32_DEBUGCTLMSR:
2848                 *data = svm->vmcb->save.dbgctl;
2849                 break;
2850         case MSR_IA32_LASTBRANCHFROMIP:
2851                 *data = svm->vmcb->save.br_from;
2852                 break;
2853         case MSR_IA32_LASTBRANCHTOIP:
2854                 *data = svm->vmcb->save.br_to;
2855                 break;
2856         case MSR_IA32_LASTINTFROMIP:
2857                 *data = svm->vmcb->save.last_excp_from;
2858                 break;
2859         case MSR_IA32_LASTINTTOIP:
2860                 *data = svm->vmcb->save.last_excp_to;
2861                 break;
2862         case MSR_VM_HSAVE_PA:
2863                 *data = svm->nested.hsave_msr;
2864                 break;
2865         case MSR_VM_CR:
2866                 *data = svm->nested.vm_cr_msr;
2867                 break;
2868         case MSR_IA32_UCODE_REV:
2869                 *data = 0x01000065;
2870                 break;
2871         default:
2872                 return kvm_get_msr_common(vcpu, ecx, data);
2873         }
2874         return 0;
2875 }
2876
2877 static int rdmsr_interception(struct vcpu_svm *svm)
2878 {
2879         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2880         u64 data;
2881
2882         if (svm_get_msr(&svm->vcpu, ecx, &data)) {
2883                 trace_kvm_msr_read_ex(ecx);
2884                 kvm_inject_gp(&svm->vcpu, 0);
2885         } else {
2886                 trace_kvm_msr_read(ecx, data);
2887
2888                 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
2889                 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
2890                 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2891                 skip_emulated_instruction(&svm->vcpu);
2892         }
2893         return 1;
2894 }
2895
2896 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
2897 {
2898         struct vcpu_svm *svm = to_svm(vcpu);
2899         int svm_dis, chg_mask;
2900
2901         if (data & ~SVM_VM_CR_VALID_MASK)
2902                 return 1;
2903
2904         chg_mask = SVM_VM_CR_VALID_MASK;
2905
2906         if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
2907                 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
2908
2909         svm->nested.vm_cr_msr &= ~chg_mask;
2910         svm->nested.vm_cr_msr |= (data & chg_mask);
2911
2912         svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
2913
2914         /* check for svm_disable while efer.svme is set */
2915         if (svm_dis && (vcpu->arch.efer & EFER_SVME))
2916                 return 1;
2917
2918         return 0;
2919 }
2920
2921 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
2922 {
2923         struct vcpu_svm *svm = to_svm(vcpu);
2924
2925         switch (ecx) {
2926         case MSR_IA32_TSC:
2927                 kvm_write_tsc(vcpu, data);
2928                 break;
2929         case MSR_STAR:
2930                 svm->vmcb->save.star = data;
2931                 break;
2932 #ifdef CONFIG_X86_64
2933         case MSR_LSTAR:
2934                 svm->vmcb->save.lstar = data;
2935                 break;
2936         case MSR_CSTAR:
2937                 svm->vmcb->save.cstar = data;
2938                 break;
2939         case MSR_KERNEL_GS_BASE:
2940                 svm->vmcb->save.kernel_gs_base = data;
2941                 break;
2942         case MSR_SYSCALL_MASK:
2943                 svm->vmcb->save.sfmask = data;
2944                 break;
2945 #endif
2946         case MSR_IA32_SYSENTER_CS:
2947                 svm->vmcb->save.sysenter_cs = data;
2948                 break;
2949         case MSR_IA32_SYSENTER_EIP:
2950                 svm->sysenter_eip = data;
2951                 svm->vmcb->save.sysenter_eip = data;
2952                 break;
2953         case MSR_IA32_SYSENTER_ESP:
2954                 svm->sysenter_esp = data;
2955                 svm->vmcb->save.sysenter_esp = data;
2956                 break;
2957         case MSR_IA32_DEBUGCTLMSR:
2958                 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
2959                         pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
2960                                         __func__, data);
2961                         break;
2962                 }
2963                 if (data & DEBUGCTL_RESERVED_BITS)
2964                         return 1;
2965
2966                 svm->vmcb->save.dbgctl = data;
2967                 mark_dirty(svm->vmcb, VMCB_LBR);
2968                 if (data & (1ULL<<0))
2969                         svm_enable_lbrv(svm);
2970                 else
2971                         svm_disable_lbrv(svm);
2972                 break;
2973         case MSR_VM_HSAVE_PA:
2974                 svm->nested.hsave_msr = data;
2975                 break;
2976         case MSR_VM_CR:
2977                 return svm_set_vm_cr(vcpu, data);
2978         case MSR_VM_IGNNE:
2979                 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
2980                 break;
2981         default:
2982                 return kvm_set_msr_common(vcpu, ecx, data);
2983         }
2984         return 0;
2985 }
2986
2987 static int wrmsr_interception(struct vcpu_svm *svm)
2988 {
2989         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2990         u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
2991                 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2992
2993
2994         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2995         if (svm_set_msr(&svm->vcpu, ecx, data)) {
2996                 trace_kvm_msr_write_ex(ecx, data);
2997                 kvm_inject_gp(&svm->vcpu, 0);
2998         } else {
2999                 trace_kvm_msr_write(ecx, data);
3000                 skip_emulated_instruction(&svm->vcpu);
3001         }
3002         return 1;
3003 }
3004
3005 static int msr_interception(struct vcpu_svm *svm)
3006 {
3007         if (svm->vmcb->control.exit_info_1)
3008                 return wrmsr_interception(svm);
3009         else
3010                 return rdmsr_interception(svm);
3011 }
3012
3013 static int interrupt_window_interception(struct vcpu_svm *svm)
3014 {
3015         struct kvm_run *kvm_run = svm->vcpu.run;
3016
3017         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3018         svm_clear_vintr(svm);
3019         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3020         mark_dirty(svm->vmcb, VMCB_INTR);
3021         /*
3022          * If the user space waits to inject interrupts, exit as soon as
3023          * possible
3024          */
3025         if (!irqchip_in_kernel(svm->vcpu.kvm) &&
3026             kvm_run->request_interrupt_window &&
3027             !kvm_cpu_has_interrupt(&svm->vcpu)) {
3028                 ++svm->vcpu.stat.irq_window_exits;
3029                 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
3030                 return 0;
3031         }
3032
3033         return 1;
3034 }
3035
3036 static int pause_interception(struct vcpu_svm *svm)
3037 {
3038         kvm_vcpu_on_spin(&(svm->vcpu));
3039         return 1;
3040 }
3041
3042 static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
3043         [SVM_EXIT_READ_CR0]                     = cr_interception,
3044         [SVM_EXIT_READ_CR3]                     = cr_interception,
3045         [SVM_EXIT_READ_CR4]                     = cr_interception,
3046         [SVM_EXIT_READ_CR8]                     = cr_interception,
3047         [SVM_EXIT_CR0_SEL_WRITE]                = emulate_on_interception,
3048         [SVM_EXIT_WRITE_CR0]                    = cr0_write_interception,
3049         [SVM_EXIT_WRITE_CR3]                    = cr_interception,
3050         [SVM_EXIT_WRITE_CR4]                    = cr_interception,
3051         [SVM_EXIT_WRITE_CR8]                    = cr8_write_interception,
3052         [SVM_EXIT_READ_DR0]                     = dr_interception,
3053         [SVM_EXIT_READ_DR1]                     = dr_interception,
3054         [SVM_EXIT_READ_DR2]                     = dr_interception,
3055         [SVM_EXIT_READ_DR3]                     = dr_interception,
3056         [SVM_EXIT_READ_DR4]                     = dr_interception,
3057         [SVM_EXIT_READ_DR5]                     = dr_interception,
3058         [SVM_EXIT_READ_DR6]                     = dr_interception,
3059         [SVM_EXIT_READ_DR7]                     = dr_interception,
3060         [SVM_EXIT_WRITE_DR0]                    = dr_interception,
3061         [SVM_EXIT_WRITE_DR1]                    = dr_interception,
3062         [SVM_EXIT_WRITE_DR2]                    = dr_interception,
3063         [SVM_EXIT_WRITE_DR3]                    = dr_interception,
3064         [SVM_EXIT_WRITE_DR4]                    = dr_interception,
3065         [SVM_EXIT_WRITE_DR5]                    = dr_interception,
3066         [SVM_EXIT_WRITE_DR6]                    = dr_interception,
3067         [SVM_EXIT_WRITE_DR7]                    = dr_interception,
3068         [SVM_EXIT_EXCP_BASE + DB_VECTOR]        = db_interception,
3069         [SVM_EXIT_EXCP_BASE + BP_VECTOR]        = bp_interception,
3070         [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
3071         [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
3072         [SVM_EXIT_EXCP_BASE + NM_VECTOR]        = nm_interception,
3073         [SVM_EXIT_EXCP_BASE + MC_VECTOR]        = mc_interception,
3074         [SVM_EXIT_INTR]                         = intr_interception,
3075         [SVM_EXIT_NMI]                          = nmi_interception,
3076         [SVM_EXIT_SMI]                          = nop_on_interception,
3077         [SVM_EXIT_INIT]                         = nop_on_interception,
3078         [SVM_EXIT_VINTR]                        = interrupt_window_interception,
3079         [SVM_EXIT_CPUID]                        = cpuid_interception,
3080         [SVM_EXIT_IRET]                         = iret_interception,
3081         [SVM_EXIT_INVD]                         = emulate_on_interception,
3082         [SVM_EXIT_PAUSE]                        = pause_interception,
3083         [SVM_EXIT_HLT]                          = halt_interception,
3084         [SVM_EXIT_INVLPG]                       = invlpg_interception,
3085         [SVM_EXIT_INVLPGA]                      = invlpga_interception,
3086         [SVM_EXIT_IOIO]                         = io_interception,
3087         [SVM_EXIT_MSR]                          = msr_interception,
3088         [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
3089         [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
3090         [SVM_EXIT_VMRUN]                        = vmrun_interception,
3091         [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
3092         [SVM_EXIT_VMLOAD]                       = vmload_interception,
3093         [SVM_EXIT_VMSAVE]                       = vmsave_interception,
3094         [SVM_EXIT_STGI]                         = stgi_interception,
3095         [SVM_EXIT_CLGI]                         = clgi_interception,
3096         [SVM_EXIT_SKINIT]                       = skinit_interception,
3097         [SVM_EXIT_WBINVD]                       = emulate_on_interception,
3098         [SVM_EXIT_MONITOR]                      = invalid_op_interception,
3099         [SVM_EXIT_MWAIT]                        = invalid_op_interception,
3100         [SVM_EXIT_XSETBV]                       = xsetbv_interception,
3101         [SVM_EXIT_NPF]                          = pf_interception,
3102 };
3103
3104 void dump_vmcb(struct kvm_vcpu *vcpu)
3105 {
3106         struct vcpu_svm *svm = to_svm(vcpu);
3107         struct vmcb_control_area *control = &svm->vmcb->control;
3108         struct vmcb_save_area *save = &svm->vmcb->save;
3109
3110         pr_err("VMCB Control Area:\n");
3111         pr_err("cr_read:            %04x\n", control->intercept_cr & 0xffff);
3112         pr_err("cr_write:           %04x\n", control->intercept_cr >> 16);
3113         pr_err("dr_read:            %04x\n", control->intercept_dr & 0xffff);
3114         pr_err("dr_write:           %04x\n", control->intercept_dr >> 16);
3115         pr_err("exceptions:         %08x\n", control->intercept_exceptions);
3116         pr_err("intercepts:         %016llx\n", control->intercept);
3117         pr_err("pause filter count: %d\n", control->pause_filter_count);
3118         pr_err("iopm_base_pa:       %016llx\n", control->iopm_base_pa);
3119         pr_err("msrpm_base_pa:      %016llx\n", control->msrpm_base_pa);
3120         pr_err("tsc_offset:         %016llx\n", control->tsc_offset);
3121         pr_err("asid:               %d\n", control->asid);
3122         pr_err("tlb_ctl:            %d\n", control->tlb_ctl);
3123         pr_err("int_ctl:            %08x\n", control->int_ctl);
3124         pr_err("int_vector:         %08x\n", control->int_vector);
3125         pr_err("int_state:          %08x\n", control->int_state);
3126         pr_err("exit_code:          %08x\n", control->exit_code);
3127         pr_err("exit_info1:         %016llx\n", control->exit_info_1);
3128         pr_err("exit_info2:         %016llx\n", control->exit_info_2);
3129         pr_err("exit_int_info:      %08x\n", control->exit_int_info);
3130         pr_err("exit_int_info_err:  %08x\n", control->exit_int_info_err);
3131         pr_err("nested_ctl:         %lld\n", control->nested_ctl);
3132         pr_err("nested_cr3:         %016llx\n", control->nested_cr3);
3133         pr_err("event_inj:          %08x\n", control->event_inj);
3134         pr_err("event_inj_err:      %08x\n", control->event_inj_err);
3135         pr_err("lbr_ctl:            %lld\n", control->lbr_ctl);
3136         pr_err("next_rip:           %016llx\n", control->next_rip);
3137         pr_err("VMCB State Save Area:\n");
3138         pr_err("es:   s: %04x a: %04x l: %08x b: %016llx\n",
3139                 save->es.selector, save->es.attrib,
3140                 save->es.limit, save->es.base);
3141         pr_err("cs:   s: %04x a: %04x l: %08x b: %016llx\n",
3142                 save->cs.selector, save->cs.attrib,
3143                 save->cs.limit, save->cs.base);
3144         pr_err("ss:   s: %04x a: %04x l: %08x b: %016llx\n",
3145                 save->ss.selector, save->ss.attrib,
3146                 save->ss.limit, save->ss.base);
3147         pr_err("ds:   s: %04x a: %04x l: %08x b: %016llx\n",
3148                 save->ds.selector, save->ds.attrib,
3149                 save->ds.limit, save->ds.base);
3150         pr_err("fs:   s: %04x a: %04x l: %08x b: %016llx\n",
3151                 save->fs.selector, save->fs.attrib,
3152                 save->fs.limit, save->fs.base);
3153         pr_err("gs:   s: %04x a: %04x l: %08x b: %016llx\n",
3154                 save->gs.selector, save->gs.attrib,
3155                 save->gs.limit, save->gs.base);
3156         pr_err("gdtr: s: %04x a: %04x l: %08x b: %016llx\n",
3157                 save->gdtr.selector, save->gdtr.attrib,
3158                 save->gdtr.limit, save->gdtr.base);
3159         pr_err("ldtr: s: %04x a: %04x l: %08x b: %016llx\n",
3160                 save->ldtr.selector, save->ldtr.attrib,
3161                 save->ldtr.limit, save->ldtr.base);
3162         pr_err("idtr: s: %04x a: %04x l: %08x b: %016llx\n",
3163                 save->idtr.selector, save->idtr.attrib,
3164                 save->idtr.limit, save->idtr.base);
3165         pr_err("tr:   s: %04x a: %04x l: %08x b: %016llx\n",
3166                 save->tr.selector, save->tr.attrib,
3167                 save->tr.limit, save->tr.base);
3168         pr_err("cpl:            %d                efer:         %016llx\n",
3169                 save->cpl, save->efer);
3170         pr_err("cr0:            %016llx cr2:          %016llx\n",
3171                 save->cr0, save->cr2);
3172         pr_err("cr3:            %016llx cr4:          %016llx\n",
3173                 save->cr3, save->cr4);
3174         pr_err("dr6:            %016llx dr7:          %016llx\n",
3175                 save->dr6, save->dr7);
3176         pr_err("rip:            %016llx rflags:       %016llx\n",
3177                 save->rip, save->rflags);
3178         pr_err("rsp:            %016llx rax:          %016llx\n",
3179                 save->rsp, save->rax);
3180         pr_err("star:           %016llx lstar:        %016llx\n",
3181                 save->star, save->lstar);
3182         pr_err("cstar:          %016llx sfmask:       %016llx\n",
3183                 save->cstar, save->sfmask);
3184         pr_err("kernel_gs_base: %016llx sysenter_cs:  %016llx\n",
3185                 save->kernel_gs_base, save->sysenter_cs);
3186         pr_err("sysenter_esp:   %016llx sysenter_eip: %016llx\n",
3187                 save->sysenter_esp, save->sysenter_eip);
3188         pr_err("gpat:           %016llx dbgctl:       %016llx\n",
3189                 save->g_pat, save->dbgctl);
3190         pr_err("br_from:        %016llx br_to:        %016llx\n",
3191                 save->br_from, save->br_to);
3192         pr_err("excp_from:      %016llx excp_to:      %016llx\n",
3193                 save->last_excp_from, save->last_excp_to);
3194
3195 }
3196
3197 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
3198 {
3199         struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3200
3201         *info1 = control->exit_info_1;
3202         *info2 = control->exit_info_2;
3203 }
3204
3205 static int handle_exit(struct kvm_vcpu *vcpu)
3206 {
3207         struct vcpu_svm *svm = to_svm(vcpu);
3208         struct kvm_run *kvm_run = vcpu->run;
3209         u32 exit_code = svm->vmcb->control.exit_code;
3210
3211         trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
3212
3213         if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
3214                 vcpu->arch.cr0 = svm->vmcb->save.cr0;
3215         if (npt_enabled)
3216                 vcpu->arch.cr3 = svm->vmcb->save.cr3;
3217
3218         if (unlikely(svm->nested.exit_required)) {
3219                 nested_svm_vmexit(svm);
3220                 svm->nested.exit_required = false;
3221
3222                 return 1;
3223         }
3224
3225         if (is_guest_mode(vcpu)) {
3226                 int vmexit;
3227
3228                 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
3229                                         svm->vmcb->control.exit_info_1,
3230                                         svm->vmcb->control.exit_info_2,
3231                                         svm->vmcb->control.exit_int_info,
3232                                         svm->vmcb->control.exit_int_info_err);
3233
3234                 vmexit = nested_svm_exit_special(svm);
3235
3236                 if (vmexit == NESTED_EXIT_CONTINUE)
3237                         vmexit = nested_svm_exit_handled(svm);
3238
3239                 if (vmexit == NESTED_EXIT_DONE)
3240                         return 1;
3241         }
3242
3243         svm_complete_interrupts(svm);
3244
3245         if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3246                 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3247                 kvm_run->fail_entry.hardware_entry_failure_reason
3248                         = svm->vmcb->control.exit_code;
3249                 pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
3250                 dump_vmcb(vcpu);
3251                 return 0;
3252         }
3253
3254         if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
3255             exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
3256             exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3257             exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
3258                 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
3259                        "exit_code 0x%x\n",
3260                        __func__, svm->vmcb->control.exit_int_info,
3261                        exit_code);
3262
3263         if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3264             || !svm_exit_handlers[exit_code]) {
3265                 kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
3266                 kvm_run->hw.hardware_exit_reason = exit_code;
3267                 return 0;
3268         }
3269
3270         return svm_exit_handlers[exit_code](svm);
3271 }
3272
3273 static void reload_tss(struct kvm_vcpu *vcpu)
3274 {
3275         int cpu = raw_smp_processor_id();
3276
3277         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3278         sd->tss_desc->type = 9; /* available 32/64-bit TSS */
3279         load_TR_desc();
3280 }
3281
3282 static void pre_svm_run(struct vcpu_svm *svm)
3283 {
3284         int cpu = raw_smp_processor_id();
3285
3286         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3287
3288         /* FIXME: handle wraparound of asid_generation */
3289         if (svm->asid_generation != sd->asid_generation)
3290                 new_asid(svm, sd);
3291 }
3292
3293 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3294 {
3295         struct vcpu_svm *svm = to_svm(vcpu);
3296
3297         svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3298         vcpu->arch.hflags |= HF_NMI_MASK;
3299         set_intercept(svm, INTERCEPT_IRET);
3300         ++vcpu->stat.nmi_injections;
3301 }
3302
3303 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
3304 {
3305         struct vmcb_control_area *control;
3306
3307         control = &svm->vmcb->control;
3308         control->int_vector = irq;
3309         control->int_ctl &= ~V_INTR_PRIO_MASK;
3310         control->int_ctl |= V_IRQ_MASK |
3311                 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
3312         mark_dirty(svm->vmcb, VMCB_INTR);
3313 }
3314
3315 static void svm_set_irq(struct kvm_vcpu *vcpu)
3316 {
3317         struct vcpu_svm *svm = to_svm(vcpu);
3318
3319         BUG_ON(!(gif_set(svm)));
3320
3321         trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3322         ++vcpu->stat.irq_injections;
3323
3324         svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3325                 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3326 }
3327
3328 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3329 {
3330         struct vcpu_svm *svm = to_svm(vcpu);
3331
3332         if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3333                 return;
3334
3335         if (irr == -1)
3336                 return;
3337
3338         if (tpr >= irr)
3339                 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3340 }
3341
3342 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3343 {
3344         struct vcpu_svm *svm = to_svm(vcpu);
3345         struct vmcb *vmcb = svm->vmcb;
3346         int ret;
3347         ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
3348               !(svm->vcpu.arch.hflags & HF_NMI_MASK);
3349         ret = ret && gif_set(svm) && nested_svm_nmi(svm);
3350
3351         return ret;
3352 }
3353
3354 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3355 {
3356         struct vcpu_svm *svm = to_svm(vcpu);
3357
3358         return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3359 }
3360
3361 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3362 {
3363         struct vcpu_svm *svm = to_svm(vcpu);
3364
3365         if (masked) {
3366                 svm->vcpu.arch.hflags |= HF_NMI_MASK;
3367                 set_intercept(svm, INTERCEPT_IRET);
3368         } else {
3369                 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
3370                 clr_intercept(svm, INTERCEPT_IRET);
3371         }
3372 }
3373
3374 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3375 {
3376         struct vcpu_svm *svm = to_svm(vcpu);
3377         struct vmcb *vmcb = svm->vmcb;
3378         int ret;
3379
3380         if (!gif_set(svm) ||
3381              (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
3382                 return 0;
3383
3384         ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
3385
3386         if (is_guest_mode(vcpu))
3387                 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
3388
3389         return ret;
3390 }
3391
3392 static void enable_irq_window(struct kvm_vcpu *vcpu)
3393 {
3394         struct vcpu_svm *svm = to_svm(vcpu);
3395
3396         /*
3397          * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3398          * 1, because that's a separate STGI/VMRUN intercept.  The next time we
3399          * get that intercept, this function will be called again though and
3400          * we'll get the vintr intercept.
3401          */
3402         if (gif_set(svm) && nested_svm_intr(svm)) {
3403                 svm_set_vintr(svm);
3404                 svm_inject_irq(svm, 0x0);
3405         }
3406 }
3407
3408 static void enable_nmi_window(struct kvm_vcpu *vcpu)
3409 {
3410         struct vcpu_svm *svm = to_svm(vcpu);
3411
3412         if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3413             == HF_NMI_MASK)
3414                 return; /* IRET will cause a vm exit */
3415
3416         /*
3417          * Something prevents NMI from been injected. Single step over possible
3418          * problem (IRET or exception injection or interrupt shadow)
3419          */
3420         svm->nmi_singlestep = true;
3421         svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3422         update_db_intercept(vcpu);
3423 }
3424
3425 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3426 {
3427         return 0;
3428 }
3429
3430 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
3431 {
3432         struct vcpu_svm *svm = to_svm(vcpu);
3433
3434         if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3435                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3436         else
3437                 svm->asid_generation--;
3438 }
3439
3440 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
3441 {
3442 }
3443
3444 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3445 {
3446         struct vcpu_svm *svm = to_svm(vcpu);
3447
3448         if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3449                 return;
3450
3451         if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
3452                 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3453                 kvm_set_cr8(vcpu, cr8);
3454         }
3455 }
3456
3457 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3458 {
3459         struct vcpu_svm *svm = to_svm(vcpu);
3460         u64 cr8;
3461
3462         if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3463                 return;
3464
3465         cr8 = kvm_get_cr8(vcpu);
3466         svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3467         svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3468 }
3469
3470 static void svm_complete_interrupts(struct vcpu_svm *svm)
3471 {
3472         u8 vector;
3473         int type;
3474         u32 exitintinfo = svm->vmcb->control.exit_int_info;
3475         unsigned int3_injected = svm->int3_injected;
3476
3477         svm->int3_injected = 0;
3478
3479         /*
3480          * If we've made progress since setting HF_IRET_MASK, we've
3481          * executed an IRET and can allow NMI injection.
3482          */
3483         if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
3484             && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
3485                 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3486                 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3487         }
3488
3489         svm->vcpu.arch.nmi_injected = false;
3490         kvm_clear_exception_queue(&svm->vcpu);
3491         kvm_clear_interrupt_queue(&svm->vcpu);
3492
3493         if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3494                 return;
3495
3496         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3497
3498         vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3499         type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3500
3501         switch (type) {
3502         case SVM_EXITINTINFO_TYPE_NMI:
3503                 svm->vcpu.arch.nmi_injected = true;
3504                 break;
3505         case SVM_EXITINTINFO_TYPE_EXEPT:
3506                 /*
3507                  * In case of software exceptions, do not reinject the vector,
3508                  * but re-execute the instruction instead. Rewind RIP first
3509                  * if we emulated INT3 before.
3510                  */
3511                 if (kvm_exception_is_soft(vector)) {
3512                         if (vector == BP_VECTOR && int3_injected &&
3513                             kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
3514                                 kvm_rip_write(&svm->vcpu,
3515                                               kvm_rip_read(&svm->vcpu) -
3516                                               int3_injected);
3517                         break;
3518                 }
3519                 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3520                         u32 err = svm->vmcb->control.exit_int_info_err;
3521                         kvm_requeue_exception_e(&svm->vcpu, vector, err);
3522
3523                 } else
3524                         kvm_requeue_exception(&svm->vcpu, vector);
3525                 break;
3526         case SVM_EXITINTINFO_TYPE_INTR:
3527                 kvm_queue_interrupt(&svm->vcpu, vector, false);
3528                 break;
3529         default:
3530                 break;
3531         }
3532 }
3533
3534 static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3535 {
3536         struct vcpu_svm *svm = to_svm(vcpu);
3537         struct vmcb_control_area *control = &svm->vmcb->control;
3538
3539         control->exit_int_info = control->event_inj;
3540         control->exit_int_info_err = control->event_inj_err;
3541         control->event_inj = 0;
3542         svm_complete_interrupts(svm);
3543 }
3544
3545 #ifdef CONFIG_X86_64
3546 #define R "r"
3547 #else
3548 #define R "e"
3549 #endif
3550
3551 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3552 {
3553         struct vcpu_svm *svm = to_svm(vcpu);
3554
3555         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3556         svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3557         svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3558
3559         /*
3560          * A vmexit emulation is required before the vcpu can be executed
3561          * again.
3562          */
3563         if (unlikely(svm->nested.exit_required))
3564                 return;
3565
3566         pre_svm_run(svm);
3567
3568         sync_lapic_to_cr8(vcpu);
3569
3570         svm->vmcb->save.cr2 = vcpu->arch.cr2;
3571
3572         clgi();
3573
3574         local_irq_enable();
3575
3576         asm volatile (
3577                 "push %%"R"bp; \n\t"
3578                 "mov %c[rbx](%[svm]), %%"R"bx \n\t"
3579                 "mov %c[rcx](%[svm]), %%"R"cx \n\t"
3580                 "mov %c[rdx](%[svm]), %%"R"dx \n\t"
3581                 "mov %c[rsi](%[svm]), %%"R"si \n\t"
3582                 "mov %c[rdi](%[svm]), %%"R"di \n\t"
3583                 "mov %c[rbp](%[svm]), %%"R"bp \n\t"
3584 #ifdef CONFIG_X86_64
3585                 "mov %c[r8](%[svm]),  %%r8  \n\t"
3586                 "mov %c[r9](%[svm]),  %%r9  \n\t"
3587                 "mov %c[r10](%[svm]), %%r10 \n\t"
3588                 "mov %c[r11](%[svm]), %%r11 \n\t"
3589                 "mov %c[r12](%[svm]), %%r12 \n\t"
3590                 "mov %c[r13](%[svm]), %%r13 \n\t"
3591                 "mov %c[r14](%[svm]), %%r14 \n\t"
3592                 "mov %c[r15](%[svm]), %%r15 \n\t"
3593 #endif
3594
3595                 /* Enter guest mode */
3596                 "push %%"R"ax \n\t"
3597                 "mov %c[vmcb](%[svm]), %%"R"ax \n\t"
3598                 __ex(SVM_VMLOAD) "\n\t"
3599                 __ex(SVM_VMRUN) "\n\t"
3600                 __ex(SVM_VMSAVE) "\n\t"
3601                 "pop %%"R"ax \n\t"
3602
3603                 /* Save guest registers, load host registers */
3604                 "mov %%"R"bx, %c[rbx](%[svm]) \n\t"
3605                 "mov %%"R"cx, %c[rcx](%[svm]) \n\t"
3606                 "mov %%"R"dx, %c[rdx](%[svm]) \n\t"
3607                 "mov %%"R"si, %c[rsi](%[svm]) \n\t"
3608                 "mov %%"R"di, %c[rdi](%[svm]) \n\t"
3609                 "mov %%"R"bp, %c[rbp](%[svm]) \n\t"
3610 #ifdef CONFIG_X86_64
3611                 "mov %%r8,  %c[r8](%[svm]) \n\t"
3612                 "mov %%r9,  %c[r9](%[svm]) \n\t"
3613                 "mov %%r10, %c[r10](%[svm]) \n\t"
3614                 "mov %%r11, %c[r11](%[svm]) \n\t"
3615                 "mov %%r12, %c[r12](%[svm]) \n\t"
3616                 "mov %%r13, %c[r13](%[svm]) \n\t"
3617                 "mov %%r14, %c[r14](%[svm]) \n\t"
3618                 "mov %%r15, %c[r15](%[svm]) \n\t"
3619 #endif
3620                 "pop %%"R"bp"
3621                 :
3622                 : [svm]"a"(svm),
3623                   [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3624                   [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3625                   [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3626                   [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3627                   [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3628                   [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3629                   [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3630 #ifdef CONFIG_X86_64
3631                   , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3632                   [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3633                   [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3634                   [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3635                   [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3636                   [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3637                   [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3638                   [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3639 #endif
3640                 : "cc", "memory"
3641                 , R"bx", R"cx", R"dx", R"si", R"di"
3642 #ifdef CONFIG_X86_64
3643                 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3644 #endif
3645                 );
3646
3647 #ifdef CONFIG_X86_64
3648         wrmsrl(MSR_GS_BASE, svm->host.gs_base);
3649 #else
3650         loadsegment(fs, svm->host.fs);
3651 #ifndef CONFIG_X86_32_LAZY_GS
3652         loadsegment(gs, svm->host.gs);
3653 #endif
3654 #endif
3655
3656         reload_tss(vcpu);
3657
3658         local_irq_disable();
3659
3660         vcpu->arch.cr2 = svm->vmcb->save.cr2;
3661         vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3662         vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3663         vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3664
3665         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3666                 kvm_before_handle_nmi(&svm->vcpu);
3667
3668         stgi();
3669
3670         /* Any pending NMI will happen here */
3671
3672         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3673                 kvm_after_handle_nmi(&svm->vcpu);
3674
3675         sync_cr8_to_lapic(vcpu);
3676
3677         svm->next_rip = 0;
3678
3679         svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
3680
3681         /* if exit due to PF check for async PF */
3682         if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3683                 svm->apf_reason = kvm_read_and_reset_pf_reason();
3684
3685         if (npt_enabled) {
3686                 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3687                 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
3688         }
3689
3690         /*
3691          * We need to handle MC intercepts here before the vcpu has a chance to
3692          * change the physical cpu
3693          */
3694         if (unlikely(svm->vmcb->control.exit_code ==
3695                      SVM_EXIT_EXCP_BASE + MC_VECTOR))
3696                 svm_handle_mce(svm);
3697
3698         mark_all_clean(svm->vmcb);
3699 }
3700
3701 #undef R
3702
3703 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3704 {
3705         struct vcpu_svm *svm = to_svm(vcpu);
3706
3707         svm->vmcb->save.cr3 = root;
3708         mark_dirty(svm->vmcb, VMCB_CR);
3709         svm_flush_tlb(vcpu);
3710 }
3711
3712 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3713 {
3714         struct vcpu_svm *svm = to_svm(vcpu);
3715
3716         svm->vmcb->control.nested_cr3 = root;
3717         mark_dirty(svm->vmcb, VMCB_NPT);
3718
3719         /* Also sync guest cr3 here in case we live migrate */
3720         svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
3721         mark_dirty(svm->vmcb, VMCB_CR);
3722
3723         svm_flush_tlb(vcpu);
3724 }
3725
3726 static int is_disabled(void)
3727 {
3728         u64 vm_cr;
3729
3730         rdmsrl(MSR_VM_CR, vm_cr);
3731         if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3732                 return 1;
3733
3734         return 0;
3735 }
3736
3737 static void
3738 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3739 {
3740         /*
3741          * Patch in the VMMCALL instruction:
3742          */
3743         hypercall[0] = 0x0f;
3744         hypercall[1] = 0x01;
3745         hypercall[2] = 0xd9;
3746 }
3747
3748 static void svm_check_processor_compat(void *rtn)
3749 {
3750         *(int *)rtn = 0;
3751 }
3752
3753 static bool svm_cpu_has_accelerated_tpr(void)
3754 {
3755         return false;
3756 }
3757
3758 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3759 {
3760         return 0;
3761 }
3762
3763 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3764 {
3765 }
3766
3767 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3768 {
3769         switch (func) {
3770         case 0x80000001:
3771                 if (nested)
3772                         entry->ecx |= (1 << 2); /* Set SVM bit */
3773                 break;
3774         case 0x8000000A:
3775                 entry->eax = 1; /* SVM revision 1 */
3776                 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
3777                                    ASID emulation to nested SVM */
3778                 entry->ecx = 0; /* Reserved */
3779                 entry->edx = 0; /* Per default do not support any
3780                                    additional features */
3781
3782                 /* Support next_rip if host supports it */
3783                 if (boot_cpu_has(X86_FEATURE_NRIPS))
3784                         entry->edx |= SVM_FEATURE_NRIP;
3785
3786                 /* Support NPT for the guest if enabled */
3787                 if (npt_enabled)
3788                         entry->edx |= SVM_FEATURE_NPT;
3789
3790                 break;
3791         }
3792 }
3793
3794 static const struct trace_print_flags svm_exit_reasons_str[] = {
3795         { SVM_EXIT_READ_CR0,                    "read_cr0" },
3796         { SVM_EXIT_READ_CR3,                    "read_cr3" },
3797         { SVM_EXIT_READ_CR4,                    "read_cr4" },
3798         { SVM_EXIT_READ_CR8,                    "read_cr8" },
3799         { SVM_EXIT_WRITE_CR0,                   "write_cr0" },
3800         { SVM_EXIT_WRITE_CR3,                   "write_cr3" },
3801         { SVM_EXIT_WRITE_CR4,                   "write_cr4" },
3802         { SVM_EXIT_WRITE_CR8,                   "write_cr8" },
3803         { SVM_EXIT_READ_DR0,                    "read_dr0" },
3804         { SVM_EXIT_READ_DR1,                    "read_dr1" },
3805         { SVM_EXIT_READ_DR2,                    "read_dr2" },
3806         { SVM_EXIT_READ_DR3,                    "read_dr3" },
3807         { SVM_EXIT_WRITE_DR0,                   "write_dr0" },
3808         { SVM_EXIT_WRITE_DR1,                   "write_dr1" },
3809         { SVM_EXIT_WRITE_DR2,                   "write_dr2" },
3810         { SVM_EXIT_WRITE_DR3,                   "write_dr3" },
3811         { SVM_EXIT_WRITE_DR5,                   "write_dr5" },
3812         { SVM_EXIT_WRITE_DR7,                   "write_dr7" },
3813         { SVM_EXIT_EXCP_BASE + DB_VECTOR,       "DB excp" },
3814         { SVM_EXIT_EXCP_BASE + BP_VECTOR,       "BP excp" },
3815         { SVM_EXIT_EXCP_BASE + UD_VECTOR,       "UD excp" },
3816         { SVM_EXIT_EXCP_BASE + PF_VECTOR,       "PF excp" },
3817         { SVM_EXIT_EXCP_BASE + NM_VECTOR,       "NM excp" },
3818         { SVM_EXIT_EXCP_BASE + MC_VECTOR,       "MC excp" },
3819         { SVM_EXIT_INTR,                        "interrupt" },
3820         { SVM_EXIT_NMI,                         "nmi" },
3821         { SVM_EXIT_SMI,                         "smi" },
3822         { SVM_EXIT_INIT,                        "init" },
3823         { SVM_EXIT_VINTR,                       "vintr" },
3824         { SVM_EXIT_CPUID,                       "cpuid" },
3825         { SVM_EXIT_INVD,                        "invd" },
3826         { SVM_EXIT_HLT,                         "hlt" },
3827         { SVM_EXIT_INVLPG,                      "invlpg" },
3828         { SVM_EXIT_INVLPGA,                     "invlpga" },
3829         { SVM_EXIT_IOIO,                        "io" },
3830         { SVM_EXIT_MSR,                         "msr" },
3831         { SVM_EXIT_TASK_SWITCH,                 "task_switch" },
3832         { SVM_EXIT_SHUTDOWN,                    "shutdown" },
3833         { SVM_EXIT_VMRUN,                       "vmrun" },
3834         { SVM_EXIT_VMMCALL,                     "hypercall" },
3835         { SVM_EXIT_VMLOAD,                      "vmload" },
3836         { SVM_EXIT_VMSAVE,                      "vmsave" },
3837         { SVM_EXIT_STGI,                        "stgi" },
3838         { SVM_EXIT_CLGI,                        "clgi" },
3839         { SVM_EXIT_SKINIT,                      "skinit" },
3840         { SVM_EXIT_WBINVD,                      "wbinvd" },
3841         { SVM_EXIT_MONITOR,                     "monitor" },
3842         { SVM_EXIT_MWAIT,                       "mwait" },
3843         { SVM_EXIT_XSETBV,                      "xsetbv" },
3844         { SVM_EXIT_NPF,                         "npf" },
3845         { -1, NULL }
3846 };
3847
3848 static int svm_get_lpage_level(void)
3849 {
3850         return PT_PDPE_LEVEL;
3851 }
3852
3853 static bool svm_rdtscp_supported(void)
3854 {
3855         return false;
3856 }
3857
3858 static bool svm_has_wbinvd_exit(void)
3859 {
3860         return true;
3861 }
3862
3863 static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
3864 {
3865         struct vcpu_svm *svm = to_svm(vcpu);
3866
3867         set_exception_intercept(svm, NM_VECTOR);
3868         update_cr0_intercept(svm);
3869 }
3870
3871 #define PRE_EX(exit)  { .exit_code = (exit), \
3872                         .stage = X86_ICPT_PRE_EXCEPT, \
3873                         .valid = true }
3874 #define POST_EX(exit) { .exit_code = (exit), \
3875                         .stage = X86_ICPT_POST_EXCEPT, \
3876                         .valid = true }
3877 #define POST_MEM(exit) { .exit_code = (exit), \
3878                          .stage = X86_ICPT_POST_MEMACCESS, \
3879                          .valid = true }
3880
3881 static struct __x86_intercept {
3882         u32 exit_code;
3883         enum x86_intercept_stage stage;
3884         bool valid;
3885 } x86_intercept_map[] = {
3886         [x86_intercept_cr_read]         = POST_EX(SVM_EXIT_READ_CR0),
3887         [x86_intercept_cr_write]        = POST_EX(SVM_EXIT_WRITE_CR0),
3888         [x86_intercept_clts]            = POST_EX(SVM_EXIT_WRITE_CR0),
3889         [x86_intercept_lmsw]            = POST_EX(SVM_EXIT_WRITE_CR0),
3890         [x86_intercept_smsw]            = POST_EX(SVM_EXIT_READ_CR0),
3891         [x86_intercept_dr_read]         = POST_EX(SVM_EXIT_READ_DR0),
3892         [x86_intercept_dr_write]        = POST_EX(SVM_EXIT_WRITE_DR0),
3893         [x86_intercept_sldt]            = POST_EX(SVM_EXIT_LDTR_READ),
3894         [x86_intercept_str]             = POST_EX(SVM_EXIT_TR_READ),
3895         [x86_intercept_lldt]            = POST_EX(SVM_EXIT_LDTR_WRITE),
3896         [x86_intercept_ltr]             = POST_EX(SVM_EXIT_TR_WRITE),
3897         [x86_intercept_sgdt]            = POST_EX(SVM_EXIT_GDTR_READ),
3898         [x86_intercept_sidt]            = POST_EX(SVM_EXIT_IDTR_READ),
3899         [x86_intercept_lgdt]            = POST_EX(SVM_EXIT_GDTR_WRITE),
3900         [x86_intercept_lidt]            = POST_EX(SVM_EXIT_IDTR_WRITE),
3901         [x86_intercept_vmrun]           = POST_EX(SVM_EXIT_VMRUN),
3902         [x86_intercept_vmmcall]         = POST_EX(SVM_EXIT_VMMCALL),
3903         [x86_intercept_vmload]          = POST_EX(SVM_EXIT_VMLOAD),
3904         [x86_intercept_vmsave]          = POST_EX(SVM_EXIT_VMSAVE),
3905         [x86_intercept_stgi]            = POST_EX(SVM_EXIT_STGI),
3906         [x86_intercept_clgi]            = POST_EX(SVM_EXIT_CLGI),
3907         [x86_intercept_skinit]          = POST_EX(SVM_EXIT_SKINIT),
3908         [x86_intercept_invlpga]         = POST_EX(SVM_EXIT_INVLPGA),
3909         [x86_intercept_rdtscp]          = POST_EX(SVM_EXIT_RDTSCP),
3910         [x86_intercept_monitor]         = POST_MEM(SVM_EXIT_MONITOR),
3911         [x86_intercept_mwait]           = POST_EX(SVM_EXIT_MWAIT),
3912         [x86_intercept_invlpg]          = POST_EX(SVM_EXIT_INVLPG),
3913         [x86_intercept_invd]            = POST_EX(SVM_EXIT_INVD),
3914         [x86_intercept_wbinvd]          = POST_EX(SVM_EXIT_WBINVD),
3915         [x86_intercept_wrmsr]           = POST_EX(SVM_EXIT_MSR),
3916         [x86_intercept_rdtsc]           = POST_EX(SVM_EXIT_RDTSC),
3917         [x86_intercept_rdmsr]           = POST_EX(SVM_EXIT_MSR),
3918         [x86_intercept_rdpmc]           = POST_EX(SVM_EXIT_RDPMC),
3919         [x86_intercept_cpuid]           = PRE_EX(SVM_EXIT_CPUID),
3920         [x86_intercept_rsm]             = PRE_EX(SVM_EXIT_RSM),
3921 };
3922
3923 #undef PRE_EX
3924 #undef POST_EX
3925 #undef POST_MEM
3926
3927 static int svm_check_intercept(struct kvm_vcpu *vcpu,
3928                                struct x86_instruction_info *info,
3929                                enum x86_intercept_stage stage)
3930 {
3931         struct vcpu_svm *svm = to_svm(vcpu);
3932         int vmexit, ret = X86EMUL_CONTINUE;
3933         struct __x86_intercept icpt_info;
3934         struct vmcb *vmcb = svm->vmcb;
3935
3936         if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
3937                 goto out;
3938
3939         icpt_info = x86_intercept_map[info->intercept];
3940
3941         if (!icpt_info.valid || stage != icpt_info.stage)
3942                 goto out;
3943
3944         switch (icpt_info.exit_code) {
3945         case SVM_EXIT_READ_CR0:
3946                 if (info->intercept == x86_intercept_cr_read)
3947                         icpt_info.exit_code += info->modrm_reg;
3948                 break;
3949         case SVM_EXIT_WRITE_CR0: {
3950                 unsigned long cr0, val;
3951                 u64 intercept;
3952
3953                 if (info->intercept == x86_intercept_cr_write)
3954                         icpt_info.exit_code += info->modrm_reg;
3955
3956                 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
3957                         break;
3958
3959                 intercept = svm->nested.intercept;
3960
3961                 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
3962                         break;
3963
3964                 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
3965                 val = info->src_val  & ~SVM_CR0_SELECTIVE_MASK;
3966
3967                 if (info->intercept == x86_intercept_lmsw) {
3968                         cr0 &= 0xfUL;
3969                         val &= 0xfUL;
3970                         /* lmsw can't clear PE - catch this here */
3971                         if (cr0 & X86_CR0_PE)
3972                                 val |= X86_CR0_PE;
3973                 }
3974
3975                 if (cr0 ^ val)
3976                         icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
3977
3978                 break;
3979         }
3980         case SVM_EXIT_READ_DR0:
3981         case SVM_EXIT_WRITE_DR0:
3982                 icpt_info.exit_code += info->modrm_reg;
3983                 break;
3984         case SVM_EXIT_MSR:
3985                 if (info->intercept == x86_intercept_wrmsr)
3986                         vmcb->control.exit_info_1 = 1;
3987                 else
3988                         vmcb->control.exit_info_1 = 0;
3989                 break;
3990         default:
3991                 break;
3992         }
3993
3994         vmcb->control.next_rip  = info->next_rip;
3995         vmcb->control.exit_code = icpt_info.exit_code;
3996         vmexit = nested_svm_exit_handled(svm);
3997
3998         ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
3999                                            : X86EMUL_CONTINUE;
4000
4001 out:
4002         return ret;
4003 }
4004
4005 static struct kvm_x86_ops svm_x86_ops = {
4006         .cpu_has_kvm_support = has_svm,
4007         .disabled_by_bios = is_disabled,
4008         .hardware_setup = svm_hardware_setup,
4009         .hardware_unsetup = svm_hardware_unsetup,
4010         .check_processor_compatibility = svm_check_processor_compat,
4011         .hardware_enable = svm_hardware_enable,
4012         .hardware_disable = svm_hardware_disable,
4013         .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
4014
4015         .vcpu_create = svm_create_vcpu,
4016         .vcpu_free = svm_free_vcpu,
4017         .vcpu_reset = svm_vcpu_reset,
4018
4019         .prepare_guest_switch = svm_prepare_guest_switch,
4020         .vcpu_load = svm_vcpu_load,
4021         .vcpu_put = svm_vcpu_put,
4022
4023         .set_guest_debug = svm_guest_debug,
4024         .get_msr = svm_get_msr,
4025         .set_msr = svm_set_msr,
4026         .get_segment_base = svm_get_segment_base,
4027         .get_segment = svm_get_segment,
4028         .set_segment = svm_set_segment,
4029         .get_cpl = svm_get_cpl,
4030         .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
4031         .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
4032         .decache_cr3 = svm_decache_cr3,
4033         .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
4034         .set_cr0 = svm_set_cr0,
4035         .set_cr3 = svm_set_cr3,
4036         .set_cr4 = svm_set_cr4,
4037         .set_efer = svm_set_efer,
4038         .get_idt = svm_get_idt,
4039         .set_idt = svm_set_idt,
4040         .get_gdt = svm_get_gdt,
4041         .set_gdt = svm_set_gdt,
4042         .set_dr7 = svm_set_dr7,
4043         .cache_reg = svm_cache_reg,
4044         .get_rflags = svm_get_rflags,
4045         .set_rflags = svm_set_rflags,
4046         .fpu_activate = svm_fpu_activate,
4047         .fpu_deactivate = svm_fpu_deactivate,
4048
4049         .tlb_flush = svm_flush_tlb,
4050
4051         .run = svm_vcpu_run,
4052         .handle_exit = handle_exit,
4053         .skip_emulated_instruction = skip_emulated_instruction,
4054         .set_interrupt_shadow = svm_set_interrupt_shadow,
4055         .get_interrupt_shadow = svm_get_interrupt_shadow,
4056         .patch_hypercall = svm_patch_hypercall,
4057         .set_irq = svm_set_irq,
4058         .set_nmi = svm_inject_nmi,
4059         .queue_exception = svm_queue_exception,
4060         .cancel_injection = svm_cancel_injection,
4061         .interrupt_allowed = svm_interrupt_allowed,
4062         .nmi_allowed = svm_nmi_allowed,
4063         .get_nmi_mask = svm_get_nmi_mask,
4064         .set_nmi_mask = svm_set_nmi_mask,
4065         .enable_nmi_window = enable_nmi_window,
4066         .enable_irq_window = enable_irq_window,
4067         .update_cr8_intercept = update_cr8_intercept,
4068
4069         .set_tss_addr = svm_set_tss_addr,
4070         .get_tdp_level = get_npt_level,
4071         .get_mt_mask = svm_get_mt_mask,
4072
4073         .get_exit_info = svm_get_exit_info,
4074         .exit_reasons_str = svm_exit_reasons_str,
4075
4076         .get_lpage_level = svm_get_lpage_level,
4077
4078         .cpuid_update = svm_cpuid_update,
4079
4080         .rdtscp_supported = svm_rdtscp_supported,
4081
4082         .set_supported_cpuid = svm_set_supported_cpuid,
4083
4084         .has_wbinvd_exit = svm_has_wbinvd_exit,
4085
4086         .write_tsc_offset = svm_write_tsc_offset,
4087         .adjust_tsc_offset = svm_adjust_tsc_offset,
4088
4089         .set_tdp_cr3 = set_tdp_cr3,
4090
4091         .check_intercept = svm_check_intercept,
4092 };
4093
4094 static int __init svm_init(void)
4095 {
4096         return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
4097                         __alignof__(struct vcpu_svm), THIS_MODULE);
4098 }
4099
4100 static void __exit svm_exit(void)
4101 {
4102         kvm_exit();
4103 }
4104
4105 module_init(svm_init)
4106 module_exit(svm_exit)