KVM: fix XSAVE bit scanning (now properly)
[pandora-kernel.git] / arch / x86 / kvm / x86.c
index 77c9d86..84f4607 100644 (file)
@@ -347,6 +347,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
        vcpu->arch.cr2 = fault->address;
        kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
 }
+EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
 
 void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 {
@@ -579,6 +580,14 @@ static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
        return best && (best->ecx & bit(X86_FEATURE_XSAVE));
 }
 
+static bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, 7, 0);
+       return best && (best->ebx & bit(X86_FEATURE_SMEP));
+}
+
 static void update_cpuid(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
@@ -598,14 +607,17 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        unsigned long old_cr4 = kvm_read_cr4(vcpu);
-       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
-
+       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
+                                  X86_CR4_PAE | X86_CR4_SMEP;
        if (cr4 & CR4_RESERVED_BITS)
                return 1;
 
        if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
                return 1;
 
+       if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
+               return 1;
+
        if (is_long_mode(vcpu)) {
                if (!(cr4 & X86_CR4_PAE))
                        return 1;
@@ -615,11 +627,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                                   kvm_read_cr3(vcpu)))
                return 1;
 
-       if (cr4 & X86_CR4_VMXE)
+       if (kvm_x86_ops->set_cr4(vcpu, cr4))
                return 1;
 
-       kvm_x86_ops->set_cr4(vcpu, cr4);
-
        if ((cr4 ^ old_cr4) & pdptr_bits)
                kvm_mmu_reset_context(vcpu);
 
@@ -1388,7 +1398,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                        return 1;
                kvm_x86_ops->patch_hypercall(vcpu, instructions);
                ((unsigned char *)instructions)[3] = 0xc3; /* ret */
-               if (copy_to_user((void __user *)addr, instructions, 4))
+               if (__copy_to_user((void __user *)addr, instructions, 4))
                        return 1;
                kvm->arch.hv_hypercall = data;
                break;
@@ -1415,7 +1425,7 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                                  HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
                if (kvm_is_error_hva(addr))
                        return 1;
-               if (clear_user((void __user *)addr, PAGE_SIZE))
+               if (__clear_user((void __user *)addr, PAGE_SIZE))
                        return 1;
                vcpu->arch.hv_vapic = data;
                break;
@@ -2283,6 +2293,13 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
        entry->flags = 0;
 }
 
+static bool supported_xcr0_bit(unsigned bit)
+{
+       u64 mask = ((u64)1 << bit);
+
+       return mask & (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) & host_xcr0;
+}
+
 #define F(x) bit(X86_FEATURE_##x)
 
 static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
@@ -2342,6 +2359,10 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
                F(PMM) | F(PMM_EN);
 
+       /* cpuid 7.0.ebx */
+       const u32 kvm_supported_word9_x86_features =
+               F(SMEP);
+
        /* all calls to cpuid_count() should be made on the same cpu */
        get_cpu();
        do_cpuid_1_ent(entry, function, index);
@@ -2376,7 +2397,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                }
                break;
        }
-       /* function 4 and 0xb have additional index. */
+       /* function 4 has additional index. */
        case 4: {
                int i, cache_type;
 
@@ -2393,6 +2414,22 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                }
                break;
        }
+       case 7: {
+               entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+               /* Mask ebx against host capbability word 9 */
+               if (index == 0) {
+                       entry->ebx &= kvm_supported_word9_x86_features;
+                       cpuid_mask(&entry->ebx, 9);
+               } else
+                       entry->ebx = 0;
+               entry->eax = 0;
+               entry->ecx = 0;
+               entry->edx = 0;
+               break;
+       }
+       case 9:
+               break;
+       /* function 0xb has additional index. */
        case 0xb: {
                int i, level_type;
 
@@ -2410,16 +2447,17 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                break;
        }
        case 0xd: {
-               int i;
+               int idx, i;
 
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-               for (i = 1; *nent < maxnent && i < 64; ++i) {
-                       if (entry[i].eax == 0)
+               for (idx = 1, i = 1; *nent < maxnent && idx < 64; ++idx) {
+                       do_cpuid_1_ent(&entry[i], function, idx);
+                       if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
                                continue;
-                       do_cpuid_1_ent(&entry[i], function, i);
                        entry[i].flags |=
                               KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                        ++*nent;
+                       ++i;
                }
                break;
        }
@@ -2451,6 +2489,24 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                entry->ecx &= kvm_supported_word6_x86_features;
                cpuid_mask(&entry->ecx, 6);
                break;
+       case 0x80000008: {
+               unsigned g_phys_as = (entry->eax >> 16) & 0xff;
+               unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
+               unsigned phys_as = entry->eax & 0xff;
+
+               if (!g_phys_as)
+                       g_phys_as = phys_as;
+               entry->eax = g_phys_as | (virt_as << 8);
+               entry->ebx = entry->edx = 0;
+               break;
+       }
+       case 0x80000019:
+               entry->ecx = entry->edx = 0;
+               break;
+       case 0x8000001a:
+               break;
+       case 0x8000001d:
+               break;
        /*Add support for Centaur's CPUID instruction*/
        case 0xC0000000:
                /*Just support up to 0xC0000004 now*/
@@ -2460,10 +2516,16 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                entry->edx &= kvm_supported_word5_x86_features;
                cpuid_mask(&entry->edx, 5);
                break;
+       case 3: /* Processor serial number */
+       case 5: /* MONITOR/MWAIT */
+       case 6: /* Thermal management */
+       case 0xA: /* Architectural Performance Monitoring */
+       case 0x80000007: /* Advanced power management */
        case 0xC0000002:
        case 0xC0000003:
        case 0xC0000004:
-               /*Now nothing to do, reserved for the future*/
+       default:
+               entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
                break;
        }
 
@@ -3817,7 +3879,7 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
                                          exception);
 }
 
-static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
                               gva_t addr, void *val, unsigned int bytes,
                               struct x86_exception *exception)
 {
@@ -3827,6 +3889,7 @@ static int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
        return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
                                          exception);
 }
+EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
 
 static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
                                      gva_t addr, void *val, unsigned int bytes,
@@ -3836,7 +3899,7 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
        return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
 }
 
-static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
+int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
                                       gva_t addr, void *val,
                                       unsigned int bytes,
                                       struct x86_exception *exception)
@@ -3868,6 +3931,7 @@ static int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 out:
        return r;
 }
+EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 
 static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
                                  unsigned long addr,
@@ -4473,9 +4537,24 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
                kvm_queue_exception(vcpu, ctxt->exception.vector);
 }
 
+static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
+                             const unsigned long *regs)
+{
+       memset(&ctxt->twobyte, 0,
+              (void *)&ctxt->regs - (void *)&ctxt->twobyte);
+       memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
+
+       ctxt->fetch.start = 0;
+       ctxt->fetch.end = 0;
+       ctxt->io_read.pos = 0;
+       ctxt->io_read.end = 0;
+       ctxt->mem_read.pos = 0;
+       ctxt->mem_read.end = 0;
+}
+
 static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 {
-       struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+       struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        int cs_db, cs_l;
 
        /*
@@ -4488,40 +4567,38 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 
        kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
-       vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
-       vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu);
-       vcpu->arch.emulate_ctxt.mode =
-               (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
-               (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
-               ? X86EMUL_MODE_VM86 : cs_l
-               ? X86EMUL_MODE_PROT64 : cs_db
-               ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
-       vcpu->arch.emulate_ctxt.guest_mode = is_guest_mode(vcpu);
-       memset(c, 0, sizeof(struct decode_cache));
-       memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+       ctxt->eflags = kvm_get_rflags(vcpu);
+       ctxt->eip = kvm_rip_read(vcpu);
+       ctxt->mode = (!is_protmode(vcpu))               ? X86EMUL_MODE_REAL :
+                    (ctxt->eflags & X86_EFLAGS_VM)     ? X86EMUL_MODE_VM86 :
+                    cs_l                               ? X86EMUL_MODE_PROT64 :
+                    cs_db                              ? X86EMUL_MODE_PROT32 :
+                                                         X86EMUL_MODE_PROT16;
+       ctxt->guest_mode = is_guest_mode(vcpu);
+
+       init_decode_cache(ctxt, vcpu->arch.regs);
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
 int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 {
-       struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+       struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        int ret;
 
        init_emulate_ctxt(vcpu);
 
-       vcpu->arch.emulate_ctxt.decode.op_bytes = 2;
-       vcpu->arch.emulate_ctxt.decode.ad_bytes = 2;
-       vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip +
-                                                                inc_eip;
-       ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq);
+       ctxt->op_bytes = 2;
+       ctxt->ad_bytes = 2;
+       ctxt->_eip = ctxt->eip + inc_eip;
+       ret = emulate_int_real(ctxt, irq);
 
        if (ret != X86EMUL_CONTINUE)
                return EMULATE_FAIL;
 
-       vcpu->arch.emulate_ctxt.eip = c->eip;
-       memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
-       kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
-       kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+       ctxt->eip = ctxt->_eip;
+       memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
+       kvm_rip_write(vcpu, ctxt->eip);
+       kvm_set_rflags(vcpu, ctxt->eflags);
 
        if (irq == NMI_VECTOR)
                vcpu->arch.nmi_pending = false;
@@ -4582,21 +4659,21 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                            int insn_len)
 {
        int r;
-       struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+       struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        bool writeback = true;
 
        kvm_clear_exception_queue(vcpu);
 
        if (!(emulation_type & EMULTYPE_NO_DECODE)) {
                init_emulate_ctxt(vcpu);
-               vcpu->arch.emulate_ctxt.interruptibility = 0;
-               vcpu->arch.emulate_ctxt.have_exception = false;
-               vcpu->arch.emulate_ctxt.perm_ok = false;
+               ctxt->interruptibility = 0;
+               ctxt->have_exception = false;
+               ctxt->perm_ok = false;
 
-               vcpu->arch.emulate_ctxt.only_vendor_specific_insn
+               ctxt->only_vendor_specific_insn
                        = emulation_type & EMULTYPE_TRAP_UD;
 
-               r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len);
+               r = x86_decode_insn(ctxt, insn, insn_len);
 
                trace_kvm_emulate_insn_start(vcpu);
                ++vcpu->stat.insn_emulation;
@@ -4612,7 +4689,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
        }
 
        if (emulation_type & EMULTYPE_SKIP) {
-               kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip);
+               kvm_rip_write(vcpu, ctxt->_eip);
                return EMULATE_DONE;
        }
 
@@ -4620,11 +4697,11 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
           changes registers values  during IO operation */
        if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
                vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
-               memcpy(c->regs, vcpu->arch.regs, sizeof c->regs);
+               memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
        }
 
 restart:
-       r = x86_emulate_insn(&vcpu->arch.emulate_ctxt);
+       r = x86_emulate_insn(ctxt);
 
        if (r == EMULATION_INTERCEPTED)
                return EMULATE_DONE;
@@ -4636,7 +4713,7 @@ restart:
                return handle_emulation_failure(vcpu);
        }
 
-       if (vcpu->arch.emulate_ctxt.have_exception) {
+       if (ctxt->have_exception) {
                inject_emulated_exception(vcpu);
                r = EMULATE_DONE;
        } else if (vcpu->arch.pio.count) {
@@ -4655,13 +4732,12 @@ restart:
                r = EMULATE_DONE;
 
        if (writeback) {
-               toggle_interruptibility(vcpu,
-                               vcpu->arch.emulate_ctxt.interruptibility);
-               kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+               toggle_interruptibility(vcpu, ctxt->interruptibility);
+               kvm_set_rflags(vcpu, ctxt->eflags);
                kvm_make_request(KVM_REQ_EVENT, vcpu);
-               memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+               memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
-               kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
+               kvm_rip_write(vcpu, ctxt->eip);
        } else
                vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
 
@@ -5082,8 +5158,7 @@ int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
 
        kvm_x86_ops->patch_hypercall(vcpu, instruction);
 
-       return emulator_write_emulated(&vcpu->arch.emulate_ctxt,
-                                      rip, instruction, 3, NULL);
+       return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
 }
 
 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
@@ -5671,8 +5746,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
                 * that usually, but some bad designed PV devices (vmware
                 * backdoor interface) need this to work
                 */
-               struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
-               memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
+               struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+               memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
                vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
        }
        regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
@@ -5801,21 +5876,20 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
                    bool has_error_code, u32 error_code)
 {
-       struct decode_cache *c = &vcpu->arch.emulate_ctxt.decode;
+       struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        int ret;
 
        init_emulate_ctxt(vcpu);
 
-       ret = emulator_task_switch(&vcpu->arch.emulate_ctxt,
-                                  tss_selector, reason, has_error_code,
-                                  error_code);
+       ret = emulator_task_switch(ctxt, tss_selector, reason,
+                                  has_error_code, error_code);
 
        if (ret)
                return EMULATE_FAIL;
 
-       memcpy(vcpu->arch.regs, c->regs, sizeof c->regs);
-       kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.eip);
-       kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
+       memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
+       kvm_rip_write(vcpu, ctxt->eip);
+       kvm_set_rflags(vcpu, ctxt->eflags);
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        return EMULATE_DONE;
 }
@@ -6093,12 +6167,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        if (r == 0)
                r = kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
-       if (r < 0)
-               goto free_vcpu;
 
-       return 0;
-free_vcpu:
-       kvm_x86_ops->vcpu_free(vcpu);
        return r;
 }