KVM: VMX: add module parameter to avoid trapping HLT instructions (v5)
authorAnthony Liguori <aliguori@us.ibm.com>
Mon, 6 Dec 2010 16:53:38 +0000 (10:53 -0600)
committerAvi Kivity <avi@redhat.com>
Wed, 12 Jan 2011 09:30:46 +0000 (11:30 +0200)
In certain use-cases, we want to allocate guests fixed time slices where idle
guest cycles leave the machine idling.  There are many approaches to achieve
this but the most direct is to simply avoid trapping the HLT instruction which
lets the guest directly execute the instruction putting the processor to sleep.

Introduce this as a module-level option for kvm-vmx.ko since if you do this
for one guest, you probably want to do it for all.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
arch/x86/include/asm/vmx.h
arch/x86/kvm/vmx.c

index 42d9590..9642c22 100644 (file)
@@ -297,6 +297,12 @@ enum vmcs_field {
 #define GUEST_INTR_STATE_SMI           0x00000004
 #define GUEST_INTR_STATE_NMI           0x00000008
 
+/* GUEST_ACTIVITY_STATE flags */
+#define GUEST_ACTIVITY_ACTIVE          0
+#define GUEST_ACTIVITY_HLT             1
+#define GUEST_ACTIVITY_SHUTDOWN                2
+#define GUEST_ACTIVITY_WAIT_SIPI       3
+
 /*
  * Exit Qualifications for MOV for Control Register Access
  */
index f3693ca..c195260 100644 (file)
@@ -69,6 +69,9 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 static int __read_mostly vmm_exclusive = 1;
 module_param(vmm_exclusive, bool, S_IRUGO);
 
+static int __read_mostly yield_on_hlt = 1;
+module_param(yield_on_hlt, bool, S_IRUGO);
+
 #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST                          \
        (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
 #define KVM_GUEST_CR0_MASK                                             \
@@ -1009,6 +1012,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
        vmx_set_interrupt_shadow(vcpu, 0);
 }
 
+static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
+{
+       /* Ensure that we clear the HLT state in the VMCS.  We don't need to
+        * explicitly skip the instruction because if the HLT state is set, then
+        * the instruction is already executing and RIP has already been
+        * advanced. */
+       if (!yield_on_hlt &&
+           vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
+               vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+}
+
 static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
                                bool has_error_code, u32 error_code,
                                bool reinject)
@@ -1035,6 +1049,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
                intr_info |= INTR_TYPE_HARD_EXCEPTION;
 
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+       vmx_clear_hlt(vcpu);
 }
 
 static bool vmx_rdtscp_supported(void)
@@ -1419,7 +1434,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                                &_pin_based_exec_control) < 0)
                return -EIO;
 
-       min = CPU_BASED_HLT_EXITING |
+       min =
 #ifdef CONFIG_X86_64
              CPU_BASED_CR8_LOAD_EXITING |
              CPU_BASED_CR8_STORE_EXITING |
@@ -1432,6 +1447,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
              CPU_BASED_MWAIT_EXITING |
              CPU_BASED_MONITOR_EXITING |
              CPU_BASED_INVLPG_EXITING;
+
+       if (yield_on_hlt)
+               min |= CPU_BASED_HLT_EXITING;
+
        opt = CPU_BASED_TPR_SHADOW |
              CPU_BASED_USE_MSR_BITMAPS |
              CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -2728,7 +2747,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        vmcs_writel(GUEST_IDTR_BASE, 0);
        vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
 
-       vmcs_write32(GUEST_ACTIVITY_STATE, 0);
+       vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
        vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
        vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
 
@@ -2821,6 +2840,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
        } else
                intr |= INTR_TYPE_EXT_INTR;
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
+       vmx_clear_hlt(vcpu);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -2848,6 +2868,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
        }
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
                        INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+       vmx_clear_hlt(vcpu);
 }
 
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)