x86/iopl/64: Properly context-switch IOPL on Xen PV

[pandora-kernel.git] / arch / x86 / xen / enlighten.c
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c

index 69b9ef6..b255312 100644 (file)
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -129,6 +129,21 @@ static void xen_vcpu_setup(int cpu)
  
         BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
  
+       /*
+        * This path is called twice on PVHVM - first during bootup via
+        * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
+        * hotplugged: cpu_up -> xen_hvm_cpu_notify.
+        * As we can only do the VCPUOP_register_vcpu_info once lets
+        * not over-write its result.
+        *
+        * For PV it is called during restore (xen_vcpu_restore) and bootup
+        * (xen_setup_vcpu_info_placement). The hotplug mechanism does not
+        * use this function.
+        */
+       if (xen_hvm_domain()) {
+               if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
+                       return;
+       }
         if (cpu < MAX_VIRT_CPUS)
                 per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
  
@@ -306,6 +321,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
         pte_t pte;
         unsigned long pfn;
         struct page *page;
+       unsigned char dummy;
  
         ptep = lookup_address((unsigned long)v, &level);
         BUG_ON(ptep == NULL);
@@ -315,6 +331,32 @@ static void set_aliased_prot(void *v, pgprot_t prot)
  
         pte = pfn_pte(pfn, prot);
  
+       /*
+        * Careful: update_va_mapping() will fail if the virtual address
+        * we're poking isn't populated in the page tables.  We don't
+        * need to worry about the direct map (that's always in the page
+        * tables), but we need to be careful about vmap space.  In
+        * particular, the top level page table can lazily propagate
+        * entries between processes, so if we've switched mms since we
+        * vmapped the target in the first place, we might not have the
+        * top-level page table entry populated.
+        *
+        * We disable preemption because we want the same mm active when
+        * we probe the target and when we issue the hypercall.  We'll
+        * have the same nominal mm, but if we're a kernel thread, lazy
+        * mm dropping could change our pgd.
+        *
+        * Out of an abundance of caution, this uses __get_user() to fault
+        * in the target address just in case there's some obscure case
+        * in which the target address isn't readable.
+        */
+
+       preempt_disable();
+
+       pagefault_disable();    /* Avoid warnings due to being atomic. */
+       __get_user(dummy, (unsigned char __user __force *)v);
+       pagefault_enable();
+
         if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
                 BUG();
  
@@ -326,6 +368,8 @@ static void set_aliased_prot(void *v, pgprot_t prot)
                                 BUG();
         } else
                 kmap_flush_unused();
+
+       preempt_enable();
  }
  
  static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
@@ -333,6 +377,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
         const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
         int i;
  
+       /*
+        * We need to mark the all aliases of the LDT pages RO.  We
+        * don't need to call vm_flush_aliases(), though, since that's
+        * only responsible for flushing aliases out the TLBs, not the
+        * page tables, and Xen will flush the TLB for us if needed.
+        *
+        * To avoid confusing future readers: none of this is necessary
+        * to load the LDT.  The hypervisor only checks this when the
+        * LDT is faulted in due to subsequent descriptor access.
+        */
+
         for(i = 0; i < entries; i += entries_per_page)
                 set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
  }
@@ -713,7 +768,7 @@ static void xen_load_sp0(struct tss_struct *tss,
         xen_mc_issue(PARAVIRT_LAZY_CPU);
  }
  
-static void xen_set_iopl_mask(unsigned mask)
+void xen_set_iopl_mask(unsigned mask)
  {
         struct physdev_set_iopl set_iopl;
  
@@ -1391,8 +1446,11 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
         switch (action) {
         case CPU_UP_PREPARE:
                 xen_vcpu_setup(cpu);
-               if (xen_have_vector_callback)
+               if (xen_have_vector_callback) {
                         xen_init_lock_cpu(cpu);
+                       if (xen_feature(XENFEAT_hvm_safe_pvclock))
+                               xen_setup_timer(cpu);
+               }
                 break;
         default:
                 break;