Merge branch 'for-linus' of git://neil.brown.name/md

[pandora-kernel.git] / arch / x86 / xen / xen-asm_32.S
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S

index 42786f5..88e15de 100644 (file)
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -1,117 +1,43 @@
  /*
-       Asm versions of Xen pv-ops, suitable for either direct use or inlining.
-       The inline versions are the same as the direct-use versions, with the
-       pre- and post-amble chopped off.
-
-       This code is encoded for size rather than absolute efficiency,
-       with a view to being able to inline as much as possible.
-
-       We only bother with direct forms (ie, vcpu in pda) of the operations
-       here; the indirect forms are better handled in C, since they're
-       generally too large to inline anyway.
+ * Asm versions of Xen pv-ops, suitable for either direct use or
+ * inlining.  The inline versions are the same as the direct-use
+ * versions, with the pre- and post-amble chopped off.
+ *
+ * This code is encoded for size rather than absolute efficiency, with
+ * a view to being able to inline as much as possible.
+ *
+ * We only bother with direct forms (ie, vcpu in pda) of the
+ * operations here; the indirect forms are better handled in C, since
+ * they're generally too large to inline anyway.
   */
  
-#include <linux/linkage.h>
-
-#include <asm/asm-offsets.h>
  #include <asm/thread_info.h>
-#include <asm/percpu.h>
  #include <asm/processor-flags.h>
  #include <asm/segment.h>
  
  #include <xen/interface/xen.h>
  
-#define RELOC(x, v)    .globl x##_reloc; x##_reloc=v
-#define ENDPATCH(x)    .globl x##_end; x##_end=.
-
-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
-#define XEN_EFLAGS_NMI 0x80000000
-
-/*
-       Enable events.  This clears the event mask and tests the pending
-       event status with one and operation.  If there are pending
-       events, then enter the hypervisor to get them handled.
- */
-ENTRY(xen_irq_enable_direct)
-       /* Unmask events */
-       movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-
-       /* Preempt here doesn't matter because that will deal with
-          any pending interrupts.  The pending check may end up being
-          run on the wrong CPU, but that doesn't hurt. */
-
-       /* Test for pending */
-       testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
-       jz 1f
-
-2:     call check_events
-1:
-ENDPATCH(xen_irq_enable_direct)
-       ret
-       ENDPROC(xen_irq_enable_direct)
-       RELOC(xen_irq_enable_direct, 2b+1)
-
-
-/*
-       Disabling events is simply a matter of making the event mask
-       non-zero.
- */
-ENTRY(xen_irq_disable_direct)
-       movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-ENDPATCH(xen_irq_disable_direct)
-       ret
-       ENDPROC(xen_irq_disable_direct)
-       RELOC(xen_irq_disable_direct, 0)
+#include "xen-asm.h"
  
  /*
-       (xen_)save_fl is used to get the current interrupt enable status.
-       Callers expect the status to be in X86_EFLAGS_IF, and other bits
-       may be set in the return value.  We take advantage of this by
-       making sure that X86_EFLAGS_IF has the right value (and other bits
-       in that byte are 0), but other bits in the return value are
-       undefined.  We need to toggle the state of the bit, because
-       Xen and x86 use opposite senses (mask vs enable).
+ * Force an event check by making a hypercall, but preserve regs
+ * before making the call.
   */
-ENTRY(xen_save_fl_direct)
-       testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-       setz %ah
-       addb %ah,%ah
-ENDPATCH(xen_save_fl_direct)
-       ret
-       ENDPROC(xen_save_fl_direct)
-       RELOC(xen_save_fl_direct, 0)
-
-
-/*
-       In principle the caller should be passing us a value return
-       from xen_save_fl_direct, but for robustness sake we test only
-       the X86_EFLAGS_IF flag rather than the whole byte. After
-       setting the interrupt mask state, it checks for unmasked
-       pending events and enters the hypervisor to get them delivered
-       if so.
- */
-ENTRY(xen_restore_fl_direct)
-       testb $X86_EFLAGS_IF>>8, %ah
-       setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
-       /* Preempt here doesn't matter because that will deal with
-          any pending interrupts.  The pending check may end up being
-          run on the wrong CPU, but that doesn't hurt. */
-
-       /* check for unmasked and pending */
-       cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
-       jz 1f
-2:     call check_events
-1:
-ENDPATCH(xen_restore_fl_direct)
+check_events:
+       push %eax
+       push %ecx
+       push %edx
+       call xen_force_evtchn_callback
+       pop %edx
+       pop %ecx
+       pop %eax
         ret
-       ENDPROC(xen_restore_fl_direct)
-       RELOC(xen_restore_fl_direct, 2b+1)
  
  /*
-       We can't use sysexit directly, because we're not running in ring0.
-       But we can easily fake it up using iret.  Assuming xen_sysexit
-       is jumped to with a standard stack frame, we can just strip it
-       back to a standard iret frame and use iret.
+ * We can't use sysexit directly, because we're not running in ring0.
+ * But we can easily fake it up using iret.  Assuming xen_sysexit is
+ * jumped to with a standard stack frame, we can just strip it back to
+ * a standard iret frame and use iret.
   */
  ENTRY(xen_sysexit)
         movl PT_EAX(%esp), %eax                 /* Shouldn't be necessary? */
@@ -122,33 +48,31 @@ ENTRY(xen_sysexit)
  ENDPROC(xen_sysexit)
  
  /*
-       This is run where a normal iret would be run, with the same stack setup:
-             8: eflags
-             4: cs
-       esp-> 0: eip
-
-       This attempts to make sure that any pending events are dealt
-       with on return to usermode, but there is a small window in
-       which an event can happen just before entering usermode.  If
-       the nested interrupt ends up setting one of the TIF_WORK_MASK
-       pending work flags, they will not be tested again before
-       returning to usermode. This means that a process can end up
-       with pending work, which will be unprocessed until the process
-       enters and leaves the kernel again, which could be an
-       unbounded amount of time.  This means that a pending signal or
-       reschedule event could be indefinitely delayed.
-
-       The fix is to notice a nested interrupt in the critical
-       window, and if one occurs, then fold the nested interrupt into
-       the current interrupt stack frame, and re-process it
-       iteratively rather than recursively.  This means that it will
-       exit via the normal path, and all pending work will be dealt
-       with appropriately.
-
-       Because the nested interrupt handler needs to deal with the
-       current stack state in whatever form its in, we keep things
-       simple by only using a single register which is pushed/popped
-       on the stack.
+ * This is run where a normal iret would be run, with the same stack setup:
+ *     8: eflags
+ *     4: cs
+ *     esp-> 0: eip
+ *
+ * This attempts to make sure that any pending events are dealt with
+ * on return to usermode, but there is a small window in which an
+ * event can happen just before entering usermode.  If the nested
+ * interrupt ends up setting one of the TIF_WORK_MASK pending work
+ * flags, they will not be tested again before returning to
+ * usermode. This means that a process can end up with pending work,
+ * which will be unprocessed until the process enters and leaves the
+ * kernel again, which could be an unbounded amount of time.  This
+ * means that a pending signal or reschedule event could be
+ * indefinitely delayed.
+ *
+ * The fix is to notice a nested interrupt in the critical window, and
+ * if one occurs, then fold the nested interrupt into the current
+ * interrupt stack frame, and re-process it iteratively rather than
+ * recursively.  This means that it will exit via the normal path, and
+ * all pending work will be dealt with appropriately.
+ *
+ * Because the nested interrupt handler needs to deal with the current
+ * stack state in whatever form its in, we keep things simple by only
+ * using a single register which is pushed/popped on the stack.
   */
  ENTRY(xen_iret)
         /* test eflags for special cases */
@@ -158,13 +82,15 @@ ENTRY(xen_iret)
         push %eax
         ESP_OFFSET=4    # bytes pushed onto stack
  
-       /* Store vcpu_info pointer for easy access.  Do it this
-          way to avoid having to reload %fs */
+       /*
+        * Store vcpu_info pointer for easy access.  Do it this way to
+        * avoid having to reload %fs
+        */
  #ifdef CONFIG_SMP
         GET_THREAD_INFO(%eax)
-       movl TI_cpu(%eax),%eax
-       movl __per_cpu_offset(,%eax,4),%eax
-       mov per_cpu__xen_vcpu(%eax),%eax
+       movl TI_cpu(%eax), %eax
+       movl __per_cpu_offset(,%eax,4), %eax
+       mov per_cpu__xen_vcpu(%eax), %eax
  #else
         movl per_cpu__xen_vcpu, %eax
  #endif
@@ -172,37 +98,46 @@ ENTRY(xen_iret)
         /* check IF state we're restoring */
         testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
  
-       /* Maybe enable events.  Once this happens we could get a
-          recursive event, so the critical region starts immediately
-          afterwards.  However, if that happens we don't end up
-          resuming the code, so we don't have to be worried about
-          being preempted to another CPU. */
+       /*
+        * Maybe enable events.  Once this happens we could get a
+        * recursive event, so the critical region starts immediately
+        * afterwards.  However, if that happens we don't end up
+        * resuming the code, so we don't have to be worried about
+        * being preempted to another CPU.
+        */
         setz XEN_vcpu_info_mask(%eax)
  xen_iret_start_crit:
  
         /* check for unmasked and pending */
         cmpw $0x0001, XEN_vcpu_info_pending(%eax)
  
-       /* If there's something pending, mask events again so we
-          can jump back into xen_hypervisor_callback */
+       /*
+        * If there's something pending, mask events again so we can
+        * jump back into xen_hypervisor_callback
+        */
         sete XEN_vcpu_info_mask(%eax)
  
         popl %eax
  
-       /* From this point on the registers are restored and the stack
-          updated, so we don't need to worry about it if we're preempted */
+       /*
+        * From this point on the registers are restored and the stack
+        * updated, so we don't need to worry about it if we're
+        * preempted
+        */
  iret_restore_end:
  
-       /* Jump to hypervisor_callback after fixing up the stack.
-          Events are masked, so jumping out of the critical
-          region is OK. */
+       /*
+        * Jump to hypervisor_callback after fixing up the stack.
+        * Events are masked, so jumping out of the critical region is
+        * OK.
+        */
         je xen_hypervisor_callback
  
  1:     iret
  xen_iret_end_crit:
-.section __ex_table,"a"
+.section __ex_table, "a"
         .align 4
-       .long 1b,iret_exc
+       .long 1b, iret_exc
  .previous
  
  hyper_iret:
@@ -212,55 +147,55 @@ hyper_iret:
         .globl xen_iret_start_crit, xen_iret_end_crit
  
  /*
-   This is called by xen_hypervisor_callback in entry.S when it sees
-   that the EIP at the time of interrupt was between xen_iret_start_crit
-   and xen_iret_end_crit.  We're passed the EIP in %eax so we can do
-   a more refined determination of what to do.
-
-   The stack format at this point is:
-       ----------------
-        ss             : (ss/esp may be present if we came from usermode)
-        esp            :
-        eflags         }  outer exception info
-        cs             }
-        eip            }
-       ---------------- <- edi (copy dest)
-        eax            :  outer eax if it hasn't been restored
-       ----------------
-        eflags         }  nested exception info
-        cs             }   (no ss/esp because we're nested
-        eip            }    from the same ring)
-        orig_eax       }<- esi (copy src)
-        - - - - - - - -
-        fs             }
-        es             }
-        ds             }  SAVE_ALL state
-        eax            }
-         :             :
-        ebx            }<- esp
-       ----------------
-
-   In order to deliver the nested exception properly, we need to shift
-   everything from the return addr up to the error code so it
-   sits just under the outer exception info.  This means that when we
-   handle the exception, we do it in the context of the outer exception
-   rather than starting a new one.
-
-   The only caveat is that if the outer eax hasn't been
-   restored yet (ie, it's still on stack), we need to insert
-   its value into the SAVE_ALL state before going on, since
-   it's usermode state which we eventually need to restore.
+ * This is called by xen_hypervisor_callback in entry.S when it sees
+ * that the EIP at the time of interrupt was between
+ * xen_iret_start_crit and xen_iret_end_crit.  We're passed the EIP in
+ * %eax so we can do a more refined determination of what to do.
+ *
+ * The stack format at this point is:
+ *     ----------------
+ *      ss             : (ss/esp may be present if we came from usermode)
+ *      esp            :
+ *      eflags         }  outer exception info
+ *      cs             }
+ *      eip            }
+ *     ---------------- <- edi (copy dest)
+ *      eax            :  outer eax if it hasn't been restored
+ *     ----------------
+ *      eflags         }  nested exception info
+ *      cs             }   (no ss/esp because we're nested
+ *      eip            }    from the same ring)
+ *      orig_eax       }<- esi (copy src)
+ *      - - - - - - - -
+ *      fs             }
+ *      es             }
+ *      ds             }  SAVE_ALL state
+ *      eax            }
+ *       :             :
+ *      ebx            }<- esp
+ *     ----------------
+ *
+ * In order to deliver the nested exception properly, we need to shift
+ * everything from the return addr up to the error code so it sits
+ * just under the outer exception info.  This means that when we
+ * handle the exception, we do it in the context of the outer
+ * exception rather than starting a new one.
+ *
+ * The only caveat is that if the outer eax hasn't been restored yet
+ * (ie, it's still on stack), we need to insert its value into the
+ * SAVE_ALL state before going on, since it's usermode state which we
+ * eventually need to restore.
   */
  ENTRY(xen_iret_crit_fixup)
         /*
-          Paranoia: Make sure we're really coming from kernel space.
-          One could imagine a case where userspace jumps into the
-          critical range address, but just before the CPU delivers a GP,
-          it decides to deliver an interrupt instead.  Unlikely?
-          Definitely.  Easy to avoid?  Yes.  The Intel documents
-          explicitly say that the reported EIP for a bad jump is the
-          jump instruction itself, not the destination, but some virtual
-          environments get this wrong.
+        * Paranoia: Make sure we're really coming from kernel space.
+        * One could imagine a case where userspace jumps into the
+        * critical range address, but just before the CPU delivers a
+        * GP, it decides to deliver an interrupt instead.  Unlikely?
+        * Definitely.  Easy to avoid?  Yes.  The Intel documents
+        * explicitly say that the reported EIP for a bad jump is the
+        * jump instruction itself, not the destination, but some
+        * virtual environments get this wrong.
          */
         movl PT_CS(%esp), %ecx
         andl $SEGMENT_RPL_MASK, %ecx
@@ -270,15 +205,17 @@ ENTRY(xen_iret_crit_fixup)
         lea PT_ORIG_EAX(%esp), %esi
         lea PT_EFLAGS(%esp), %edi
  
-       /* If eip is before iret_restore_end then stack
-          hasn't been restored yet. */
+       /*
+        * If eip is before iret_restore_end then stack
+        * hasn't been restored yet.
+        */
         cmp $iret_restore_end, %eax
         jae 1f
  
-       movl 0+4(%edi),%eax             /* copy EAX (just above top of frame) */
+       movl 0+4(%edi), %eax            /* copy EAX (just above top of frame) */
         movl %eax, PT_EAX(%esp)
  
-       lea ESP_OFFSET(%edi),%edi       /* move dest up over saved regs */
+       lea ESP_OFFSET(%edi), %edi      /* move dest up over saved regs */
  
         /* set up the copy */
  1:     std
@@ -286,20 +223,6 @@ ENTRY(xen_iret_crit_fixup)
         rep movsl
         cld
  
-       lea 4(%edi),%esp                /* point esp to new frame */
+       lea 4(%edi), %esp               /* point esp to new frame */
  2:     jmp xen_do_upcall
  
-
-/*
-       Force an event check by making a hypercall,
-       but preserve regs before making the call.
- */
-check_events:
-       push %eax
-       push %ecx
-       push %edx
-       call xen_force_evtchn_callback
-       pop %edx
-       pop %ecx
-       pop %eax
-       ret