x86-64: Fix CFI annotations for NMI nesting code

[pandora-kernel.git] / arch / x86 / kernel / entry_64.S
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S

index 940ba71..e0eca00 100644 (file)
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
  #include <asm/paravirt.h>
  #include <asm/ftrace.h>
  #include <asm/percpu.h>
+#include <linux/err.h>
  
  /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
  #include <linux/elf-em.h>
@@ -548,7 +549,7 @@ badsys:
  #ifdef CONFIG_AUDITSYSCALL
         /*
          * Fast path for syscall audit without full syscall trace.
-        * We just call audit_syscall_entry() directly, and then
+        * We just call __audit_syscall_entry() directly, and then
          * jump back to the normal fast path.
          */
  auditsys:
@@ -558,22 +559,21 @@ auditsys:
         movq %rdi,%rdx                  /* 3rd arg: 1st syscall arg */
         movq %rax,%rsi                  /* 2nd arg: syscall number */
         movl $AUDIT_ARCH_X86_64,%edi    /* 1st arg: audit arch */
-       call audit_syscall_entry
+       call __audit_syscall_entry
         LOAD_ARGS 0             /* reload call-clobbered registers */
         jmp system_call_fastpath
  
         /*
-        * Return fast path for syscall audit.  Call audit_syscall_exit()
+        * Return fast path for syscall audit.  Call __audit_syscall_exit()
          * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
          * masked off.
          */
  sysret_audit:
         movq RAX-ARGOFFSET(%rsp),%rsi   /* second arg, syscall return value */
-       cmpq $0,%rsi            /* is it < 0? */
-       setl %al                /* 1 if so, 0 if not */
+       cmpq $-MAX_ERRNO,%rsi   /* is it < -MAX_ERRNO? */
+       setbe %al               /* 1 if so, 0 if not */
         movzbl %al,%edi         /* zero-extend that into %edi */
-       inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-       call audit_syscall_exit
+       call __audit_syscall_exit
         movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
         jmp sysret_check
  #endif /* CONFIG_AUDITSYSCALL */
@@ -1530,12 +1530,20 @@ ENTRY(nmi)
  
         /* Use %rdx as out temp variable throughout */
         pushq_cfi %rdx
+       CFI_REL_OFFSET rdx, 0
+
+       /*
+        * If %cs was not the kernel segment, then the NMI triggered in user
+        * space, which means it is definitely not nested.
+        */
+       cmpl $__KERNEL_CS, 16(%rsp)
+       jne first_nmi
  
         /*
          * Check the special variable on the stack to see if NMIs are
          * executing.
          */
-       cmp $1, -8(%rsp)
+       cmpl $1, -8(%rsp)
         je nested_nmi
  
         /*
@@ -1547,6 +1555,7 @@ ENTRY(nmi)
          */
         lea 6*8(%rsp), %rdx
         test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+       CFI_REMEMBER_STATE
  
  nested_nmi:
         /*
@@ -1578,10 +1587,12 @@ nested_nmi:
  
  nested_nmi_out:
         popq_cfi %rdx
+       CFI_RESTORE rdx
  
         /* No need to check faults here */
         INTERRUPT_RETURN
  
+       CFI_RESTORE_STATE
  first_nmi:
         /*
          * Because nested NMIs will use the pushed location that we
@@ -1617,6 +1628,10 @@ first_nmi:
          * NMI may zero out. The original stack frame and the temp storage
          * is also used by nested NMIs and can not be trusted on exit.
          */
+       /* Do not pop rdx, nested NMIs will corrupt it */
+       movq (%rsp), %rdx
+       CFI_RESTORE rdx
+
         /* Set the NMI executing variable on the stack. */
         pushq_cfi $1
  
@@ -1624,14 +1639,31 @@ first_nmi:
         .rept 5
         pushq_cfi 6*8(%rsp)
         .endr
+       CFI_DEF_CFA_OFFSET SS+8-RIP
+
+       /*
+        * If there was a nested NMI, the first NMI's iret will return
+        * here. But NMIs are still enabled and we can take another
+        * nested NMI. The nested NMI checks the interrupted RIP to see
+        * if it is between repeat_nmi and end_repeat_nmi, and if so
+        * it will just return, as we are about to repeat an NMI anyway.
+        * This makes it safe to copy to the stack frame that a nested
+        * NMI will update.
+        */
+repeat_nmi:
+       /*
+        * Update the stack variable to say we are still in NMI (the update
+        * is benign for the non-repeat case, where 1 was pushed just above
+        * to this very stack slot).
+        */
+       movq $1, 5*8(%rsp)
  
         /* Make another copy, this one may be modified by nested NMIs */
         .rept 5
         pushq_cfi 4*8(%rsp)
         .endr
-
-       /* Do not pop rdx, nested NMIs will corrupt it */
-       movq 11*8(%rsp), %rdx
+       CFI_DEF_CFA_OFFSET SS+8-RIP
+end_repeat_nmi:
  
         /*
          * Everything below this point can be preempted by a nested
@@ -1639,7 +1671,6 @@ first_nmi:
          * caused by an exception and nested NMI will start here, and
          * can still be preempted by another NMI.
          */
-restart_nmi:
         pushq_cfi $-1           /* ORIG_RAX: no syscall to restart */
         subq $ORIG_RAX-R15, %rsp
         CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@@ -1668,26 +1699,6 @@ nmi_restore:
         CFI_ENDPROC
  END(nmi)
  
-       /*
-        * If an NMI hit an iret because of an exception or breakpoint,
-        * it can lose its NMI context, and a nested NMI may come in.
-        * In that case, the nested NMI will change the preempted NMI's
-        * stack to jump to here when it does the final iret.
-        */
-repeat_nmi:
-       INTR_FRAME
-       /* Update the stack variable to say we are still in NMI */
-       movq $1, 5*8(%rsp)
-
-       /* copy the saved stack back to copy stack */
-       .rept 5
-       pushq_cfi 4*8(%rsp)
-       .endr
-
-       jmp restart_nmi
-       CFI_ENDPROC
-end_repeat_nmi:
-
  ENTRY(ignore_sysret)
         CFI_STARTPROC
         mov $-ENOSYS,%eax