[PATCH] lockdep: irqtrace subsystem, x86_64 support
authorIngo Molnar <mingo@elte.hu>
Mon, 3 Jul 2006 07:24:45 +0000 (00:24 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Mon, 3 Jul 2006 22:27:03 +0000 (15:27 -0700)
Add irqflags-tracing support to x86_64.

[akpm@osdl.org: build fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
arch/x86_64/Kconfig.debug
arch/x86_64/ia32/ia32entry.S
arch/x86_64/kernel/entry.S
arch/x86_64/kernel/irq.c
include/asm-x86_64/irqflags.h [new file with mode: 0644]
include/asm-x86_64/system.h

index 1d92ab5..775d211 100644 (file)
@@ -1,5 +1,9 @@
 menu "Kernel hacking"
 
+config TRACE_IRQFLAGS_SUPPORT
+       bool
+       default y
+
 source "lib/Kconfig.debug"
 
 config DEBUG_RODATA
index c536fa9..9b5bb41 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/thread_info.h>   
 #include <asm/segment.h>
 #include <asm/vsyscall32.h>
+#include <asm/irqflags.h>
 #include <linux/linkage.h>
 
 #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target)
        swapgs
        movq    %gs:pda_kernelstack, %rsp
        addq    $(PDA_STACKOFFSET),%rsp 
+       /*
+        * No need to follow this irqs on/off section: the syscall
+        * disabled irqs, here we enable it straight after entry:
+        */
        sti     
        movl    %ebp,%ebp               /* zero extension */
        pushq   $__USER32_DS
@@ -118,6 +123,7 @@ sysenter_do_call:
        movq    %rax,RAX-ARGOFFSET(%rsp)
        GET_THREAD_INFO(%r10)
        cli
+       TRACE_IRQS_OFF
        testl   $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
        jnz     int_ret_from_sys_call
        andl    $~TS_COMPAT,threadinfo_status(%r10)
@@ -132,6 +138,7 @@ sysenter_do_call:
        CFI_REGISTER rsp,rcx
        movl    $VSYSCALL32_SYSEXIT,%edx        /* User %eip */
        CFI_REGISTER rip,rdx
+       TRACE_IRQS_ON
        swapgs
        sti             /* sti only takes effect after the next instruction */
        /* sysexit */
@@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target)
        movl    %esp,%r8d
        CFI_REGISTER    rsp,r8
        movq    %gs:pda_kernelstack,%rsp
+       /*
+        * No need to follow this irqs on/off section: the syscall
+        * disabled irqs and here we enable it straight after entry:
+        */
        sti
        SAVE_ARGS 8,1,1
        movl    %eax,%eax       /* zero extension */
@@ -220,6 +231,7 @@ cstar_do_call:
        movq %rax,RAX-ARGOFFSET(%rsp)
        GET_THREAD_INFO(%r10)
        cli
+       TRACE_IRQS_OFF
        testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
        jnz  int_ret_from_sys_call
        andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -228,6 +240,7 @@ cstar_do_call:
        CFI_REGISTER rip,rcx
        movl EFLAGS-ARGOFFSET(%rsp),%r11d       
        /*CFI_REGISTER rflags,r11*/
+       TRACE_IRQS_ON
        movl RSP-ARGOFFSET(%rsp),%esp
        CFI_RESTORE rsp
        swapgs
@@ -286,7 +299,11 @@ ENTRY(ia32_syscall)
        /*CFI_REL_OFFSET        rflags,EFLAGS-RIP*/
        /*CFI_REL_OFFSET        cs,CS-RIP*/
        CFI_REL_OFFSET  rip,RIP-RIP
-       swapgs  
+       swapgs
+       /*
+        * No need to follow this irqs on/off section: the syscall
+        * disabled irqs and here we enable it straight after entry:
+        */
        sti
        movl %eax,%eax
        pushq %rax
index ed92c29..d464dde 100644 (file)
 #include <asm/thread_info.h>
 #include <asm/hw_irq.h>
 #include <asm/page.h>
+#include <asm/irqflags.h>
 
        .code64
 
 #ifndef CONFIG_PREEMPT
 #define retint_kernel retint_restore_args
 #endif 
-       
+
+
+.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+#ifdef CONFIG_TRACE_IRQFLAGS
+       bt   $9,EFLAGS-\offset(%rsp)    /* interrupts off? */
+       jnc  1f
+       TRACE_IRQS_ON
+1:
+#endif
+.endm
+
 /*
  * C code is not supposed to know about undefined top of stack. Every time 
  * a C function with an pt_regs argument is called from the SYSCALL based 
@@ -194,6 +205,10 @@ ENTRY(system_call)
        swapgs
        movq    %rsp,%gs:pda_oldrsp 
        movq    %gs:pda_kernelstack,%rsp
+       /*
+        * No need to follow this irqs off/on section - it's straight
+        * and short:
+        */
        sti                                     
        SAVE_ARGS 8,1
        movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
@@ -219,10 +234,15 @@ ret_from_sys_call:
 sysret_check:          
        GET_THREAD_INFO(%rcx)
        cli
+       TRACE_IRQS_OFF
        movl threadinfo_flags(%rcx),%edx
        andl %edi,%edx
        CFI_REMEMBER_STATE
        jnz  sysret_careful 
+       /*
+        * sysretq will re-enable interrupts:
+        */
+       TRACE_IRQS_ON
        movq RIP-ARGOFFSET(%rsp),%rcx
        CFI_REGISTER    rip,rcx
        RESTORE_ARGS 0,-ARG_SKIP,1
@@ -237,6 +257,7 @@ sysret_careful:
        CFI_RESTORE_STATE
        bt $TIF_NEED_RESCHED,%edx
        jnc sysret_signal
+       TRACE_IRQS_ON
        sti
        pushq %rdi
        CFI_ADJUST_CFA_OFFSET 8
@@ -247,6 +268,7 @@ sysret_careful:
 
        /* Handle a signal */ 
 sysret_signal:
+       TRACE_IRQS_ON
        sti
        testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
        jz    1f
@@ -261,6 +283,7 @@ sysret_signal:
        /* Use IRET because user could have changed frame. This
           works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
        cli
+       TRACE_IRQS_OFF
        jmp int_with_check
        
 badsys:
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call)
        CFI_REL_OFFSET  r10,R10-ARGOFFSET
        CFI_REL_OFFSET  r11,R11-ARGOFFSET
        cli
+       TRACE_IRQS_OFF
        testl $3,CS-ARGOFFSET(%rsp)
        je retint_restore_args
        movl $_TIF_ALLWORK_MASK,%edi
@@ -327,6 +351,7 @@ int_with_check:
 int_careful:
        bt $TIF_NEED_RESCHED,%edx
        jnc  int_very_careful
+       TRACE_IRQS_ON
        sti
        pushq %rdi
        CFI_ADJUST_CFA_OFFSET 8
@@ -334,10 +359,12 @@ int_careful:
        popq %rdi
        CFI_ADJUST_CFA_OFFSET -8
        cli
+       TRACE_IRQS_OFF
        jmp int_with_check
 
        /* handle signals and tracing -- both require a full stack frame */
 int_very_careful:
+       TRACE_IRQS_ON
        sti
        SAVE_REST
        /* Check for syscall exit trace */      
@@ -351,6 +378,7 @@ int_very_careful:
        CFI_ADJUST_CFA_OFFSET -8
        andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
        cli
+       TRACE_IRQS_OFF
        jmp int_restore_rest
        
 int_signal:
@@ -363,6 +391,7 @@ int_signal:
 int_restore_rest:
        RESTORE_REST
        cli
+       TRACE_IRQS_OFF
        jmp int_with_check
        CFI_ENDPROC
 END(int_ret_from_sys_call)
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn)
        swapgs  
 1:     incl    %gs:pda_irqcount        # RED-PEN should check preempt count
        cmoveq %gs:pda_irqstackptr,%rsp
+       /*
+        * We entered an interrupt context - irqs are off:
+        */
+       TRACE_IRQS_OFF
        call \func
        .endm
 
@@ -493,6 +526,7 @@ ENTRY(common_interrupt)
        /* 0(%rsp): oldrsp-ARGOFFSET */
 ret_from_intr:
        cli     
+       TRACE_IRQS_OFF
        decl %gs:pda_irqcount
        leaveq
        CFI_DEF_CFA_REGISTER    rsp
@@ -515,9 +549,21 @@ retint_check:
        CFI_REMEMBER_STATE
        jnz  retint_careful
 retint_swapgs:         
+       /*
+        * The iretq could re-enable interrupts:
+        */
+       cli
+       TRACE_IRQS_IRETQ
        swapgs 
+       jmp restore_args
+
 retint_restore_args:                           
        cli
+       /*
+        * The iretq could re-enable interrupts:
+        */
+       TRACE_IRQS_IRETQ
+restore_args:
        RESTORE_ARGS 0,8,0                                              
 iret_label:    
        iretq
@@ -530,6 +576,7 @@ iret_label:
        /* running with kernel gs */
 bad_iret:
        movq $11,%rdi   /* SIGSEGV */
+       TRACE_IRQS_ON
        sti
        jmp do_exit                     
        .previous       
@@ -539,6 +586,7 @@ retint_careful:
        CFI_RESTORE_STATE
        bt    $TIF_NEED_RESCHED,%edx
        jnc   retint_signal
+       TRACE_IRQS_ON
        sti
        pushq %rdi
        CFI_ADJUST_CFA_OFFSET   8
@@ -547,11 +595,13 @@ retint_careful:
        CFI_ADJUST_CFA_OFFSET   -8
        GET_THREAD_INFO(%rcx)
        cli
+       TRACE_IRQS_OFF
        jmp retint_check
        
 retint_signal:
        testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
        jz    retint_swapgs
+       TRACE_IRQS_ON
        sti
        SAVE_REST
        movq $-1,ORIG_RAX(%rsp)                         
@@ -560,6 +610,7 @@ retint_signal:
        call do_notify_resume
        RESTORE_REST
        cli
+       TRACE_IRQS_OFF
        movl $_TIF_NEED_RESCHED,%edi
        GET_THREAD_INFO(%rcx)
        jmp retint_check
@@ -666,7 +717,7 @@ END(spurious_interrupt)
 
        /* error code is on the stack already */
        /* handle NMI like exceptions that can happen everywhere */
-       .macro paranoidentry sym, ist=0
+       .macro paranoidentry sym, ist=0, irqtrace=1
        SAVE_ALL
        cld
        movl $1,%ebx
@@ -691,8 +742,73 @@ END(spurious_interrupt)
        addq    $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
        .endif
        cli
+       .if \irqtrace
+       TRACE_IRQS_OFF
+       .endif
        .endm
-       
+
+       /*
+        * "Paranoid" exit path from exception stack.
+        * Paranoid because this is used by NMIs and cannot take
+        * any kernel state for granted.
+        * We don't do kernel preemption checks here, because only
+        * NMI should be common and it does not enable IRQs and
+        * cannot get reschedule ticks.
+        *
+        * "trace" is 0 for the NMI handler only, because irq-tracing
+        * is fundamentally NMI-unsafe. (we cannot change the soft and
+        * hard flags at once, atomically)
+        */
+       .macro paranoidexit trace=1
+       /* ebx: no swapgs flag */
+paranoid_exit\trace:
+       testl %ebx,%ebx                         /* swapgs needed? */
+       jnz paranoid_restore\trace
+       testl $3,CS(%rsp)
+       jnz   paranoid_userspace\trace
+paranoid_swapgs\trace:
+       TRACE_IRQS_IRETQ 0
+       swapgs
+paranoid_restore\trace:
+       RESTORE_ALL 8
+       iretq
+paranoid_userspace\trace:
+       GET_THREAD_INFO(%rcx)
+       movl threadinfo_flags(%rcx),%ebx
+       andl $_TIF_WORK_MASK,%ebx
+       jz paranoid_swapgs\trace
+       movq %rsp,%rdi                  /* &pt_regs */
+       call sync_regs
+       movq %rax,%rsp                  /* switch stack for scheduling */
+       testl $_TIF_NEED_RESCHED,%ebx
+       jnz paranoid_schedule\trace
+       movl %ebx,%edx                  /* arg3: thread flags */
+       .if \trace
+       TRACE_IRQS_ON
+       .endif
+       sti
+       xorl %esi,%esi                  /* arg2: oldset */
+       movq %rsp,%rdi                  /* arg1: &pt_regs */
+       call do_notify_resume
+       cli
+       .if \trace
+       TRACE_IRQS_OFF
+       .endif
+       jmp paranoid_userspace\trace
+paranoid_schedule\trace:
+       .if \trace
+       TRACE_IRQS_ON
+       .endif
+       sti
+       call schedule
+       cli
+       .if \trace
+       TRACE_IRQS_OFF
+       .endif
+       jmp paranoid_userspace\trace
+       CFI_ENDPROC
+       .endm
+
 /*
  * Exception entry point. This expects an error code/orig_rax on the stack
  * and the exception handler in %rax.  
@@ -748,6 +864,7 @@ error_exit:
        movl %ebx,%eax          
        RESTORE_REST
        cli
+       TRACE_IRQS_OFF
        GET_THREAD_INFO(%rcx)   
        testl %eax,%eax
        jne  retint_kernel
@@ -755,6 +872,10 @@ error_exit:
        movl  $_TIF_WORK_MASK,%edi
        andl  %edi,%edx
        jnz  retint_careful
+       /*
+        * The iret might restore flags:
+        */
+       TRACE_IRQS_IRETQ
        swapgs 
        RESTORE_ARGS 0,8,0                                              
        jmp iret_label
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug)
        pushq $0
        CFI_ADJUST_CFA_OFFSET 8         
        paranoidentry do_debug, DEBUG_STACK
-       jmp paranoid_exit
-       CFI_ENDPROC
+       paranoidexit
 END(debug)
        .previous .text
 
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi)
        INTR_FRAME
        pushq $-1
        CFI_ADJUST_CFA_OFFSET 8
-       paranoidentry do_nmi
-       /*
-        * "Paranoid" exit path from exception stack.
-        * Paranoid because this is used by NMIs and cannot take
-        * any kernel state for granted.
-        * We don't do kernel preemption checks here, because only
-        * NMI should be common and it does not enable IRQs and
-        * cannot get reschedule ticks.
-        */
-       /* ebx: no swapgs flag */
-paranoid_exit:
-       testl %ebx,%ebx                         /* swapgs needed? */
-       jnz paranoid_restore
-       testl $3,CS(%rsp)
-       jnz   paranoid_userspace
-paranoid_swapgs:       
-       swapgs
-paranoid_restore:      
-       RESTORE_ALL 8
-       iretq
-paranoid_userspace:    
-       GET_THREAD_INFO(%rcx)
-       movl threadinfo_flags(%rcx),%ebx
-       andl $_TIF_WORK_MASK,%ebx
-       jz paranoid_swapgs
-       movq %rsp,%rdi                  /* &pt_regs */
-       call sync_regs
-       movq %rax,%rsp                  /* switch stack for scheduling */
-       testl $_TIF_NEED_RESCHED,%ebx
-       jnz paranoid_schedule
-       movl %ebx,%edx                  /* arg3: thread flags */
-       sti
-       xorl %esi,%esi                  /* arg2: oldset */
-       movq %rsp,%rdi                  /* arg1: &pt_regs */
-       call do_notify_resume
-       cli
-       jmp paranoid_userspace
-paranoid_schedule:
-       sti
-       call schedule
-       cli
-       jmp paranoid_userspace
-       CFI_ENDPROC
+       paranoidentry do_nmi, 0, 0
+#ifdef CONFIG_TRACE_IRQFLAGS
+       paranoidexit 0
+#else
+       jmp paranoid_exit1
+       CFI_ENDPROC
+#endif
 END(nmi)
        .previous .text
 
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3)
        pushq $0
        CFI_ADJUST_CFA_OFFSET 8
        paranoidentry do_int3, DEBUG_STACK
-       jmp paranoid_exit
+       jmp paranoid_exit1
        CFI_ENDPROC
 END(int3)
        .previous .text
@@ -1006,7 +1090,7 @@ END(reserved)
 ENTRY(double_fault)
        XCPT_FRAME
        paranoidentry do_double_fault
-       jmp paranoid_exit
+       jmp paranoid_exit1
        CFI_ENDPROC
 END(double_fault)
 
@@ -1022,7 +1106,7 @@ END(segment_not_present)
 ENTRY(stack_segment)
        XCPT_FRAME
        paranoidentry do_stack_segment
-       jmp paranoid_exit
+       jmp paranoid_exit1
        CFI_ENDPROC
 END(stack_segment)
 
@@ -1050,7 +1134,7 @@ ENTRY(machine_check)
        pushq $0
        CFI_ADJUST_CFA_OFFSET 8 
        paranoidentry do_machine_check
-       jmp paranoid_exit
+       jmp paranoid_exit1
        CFI_ENDPROC
 END(machine_check)
 #endif
index a1f1df5..5221a53 100644 (file)
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void)
        local_irq_save(flags);
        pending = local_softirq_pending();
        /* Switch to interrupt stack */
-       if (pending)
+       if (pending) {
                call_softirq();
+               WARN_ON_ONCE(softirq_count());
+       }
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL(do_softirq);
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h
new file mode 100644 (file)
index 0000000..22f3c06
--- /dev/null
@@ -0,0 +1,61 @@
+/*
+ * include/asm-x86_64/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+/* interrupt control.. */
+#define raw_local_save_flags(x)        do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
+#define raw_local_irq_restore(x)       __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
+
+#ifdef CONFIG_X86_VSMP
+/* Interrupt control for VSMP  architecture */
+#define raw_local_irq_disable()        do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
+#define raw_local_irq_enable() do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
+
+#define raw_irqs_disabled_flags(flags) \
+({                                             \
+       (flags & (1<<18)) || !(flags & (1<<9)); \
+})
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x)  do { raw_local_save_flags(x); raw_local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
+#else  /* CONFIG_X86_VSMP */
+#define raw_local_irq_disable()        __asm__ __volatile__("cli": : :"memory")
+#define raw_local_irq_enable() __asm__ __volatile__("sti": : :"memory")
+
+#define raw_irqs_disabled_flags(flags) \
+({                                             \
+       !(flags & (1<<9));                      \
+})
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x)  do { warn_if_not_ulong(x); __asm__ __volatile__("# raw_local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
+#endif
+
+#define raw_irqs_disabled()                    \
+({                                             \
+       unsigned long flags;                    \
+       raw_local_save_flags(flags);            \
+       raw_irqs_disabled_flags(flags);         \
+})
+
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define raw_safe_halt()        __asm__ __volatile__("sti; hlt": : :"memory")
+/* used when interrupts are already enabled or to shutdown the processor */
+#define halt()                 __asm__ __volatile__("hlt": : :"memory")
+
+#else /* __ASSEMBLY__: */
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif
index 68e559f..f67f287 100644 (file)
@@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 
 #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
 
-/* interrupt control.. */
-#define local_save_flags(x)    do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define local_irq_restore(x)   __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
-
-#ifdef CONFIG_X86_VSMP
-/* Interrupt control for VSMP  architecture */
-#define local_irq_disable()    do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
-#define local_irq_enable()     do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
-
-#define irqs_disabled()                                        \
-({                                                     \
-       unsigned long flags;                            \
-       local_save_flags(flags);                        \
-       (flags & (1<<18)) || !(flags & (1<<9));         \
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x)      do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
-#else  /* CONFIG_X86_VSMP */
-#define local_irq_disable()    __asm__ __volatile__("cli": : :"memory")
-#define local_irq_enable()     __asm__ __volatile__("sti": : :"memory")
-
-#define irqs_disabled()                        \
-({                                     \
-       unsigned long flags;            \
-       local_save_flags(flags);        \
-       !(flags & (1<<9));              \
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x)      do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
-#endif
-
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt()            __asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()                 __asm__ __volatile__("hlt": : :"memory")
+#include <linux/irqflags.h>
 
 void cpu_idle_wait(void);