Merge branch 'tip-x86-fpu' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp...

author Ingo Molnar <mingo@kernel.org>

Thu, 19 Feb 2015 10:17:42 +0000 (11:17 +0100)

committer Ingo Molnar <mingo@kernel.org>

Thu, 19 Feb 2015 10:19:05 +0000 (11:19 +0100)
author Ingo Molnar <mingo@kernel.org>
Thu, 19 Feb 2015 10:17:42 +0000 (11:17 +0100)
committer Ingo Molnar <mingo@kernel.org>
Thu, 19 Feb 2015 10:19:05 +0000 (11:19 +0100)
diff --combined arch/x86/include/asm/fpu-internal.h

index 0dbc082,19fb41c..61609b9
--- 1/arch/x86/include/asm/fpu-internal.h
--- 2/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@@ -67,6 -67,34 +67,34 @@@ extern void finit_soft_fpu(struct i387_
   static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
   #endif
   
+ /*
+  * Must be run with preemption disabled: this clears the fpu_owner_task,
+  * on this CPU.
+  *
+  * This will disable any lazy FPU state restore of the current FPU state,
+  * but if the current thread owns the FPU, it will still be saved by.
+  */
+ static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+ {
+       per_cpu(fpu_owner_task, cpu) = NULL;
+ }
+ 
+ /*
+  * Used to indicate that the FPU state in memory is newer than the FPU
+  * state in registers, and the FPU state should be reloaded next time the
+  * task is run. Only safe on the current task, or non-running tasks.
+  */
+ static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
+ {
+       tsk->thread.fpu.last_cpu = ~0;
+ }
+ 
+ static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+ {
+       return new == this_cpu_read_stable(fpu_owner_task) &&
+               cpu == new->thread.fpu.last_cpu;
+ }
+ 
   static inline int is_ia32_compat_frame(void)
   {
         return config_enabled(CONFIG_IA32_EMULATION) &&
@@@ -207,7 -235,7 +235,7 @@@ static inline void fpu_fxsave(struct fp
         if (config_enabled(CONFIG_X86_32))
                 asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
         else if (config_enabled(CONFIG_AS_FXSAVEQ))
- -              asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave));
+ +              asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave));
         else {
                 /* Using "rex64; fxsave %0" is broken because, if the memory
                  * operand uses any extended registers for addressing, a second
@@@ -290,11 -318,9 +318,11 @@@ static inline int fpu_restore_checking(
   
   static inline int restore_fpu_checking(struct task_struct *tsk)
   {
- -      /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
- -         is pending.  Clear the x87 state here by setting it to fixed
- -         values. "m" is a random variable that should be in L1 */
+ +      /*
+ +       * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
+ +       * pending. Clear the x87 state here by setting it to fixed values.
+ +       * "m" is a random variable that should be in L1.
+ +       */
         if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
                 asm volatile(
                         "fnclex\n\t"
@@@ -400,24 -426,6 +428,6 @@@ static inline void drop_init_fpu(struc
    */
   typedef struct { int preload; } fpu_switch_t;
   
- /*
-  * Must be run with preemption disabled: this clears the fpu_owner_task,
-  * on this CPU.
-  *
-  * This will disable any lazy FPU state restore of the current FPU state,
-  * but if the current thread owns the FPU, it will still be saved by.
-  */
- static inline void __cpu_disable_lazy_restore(unsigned int cpu)
- {
-       per_cpu(fpu_owner_task, cpu) = NULL;
- }
- 
- static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
- {
-       return new == this_cpu_read_stable(fpu_owner_task) &&
-               cpu == new->thread.fpu.last_cpu;
- }
- 
   static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
   {
         fpu_switch_t fpu;
@@@ -426,13 -434,17 +436,17 @@@
          * If the task has used the math, pre-load the FPU on xsave processors
          * or if the past 5 consecutive context-switches used math.
          */
-       fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
-                                            new->thread.fpu_counter > 5);
+       fpu.preload = tsk_used_math(new) &&
+                     (use_eager_fpu() || new->thread.fpu_counter > 5);
+ 
         if (__thread_has_fpu(old)) {
                 if (!__save_init_fpu(old))
-                       cpu = ~0;
-               old->thread.fpu.last_cpu = cpu;
-               old->thread.fpu.has_fpu = 0;    /* But leave fpu_owner_task! */
+                       task_disable_lazy_fpu_restore(old);
+               else
+                       old->thread.fpu.last_cpu = cpu;
+ 
+               /* But leave fpu_owner_task! */
+               old->thread.fpu.has_fpu = 0;
   
                 /* Don't change CR0.TS if we just switch! */
                 if (fpu.preload) {
@@@ -443,10 -455,10 +457,10 @@@
                         stts();
         } else {
                 old->thread.fpu_counter = 0;
-               old->thread.fpu.last_cpu = ~0;
+               task_disable_lazy_fpu_restore(old);
                 if (fpu.preload) {
                         new->thread.fpu_counter++;
-                       if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
+                       if (fpu_lazy_restore(new, cpu))
                                 fpu.preload = 0;
                         else
                                 prefetch(new->thread.fpu.state);
@@@ -519,24 -531,6 +533,6 @@@ static inline void __save_fpu(struct ta
                 fpu_fxsave(&tsk->thread.fpu);
   }
   
- /*
-  * These disable preemption on their own and are safe
-  */
- static inline void save_init_fpu(struct task_struct *tsk)
- {
-       WARN_ON_ONCE(!__thread_has_fpu(tsk));
- 
-       if (use_eager_fpu()) {
-               __save_fpu(tsk);
-               return;
-       }
- 
-       preempt_disable();
-       __save_init_fpu(tsk);
-       __thread_fpu_end(tsk);
-       preempt_enable();
- }
- 
   /*
    * i387 state interaction
    */
diff --combined arch/x86/kernel/i387.c

index 81049ff,5722ab6..f59d806
--- 1/arch/x86/kernel/i387.c
--- 2/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@@ -19,19 -19,6 +19,19 @@@
   #include <asm/fpu-internal.h>
   #include <asm/user.h>
   
+ +static DEFINE_PER_CPU(bool, in_kernel_fpu);
+ +
+ +void kernel_fpu_disable(void)
+ +{
+ +      WARN_ON(this_cpu_read(in_kernel_fpu));
+ +      this_cpu_write(in_kernel_fpu, true);
+ +}
+ +
+ +void kernel_fpu_enable(void)
+ +{
+ +      this_cpu_write(in_kernel_fpu, false);
+ +}
+ +
   /*
    * Were we in an interrupt that interrupted kernel mode?
    *
@@@ -46,9 -33,6 +46,9 @@@
    */
   static inline bool interrupted_kernel_fpu_idle(void)
   {
+ +      if (this_cpu_read(in_kernel_fpu))
+ +              return false;
+ +
         if (use_eager_fpu())
                 return __thread_has_fpu(current);
   
@@@ -89,10 -73,10 +89,10 @@@ void __kernel_fpu_begin(void
   {
         struct task_struct *me = current;
   
+ +      this_cpu_write(in_kernel_fpu, true);
+ +
         if (__thread_has_fpu(me)) {
- -              __thread_clear_has_fpu(me);
                 __save_init_fpu(me);
- -              /* We do 'stts()' in __kernel_fpu_end() */
         } else if (!use_eager_fpu()) {
                 this_cpu_write(fpu_owner_task, NULL);
                 clts();
@@@ -102,16 -86,19 +102,16 @@@ EXPORT_SYMBOL(__kernel_fpu_begin)
   
   void __kernel_fpu_end(void)
   {
- -      if (use_eager_fpu()) {
- -              /*
- -               * For eager fpu, most the time, tsk_used_math() is true.
- -               * Restore the user math as we are done with the kernel usage.
- -               * At few instances during thread exit, signal handling etc,
- -               * tsk_used_math() is false. Those few places will take proper
- -               * actions, so we don't need to restore the math here.
- -               */
- -              if (likely(tsk_used_math(current)))
- -                      math_state_restore();
- -      } else {
+ +      struct task_struct *me = current;
+ +
+ +      if (__thread_has_fpu(me)) {
+ +              if (WARN_ON(restore_fpu_checking(me)))
+ +                      drop_init_fpu(me);
+ +      } else if (!use_eager_fpu()) {
                 stts();
         }
+ +
+ +      this_cpu_write(in_kernel_fpu, false);
   }
   EXPORT_SYMBOL(__kernel_fpu_end);
   
@@@ -119,10 -106,13 +119,13 @@@ void unlazy_fpu(struct task_struct *tsk
   {
         preempt_disable();
         if (__thread_has_fpu(tsk)) {
-               __save_init_fpu(tsk);
-               __thread_fpu_end(tsk);
-       } else
-               tsk->thread.fpu_counter = 0;
+               if (use_eager_fpu()) {
+                       __save_fpu(tsk);
+               } else {
+                       __save_init_fpu(tsk);
+                       __thread_fpu_end(tsk);
+               }
+       }
         preempt_enable();
   }
   EXPORT_SYMBOL(unlazy_fpu);
@@@ -246,7 -236,7 +249,7 @@@ int init_fpu(struct task_struct *tsk
         if (tsk_used_math(tsk)) {
                 if (cpu_has_fpu && tsk == current)
                         unlazy_fpu(tsk);
-               tsk->thread.fpu.last_cpu = ~0;
+               task_disable_lazy_fpu_restore(tsk);
                 return 0;
         }
   
diff --combined arch/x86/kernel/traps.c

index 9d2073e,9d889f7..92b83e2
--- 1/arch/x86/kernel/traps.c
--- 2/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@@ -108,88 -108,6 +108,88 @@@ static inline void preempt_conditional_
         preempt_count_dec();
   }
   
+ +enum ctx_state ist_enter(struct pt_regs *regs)
+ +{
+ +      enum ctx_state prev_state;
+ +
+ +      if (user_mode_vm(regs)) {
+ +              /* Other than that, we're just an exception. */
+ +              prev_state = exception_enter();
+ +      } else {
+ +              /*
+ +               * We might have interrupted pretty much anything.  In
+ +               * fact, if we're a machine check, we can even interrupt
+ +               * NMI processing.  We don't want in_nmi() to return true,
+ +               * but we need to notify RCU.
+ +               */
+ +              rcu_nmi_enter();
+ +              prev_state = IN_KERNEL;  /* the value is irrelevant. */
+ +      }
+ +
+ +      /*
+ +       * We are atomic because we're on the IST stack (or we're on x86_32,
+ +       * in which case we still shouldn't schedule).
+ +       *
+ +       * This must be after exception_enter(), because exception_enter()
+ +       * won't do anything if in_interrupt() returns true.
+ +       */
+ +      preempt_count_add(HARDIRQ_OFFSET);
+ +
+ +      /* This code is a bit fragile.  Test it. */
+ +      rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work");
+ +
+ +      return prev_state;
+ +}
+ +
+ +void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
+ +{
+ +      /* Must be before exception_exit. */
+ +      preempt_count_sub(HARDIRQ_OFFSET);
+ +
+ +      if (user_mode_vm(regs))
+ +              return exception_exit(prev_state);
+ +      else
+ +              rcu_nmi_exit();
+ +}
+ +
+ +/**
+ + * ist_begin_non_atomic() - begin a non-atomic section in an IST exception
+ + * @regs:     regs passed to the IST exception handler
+ + *
+ + * IST exception handlers normally cannot schedule.  As a special
+ + * exception, if the exception interrupted userspace code (i.e.
+ + * user_mode_vm(regs) would return true) and the exception was not
+ + * a double fault, it can be safe to schedule.  ist_begin_non_atomic()
+ + * begins a non-atomic section within an ist_enter()/ist_exit() region.
+ + * Callers are responsible for enabling interrupts themselves inside
+ + * the non-atomic section, and callers must call is_end_non_atomic()
+ + * before ist_exit().
+ + */
+ +void ist_begin_non_atomic(struct pt_regs *regs)
+ +{
+ +      BUG_ON(!user_mode_vm(regs));
+ +
+ +      /*
+ +       * Sanity check: we need to be on the normal thread stack.  This
+ +       * will catch asm bugs and any attempt to use ist_preempt_enable
+ +       * from double_fault.
+ +       */
+ +      BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
+ +              & ~(THREAD_SIZE - 1)) != 0);
+ +
+ +      preempt_count_sub(HARDIRQ_OFFSET);
+ +}
+ +
+ +/**
+ + * ist_end_non_atomic() - begin a non-atomic section in an IST exception
+ + *
+ + * Ends a non-atomic section started with ist_begin_non_atomic().
+ + */
+ +void ist_end_non_atomic(void)
+ +{
+ +      preempt_count_add(HARDIRQ_OFFSET);
+ +}
+ +
   static nokprobe_inline int
   do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
                   struct pt_regs *regs, long error_code)
@@@ -333,8 -251,6 +333,8 @@@ dotraplinkage void do_double_fault(stru
          * end up promoting it to a doublefault.  In that case, modify
          * the stack to make it look like we just entered the #GP
          * handler from user space, similar to bad_iret.
+ +       *
+ +       * No need for ist_enter here because we don't use RCU.
          */
         if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
                 regs->cs == __KERNEL_CS &&
@@@ -347,12 -263,12 +347,12 @@@
                 normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
                 regs->ip = (unsigned long)general_protection;
                 regs->sp = (unsigned long)&normal_regs->orig_ax;
+ +
                 return;
         }
   #endif
   
- -      exception_enter();
- -      /* Return not checked because double check cannot be ignored */
+ +      ist_enter(regs);  /* Discard prev_state because we won't return. */
         notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
   
         tsk->thread.error_code = error_code;
@@@ -518,7 -434,7 +518,7 @@@ dotraplinkage void notrace do_int3(stru
         if (poke_int3_handler(regs))
                 return;
   
- -      prev_state = exception_enter();
+ +      prev_state = ist_enter(regs);
   #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
         if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
                                 SIGTRAP) == NOTIFY_STOP)
@@@ -544,20 -460,33 +544,20 @@@
         preempt_conditional_cli(regs);
         debug_stack_usage_dec();
   exit:
- -      exception_exit(prev_state);
+ +      ist_exit(regs, prev_state);
   }
   NOKPROBE_SYMBOL(do_int3);
   
   #ifdef CONFIG_X86_64
   /*
- - * Help handler running on IST stack to switch back to user stack
- - * for scheduling or signal handling. The actual stack switch is done in
- - * entry.S
+ + * Help handler running on IST stack to switch off the IST stack if the
+ + * interrupted code was in user mode. The actual stack switch is done in
+ + * entry_64.S
    */
   asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
   {
- -      struct pt_regs *regs = eregs;
- -      /* Did already sync */
- -      if (eregs == (struct pt_regs *)eregs->sp)
- -              ;
- -      /* Exception from user space */
- -      else if (user_mode(eregs))
- -              regs = task_pt_regs(current);
- -      /*
- -       * Exception from kernel and interrupts are enabled. Move to
- -       * kernel process stack.
- -       */
- -      else if (eregs->flags & X86_EFLAGS_IF)
- -              regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
- -      if (eregs != regs)
- -              *regs = *eregs;
+ +      struct pt_regs *regs = task_pt_regs(current);
+ +      *regs = *eregs;
         return regs;
   }
   NOKPROBE_SYMBOL(sync_regs);
@@@ -625,7 -554,7 +625,7 @@@ dotraplinkage void do_debug(struct pt_r
         unsigned long dr6;
         int si_code;
   
- -      prev_state = exception_enter();
+ +      prev_state = ist_enter(regs);
   
         get_debugreg(dr6, 6);
   
@@@ -700,7 -629,7 +700,7 @@@
         debug_stack_usage_dec();
   
   exit:
- -      exception_exit(prev_state);
+ +      ist_exit(regs, prev_state);
   }
   NOKPROBE_SYMBOL(do_debug);
   
@@@ -734,7 -663,7 +734,7 @@@ static void math_error(struct pt_regs *
         /*
          * Save the info for the exception handler and clear the error.
          */
-       save_init_fpu(task);
+       unlazy_fpu(task);
         task->thread.trap_nr = trapnr;
         task->thread.error_code = error_code;
         info.si_signo = SIGFPE;
@@@ -859,16 -788,18 +859,16 @@@ void math_state_restore(void
                 local_irq_disable();
         }
   
+ +      /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */
+ +      kernel_fpu_disable();
         __thread_fpu_begin(tsk);
- -
- -      /*
- -       * Paranoid restore. send a SIGSEGV if we fail to restore the state.
- -       */
         if (unlikely(restore_fpu_checking(tsk))) {
                 drop_init_fpu(tsk);
                 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
- -              return;
+ +      } else {
+ +              tsk->thread.fpu_counter++;
         }
- -
- -      tsk->thread.fpu_counter++;
+ +      kernel_fpu_enable();
   }
   EXPORT_SYMBOL_GPL(math_state_restore);
author	Ingo Molnar <mingo@kernel.org>
	Thu, 19 Feb 2015 10:17:42 +0000 (11:17 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 19 Feb 2015 10:19:05 +0000 (11:19 +0100)
		1	2
arch/x86/include/asm/fpu-internal.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/i387.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/traps.c	patch \|	diff1 \|	diff2 \|	blob \| history