[MIPS] FPU affinity for MT ASE.
authorRalf Baechle <ralf@linux-mips.org>
Wed, 5 Apr 2006 08:45:47 +0000 (09:45 +0100)
committerRalf Baechle <ralf@linux-mips.org>
Wed, 19 Apr 2006 02:14:28 +0000 (04:14 +0200)
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
arch/mips/Kconfig
arch/mips/kernel/process.c
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/setup.c
arch/mips/kernel/smp-mt.c
arch/mips/kernel/traps.c
include/asm-mips/cpu-features.h
include/asm-mips/fpu.h
include/asm-mips/processor.h
include/asm-mips/system.h

index f9be549..87f0b79 100644 (file)
@@ -1464,6 +1464,11 @@ config MIPS_VPE_LOADER
 
 endchoice
 
+config MIPS_MT_FPAFF
+       bool "Dynamic FPU affinity for FP-intensive threads"
+       depends on MIPS_MT
+       default y
+
 config MIPS_VPE_LOADER_TOM
        bool "Load VPE program into memory hidden from linux"
        depends on MIPS_VPE_LOADER
index 8b393df..199a06e 100644 (file)
@@ -185,6 +185,17 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
        childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
        clear_tsk_thread_flag(p, TIF_USEDFPU);
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+       /*
+        * FPU affinity support is cleaner if we track the
+        * user-visible CPU affinity from the very beginning.
+        * The generic cpus_allowed mask will already have
+        * been copied from the parent before copy_thread
+        * is invoked.
+        */
+       p->thread.user_cpus_allowed = p->cpus_allowed;
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
        if (clone_flags & CLONE_SETTLS)
                ti->tp_value = regs->regs[7];
 
index 4e36b87..a0ac0e5 100644 (file)
@@ -569,8 +569,19 @@ einval:    li      v0, -EINVAL
        sys     sys_tkill               2
        sys     sys_sendfile64          5
        sys     sys_futex               6
+#ifdef CONFIG_MIPS_MT_FPAFF
+       /*
+        * For FPU affinity scheduling on MIPS MT processors, we need to
+        * intercept sys_sched_xxxaffinity() calls until we get a proper hook
+        * in kernel/sched.c.  Considered only temporary we only support these
+        * hooks for the 32-bit kernel - there is no MIPS64 MT processor atm.
+        */
+       sys     mipsmt_sys_sched_setaffinity    3
+       sys     mipsmt_sys_sched_getaffinity    3
+#else
        sys     sys_sched_setaffinity   3
        sys     sys_sched_getaffinity   3       /* 4240 */
+#endif /* CONFIG_MIPS_MT_FPAFF */
        sys     sys_io_setup            2
        sys     sys_io_destroy          1
        sys     sys_io_getevents        5
index dcbfd27..bcf1b10 100644 (file)
@@ -529,7 +529,10 @@ void __init setup_arch(char **cmdline_p)
 
 int __init fpu_disable(char *s)
 {
-       cpu_data[0].options &= ~MIPS_CPU_FPU;
+       int i;
+
+       for (i = 0; i < NR_CPUS; i++)
+               cpu_data[i].options &= ~MIPS_CPU_FPU;
 
        return 1;
 }
index 19b8e4b..5777090 100644 (file)
@@ -150,6 +150,11 @@ void plat_smp_setup(void)
        unsigned long val;
        int i, num;
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+       /* If we have an FPU, enroll ourselves in the FPU-full mask */
+       if (cpu_has_fpu)
+               cpu_set(0, mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
        if (!cpu_has_mipsmt)
                return;
 
@@ -312,6 +317,12 @@ void prom_smp_finish(void)
 {
        write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ));
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+       /* If we have an FPU, enroll ourselves in the FPU-full mask */
+       if (cpu_has_fpu)
+               cpu_set(smp_processor_id(), mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
        local_irq_enable();
 }
 
index 6336fe8..e9902d8 100644 (file)
@@ -758,6 +758,36 @@ asmlinkage void do_cpu(struct pt_regs *regs)
                                                &current->thread.fpu.soft);
                        if (sig)
                                force_sig(sig, current);
+#ifdef CONFIG_MIPS_MT_FPAFF
+                       else {
+                       /*
+                        * MIPS MT processors may have fewer FPU contexts
+                        * than CPU threads. If we've emulated more than
+                        * some threshold number of instructions, force
+                        * migration to a "CPU" that has FP support.
+                        */
+                        if(mt_fpemul_threshold > 0
+                        && ((current->thread.emulated_fp++
+                           > mt_fpemul_threshold))) {
+                         /*
+                          * If there's no FPU present, or if the
+                          * application has already restricted
+                          * the allowed set to exclude any CPUs
+                          * with FPUs, we'll skip the procedure.
+                          */
+                         if (cpus_intersects(current->cpus_allowed,
+                                               mt_fpu_cpumask)) {
+                           cpumask_t tmask;
+
+                           cpus_and(tmask,
+                                       current->thread.user_cpus_allowed,
+                                       mt_fpu_cpumask);
+                           set_cpus_allowed(current, tmask);
+                           current->thread.mflags |= MF_FPUBOUND;
+                         }
+                        }
+                       }
+#endif /* CONFIG_MIPS_MT_FPAFF */
                }
 
                return;
index 3f2b6d9..254e11e 100644 (file)
@@ -40,7 +40,7 @@
 #define cpu_has_sb1_cache      (cpu_data[0].options & MIPS_CPU_SB1_CACHE)
 #endif
 #ifndef cpu_has_fpu
-#define cpu_has_fpu            (cpu_data[0].options & MIPS_CPU_FPU)
+#define cpu_has_fpu            (current_cpu_data.options & MIPS_CPU_FPU)
 #endif
 #ifndef cpu_has_32fpr
 #define cpu_has_32fpr          (cpu_data[0].options & MIPS_CPU_32FPR)
index 9c828b1..b0f5001 100644 (file)
 #include <asm/processor.h>
 #include <asm/current.h>
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+#include <asm/mips_mt.h>
+#endif
+
 struct sigcontext;
 struct sigcontext32;
 
index 7866513..0fb75f0 100644 (file)
@@ -134,6 +134,12 @@ struct thread_struct {
 
        /* Saved fpu/fpu emulator stuff. */
        union mips_fpu_union fpu;
+#ifdef CONFIG_MIPS_MT_FPAFF
+       /* Emulated instruction count */
+       unsigned long emulated_fp;
+       /* Saved per-thread scheduler affinity mask */
+       cpumask_t user_cpus_allowed;
+#endif /* CONFIG_MIPS_MT_FPAFF */
 
        /* Saved state of the DSP ASE, if available. */
        struct mips_dsp_state dsp;
@@ -159,6 +165,12 @@ struct thread_struct {
 #define MF_N32         MF_32BIT_ADDR
 #define MF_N64         0
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+#define FPAFF_INIT 0, INIT_CPUMASK,
+#else
+#define FPAFF_INIT
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
 #define INIT_THREAD  { \
         /* \
          * saved main processor registers \
@@ -173,6 +185,10 @@ struct thread_struct {
         * saved fpu/fpu emulator stuff \
         */ \
        INIT_FPU, \
+       /* \
+        * fpu affinity state (null if not FPAFF) \
+        */ \
+       FPAFF_INIT \
        /* \
         * saved dsp/dsp emulator stuff \
         */ \
index 3902669..261f71d 100644 (file)
@@ -155,6 +155,37 @@ extern asmlinkage void *resume(void *last, void *next, void *next_ti);
 
 struct task_struct;
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+
+/*
+ * Handle the scheduler resume end of FPU affinity management.  We do this
+ * inline to try to keep the overhead down. If we have been forced to run on
+ * a "CPU" with an FPU because of a previous high level of FP computation,
+ * but did not actually use the FPU during the most recent time-slice (CU1
+ * isn't set), we undo the restriction on cpus_allowed.
+ *
+ * We're not calling set_cpus_allowed() here, because we have no need to
+ * force prompt migration - we're already switching the current CPU to a
+ * different thread.
+ */
+
+#define switch_to(prev,next,last)                                      \
+do {                                                                   \
+       if (cpu_has_fpu &&                                              \
+           (prev->thread.mflags & MF_FPUBOUND) &&                      \
+            (!(KSTK_STATUS(prev) & ST0_CU1))) {                        \
+               prev->thread.mflags &= ~MF_FPUBOUND;                    \
+               prev->cpus_allowed = prev->thread.user_cpus_allowed;    \
+       }                                                               \
+       if (cpu_has_dsp)                                                \
+               __save_dsp(prev);                                       \
+       next->thread.emulated_fp = 0;                                   \
+       (last) = resume(prev, next, next->thread_info);                 \
+       if (cpu_has_dsp)                                                \
+               __restore_dsp(current);                                 \
+} while(0)
+
+#else
 #define switch_to(prev,next,last)                                      \
 do {                                                                   \
        if (cpu_has_dsp)                                                \
@@ -163,6 +194,7 @@ do {                                                                        \
        if (cpu_has_dsp)                                                \
                __restore_dsp(current);                                 \
 } while(0)
+#endif
 
 /*
  * On SMP systems, when the scheduler does migration-cost autodetection,