Pull sn2-mmio-writes into release branch
authorTony Luck <tony.luck@intel.com>
Tue, 21 Mar 2006 16:21:26 +0000 (08:21 -0800)
committerTony Luck <tony.luck@intel.com>
Tue, 21 Mar 2006 16:21:26 +0000 (08:21 -0800)
Hand-fixed conflicts:
include/asm-ia64/machvec_sn2.h

Signed-off-by: Tony Luck <tony.luck@intel.com>
arch/ia64/sn/kernel/setup.c
arch/ia64/sn/kernel/sn2/sn2_smp.c
include/asm-ia64/machvec.h
include/asm-ia64/machvec_sn2.h
include/asm-ia64/processor.h
include/asm-ia64/system.h
include/asm-ia64/thread_info.h

index 5b84836..8b6d5c8 100644 (file)
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/config.h>
@@ -498,6 +498,7 @@ void __init sn_setup(char **cmdline_p)
         * for sn.
         */
        pm_power_off = ia64_sn_power_down;
+       current->thread.flags |= IA64_THREAD_MIGRATION;
 }
 
 /**
@@ -660,7 +661,8 @@ void __init sn_cpu_init(void)
                        SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
                u64 *pio;
                pio = is_shub1() ? pio1 : pio2;
-               pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
+               pda->pio_write_status_addr =
+                  (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
                pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
        }
 
index b2e1e74..d9d306c 100644 (file)
@@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void)
        return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
 }
 
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+       pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+       volatile unsigned long *adr = last_pda->pio_write_status_addr;
+       unsigned long val = last_pda->pio_write_status_val;
+
+       /* Drain PIO writes from old CPU's Shub */
+       while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+                       != val))
+               cpu_relax();
+}
+
 void sn_tlb_migrate_finish(struct mm_struct *mm)
 {
        /* flush_tlb_mm is inefficient if more than 1 users of mm */
index ca5ea99..c3e4ed8 100644 (file)
@@ -20,6 +20,7 @@ struct scatterlist;
 struct page;
 struct mm_struct;
 struct pci_bus;
+struct task_struct;
 
 typedef void ia64_mv_setup_t (char **);
 typedef void ia64_mv_cpu_init_t (void);
@@ -34,6 +35,7 @@ typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val,
                                       u8 size);
 typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
                                        u8 size);
+typedef void ia64_mv_migrate_t(struct task_struct * task);
 
 /* DMA-mapping interface: */
 typedef void ia64_mv_dma_init (void);
@@ -85,6 +87,11 @@ machvec_noop_mm (struct mm_struct *mm)
 {
 }
 
+static inline void
+machvec_noop_task (struct task_struct *task)
+{
+}
+
 extern void machvec_setup (char **);
 extern void machvec_timer_interrupt (int, void *, struct pt_regs *);
 extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int);
@@ -146,6 +153,7 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
 #  define platform_readw_relaxed        ia64_mv.readw_relaxed
 #  define platform_readl_relaxed        ia64_mv.readl_relaxed
 #  define platform_readq_relaxed        ia64_mv.readq_relaxed
+#  define platform_migrate             ia64_mv.migrate
 # endif
 
 /* __attribute__((__aligned__(16))) is required to make size of the
@@ -194,6 +202,7 @@ struct ia64_machine_vector {
        ia64_mv_readw_relaxed_t *readw_relaxed;
        ia64_mv_readl_relaxed_t *readl_relaxed;
        ia64_mv_readq_relaxed_t *readq_relaxed;
+       ia64_mv_migrate_t *migrate;
 } __attribute__((__aligned__(16))); /* align attrib? see above comment */
 
 #define MACHVEC_INIT(name)                     \
@@ -238,6 +247,7 @@ struct ia64_machine_vector {
        platform_readw_relaxed,                 \
        platform_readl_relaxed,                 \
        platform_readq_relaxed,                 \
+       platform_migrate,                       \
 }
 
 extern struct ia64_machine_vector ia64_mv;
@@ -386,5 +396,8 @@ extern ia64_mv_dma_supported                swiotlb_dma_supported;
 #ifndef platform_readq_relaxed
 # define platform_readq_relaxed        __ia64_readq_relaxed
 #endif
+#ifndef platform_migrate
+# define platform_migrate machvec_noop_task
+#endif
 
 #endif /* _ASM_IA64_MACHVEC_H */
index 03d00fa..da1d437 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc.  All Rights Reserved.
  * 
  * This program is free software; you can redistribute it and/or modify it 
  * under the terms of version 2 of the GNU General Public License 
@@ -66,6 +66,7 @@ extern ia64_mv_dma_sync_single_for_device sn_dma_sync_single_for_device;
 extern ia64_mv_dma_sync_sg_for_device  sn_dma_sync_sg_for_device;
 extern ia64_mv_dma_mapping_error       sn_dma_mapping_error;
 extern ia64_mv_dma_supported           sn_dma_supported;
+extern ia64_mv_migrate_t               sn_migrate;
 
 /*
  * This stuff has dual use!
@@ -115,6 +116,7 @@ extern ia64_mv_dma_supported                sn_dma_supported;
 #define platform_dma_sync_sg_for_device        sn_dma_sync_sg_for_device
 #define platform_dma_mapping_error             sn_dma_mapping_error
 #define platform_dma_supported         sn_dma_supported
+#define platform_migrate               sn_migrate
 
 #include <asm/sn/io.h>
 
index 23c8e1b..128fefd 100644 (file)
@@ -50,7 +50,8 @@
 #define IA64_THREAD_PM_VALID   (__IA64_UL(1) << 2)     /* performance registers valid? */
 #define IA64_THREAD_UAC_NOPRINT        (__IA64_UL(1) << 3)     /* don't log unaligned accesses */
 #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4)     /* generate SIGBUS on unaligned acc. */
-                                                       /* bit 5 is currently unused */
+#define IA64_THREAD_MIGRATION  (__IA64_UL(1) << 5)     /* require migration
+                                                          sync at ctx sw */
 #define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6)  /* don't log any fpswa faults */
 #define IA64_THREAD_FPEMU_SIGFPE  (__IA64_UL(1) << 7)  /* send a SIGFPE for fpswa faults */
 
index 0625387..cd4233d 100644 (file)
@@ -244,6 +244,13 @@ extern void ia64_load_extra (struct task_struct *task);
                __ia64_save_fpu((prev)->thread.fph);                            \
        }                                                                       \
        __switch_to(prev, next, last);                                          \
+       /* "next" in old context is "current" in new context */                 \
+       if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) &&        \
+                    (task_cpu(current) !=                                     \
+                                     task_thread_info(current)->last_cpu))) { \
+               platform_migrate(current);                                     \
+               task_thread_info(current)->last_cpu = task_cpu(current);       \
+       }                                                                      \
 } while (0)
 #else
 # define switch_to(prev,next,last)     __switch_to(prev, next, last)
index a6ee273..56394a2 100644 (file)
@@ -26,6 +26,7 @@ struct thread_info {
        struct exec_domain *exec_domain;/* execution domain */
        __u32 flags;                    /* thread_info flags (see TIF_*) */
        __u32 cpu;                      /* current CPU */
+       __u32 last_cpu;                 /* Last CPU thread ran on */
        mm_segment_t addr_limit;        /* user-level address space limit */
        int preempt_count;              /* 0=premptable, <0=BUG; will also serve as bh-counter */
        struct restart_block restart_block;