[POWERPC] spufs: make spu page faults not block scheduling
authorArnd Bergmann <arnd.bergmann@de.ibm.com>
Mon, 23 Apr 2007 19:08:15 +0000 (21:08 +0200)
committerArnd Bergmann <arnd@klappe.arndb.de>
Mon, 23 Apr 2007 19:18:55 +0000 (21:18 +0200)
Until now, we have always entered the spu page fault handler
with a mutex for the spu context held. This has multiple
bad side-effects:
- it becomes impossible to suspend the context during
  page faults
- if an spu program attempts to access its own mmio
  areas through DMA, we get an immediate livelock when
  the nopage function tries to acquire the same mutex

This patch makes the page fault logic operate on a
struct spu_context instead of a struct spu, and moves it
from spu_base.c to a new file fault.c inside of spufs.

We now also need to copy the dar and dsisr contents
of the last fault into the saved context to have it
accessible in case we schedule out the context before
activating the page fault handler.

Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
arch/powerpc/platforms/cell/spu_base.c
arch/powerpc/platforms/cell/spufs/Makefile
arch/powerpc/platforms/cell/spufs/backing_ops.c
arch/powerpc/platforms/cell/spufs/fault.c [new file with mode: 0644]
arch/powerpc/platforms/cell/spufs/hw_ops.c
arch/powerpc/platforms/cell/spufs/run.c
arch/powerpc/platforms/cell/spufs/spufs.h
arch/powerpc/platforms/cell/spufs/switch.c
include/asm-powerpc/mmu.h
include/asm-powerpc/spu_csa.h

index 6242f3c..31fa55f 100644 (file)
@@ -290,7 +290,6 @@ spu_irq_class_1(int irq, void *data)
 
        return stat ? IRQ_HANDLED : IRQ_NONE;
 }
-EXPORT_SYMBOL_GPL(spu_irq_class_1_bottom);
 
 static irqreturn_t
 spu_irq_class_2(int irq, void *data)
@@ -462,108 +461,6 @@ void spu_free(struct spu *spu)
 }
 EXPORT_SYMBOL_GPL(spu_free);
 
-static int spu_handle_mm_fault(struct spu *spu)
-{
-       struct mm_struct *mm = spu->mm;
-       struct vm_area_struct *vma;
-       u64 ea, dsisr, is_write;
-       int ret;
-
-       ea = spu->dar;
-       dsisr = spu->dsisr;
-#if 0
-       if (!IS_VALID_EA(ea)) {
-               return -EFAULT;
-       }
-#endif /* XXX */
-       if (mm == NULL) {
-               return -EFAULT;
-       }
-       if (mm->pgd == NULL) {
-               return -EFAULT;
-       }
-
-       down_read(&mm->mmap_sem);
-       vma = find_vma(mm, ea);
-       if (!vma)
-               goto bad_area;
-       if (vma->vm_start <= ea)
-               goto good_area;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               goto bad_area;
-#if 0
-       if (expand_stack(vma, ea))
-               goto bad_area;
-#endif /* XXX */
-good_area:
-       is_write = dsisr & MFC_DSISR_ACCESS_PUT;
-       if (is_write) {
-               if (!(vma->vm_flags & VM_WRITE))
-                       goto bad_area;
-       } else {
-               if (dsisr & MFC_DSISR_ACCESS_DENIED)
-                       goto bad_area;
-               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
-                       goto bad_area;
-       }
-       ret = 0;
-       switch (handle_mm_fault(mm, vma, ea, is_write)) {
-       case VM_FAULT_MINOR:
-               current->min_flt++;
-               break;
-       case VM_FAULT_MAJOR:
-               current->maj_flt++;
-               break;
-       case VM_FAULT_SIGBUS:
-               ret = -EFAULT;
-               goto bad_area;
-       case VM_FAULT_OOM:
-               ret = -ENOMEM;
-               goto bad_area;
-       default:
-               BUG();
-       }
-       up_read(&mm->mmap_sem);
-       return ret;
-
-bad_area:
-       up_read(&mm->mmap_sem);
-       return -EFAULT;
-}
-
-int spu_irq_class_1_bottom(struct spu *spu)
-{
-       u64 ea, dsisr, access, error = 0UL;
-       int ret = 0;
-
-       ea = spu->dar;
-       dsisr = spu->dsisr;
-       if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) {
-               u64 flags;
-
-               access = (_PAGE_PRESENT | _PAGE_USER);
-               access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
-               local_irq_save(flags);
-               if (hash_page(ea, access, 0x300) != 0)
-                       error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-               local_irq_restore(flags);
-       }
-       if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) {
-               if ((ret = spu_handle_mm_fault(spu)) != 0)
-                       error |= CLASS1_ENABLE_STORAGE_FAULT_INTR;
-               else
-                       error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR;
-       }
-       spu->dar = 0UL;
-       spu->dsisr = 0UL;
-       if (!error) {
-               spu_restart_dma(spu);
-       } else {
-               spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE);
-       }
-       return ret;
-}
-
 struct sysdev_class spu_sysdev_class = {
        set_kset_name("spu")
 };
index 472217d..2cd89c1 100644 (file)
@@ -1,4 +1,4 @@
-obj-y += switch.o
+obj-y += switch.o fault.o
 
 obj-$(CONFIG_SPU_FS) += spufs.o
 spufs-y += inode.o file.o context.o syscalls.o coredump.o
index 1898f0d..3322528 100644 (file)
@@ -350,6 +350,11 @@ static int spu_backing_send_mfc_command(struct spu_context *ctx,
        return ret;
 }
 
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+       /* nothing to do here */
+}
+
 struct spu_context_ops spu_backing_ops = {
        .mbox_read = spu_backing_mbox_read,
        .mbox_stat_read = spu_backing_mbox_stat_read,
@@ -376,4 +381,5 @@ struct spu_context_ops spu_backing_ops = {
        .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
        .get_mfc_free_elements = spu_backing_get_mfc_free_elements,
        .send_mfc_command = spu_backing_send_mfc_command,
+       .restart_dma = spu_backing_restart_dma,
 };
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644 (file)
index 0000000..182dc91
--- /dev/null
@@ -0,0 +1,193 @@
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr)
+{
+       struct vm_area_struct *vma;
+       unsigned long is_write;
+       int ret;
+
+#if 0
+       if (!IS_VALID_EA(ea)) {
+               return -EFAULT;
+       }
+#endif /* XXX */
+       if (mm == NULL) {
+               return -EFAULT;
+       }
+       if (mm->pgd == NULL) {
+               return -EFAULT;
+       }
+
+       down_read(&mm->mmap_sem);
+       vma = find_vma(mm, ea);
+       if (!vma)
+               goto bad_area;
+       if (vma->vm_start <= ea)
+               goto good_area;
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               goto bad_area;
+       if (expand_stack(vma, ea))
+               goto bad_area;
+good_area:
+       is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+       if (is_write) {
+               if (!(vma->vm_flags & VM_WRITE))
+                       goto bad_area;
+       } else {
+               if (dsisr & MFC_DSISR_ACCESS_DENIED)
+                       goto bad_area;
+               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                       goto bad_area;
+       }
+       ret = 0;
+       switch (handle_mm_fault(mm, vma, ea, is_write)) {
+       case VM_FAULT_MINOR:
+               current->min_flt++;
+               break;
+       case VM_FAULT_MAJOR:
+               current->maj_flt++;
+               break;
+       case VM_FAULT_SIGBUS:
+               ret = -EFAULT;
+               goto bad_area;
+       case VM_FAULT_OOM:
+               ret = -ENOMEM;
+               goto bad_area;
+       default:
+               BUG();
+       }
+       up_read(&mm->mmap_sem);
+       return ret;
+
+bad_area:
+       up_read(&mm->mmap_sem);
+       return -EFAULT;
+}
+
+static void spufs_handle_dma_error(struct spu_context *ctx, int type)
+{
+       if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+               ctx->event_return |= type;
+               wake_up_all(&ctx->stop_wq);
+       } else {
+               switch (type) {
+               case SPE_EVENT_DMA_ALIGNMENT:
+               case SPE_EVENT_SPE_DATA_STORAGE:
+               case SPE_EVENT_INVALID_DMA:
+                       force_sig(SIGBUS, /* info, */ current);
+                       break;
+               case SPE_EVENT_SPE_ERROR:
+                       force_sig(SIGILL, /* info */ current);
+                       break;
+               }
+       }
+}
+
+void spufs_dma_callback(struct spu *spu, int type)
+{
+       spufs_handle_dma_error(spu->ctx, type);
+}
+EXPORT_SYMBOL_GPL(spufs_dma_callback);
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ *       in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+       u64 ea, dsisr, access;
+       unsigned long flags;
+       int ret;
+
+       /*
+        * dar and dsisr get passed from the registers
+        * to the spu_context, to this function, but not
+        * back to the spu if it gets scheduled again.
+        *
+        * if we don't handle the fault for a saved context
+        * in time, we can still expect to get the same fault
+        * the immediately after the context restore.
+        */
+       if (ctx->state == SPU_STATE_RUNNABLE) {
+               ea = ctx->spu->dar;
+               dsisr = ctx->spu->dsisr;
+               ctx->spu->dar= ctx->spu->dsisr = 0;
+       } else {
+               ea = ctx->csa.priv1.mfc_dar_RW;
+               dsisr = ctx->csa.priv1.mfc_dsisr_RW;
+               ctx->csa.priv1.mfc_dar_RW = 0;
+               ctx->csa.priv1.mfc_dsisr_RW = 0;
+       }
+
+       if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+               return 0;
+
+       pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
+               dsisr, ctx->state);
+
+       /* we must not hold the lock when entering spu_handle_mm_fault */
+       spu_release(ctx);
+
+       access = (_PAGE_PRESENT | _PAGE_USER);
+       access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL;
+       local_irq_save(flags);
+       ret = hash_page(ea, access, 0x300);
+       local_irq_restore(flags);
+
+       /* hashing failed, so try the actual fault handler */
+       if (ret)
+               ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+
+       spu_acquire(ctx);
+       /*
+        * If we handled the fault successfully and are in runnable
+        * state, restart the DMA.
+        * In case of unhandled error report the problem to user space.
+        */
+       if (!ret) {
+               if (ctx->spu)
+                       ctx->ops->restart_dma(ctx);
+       } else
+               spufs_handle_dma_error(ctx, SPE_EVENT_SPE_DATA_STORAGE);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(spufs_handle_class1);
index ae42e03..428875c 100644 (file)
@@ -296,6 +296,14 @@ static int spu_hw_send_mfc_command(struct spu_context *ctx,
        }
 }
 
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+       struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+       if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+               out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
 struct spu_context_ops spu_hw_ops = {
        .mbox_read = spu_hw_mbox_read,
        .mbox_stat_read = spu_hw_mbox_stat_read,
@@ -320,4 +328,5 @@ struct spu_context_ops spu_hw_ops = {
        .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
        .get_mfc_free_elements = spu_hw_get_mfc_free_elements,
        .send_mfc_command = spu_hw_send_mfc_command,
+       .restart_dma = spu_hw_restart_dma,
 };
index 7df5202..1a8195b 100644 (file)
@@ -18,27 +18,6 @@ void spufs_stop_callback(struct spu *spu)
        wake_up_all(&ctx->stop_wq);
 }
 
-void spufs_dma_callback(struct spu *spu, int type)
-{
-       struct spu_context *ctx = spu->ctx;
-
-       if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
-               ctx->event_return |= type;
-               wake_up_all(&ctx->stop_wq);
-       } else {
-               switch (type) {
-               case SPE_EVENT_DMA_ALIGNMENT:
-               case SPE_EVENT_SPE_DATA_STORAGE:
-               case SPE_EVENT_INVALID_DMA:
-                       force_sig(SIGBUS, /* info, */ current);
-                       break;
-               case SPE_EVENT_SPE_ERROR:
-                       force_sig(SIGILL, /* info */ current);
-                       break;
-               }
-       }
-}
-
 static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
 {
        struct spu *spu;
@@ -294,11 +273,8 @@ int spu_process_callback(struct spu_context *ctx)
 static inline int spu_process_events(struct spu_context *ctx)
 {
        struct spu *spu = ctx->spu;
-       u64 pte_fault = MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED;
        int ret = 0;
 
-       if (spu->dsisr & pte_fault)
-               ret = spu_irq_class_1_bottom(spu);
        if (spu->class_0_pending)
                ret = spu_irq_class_0_bottom(spu);
        if (!ret && signal_pending(current))
@@ -332,6 +308,10 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
                                break;
                        status &= ~SPU_STATUS_STOPPED_BY_STOP;
                }
+               ret = spufs_handle_class1(ctx);
+               if (ret)
+                       break;
+
                if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
                        ret = spu_reacquire_runnable(ctx, npc, &status);
                        if (ret) {
index cae2ad4..9993c9b 100644 (file)
@@ -141,6 +141,7 @@ struct spu_context_ops {
                               struct spu_dma_info * info);
        void (*proxydma_info_read) (struct spu_context * ctx,
                                    struct spu_proxydma_info * info);
+       void (*restart_dma)(struct spu_context *ctx);
 };
 
 extern struct spu_context_ops spu_hw_ops;
@@ -172,6 +173,9 @@ int put_spu_gang(struct spu_gang *gang);
 void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
 void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+
 /* context management */
 static inline void spu_acquire(struct spu_context *ctx)
 {
index fd91c73..8347c4a 100644 (file)
@@ -2084,6 +2084,10 @@ int spu_save(struct spu_state *prev, struct spu *spu)
        int rc;
 
        acquire_spu_lock(spu);          /* Step 1.     */
+       prev->dar = spu->dar;
+       prev->dsisr = spu->dsisr;
+       spu->dar = 0;
+       spu->dsisr = 0;
        rc = __do_spu_save(prev, spu);  /* Steps 2-53. */
        release_spu_lock(spu);
        if (rc != 0 && rc != 2 && rc != 6) {
@@ -2109,9 +2113,9 @@ int spu_restore(struct spu_state *new, struct spu *spu)
 
        acquire_spu_lock(spu);
        harvest(NULL, spu);
-       spu->dar = 0;
-       spu->dsisr = 0;
        spu->slb_replace = 0;
+       new->dar = 0;
+       new->dsisr = 0;
        spu->class_0_pending = 0;
        rc = __do_spu_restore(new, spu);
        release_spu_lock(spu);
index 200055a..e22fd88 100644 (file)
@@ -234,6 +234,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access,
                           unsigned long vsid, pte_t *ptep, unsigned long trap,
                           unsigned int local);
 struct mm_struct;
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
 extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
                          unsigned long ea, unsigned long vsid, int local,
                          unsigned long trap);
index 8aad061..02e56a6 100644 (file)
@@ -242,6 +242,7 @@ struct spu_state {
        u64 spu_chnldata_RW[32];
        u32 spu_mailbox_data[4];
        u32 pu_mailbox_data[1];
+       u64 dar, dsisr;
        unsigned long suspend_time;
        spinlock_t register_lock;
 };