Merge branch 'for-2.6.25' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus...
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 8 Feb 2008 17:31:42 +0000 (09:31 -0800)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 8 Feb 2008 17:31:42 +0000 (09:31 -0800)
* 'for-2.6.25' of git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc:
  [POWERPC] Add arch-specific walk_memory_remove() for 64-bit powerpc
  [POWERPC] Enable hotplug memory remove for 64-bit powerpc
  [POWERPC] Add remove_memory() for 64-bit powerpc
  [POWERPC] Make cell IOMMU fixed mapping printk more useful
  [POWERPC] Fix potential cell IOMMU bug when switching back to default DMA ops
  [POWERPC] Don't enable cell IOMMU fixed mapping if there are no dma-ranges
  [POWERPC] Fix cell IOMMU null pointer explosion on old firmwares
  [POWERPC] spufs: Fix timing dependent false return from spufs_run_spu
  [POWERPC] spufs: No need to have a runnable SPU for libassist update
  [POWERPC] spufs: Update SPU_Status[CISHP] in backing runcntl write
  [POWERPC] spufs: Fix state_mutex leaks
  [POWERPC] Disable G5 NAP mode during SMU commands on U3

12 files changed:
arch/powerpc/Kconfig
arch/powerpc/mm/mem.c
arch/powerpc/platforms/cell/iommu.c
arch/powerpc/platforms/cell/spufs/backing_ops.c
arch/powerpc/platforms/cell/spufs/fault.c
arch/powerpc/platforms/cell/spufs/file.c
arch/powerpc/platforms/cell/spufs/run.c
arch/powerpc/platforms/cell/spufs/spufs.h
arch/powerpc/platforms/powermac/feature.c
drivers/macintosh/smu.c
include/asm-powerpc/pmac_feature.h
kernel/resource.c

index 8dcac0b..26b963c 100644 (file)
@@ -272,6 +272,12 @@ config HOTPLUG_CPU
 config ARCH_ENABLE_MEMORY_HOTPLUG
        def_bool y
 
+config ARCH_HAS_WALK_MEMORY
+       def_bool y
+
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+       def_bool y
+
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
        depends on (PPC_PRPMC2800 || PPC_MULTIPLATFORM) && EXPERIMENTAL
index 93a5c53..be5c506 100644 (file)
@@ -129,6 +129,39 @@ int __devinit arch_add_memory(int nid, u64 start, u64 size)
        return __add_pages(zone, start_pfn, nr_pages);
 }
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int remove_memory(u64 start, u64 size)
+{
+       unsigned long start_pfn, end_pfn;
+       int ret;
+
+       start_pfn = start >> PAGE_SHIFT;
+       end_pfn = start_pfn + (size >> PAGE_SHIFT);
+       ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
+       if (ret)
+               goto out;
+       /* Arch-specific calls go here - next patch */
+out:
+       return ret;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+/*
+ * walk_memory_resource() needs to make sure there is no holes in a given
+ * memory range. On PPC64, since this range comes from /sysfs, the range
+ * is guaranteed to be valid, non-overlapping and can not contain any
+ * holes. By the time we get here (memory add or remove), /proc/device-tree
+ * is updated and correct. Only reason we need to check against device-tree
+ * would be if we allow user-land to specify a memory range through a
+ * system call/ioctl etc. instead of doing offline/online through /sysfs.
+ */
+int
+walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
+                       int (*func)(unsigned long, unsigned long, void *))
+{
+       return  (*func)(start_pfn, nr_pages, arg);
+}
+
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 void show_mem(void)
index df33066..edab631 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 
 #include <asm/prom.h>
@@ -789,18 +790,16 @@ static int __init cell_iommu_init_disabled(void)
 static u64 cell_iommu_get_fixed_address(struct device *dev)
 {
        u64 cpu_addr, size, best_size, pci_addr = OF_BAD_ADDR;
-       struct device_node *tmp, *np;
+       struct device_node *np;
        const u32 *ranges = NULL;
        int i, len, best;
 
-       np = dev->archdata.of_node;
-       of_node_get(np);
-       ranges = of_get_property(np, "dma-ranges", &len);
-       while (!ranges && np) {
-               tmp = of_get_parent(np);
-               of_node_put(np);
-               np = tmp;
+       np = of_node_get(dev->archdata.of_node);
+       while (np) {
                ranges = of_get_property(np, "dma-ranges", &len);
+               if (ranges)
+                       break;
+               np = of_get_next_parent(np);
        }
 
        if (!ranges) {
@@ -842,19 +841,18 @@ static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
        if (!dev->dma_mask || !dma_supported(dev, dma_mask))
                return -EIO;
 
-       if (dma_mask == DMA_BIT_MASK(64)) {
-               if (cell_iommu_get_fixed_address(dev) == OF_BAD_ADDR)
-                       dev_dbg(dev, "iommu: 64-bit OK, but bad addr\n");
-               else {
-                       dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
-                       set_dma_ops(dev, &dma_iommu_fixed_ops);
-                       cell_dma_dev_setup(dev);
-               }
+       if (dma_mask == DMA_BIT_MASK(64) &&
+               cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+       {
+               dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+               set_dma_ops(dev, &dma_iommu_fixed_ops);
        } else {
                dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
                set_dma_ops(dev, get_pci_dma_ops());
        }
 
+       cell_dma_dev_setup(dev);
+
        *dev->dma_mask = dma_mask;
 
        return 0;
@@ -918,6 +916,18 @@ static int __init cell_iommu_fixed_mapping_init(void)
                return -1;
        }
 
+       /* We must have dma-ranges properties for fixed mapping to work */
+       for (np = NULL; (np = of_find_all_nodes(np));) {
+               if (of_find_property(np, "dma-ranges", NULL))
+                       break;
+       }
+       of_node_put(np);
+
+       if (!np) {
+               pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+               return -1;
+       }
+
        /* The default setup is to have the fixed mapping sit after the
         * dynamic region, so find the top of the largest IOMMU window
         * on any axon, then add the size of RAM and that's our max value.
@@ -981,8 +991,8 @@ static int __init cell_iommu_fixed_mapping_init(void)
                        dsize = htab_size_bytes;
                }
 
-               pr_debug("iommu: setting up %d, dynamic window %lx-%lx " \
-                        "fixed window %lx-%lx\n", iommu->nid, dbase,
+               printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+                       "fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
                         dbase + dsize, fbase, fbase + fsize);
 
                cell_iommu_setup_page_tables(iommu, dbase, dsize, fbase, fsize);
@@ -998,8 +1008,6 @@ static int __init cell_iommu_fixed_mapping_init(void)
        dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
        set_pci_dma_ops(&dma_iommu_ops);
 
-       printk(KERN_DEBUG "IOMMU fixed mapping established.\n");
-
        return 0;
 }
 
index 50d98a1..64eb15b 100644 (file)
@@ -288,6 +288,12 @@ static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
        spin_lock(&ctx->csa.register_lock);
        ctx->csa.prob.spu_runcntl_RW = val;
        if (val & SPU_RUNCNTL_RUNNABLE) {
+               ctx->csa.prob.spu_status_R &=
+                       ~SPU_STATUS_STOPPED_BY_STOP &
+                       ~SPU_STATUS_STOPPED_BY_HALT &
+                       ~SPU_STATUS_SINGLE_STEP &
+                       ~SPU_STATUS_INVALID_INSTR &
+                       ~SPU_STATUS_INVALID_CH;
                ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
        } else {
                ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
index eff4d29..e46d300 100644 (file)
@@ -108,7 +108,7 @@ int spufs_handle_class1(struct spu_context *ctx)
        u64 ea, dsisr, access;
        unsigned long flags;
        unsigned flt = 0;
-       int ret, ret2;
+       int ret;
 
        /*
         * dar and dsisr get passed from the registers
@@ -148,13 +148,10 @@ int spufs_handle_class1(struct spu_context *ctx)
                ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
 
        /*
-        * If spu_acquire fails due to a pending signal we just want to return
-        * EINTR to userspace even if that means missing the dma restart or
-        * updating the page fault statistics.
+        * This is nasty: we need the state_mutex for all the bookkeeping even
+        * if the syscall was interrupted by a signal. ewww.
         */
-       ret2 = spu_acquire(ctx);
-       if (ret2)
-               goto out;
+       mutex_lock(&ctx->state_mutex);
 
        /*
         * Clear dsisr under ctxt lock after handling the fault, so that
@@ -185,7 +182,6 @@ int spufs_handle_class1(struct spu_context *ctx)
        } else
                spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
 
- out:
        spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
        return ret;
 }
index e393144..c66c375 100644 (file)
@@ -358,6 +358,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
 {
        struct spu_context *ctx = vma->vm_file->private_data;
        unsigned long area, offset = address - vma->vm_start;
+       int ret = 0;
 
        spu_context_nospu_trace(spufs_ps_nopfn__enter, ctx);
 
@@ -379,7 +380,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
        if (ctx->state == SPU_STATE_SAVED) {
                up_read(&current->mm->mmap_sem);
                spu_context_nospu_trace(spufs_ps_nopfn__sleep, ctx);
-               spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+               ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
                spu_context_trace(spufs_ps_nopfn__wake, ctx, ctx->spu);
                down_read(&current->mm->mmap_sem);
        } else {
@@ -388,7 +389,8 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
                spu_context_trace(spufs_ps_nopfn__insert, ctx, ctx->spu);
        }
 
-       spu_release(ctx);
+       if (!ret)
+               spu_release(ctx);
        return NOPFN_REFAULT;
 }
 
@@ -755,23 +757,25 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
 
        count = spu_acquire(ctx);
        if (count)
-               return count;
+               goto out;
 
        /* wait only for the first element */
        count = 0;
        if (file->f_flags & O_NONBLOCK) {
-               if (!spu_ibox_read(ctx, &ibox_data))
+               if (!spu_ibox_read(ctx, &ibox_data)) {
                        count = -EAGAIN;
+                       goto out_unlock;
+               }
        } else {
                count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+               if (count)
+                       goto out;
        }
-       if (count)
-               goto out;
 
        /* if we can't write at all, return -EFAULT */
        count = __put_user(ibox_data, udata);
        if (count)
-               goto out;
+               goto out_unlock;
 
        for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
                int ret;
@@ -788,9 +792,9 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
                        break;
        }
 
-out:
+out_unlock:
        spu_release(ctx);
-
+out:
        return count;
 }
 
@@ -905,7 +909,7 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
 
        count = spu_acquire(ctx);
        if (count)
-               return count;
+               goto out;
 
        /*
         * make sure we can at least write one element, by waiting
@@ -913,14 +917,16 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
         */
        count = 0;
        if (file->f_flags & O_NONBLOCK) {
-               if (!spu_wbox_write(ctx, wbox_data))
+               if (!spu_wbox_write(ctx, wbox_data)) {
                        count = -EAGAIN;
+                       goto out_unlock;
+               }
        } else {
                count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+               if (count)
+                       goto out;
        }
 
-       if (count)
-               goto out;
 
        /* write as much as possible */
        for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
@@ -934,8 +940,9 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
                        break;
        }
 
-out:
+out_unlock:
        spu_release(ctx);
+out:
        return count;
 }
 
@@ -1598,12 +1605,11 @@ static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
        } else {
                ret = spufs_wait(ctx->mfc_wq,
                           spufs_read_mfc_tagstatus(ctx, &status));
+               if (ret)
+                       goto out;
        }
        spu_release(ctx);
 
-       if (ret)
-               goto out;
-
        ret = 4;
        if (copy_to_user(buffer, &status, 4))
                ret = -EFAULT;
@@ -1732,6 +1738,8 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
                int status;
                ret = spufs_wait(ctx->mfc_wq,
                                 spu_send_mfc_command(ctx, cmd, &status));
+               if (ret)
+                       goto out;
                if (status)
                        ret = status;
        }
@@ -1785,7 +1793,7 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
 
        ret = spu_acquire(ctx);
        if (ret)
-               return ret;
+               goto out;
 #if 0
 /* this currently hangs */
        ret = spufs_wait(ctx->mfc_wq,
@@ -1794,12 +1802,13 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
                goto out;
        ret = spufs_wait(ctx->mfc_wq,
                         ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
-out:
+       if (ret)
+               goto out;
 #else
        ret = 0;
 #endif
        spu_release(ctx);
-
+out:
        return ret;
 }
 
index b4814c7..fca22e1 100644 (file)
@@ -53,7 +53,7 @@ int spu_stopped(struct spu_context *ctx, u32 *stat)
 
        stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
                SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
-       if (*stat & stopped)
+       if (!(*stat & SPU_STATUS_RUNNING) && (*stat & stopped))
                return 1;
 
        dsisr = ctx->csa.dsisr;
@@ -354,8 +354,15 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
 
        do {
                ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
-               if (unlikely(ret))
+               if (unlikely(ret)) {
+                       /*
+                        * This is nasty: we need the state_mutex for all the
+                        * bookkeeping even if the syscall was interrupted by
+                        * a signal. ewww.
+                        */
+                       mutex_lock(&ctx->state_mutex);
                        break;
+               }
                spu = ctx->spu;
                if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
                                                &ctx->sched_flags))) {
@@ -388,16 +395,14 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
                                      SPU_STATUS_STOPPED_BY_HALT |
                                       SPU_STATUS_SINGLE_STEP)));
 
-       if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
-           (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) &&
-           (ctx->state == SPU_STATE_RUNNABLE))
-               ctx->stats.libassist++;
-
-
        spu_disable_spu(ctx);
        ret = spu_run_fini(ctx, npc, &status);
        spu_yield(ctx);
 
+       if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+           (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+               ctx->stats.libassist++;
+
        if ((ret == 0) ||
            ((ret == -ERESTARTSYS) &&
             ((status & SPU_STATUS_STOPPED_BY_HALT) ||
index 795a1b5..2c2fe3c 100644 (file)
@@ -268,6 +268,9 @@ extern char *isolated_loader;
  *     Same as wait_event_interruptible(), except that here
  *     we need to call spu_release(ctx) before sleeping, and
  *     then spu_acquire(ctx) when awoken.
+ *
+ *     Returns with state_mutex re-acquired when successfull or
+ *     with -ERESTARTSYS and the state_mutex dropped when interrupted.
  */
 
 #define spufs_wait(wq, condition)                                      \
@@ -278,11 +281,11 @@ extern char *isolated_loader;
                prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);    \
                if (condition)                                          \
                        break;                                          \
+               spu_release(ctx);                                       \
                if (signal_pending(current)) {                          \
                        __ret = -ERESTARTSYS;                           \
                        break;                                          \
                }                                                       \
-               spu_release(ctx);                                       \
                schedule();                                             \
                __ret = spu_acquire(ctx);                               \
                if (__ret)                                              \
index ba931be..5169ecc 100644 (file)
@@ -2565,6 +2565,8 @@ static void __init probe_uninorth(void)
 
        /* Locate core99 Uni-N */
        uninorth_node = of_find_node_by_name(NULL, "uni-n");
+       uninorth_maj = 1;
+
        /* Locate G5 u3 */
        if (uninorth_node == NULL) {
                uninorth_node = of_find_node_by_name(NULL, "u3");
@@ -2575,8 +2577,10 @@ static void __init probe_uninorth(void)
                uninorth_node = of_find_node_by_name(NULL, "u4");
                uninorth_maj = 4;
        }
-       if (uninorth_node == NULL)
+       if (uninorth_node == NULL) {
+               uninorth_maj = 0;
                return;
+       }
 
        addrp = of_get_property(uninorth_node, "reg", NULL);
        if (addrp == NULL)
@@ -3029,3 +3033,8 @@ void pmac_resume_agp_for_card(struct pci_dev *dev)
        pmac_agp_resume(pmac_agp_bridge);
 }
 EXPORT_SYMBOL(pmac_resume_agp_for_card);
+
+int pmac_get_uninorth_variant(void)
+{
+       return uninorth_maj;
+}
index 8ba4938..77ad192 100644 (file)
@@ -85,6 +85,7 @@ struct smu_device {
        u32                     cmd_buf_abs;    /* command buffer absolute */
        struct list_head        cmd_list;
        struct smu_cmd          *cmd_cur;       /* pending command */
+       int                     broken_nap;
        struct list_head        cmd_i2c_list;
        struct smu_i2c_cmd      *cmd_i2c_cur;   /* pending i2c command */
        struct timer_list       i2c_timer;
@@ -135,6 +136,19 @@ static void smu_start_cmd(void)
        fend = faddr + smu->cmd_buf->length + 2;
        flush_inval_dcache_range(faddr, fend);
 
+
+       /* We also disable NAP mode for the duration of the command
+        * on U3 based machines.
+        * This is slightly racy as it can be written back to 1 by a sysctl
+        * but that never happens in practice. There seem to be an issue with
+        * U3 based machines such as the iMac G5 where napping for the
+        * whole duration of the command prevents the SMU from fetching it
+        * from memory. This might be related to the strange i2c based
+        * mechanism the SMU uses to access memory.
+        */
+       if (smu->broken_nap)
+               powersave_nap = 0;
+
        /* This isn't exactly a DMA mapping here, I suspect
         * the SMU is actually communicating with us via i2c to the
         * northbridge or the CPU to access RAM.
@@ -211,6 +225,10 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
        misc = cmd->misc;
        mb();
        cmd->status = rc;
+
+       /* Re-enable NAP mode */
+       if (smu->broken_nap)
+               powersave_nap = 1;
  bail:
        /* Start next command if any */
        smu_start_cmd();
@@ -461,7 +479,7 @@ int __init smu_init (void)
         if (np == NULL)
                return -ENODEV;
 
-       printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR);
+       printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR);
 
        if (smu_cmdbuf_abs == 0) {
                printk(KERN_ERR "SMU: Command buffer not allocated !\n");
@@ -533,6 +551,11 @@ int __init smu_init (void)
                goto fail;
        }
 
+       /* U3 has an issue with NAP mode when issuing SMU commands */
+       smu->broken_nap = pmac_get_uninorth_variant() < 4;
+       if (smu->broken_nap)
+               printk(KERN_INFO "SMU: using NAP mode workaround\n");
+
        sys_ctrler = SYS_CTRLER_SMU;
        return 0;
 
index 26bcb0a..877c35a 100644 (file)
@@ -392,6 +392,14 @@ extern u32 __iomem *uninorth_base;
 #define UN_BIS(r,v)    (UN_OUT((r), UN_IN(r) | (v)))
 #define UN_BIC(r,v)    (UN_OUT((r), UN_IN(r) & ~(v)))
 
+/* Uninorth variant:
+ *
+ * 0 = not uninorth
+ * 1 = U1.x or U2.x
+ * 3 = U3
+ * 4 = U4
+ */
+extern int pmac_get_uninorth_variant(void);
 
 #endif /* __ASM_POWERPC_PMAC_FEATURE_H */
 #endif /* __KERNEL__ */
index 2eb553d..82aea81 100644 (file)
@@ -228,7 +228,7 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 /*
  * Finds the lowest memory reosurce exists within [res->start.res->end)
  * the caller must specify res->start, res->end, res->flags.