Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jul 2011 21:43:13 +0000 (14:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 22 Jul 2011 21:43:13 +0000 (14:43 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1287 commits)
  icmp: Fix regression in nexthop resolution during replies.
  net: Fix ppc64 BPF JIT dependencies.
  acenic: include NET_SKB_PAD headroom to incoming skbs
  ixgbe: convert to ndo_fix_features
  ixgbe: only enable WoL for magic packet by default
  ixgbe: remove ifdef check for non-existent define
  ixgbe: Pass staterr instead of re-reading status and error bits from descriptor
  ixgbe: Move interrupt related values out of ring and into q_vector
  ixgbe: add structure for containing RX/TX rings to q_vector
  ixgbe: inline the ixgbe_maybe_stop_tx function
  ixgbe: Update ATR to use recorded TX queues instead of CPU for routing
  igb: Fix for DH89xxCC near end loopback test
  e1000: always call e1000_check_for_link() on e1000_ce4100 MACs.
  netxen: add fw version compatibility check
  be2net: request native mode each time the card is reset
  ipv4: Constrain UFO fragment sizes to multiples of 8 bytes
  virtio_net: Fix panic in virtnet_remove
  ipv6: make fragment identifications less predictable
  ipv6: unshare inetpeers
  can: make function can_get_bittiming static
  ...

278 files changed:
Documentation/filesystems/ubifs.txt
Documentation/mmc/00-INDEX
Documentation/mmc/mmc-async-req.txt [new file with mode: 0644]
Documentation/virtual/lguest/lguest.c
Documentation/x86/boot.txt
MAINTAINERS
Makefile
arch/arm/configs/mmp2_defconfig
arch/arm/mach-mmp/brownstone.c
arch/arm/mach-mmp/include/mach/mmp2.h
arch/arm/mach-mmp/jasper.c
arch/arm/mach-mmp/mmp2.c
arch/arm/plat-pxa/include/plat/sdhci.h [deleted file]
arch/sparc/include/asm/ptrace.h
arch/x86/Kconfig
arch/x86/include/asm/lguest_hcall.h
arch/x86/include/asm/xen/pci.h
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/reboot.c
arch/x86/lguest/boot.c
arch/x86/lguest/i386_head.S
arch/x86/pci/xen.c
arch/x86/xen/Makefile
arch/x86/xen/enlighten.c
arch/x86/xen/platform-pci-unplug.c
arch/x86/xen/vga.c [new file with mode: 0644]
arch/x86/xen/xen-ops.h
drivers/block/xen-blkback/xenbus.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_tiling.c
drivers/gpu/drm/i915/intel_display.c
drivers/lguest/core.c
drivers/lguest/interrupts_and_traps.c
drivers/lguest/lg.h
drivers/lguest/lguest_device.c
drivers/lguest/lguest_user.c
drivers/lguest/page_tables.c
drivers/lguest/x86/core.c
drivers/mmc/card/block.c
drivers/mmc/card/mmc_test.c
drivers/mmc/card/queue.c
drivers/mmc/card/queue.h
drivers/mmc/core/core.c
drivers/mmc/core/sd.c
drivers/mmc/core/sdio_bus.c
drivers/mmc/host/Kconfig
drivers/mmc/host/Makefile
drivers/mmc/host/at91_mci.c
drivers/mmc/host/at91_mci.h [moved from arch/arm/mach-at91/include/mach/at91_mci.h with 99% similarity]
drivers/mmc/host/atmel-mci.c
drivers/mmc/host/dw_mmc.c
drivers/mmc/host/dw_mmc.h
drivers/mmc/host/mmci.c
drivers/mmc/host/mmci.h
drivers/mmc/host/mxs-mmc.c
drivers/mmc/host/omap_hsmmc.c
drivers/mmc/host/sdhci-cns3xxx.c
drivers/mmc/host/sdhci-dove.c
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/mmc/host/sdhci-of-core.c [deleted file]
drivers/mmc/host/sdhci-of-esdhc.c
drivers/mmc/host/sdhci-of-hlwd.c
drivers/mmc/host/sdhci-of.h [deleted file]
drivers/mmc/host/sdhci-pci.c
drivers/mmc/host/sdhci-pltfm.c
drivers/mmc/host/sdhci-pltfm.h
drivers/mmc/host/sdhci-pxa.c [deleted file]
drivers/mmc/host/sdhci-pxav2.c [new file with mode: 0644]
drivers/mmc/host/sdhci-pxav3.c [new file with mode: 0644]
drivers/mmc/host/sdhci-s3c.c
drivers/mmc/host/sdhci-tegra.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sh_mmcif.c
drivers/mmc/host/sh_mobile_sdhi.c
drivers/mmc/host/tmio_mmc.h
drivers/mmc/host/tmio_mmc_dma.c
drivers/mmc/host/tmio_mmc_pio.c
drivers/mtd/ubi/build.c
drivers/mtd/ubi/debug.c
drivers/mtd/ubi/debug.h
drivers/mtd/ubi/io.c
drivers/mtd/ubi/scan.c
drivers/mtd/ubi/ubi.h
drivers/mtd/ubi/vmt.c
drivers/mtd/ubi/vtbl.c
drivers/mtd/ubi/wl.c
drivers/pci/quirks.c
drivers/xen/Kconfig
drivers/xen/Makefile
drivers/xen/events.c
drivers/xen/tmem.c
drivers/xen/xen-balloon.c
drivers/xen/xen-pciback/Makefile [new file with mode: 0644]
drivers/xen/xen-pciback/conf_space.c [new file with mode: 0644]
drivers/xen/xen-pciback/conf_space.h [new file with mode: 0644]
drivers/xen/xen-pciback/conf_space_capability.c [new file with mode: 0644]
drivers/xen/xen-pciback/conf_space_header.c [new file with mode: 0644]
drivers/xen/xen-pciback/conf_space_quirks.c [new file with mode: 0644]
drivers/xen/xen-pciback/conf_space_quirks.h [new file with mode: 0644]
drivers/xen/xen-pciback/passthrough.c [new file with mode: 0644]
drivers/xen/xen-pciback/pci_stub.c [new file with mode: 0644]
drivers/xen/xen-pciback/pciback.h [new file with mode: 0644]
drivers/xen/xen-pciback/pciback_ops.c [new file with mode: 0644]
drivers/xen/xen-pciback/vpci.c [new file with mode: 0644]
drivers/xen/xen-pciback/xenbus.c [new file with mode: 0644]
drivers/xen/xen-selfballoon.c [new file with mode: 0644]
drivers/xen/xenbus/xenbus_probe.c
drivers/xen/xenbus/xenbus_probe.h
drivers/xen/xenbus/xenbus_probe_backend.c
drivers/xen/xenbus/xenbus_probe_frontend.c
fs/cifs/file.c
fs/dcache.c
fs/dlm/ast.c
fs/dlm/ast.h
fs/dlm/config.c
fs/dlm/config.h
fs/dlm/dlm_internal.h
fs/dlm/lock.c
fs/dlm/lockspace.c
fs/dlm/lowcomms.c
fs/dlm/memory.c
fs/dlm/memory.h
fs/dlm/recoverd.c
fs/dlm/user.c
fs/fscache/page.c
fs/gfs2/bmap.c
fs/gfs2/dir.c
fs/gfs2/dir.h
fs/gfs2/file.c
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/main.c
fs/gfs2/ops_fstype.c
fs/gfs2/rgrp.c
fs/gfs2/rgrp.h
fs/gfs2/super.c
fs/hfsplus/brec.c
fs/hfsplus/catalog.c
fs/hfsplus/dir.c
fs/hfsplus/extents.c
fs/hfsplus/hfsplus_fs.h
fs/hfsplus/inode.c
fs/hfsplus/part_tbl.c
fs/hfsplus/super.c
fs/hfsplus/unicode.c
fs/hfsplus/wrapper.c
fs/ubifs/commit.c
fs/ubifs/debug.c
fs/ubifs/debug.h
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/io.c
fs/ubifs/log.c
fs/ubifs/lprops.c
fs/ubifs/lpt.c
fs/ubifs/lpt_commit.c
fs/ubifs/misc.h
fs/ubifs/orphan.c
fs/ubifs/recovery.c
fs/ubifs/replay.c
fs/ubifs/sb.c
fs/ubifs/scan.c
fs/ubifs/super.c
fs/ubifs/tnc.c
fs/ubifs/tnc_commit.c
fs/ubifs/ubifs.h
fs/xfs/Makefile
fs/xfs/linux-2.6/xfs_acl.c
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_buf.h
fs/xfs/linux-2.6/xfs_export.c
fs/xfs/linux-2.6/xfs_file.c
fs/xfs/linux-2.6/xfs_iops.c
fs/xfs/linux-2.6/xfs_linux.h
fs/xfs/linux-2.6/xfs_super.c
fs/xfs/linux-2.6/xfs_sync.c
fs/xfs/linux-2.6/xfs_sync.h
fs/xfs/linux-2.6/xfs_trace.h
fs/xfs/quota/xfs_dquot.c
fs/xfs/quota/xfs_dquot.h
fs/xfs/quota/xfs_qm.c
fs/xfs/quota/xfs_qm.h
fs/xfs/quota/xfs_qm_syscalls.c
fs/xfs/quota/xfs_trans_dquot.c
fs/xfs/xfs.h
fs/xfs/xfs_alloc.c
fs/xfs/xfs_alloc_btree.c
fs/xfs/xfs_arch.h [deleted file]
fs/xfs/xfs_attr.c
fs/xfs/xfs_attr_leaf.c
fs/xfs/xfs_bmap.c
fs/xfs/xfs_bmap_btree.c
fs/xfs/xfs_btree.c
fs/xfs/xfs_btree.h
fs/xfs/xfs_btree_trace.c [deleted file]
fs/xfs/xfs_btree_trace.h [deleted file]
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_da_btree.c
fs/xfs/xfs_da_btree.h
fs/xfs/xfs_dir2.c
fs/xfs/xfs_dir2.h
fs/xfs/xfs_dir2_block.c
fs/xfs/xfs_dir2_block.h [deleted file]
fs/xfs/xfs_dir2_data.c
fs/xfs/xfs_dir2_data.h [deleted file]
fs/xfs/xfs_dir2_format.h [new file with mode: 0644]
fs/xfs/xfs_dir2_leaf.c
fs/xfs/xfs_dir2_leaf.h [deleted file]
fs/xfs/xfs_dir2_node.c
fs/xfs/xfs_dir2_node.h [deleted file]
fs/xfs/xfs_dir2_priv.h [new file with mode: 0644]
fs/xfs/xfs_dir2_sf.c
fs/xfs/xfs_dir2_sf.h [deleted file]
fs/xfs/xfs_fs.h
fs/xfs/xfs_ialloc.c
fs/xfs/xfs_ialloc_btree.c
fs/xfs/xfs_iget.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_inum.h
fs/xfs/xfs_log.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans_ail.c
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_trans_inode.c
fs/xfs/xfs_trans_priv.h
fs/xfs/xfs_vnodeops.c
fs/xfs/xfs_vnodeops.h
include/linux/lguest.h
include/linux/mfd/tmio.h
include/linux/mmc/boot.h
include/linux/mmc/card.h
include/linux/mmc/core.h
include/linux/mmc/dw_mmc.h
include/linux/mmc/host.h
include/linux/mmc/ioctl.h
include/linux/mmc/mmc.h
include/linux/mmc/pm.h
include/linux/mmc/sd.h
include/linux/mmc/sdhci-pltfm.h [deleted file]
include/linux/mmc/sdhci-spear.h
include/linux/mmc/sdhci.h
include/linux/mmc/sdio.h
include/linux/mmc/sdio_func.h
include/linux/mmc/sdio_ids.h
include/linux/mmc/sh_mmcif.h
include/linux/mmc/sh_mobile_sdhi.h
include/linux/mmc/tmio.h
include/linux/mtd/ubi.h
include/linux/platform_data/pxa_sdhci.h [new file with mode: 0644]
include/linux/sched.h
include/linux/slab.h
include/linux/slab_def.h
include/linux/slob_def.h
include/linux/slub_def.h
include/xen/balloon.h
include/xen/events.h
include/xen/hvc-console.h
include/xen/interface/xen.h
include/xen/tmem.h [new file with mode: 0644]
include/xen/xenbus.h
kernel/rcutree_plugin.h
kernel/sched.c
kernel/sched_fair.c
kernel/sched_features.h
kernel/signal.c
kernel/softirq.c
mm/slab.c
mm/slob.c
mm/slub.c

index 8e4fab6..a0a61d2 100644 (file)
@@ -111,34 +111,6 @@ The following is an example of the kernel boot arguments to attach mtd0
 to UBI and mount volume "rootfs":
 ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
 
-
-Module Parameters for Debugging
-===============================
-
-When UBIFS has been compiled with debugging enabled, there are 2 module
-parameters that are available to control aspects of testing and debugging.
-
-debug_chks     Selects extra checks that UBIFS can do while running:
-
-               Check                                   Flag value
-
-               General checks                          1
-               Check Tree Node Cache (TNC)             2
-               Check indexing tree size                4
-               Check orphan area                       8
-               Check old indexing tree                 16
-               Check LEB properties (lprops)           32
-               Check leaf nodes and inodes             64
-
-debug_tsts     Selects a mode of testing, as follows:
-
-               Test mode                               Flag value
-
-               Failure mode for recovery testing       4
-
-For example, set debug_chks to 3 to enable general and TNC checks.
-
-
 References
 ==========
 
index 93dd7a7..a9ba672 100644 (file)
@@ -4,3 +4,5 @@ mmc-dev-attrs.txt
         - info on SD and MMC device attributes
 mmc-dev-parts.txt
         - info on SD and MMC device partitions
+mmc-async-req.txt
+        - info on mmc asynchronous requests
diff --git a/Documentation/mmc/mmc-async-req.txt b/Documentation/mmc/mmc-async-req.txt
new file mode 100644 (file)
index 0000000..ae1907b
--- /dev/null
@@ -0,0 +1,87 @@
+Rationale
+=========
+
+How significant is the cache maintenance overhead?
+It depends. Fast eMMC and multiple cache levels with speculative cache
+pre-fetch makes the cache overhead relatively significant. If the DMA
+preparations for the next request are done in parallel with the current
+transfer, the DMA preparation overhead would not affect the MMC performance.
+The intention of non-blocking (asynchronous) MMC requests is to minimize the
+time between when an MMC request ends and another MMC request begins.
+Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
+dma_unmap_sg are processing. Using non-blocking MMC requests makes it
+possible to prepare the caches for next job in parallel with an active
+MMC request.
+
+MMC block driver
+================
+
+The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
+The increase in throughput is proportional to the time it takes to
+prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
+a request and how fast the memory is. The faster the MMC/SD is the
+more significant the prepare request time becomes. Roughly the expected
+performance gain is 5% for large writes and 10% on large reads on a L2 cache
+platform. In power save mode, when clocks run on a lower frequency, the DMA
+preparation may cost even more. As long as these slower preparations are run
+in parallel with the transfer performance won't be affected.
+
+Details on measurements from IOZone and mmc_test
+================================================
+
+https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
+
+MMC core API extension
+======================
+
+There is one new public function mmc_start_req().
+It starts a new MMC command request for a host. The function isn't
+truly non-blocking. If there is an ongoing async request it waits
+for completion of that request and starts the new one and returns. It
+doesn't wait for the new request to complete. If there is no ongoing
+request it starts the new request and returns immediately.
+
+MMC host extensions
+===================
+
+There are two optional members in the mmc_host_ops -- pre_req() and
+post_req() -- that the host driver may implement in order to move work
+to before and after the actual mmc_host_ops.request() function is called.
+In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
+descriptor, and post_req() runs the dma_unmap_sg().
+
+Optimize for the first request
+==============================
+
+The first request in a series of requests can't be prepared in parallel
+with the previous transfer, since there is no previous request.
+The argument is_first_req in pre_req() indicates that there is no previous
+request. The host driver may optimize for this scenario to minimize
+the performance loss. A way to optimize for this is to split the current
+request in two chunks, prepare the first chunk and start the request,
+and finally prepare the second chunk and start the transfer.
+
+Pseudocode to handle is_first_req scenario with minimal prepare overhead:
+
+if (is_first_req && req->size > threshold)
+   /* start MMC transfer for the complete transfer size */
+   mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
+
+   /*
+    * Begin to prepare DMA while cmd is being processed by MMC.
+    * The first chunk of the request should take the same time
+    * to prepare as the "MMC process command time".
+    * If prepare time exceeds MMC cmd time
+    * the transfer is delayed, guesstimate max 4k as first chunk size.
+    */
+    prepare_1st_chunk_for_dma(req);
+    /* flush pending desc to the DMAC (dmaengine.h) */
+    dma_issue_pending(req->dma_desc);
+
+    prepare_2nd_chunk_for_dma(req);
+    /*
+     * The second issue_pending should be called before MMC runs out
+     * of the first chunk. If the MMC runs out of the first data chunk
+     * before this call, the transfer is delayed.
+     */
+    dma_issue_pending(req->dma_desc);
index cd9d6af..043bd7d 100644 (file)
@@ -51,7 +51,7 @@
 #include <asm/bootparam.h>
 #include "../../../include/linux/lguest_launcher.h"
 /*L:110
- * We can ignore the 42 include files we need for this program, but I do want
+ * We can ignore the 43 include files we need for this program, but I do want
  * to draw attention to the use of kernel-style types.
  *
  * As Linus said, "C is a Spartan language, and so should your naming be."  I
@@ -65,7 +65,6 @@ typedef uint16_t u16;
 typedef uint8_t u8;
 /*:*/
 
-#define PAGE_PRESENT 0x7       /* Present, RW, Execute */
 #define BRIDGE_PFX "bridge:"
 #ifndef SIOCBRADDIF
 #define SIOCBRADDIF    0x89a2          /* add interface to bridge      */
@@ -861,8 +860,10 @@ static void console_output(struct virtqueue *vq)
        /* writev can return a partial write, so we loop here. */
        while (!iov_empty(iov, out)) {
                int len = writev(STDOUT_FILENO, iov, out);
-               if (len <= 0)
-                       err(1, "Write to stdout gave %i", len);
+               if (len <= 0) {
+                       warn("Write to stdout gave %i (%d)", len, errno);
+                       break;
+               }
                iov_consume(iov, out, len);
        }
 
@@ -898,7 +899,7 @@ static void net_output(struct virtqueue *vq)
         * same format: what a coincidence!
         */
        if (writev(net_info->tunfd, iov, out) < 0)
-               errx(1, "Write to tun failed?");
+               warnx("Write to tun failed (%d)?", errno);
 
        /*
         * Done with that one; wait_for_vq_desc() will send the interrupt if
@@ -955,7 +956,7 @@ static void net_input(struct virtqueue *vq)
         */
        len = readv(net_info->tunfd, iov, in);
        if (len <= 0)
-               err(1, "Failed to read from tun.");
+               warn("Failed to read from tun (%d).", errno);
 
        /*
         * Mark that packet buffer as used, but don't interrupt here.  We want
@@ -1093,9 +1094,10 @@ static void update_device_status(struct device *dev)
                warnx("Device %s configuration FAILED", dev->name);
                if (dev->running)
                        reset_device(dev);
-       } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
-               if (!dev->running)
-                       start_device(dev);
+       } else {
+               if (dev->running)
+                       err(1, "Device %s features finalized twice", dev->name);
+               start_device(dev);
        }
 }
 
@@ -1120,25 +1122,11 @@ static void handle_output(unsigned long addr)
                        return;
                }
 
-               /*
-                * Devices *can* be used before status is set to DRIVER_OK.
-                * The original plan was that they would never do this: they
-                * would always finish setting up their status bits before
-                * actually touching the virtqueues.  In practice, we allowed
-                * them to, and they do (eg. the disk probes for partition
-                * tables as part of initialization).
-                *
-                * If we see this, we start the device: once it's running, we
-                * expect the device to catch all the notifications.
-                */
+               /* Devices should not be used before features are finalized. */
                for (vq = i->vq; vq; vq = vq->next) {
                        if (addr != vq->config.pfn*getpagesize())
                                continue;
-                       if (i->running)
-                               errx(1, "Notification on running %s", i->name);
-                       /* This just calls create_thread() for each virtqueue */
-                       start_device(i);
-                       return;
+                       errx(1, "Notification on %s before setup!", i->name);
                }
        }
 
@@ -1370,7 +1358,7 @@ static void setup_console(void)
  * --sharenet=<name> option which opens or creates a named pipe.  This can be
  * used to send packets to another guest in a 1:1 manner.
  *
- * More sopisticated is to use one of the tools developed for project like UML
+ * More sophisticated is to use one of the tools developed for project like UML
  * to do networking.
  *
  * Faster is to do virtio bonding in kernel.  Doing this 1:1 would be
@@ -1380,7 +1368,7 @@ static void setup_console(void)
  * multiple inter-guest channels behind one interface, although it would
  * require some manner of hotplugging new virtio channels.
  *
- * Finally, we could implement a virtio network switch in the kernel.
+ * Finally, we could use a virtio network switch in the kernel, ie. vhost.
 :*/
 
 static u32 str2ip(const char *ipaddr)
@@ -2017,10 +2005,7 @@ int main(int argc, char *argv[])
        /* Tell the entry path not to try to reload segment registers. */
        boot->hdr.loadflags |= KEEP_SEGMENTS;
 
-       /*
-        * We tell the kernel to initialize the Guest: this returns the open
-        * /dev/lguest file descriptor.
-        */
+       /* We tell the kernel to initialize the Guest. */
        tell_kernel(start);
 
        /* Ensure that we terminate if a device-servicing child dies. */
index 9b7221a..7c3a880 100644 (file)
@@ -674,7 +674,7 @@ Protocol:   2.10+
 
 Field name:    init_size
 Type:          read
-Offset/size:   0x25c/4
+Offset/size:   0x260/4
 
   This field indicates the amount of linear contiguous memory starting
   at the kernel runtime start address that the kernel needs before it
index 7a9569e..81cf5fb 100644 (file)
@@ -1,4 +1,5 @@
 
+
        List of maintainers and how to submit kernel changes
 
 Please try to follow the guidelines below.  This will make things
@@ -4584,9 +4585,8 @@ S:        Maintained
 F:     drivers/mmc/host/omap.c
 
 OMAP HS MMC SUPPORT
-M:     Madhusudhan Chikkature <madhu.cr@ti.com>
 L:     linux-omap@vger.kernel.org
-S:     Maintained
+S:     Orphan
 F:     drivers/mmc/host/omap_hsmmc.c
 
 OMAP RANDOM NUMBER GENERATOR SUPPORT
@@ -6242,9 +6242,14 @@ F:       drivers/char/toshiba.c
 F:     include/linux/toshiba.h
 
 TMIO MMC DRIVER
+M:     Guennadi Liakhovetski <g.liakhovetski@gmx.de>
 M:     Ian Molton <ian@mnementh.co.uk>
+L:     linux-mmc@vger.kernel.org
 S:     Maintained
-F:     drivers/mmc/host/tmio_mmc.*
+F:     drivers/mmc/host/tmio_mmc*
+F:     drivers/mmc/host/sh_mobile_sdhi.c
+F:     include/linux/mmc/tmio.h
+F:     include/linux/mmc/sh_mobile_sdhi.h
 
 TMPFS (SHMEM FILESYSTEM)
 M:     Hugh Dickins <hughd@google.com>
@@ -6321,7 +6326,7 @@ F:        drivers/scsi/u14-34f.c
 
 UBI FILE SYSTEM (UBIFS)
 M:     Artem Bityutskiy <dedekind1@gmail.com>
-M:     Adrian Hunter <adrian.hunter@nokia.com>
+M:     Adrian Hunter <adrian.hunter@intel.com>
 L:     linux-mtd@lists.infradead.org
 T:     git git://git.infradead.org/ubifs-2.6.git
 W:     http://www.linux-mtd.infradead.org/doc/ubifs.html
index 60d91f7..6a5bdad 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Sneaky Weasel
 
 # *DOCUMENTATION*
index 47ad3b1..5a58452 100644 (file)
@@ -8,6 +8,7 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_ARCH_MMP=y
+CONFIG_MACH_BROWNSTONE=y
 CONFIG_MACH_FLINT=y
 CONFIG_MACH_MARVELL_JASPER=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -63,10 +64,16 @@ CONFIG_BACKLIGHT_MAX8925=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_MAX8925=y
+CONFIG_MMC=y
 # CONFIG_DNOTIFY is not set
 CONFIG_INOTIFY=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
 CONFIG_JFFS2_FS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
@@ -81,7 +88,7 @@ CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_PREEMPT is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_DYNAMIC_DEBUG=y
+# CONFIG_DYNAMIC_DEBUG is not set
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_ERRORS=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
index 7bb78fd..c79162a 100644 (file)
@@ -177,9 +177,16 @@ static struct i2c_board_info brownstone_twsi1_info[] = {
 };
 
 static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc0 = {
-       .max_speed      = 25000000,
+       .clk_delay_cycles = 0x1f,
 };
 
+static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc2 = {
+       .clk_delay_cycles = 0x1f,
+       .flags = PXA_FLAG_CARD_PERMANENT
+               | PXA_FLAG_SD_8_BIT_CAPABLE_SLOT,
+};
+
+
 static void __init brownstone_init(void)
 {
        mfp_config(ARRAY_AND_SIZE(brownstone_pin_config));
@@ -189,6 +196,7 @@ static void __init brownstone_init(void)
        mmp2_add_uart(3);
        mmp2_add_twsi(1, NULL, ARRAY_AND_SIZE(brownstone_twsi1_info));
        mmp2_add_sdhost(0, &mmp2_sdh_platdata_mmc0); /* SD/MMC */
+       mmp2_add_sdhost(2, &mmp2_sdh_platdata_mmc2); /* eMMC */
 
        /* enable 5v regulator */
        platform_device_register(&brownstone_v_5vp_device);
index 2cbf6df..de7b888 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef __ASM_MACH_MMP2_H
 #define __ASM_MACH_MMP2_H
 
-#include <plat/sdhci.h>
+#include <linux/platform_data/pxa_sdhci.h>
 
 struct sys_timer;
 
index 24172a0..5d6421d 100644 (file)
@@ -154,7 +154,7 @@ static struct i2c_board_info jasper_twsi1_info[] = {
 };
 
 static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc0 = {
-       .max_speed      = 25000000,
+       .clk_delay_cycles = 0x1f,
 };
 
 static void __init jasper_init(void)
index 8e6c3ac..079c188 100644 (file)
@@ -168,10 +168,10 @@ static struct clk_lookup mmp2_clkregs[] = {
        INIT_CLKREG(&clk_twsi5, "pxa2xx-i2c.4", NULL),
        INIT_CLKREG(&clk_twsi6, "pxa2xx-i2c.5", NULL),
        INIT_CLKREG(&clk_nand, "pxa3xx-nand", NULL),
-       INIT_CLKREG(&clk_sdh0, "sdhci-pxa.0", "PXA-SDHCLK"),
-       INIT_CLKREG(&clk_sdh1, "sdhci-pxa.1", "PXA-SDHCLK"),
-       INIT_CLKREG(&clk_sdh2, "sdhci-pxa.2", "PXA-SDHCLK"),
-       INIT_CLKREG(&clk_sdh3, "sdhci-pxa.3", "PXA-SDHCLK"),
+       INIT_CLKREG(&clk_sdh0, "sdhci-pxav3.0", "PXA-SDHCLK"),
+       INIT_CLKREG(&clk_sdh1, "sdhci-pxav3.1", "PXA-SDHCLK"),
+       INIT_CLKREG(&clk_sdh2, "sdhci-pxav3.2", "PXA-SDHCLK"),
+       INIT_CLKREG(&clk_sdh3, "sdhci-pxav3.3", "PXA-SDHCLK"),
 };
 
 static int __init mmp2_init(void)
@@ -222,8 +222,8 @@ MMP2_DEVICE(twsi4, "pxa2xx-i2c", 3, TWSI4, 0xd4033000, 0x70);
 MMP2_DEVICE(twsi5, "pxa2xx-i2c", 4, TWSI5, 0xd4033800, 0x70);
 MMP2_DEVICE(twsi6, "pxa2xx-i2c", 5, TWSI6, 0xd4034000, 0x70);
 MMP2_DEVICE(nand, "pxa3xx-nand", -1, NAND, 0xd4283000, 0x100, 28, 29);
-MMP2_DEVICE(sdh0, "sdhci-pxa", 0, MMC, 0xd4280000, 0x120);
-MMP2_DEVICE(sdh1, "sdhci-pxa", 1, MMC2, 0xd4280800, 0x120);
-MMP2_DEVICE(sdh2, "sdhci-pxa", 2, MMC3, 0xd4281000, 0x120);
-MMP2_DEVICE(sdh3, "sdhci-pxa", 3, MMC4, 0xd4281800, 0x120);
+MMP2_DEVICE(sdh0, "sdhci-pxav3", 0, MMC, 0xd4280000, 0x120);
+MMP2_DEVICE(sdh1, "sdhci-pxav3", 1, MMC2, 0xd4280800, 0x120);
+MMP2_DEVICE(sdh2, "sdhci-pxav3", 2, MMC3, 0xd4281000, 0x120);
+MMP2_DEVICE(sdh3, "sdhci-pxav3", 3, MMC4, 0xd4281800, 0x120);
 
diff --git a/arch/arm/plat-pxa/include/plat/sdhci.h b/arch/arm/plat-pxa/include/plat/sdhci.h
deleted file mode 100644 (file)
index 1ab332e..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/* linux/arch/arm/plat-pxa/include/plat/sdhci.h
- *
- * Copyright 2010 Marvell
- *     Zhangfei Gao <zhangfei.gao@marvell.com>
- *
- * PXA Platform - SDHCI platform data definitions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __PLAT_PXA_SDHCI_H
-#define __PLAT_PXA_SDHCI_H
-
-/* pxa specific flag */
-/* Require clock free running */
-#define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0)
-
-/* Board design supports 8-bit data on SD/SDIO BUS */
-#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2)
-
-/*
- * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI
- * @max_speed: the maximum speed supported
- * @quirks: quirks of specific device
- * @flags: flags for platform requirement
- */
-struct sdhci_pxa_platdata {
-       unsigned int    max_speed;
-       unsigned int    quirks;
-       unsigned int    flags;
-};
-
-#endif /* __PLAT_PXA_SDHCI_H */
index c7ad3fe..b928b31 100644 (file)
@@ -205,6 +205,7 @@ do {        current_thread_info()->syscall_noerror = 1; \
 } while (0)
 #define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
 #define instruction_pointer(regs) ((regs)->tpc)
+#define instruction_pointer_set(regs, val) ((regs)->tpc = (val))
 #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
 #define regs_return_value(regs) ((regs)->u_regs[UREG_I0])
 #ifdef CONFIG_SMP
index da34972..37357a5 100644 (file)
@@ -1170,7 +1170,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
 config AMD_NUMA
        def_bool y
        prompt "Old style AMD Opteron NUMA detection"
-       depends on NUMA && PCI
+       depends on X86_64 && NUMA && PCI
        ---help---
          Enable AMD NUMA node topology detection.  You should say Y here if
          you have a multi processor AMD system. This uses an old method to
index b60f292..879fd7d 100644 (file)
@@ -61,6 +61,7 @@ hcall(unsigned long call,
                     : "memory");
        return call;
 }
+/*:*/
 
 /* Can't use our min() macro here: needs to be a constant */
 #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
index 4fbda9a..968d57d 100644 (file)
@@ -14,13 +14,14 @@ static inline int pci_xen_hvm_init(void)
 }
 #endif
 #if defined(CONFIG_XEN_DOM0)
-void __init xen_setup_pirqs(void);
+int __init pci_xen_initial_domain(void);
 int xen_find_device_domain_owner(struct pci_dev *dev);
 int xen_register_device_domain_owner(struct pci_dev *dev, uint16_t domain);
 int xen_unregister_device_domain_owner(struct pci_dev *dev);
 #else
-static inline void __init xen_setup_pirqs(void)
+static inline int __init pci_xen_initial_domain(void)
 {
+       return -1;
 }
 static inline int xen_find_device_domain_owner(struct pci_dev *dev)
 {
index c29d631..395a10e 100644 (file)
@@ -63,7 +63,6 @@ void foo(void)
        BLANK();
        OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
        OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending);
-       OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir);
 
        BLANK();
        OFFSET(LGUEST_PAGES_host_gdt_desc, lguest_pages, state.host_gdt_desc);
index 4f0d46f..9242436 100644 (file)
@@ -419,6 +419,30 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
                },
        },
+       {       /* Handle problems with rebooting on the Latitude E6320. */
+               .callback = set_pci_reboot,
+               .ident = "Dell Latitude E6320",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6320"),
+               },
+       },
+       {       /* Handle problems with rebooting on the Latitude E5420. */
+               .callback = set_pci_reboot,
+               .ident = "Dell Latitude E5420",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E5420"),
+               },
+       },
+       {       /* Handle problems with rebooting on the Latitude E6420. */
+               .callback = set_pci_reboot,
+               .ident = "Dell Latitude E6420",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6420"),
+               },
+       },
        { }
 };
 
index db832fd..13ee258 100644 (file)
@@ -71,7 +71,8 @@
 #include <asm/stackprotector.h>
 #include <asm/reboot.h>                /* for struct machine_ops */
 
-/*G:010 Welcome to the Guest!
+/*G:010
+ * Welcome to the Guest!
  *
  * The Guest in our tale is a simple creature: identical to the Host but
  * behaving in simplified but equivalent ways.  In particular, the Guest is the
@@ -190,15 +191,23 @@ static void lazy_hcall4(unsigned long call,
 #endif
 
 /*G:036
- * When lazy mode is turned off reset the per-cpu lazy mode variable and then
- * issue the do-nothing hypercall to flush any stored calls.
-:*/
+ * When lazy mode is turned off, we issue the do-nothing hypercall to
+ * flush any stored calls, and call the generic helper to reset the
+ * per-cpu lazy mode variable.
+ */
 static void lguest_leave_lazy_mmu_mode(void)
 {
        hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
        paravirt_leave_lazy_mmu();
 }
 
+/*
+ * We also catch the end of context switch; we enter lazy mode for much of
+ * that too, so again we need to flush here.
+ *
+ * (Technically, this is lazy CPU mode, and normally we're in lazy MMU
+ * mode, but unlike Xen, lguest doesn't care about the difference).
+ */
 static void lguest_end_context_switch(struct task_struct *next)
 {
        hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);
@@ -391,7 +400,7 @@ static void lguest_load_tr_desc(void)
  * giant ball of hair.  Its entry in the current Intel manual runs to 28 pages.
  *
  * This instruction even it has its own Wikipedia entry.  The Wikipedia entry
- * has been translated into 5 languages.  I am not making this up!
+ * has been translated into 6 languages.  I am not making this up!
  *
  * We could get funky here and identify ourselves as "GenuineLguest", but
  * instead we just use the real "cpuid" instruction.  Then I pretty much turned
@@ -458,7 +467,7 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
        /*
         * PAE systems can mark pages as non-executable.  Linux calls this the
         * NX bit.  Intel calls it XD (eXecute Disable), AMD EVP (Enhanced
-        * Virus Protection).  We just switch turn if off here, since we don't
+        * Virus Protection).  We just switch it off here, since we don't
         * support it.
         */
        case 0x80000001:
@@ -520,17 +529,16 @@ static unsigned long lguest_read_cr2(void)
 
 /* See lguest_set_pte() below. */
 static bool cr3_changed = false;
+static unsigned long current_cr3;
 
 /*
  * cr3 is the current toplevel pagetable page: the principle is the same as
- * cr0.  Keep a local copy, and tell the Host when it changes.  The only
- * difference is that our local copy is in lguest_data because the Host needs
- * to set it upon our initial hypercall.
+ * cr0.  Keep a local copy, and tell the Host when it changes.
  */
 static void lguest_write_cr3(unsigned long cr3)
 {
-       lguest_data.pgdir = cr3;
        lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
+       current_cr3 = cr3;
 
        /* These two page tables are simple, linear, and used during boot */
        if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
@@ -539,7 +547,7 @@ static void lguest_write_cr3(unsigned long cr3)
 
 static unsigned long lguest_read_cr3(void)
 {
-       return lguest_data.pgdir;
+       return current_cr3;
 }
 
 /* cr4 is used to enable and disable PGE, but we don't care. */
@@ -641,7 +649,7 @@ static void lguest_write_cr4(unsigned long val)
 
 /*
  * The Guest calls this after it has set a second-level entry (pte), ie. to map
- * a page into a process' address space.  Wetell the Host the toplevel and
+ * a page into a process' address space.  We tell the Host the toplevel and
  * address this corresponds to.  The Guest uses one pagetable per process, so
  * we need to tell the Host which one we're changing (mm->pgd).
  */
@@ -758,7 +766,7 @@ static void lguest_pmd_clear(pmd_t *pmdp)
 static void lguest_flush_tlb_single(unsigned long addr)
 {
        /* Simply set it to zero: if it was not, it will fault back in. */
-       lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
+       lazy_hcall3(LHCALL_SET_PTE, current_cr3, addr, 0);
 }
 
 /*
@@ -1140,7 +1148,7 @@ static struct notifier_block paniced = {
 static __init char *lguest_memory_setup(void)
 {
        /*
-        *The Linux bootloader header contains an "e820" memory map: the
+        * The Linux bootloader header contains an "e820" memory map: the
         * Launcher populated the first entry with our memory limit.
         */
        e820_add_region(boot_params.e820_map[0].addr,
index 4f420c2..6ddfe4f 100644 (file)
@@ -6,18 +6,22 @@
 #include <asm/processor-flags.h>
 
 /*G:020
- * Our story starts with the kernel booting into startup_32 in
- * arch/x86/kernel/head_32.S.  It expects a boot header, which is created by
- * the bootloader (the Launcher in our case).
+
+ * Our story starts with the bzImage: booting starts at startup_32 in
+ * arch/x86/boot/compressed/head_32.S.  This merely uncompresses the real
+ * kernel in place and then jumps into it: startup_32 in
+ * arch/x86/kernel/head_32.S.  Both routines expects a boot header in the %esi
+ * register, which is created by the bootloader (the Launcher in our case).
  *
  * The startup_32 function does very little: it clears the uninitialized global
  * C variables which we expect to be zero (ie. BSS) and then copies the boot
- * header and kernel command line somewhere safe.  Finally it checks the
- * 'hardware_subarch' field.  This was introduced in 2.6.24 for lguest and Xen:
- * if it's set to '1' (lguest's assigned number), then it calls us here.
+ * header and kernel command line somewhere safe, and populates some initial
+ * page tables.  Finally it checks the 'hardware_subarch' field.  This was
+ * introduced in 2.6.24 for lguest and Xen: if it's set to '1' (lguest's
+ * assigned number), then it calls us here.
  *
  * WARNING: be very careful here!  We're running at addresses equal to physical
- * addesses (around 0), not above PAGE_OFFSET as most code expectes
+ * addresses (around 0), not above PAGE_OFFSET as most code expects
  * (eg. 0xC0000000).  Jumps are relative, so they're OK, but we can't touch any
  * data without remembering to subtract __PAGE_OFFSET!
  *
 .section .init.text, "ax", @progbits
 ENTRY(lguest_entry)
        /*
-        * We make the "initialization" hypercall now to tell the Host about
-        * us, and also find out where it put our page tables.
+        * We make the "initialization" hypercall now to tell the Host where
+        * our lguest_data struct is.
         */
        movl $LHCALL_LGUEST_INIT, %eax
        movl $lguest_data - __PAGE_OFFSET, %ebx
        int $LGUEST_TRAP_ENTRY
 
+       /* Now turn our pagetables on; setup by arch/x86/kernel/head_32.S. */
+       movl $LHCALL_NEW_PGTABLE, %eax
+       movl $(initial_page_table - __PAGE_OFFSET), %ebx
+       int $LGUEST_TRAP_ENTRY
+
        /* Set up the initial stack so we can run C code. */
        movl $(init_thread_union+THREAD_SIZE),%esp
 
@@ -96,12 +105,8 @@ send_interrupts:
         */
        pushl %eax
        movl $LHCALL_SEND_INTERRUPTS, %eax
-       /*
-        * This is a vmcall instruction (same thing that KVM uses).  Older
-        * assembler versions might not know the "vmcall" instruction, so we
-        * create one manually here.
-        */
-       .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
+       /* This is the actual hypercall trap. */
+       int  $LGUEST_TRAP_ENTRY
        /* Put eax back the way we found it. */
        popl %eax
        ret
index f567965..1017c7b 100644 (file)
@@ -1,8 +1,13 @@
 /*
- * Xen PCI Frontend Stub - puts some "dummy" functions in to the Linux
- *                        x86 PCI core to support the Xen PCI Frontend
+ * Xen PCI - handle PCI (INTx) and MSI infrastructure calls for PV, HVM and
+ * initial domain support. We also handle the DSDT _PRT callbacks for GSI's
+ * used in HVM and initial domain mode (PV does not parse ACPI, so it has no
+ * concept of GSIs). Under PV we hook under the pnbbios API for IRQs and
+ * 0xcf8 PCI configuration read/write.
  *
  *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ *           Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+ *           Stefano Stabellini <stefano.stabellini@eu.citrix.com>
  */
 #include <linux/module.h>
 #include <linux/init.h>
 #include <xen/events.h>
 #include <asm/xen/pci.h>
 
+static int xen_pcifront_enable_irq(struct pci_dev *dev)
+{
+       int rc;
+       int share = 1;
+       int pirq;
+       u8 gsi;
+
+       rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+       if (rc < 0) {
+               dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
+                        rc);
+               return rc;
+       }
+       /* In PV DomU the Xen PCI backend puts the PIRQ in the interrupt line.*/
+       pirq = gsi;
+
+       if (gsi < NR_IRQS_LEGACY)
+               share = 0;
+
+       rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
+       if (rc < 0) {
+               dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n",
+                        gsi, pirq, rc);
+               return rc;
+       }
+
+       dev->irq = rc;
+       dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
+       return 0;
+}
+
 #ifdef CONFIG_ACPI
-static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
-                                int trigger, int polarity)
+static int xen_register_pirq(u32 gsi, int gsi_override, int triggering,
+                            bool set_pirq)
 {
-       int rc, irq;
+       int rc, pirq = -1, irq = -1;
        struct physdev_map_pirq map_irq;
        int shareable = 0;
        char *name;
 
-       if (!xen_hvm_domain())
-               return -1;
+       if (set_pirq)
+               pirq = gsi;
 
        map_irq.domid = DOMID_SELF;
        map_irq.type = MAP_PIRQ_TYPE_GSI;
        map_irq.index = gsi;
-       map_irq.pirq = -1;
+       map_irq.pirq = pirq;
 
        rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
        if (rc) {
@@ -42,7 +78,7 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
                return -1;
        }
 
-       if (trigger == ACPI_EDGE_SENSITIVE) {
+       if (triggering == ACPI_EDGE_SENSITIVE) {
                shareable = 0;
                name = "ioapic-edge";
        } else {
@@ -50,12 +86,63 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
                name = "ioapic-level";
        }
 
+       if (gsi_override >= 0)
+               gsi = gsi_override;
+
        irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name);
+       if (irq < 0)
+               goto out;
+
+       printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", map_irq.pirq, irq, gsi);
+out:
+       return irq;
+}
+
+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
+                                    int trigger, int polarity)
+{
+       if (!xen_hvm_domain())
+               return -1;
 
-       printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
+       return xen_register_pirq(gsi, -1 /* no GSI override */, trigger,
+                                false /* no mapping of GSI to PIRQ */);
+}
+
+#ifdef CONFIG_XEN_DOM0
+static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity)
+{
+       int rc, irq;
+       struct physdev_setup_gsi setup_gsi;
+
+       if (!xen_pv_domain())
+               return -1;
+
+       printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
+                       gsi, triggering, polarity);
+
+       irq = xen_register_pirq(gsi, gsi_override, triggering, true);
+
+       setup_gsi.gsi = gsi;
+       setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
+       setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+
+       rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
+       if (rc == -EEXIST)
+               printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
+       else if (rc) {
+               printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
+                               gsi, rc);
+       }
 
        return irq;
 }
+
+static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
+                                int trigger, int polarity)
+{
+       return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity);
+}
+#endif
 #endif
 
 #if defined(CONFIG_PCI_MSI)
@@ -65,6 +152,43 @@ static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
 struct xen_pci_frontend_ops *xen_pci_frontend;
 EXPORT_SYMBOL_GPL(xen_pci_frontend);
 
+static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+       int irq, ret, i;
+       struct msi_desc *msidesc;
+       int *v;
+
+       v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
+       if (!v)
+               return -ENOMEM;
+
+       if (type == PCI_CAP_ID_MSIX)
+               ret = xen_pci_frontend_enable_msix(dev, v, nvec);
+       else
+               ret = xen_pci_frontend_enable_msi(dev, v);
+       if (ret)
+               goto error;
+       i = 0;
+       list_for_each_entry(msidesc, &dev->msi_list, list) {
+               irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
+                                              (type == PCI_CAP_ID_MSIX) ?
+                                              "pcifront-msi-x" :
+                                              "pcifront-msi",
+                                               DOMID_SELF);
+               if (irq < 0)
+                       goto free;
+               i++;
+       }
+       kfree(v);
+       return 0;
+
+error:
+       dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+free:
+       kfree(v);
+       return ret;
+}
+
 #define XEN_PIRQ_MSI_DATA  (MSI_DATA_TRIGGER_EDGE | \
                MSI_DATA_LEVEL_ASSERT | (3 << 8) | MSI_DATA_VECTOR(0))
 
@@ -123,67 +247,6 @@ error:
        return -ENODEV;
 }
 
-/*
- * For MSI interrupts we have to use drivers/xen/event.s functions to
- * allocate an irq_desc and setup the right */
-
-
-static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-       int irq, ret, i;
-       struct msi_desc *msidesc;
-       int *v;
-
-       v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL);
-       if (!v)
-               return -ENOMEM;
-
-       if (type == PCI_CAP_ID_MSIX)
-               ret = xen_pci_frontend_enable_msix(dev, v, nvec);
-       else
-               ret = xen_pci_frontend_enable_msi(dev, v);
-       if (ret)
-               goto error;
-       i = 0;
-       list_for_each_entry(msidesc, &dev->msi_list, list) {
-               irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
-                                              (type == PCI_CAP_ID_MSIX) ?
-                                              "pcifront-msi-x" :
-                                              "pcifront-msi",
-                                               DOMID_SELF);
-               if (irq < 0)
-                       goto free;
-               i++;
-       }
-       kfree(v);
-       return 0;
-
-error:
-       dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
-free:
-       kfree(v);
-       return ret;
-}
-
-static void xen_teardown_msi_irqs(struct pci_dev *dev)
-{
-       struct msi_desc *msidesc;
-
-       msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-       if (msidesc->msi_attrib.is_msix)
-               xen_pci_frontend_disable_msix(dev);
-       else
-               xen_pci_frontend_disable_msi(dev);
-
-       /* Free the IRQ's and the msidesc using the generic code. */
-       default_teardown_msi_irqs(dev);
-}
-
-static void xen_teardown_msi_irq(unsigned int irq)
-{
-       xen_destroy_irq(irq);
-}
-
 #ifdef CONFIG_XEN_DOM0
 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
@@ -242,45 +305,28 @@ out:
        return ret;
 }
 #endif
-#endif
 
-static int xen_pcifront_enable_irq(struct pci_dev *dev)
+static void xen_teardown_msi_irqs(struct pci_dev *dev)
 {
-       int rc;
-       int share = 1;
-       int pirq;
-       u8 gsi;
-
-       rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
-       if (rc < 0) {
-               dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
-                        rc);
-               return rc;
-       }
-
-       rc = xen_allocate_pirq_gsi(gsi);
-       if (rc < 0) {
-               dev_warn(&dev->dev, "Xen PCI: failed to allocate a PIRQ for GSI%d: %d\n",
-                        gsi, rc);
-               return rc;
-       }
-       pirq = rc;
+       struct msi_desc *msidesc;
 
-       if (gsi < NR_IRQS_LEGACY)
-               share = 0;
+       msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
+       if (msidesc->msi_attrib.is_msix)
+               xen_pci_frontend_disable_msix(dev);
+       else
+               xen_pci_frontend_disable_msi(dev);
 
-       rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront");
-       if (rc < 0) {
-               dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n",
-                        gsi, pirq, rc);
-               return rc;
-       }
+       /* Free the IRQ's and the msidesc using the generic code. */
+       default_teardown_msi_irqs(dev);
+}
 
-       dev->irq = rc;
-       dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
-       return 0;
+static void xen_teardown_msi_irq(unsigned int irq)
+{
+       xen_destroy_irq(irq);
 }
 
+#endif
+
 int __init pci_xen_init(void)
 {
        if (!xen_pv_domain() || xen_initial_domain())
@@ -327,79 +373,6 @@ int __init pci_xen_hvm_init(void)
 }
 
 #ifdef CONFIG_XEN_DOM0
-static int xen_register_pirq(u32 gsi, int gsi_override, int triggering)
-{
-       int rc, pirq, irq = -1;
-       struct physdev_map_pirq map_irq;
-       int shareable = 0;
-       char *name;
-
-       if (!xen_pv_domain())
-               return -1;
-
-       if (triggering == ACPI_EDGE_SENSITIVE) {
-               shareable = 0;
-               name = "ioapic-edge";
-       } else {
-               shareable = 1;
-               name = "ioapic-level";
-       }
-       pirq = xen_allocate_pirq_gsi(gsi);
-       if (pirq < 0)
-               goto out;
-
-       if (gsi_override >= 0)
-               irq = xen_bind_pirq_gsi_to_irq(gsi_override, pirq, shareable, name);
-       else
-               irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name);
-       if (irq < 0)
-               goto out;
-
-       printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", pirq, irq, gsi);
-
-       map_irq.domid = DOMID_SELF;
-       map_irq.type = MAP_PIRQ_TYPE_GSI;
-       map_irq.index = gsi;
-       map_irq.pirq = pirq;
-
-       rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
-       if (rc) {
-               printk(KERN_WARNING "xen map irq failed %d\n", rc);
-               return -1;
-       }
-
-out:
-       return irq;
-}
-
-static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity)
-{
-       int rc, irq;
-       struct physdev_setup_gsi setup_gsi;
-
-       if (!xen_pv_domain())
-               return -1;
-
-       printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
-                       gsi, triggering, polarity);
-
-       irq = xen_register_pirq(gsi, gsi_override, triggering);
-
-       setup_gsi.gsi = gsi;
-       setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
-       setup_gsi.polarity = (polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-
-       rc = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
-       if (rc == -EEXIST)
-               printk(KERN_INFO "Already setup the GSI :%d\n", gsi);
-       else if (rc) {
-               printk(KERN_ERR "Failed to setup GSI :%d, err_code:%d\n",
-                               gsi, rc);
-       }
-
-       return irq;
-}
-
 static __init void xen_setup_acpi_sci(void)
 {
        int rc;
@@ -419,7 +392,7 @@ static __init void xen_setup_acpi_sci(void)
        }
        trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
        polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH;
-       
+
        printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
                        "polarity=%d\n", gsi, trigger, polarity);
 
@@ -434,10 +407,9 @@ static __init void xen_setup_acpi_sci(void)
         * the ACPI interpreter and keels over since IRQ 9 has not been
         * setup as we had setup IRQ 20 for it).
         */
-       /* Check whether the GSI != IRQ */
        if (acpi_gsi_to_irq(gsi, &irq) == 0) {
-               if (irq >= 0 && irq != gsi)
-                       /* Bugger, we MUST have that IRQ. */
+               /* Use the provided value if it's valid. */
+               if (irq >= 0)
                        gsi_override = irq;
        }
 
@@ -447,41 +419,16 @@ static __init void xen_setup_acpi_sci(void)
        return;
 }
 
-static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
-                                int trigger, int polarity)
+int __init pci_xen_initial_domain(void)
 {
-       return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity);
-}
+       int irq;
 
-static int __init pci_xen_initial_domain(void)
-{
 #ifdef CONFIG_PCI_MSI
        x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
        x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 #endif
        xen_setup_acpi_sci();
        __acpi_register_gsi = acpi_register_gsi_xen;
-
-       return 0;
-}
-
-void __init xen_setup_pirqs(void)
-{
-       int pirq, irq;
-
-       pci_xen_initial_domain();
-
-       if (0 == nr_ioapics) {
-               for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
-                       pirq = xen_allocate_pirq_gsi(irq);
-                       if (WARN(pirq < 0,
-                                "Could not allocate PIRQ for legacy interrupt\n"))
-                               break;
-                       irq = xen_bind_pirq_gsi_to_irq(irq, pirq, 0, "xt-pic");
-               }
-               return;
-       }
-
        /* Pre-allocate legacy irqs */
        for (irq = 0; irq < NR_IRQS_LEGACY; irq++) {
                int trigger, polarity;
@@ -490,12 +437,16 @@ void __init xen_setup_pirqs(void)
                        continue;
 
                xen_register_pirq(irq, -1 /* no GSI override */,
-                       trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE);
+                       trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE,
+                       true /* Map GSI to PIRQ */);
        }
+       if (0 == nr_ioapics) {
+               for (irq = 0; irq < NR_IRQS_LEGACY; irq++)
+                       xen_bind_pirq_gsi_to_irq(irq, irq, 0, "xt-pic");
+       }
+       return 0;
 }
-#endif
 
-#ifdef CONFIG_XEN_DOM0
 struct xen_device_domain_owner {
        domid_t domain;
        struct pci_dev *dev;
index 17c565d..a6575b9 100644 (file)
@@ -18,5 +18,5 @@ obj-y         := enlighten.o setup.o multicalls.o mmu.o irq.o \
 obj-$(CONFIG_SMP)              += smp.o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 obj-$(CONFIG_XEN_DEBUG_FS)     += debugfs.o
-
+obj-$(CONFIG_XEN_DOM0)         += vga.o
 obj-$(CONFIG_SWIOTLB_XEN)      += pci-swiotlb-xen.o
index 5525163..5325742 100644 (file)
@@ -1248,6 +1248,14 @@ asmlinkage void __init xen_start_kernel(void)
                if (pci_xen)
                        x86_init.pci.arch_init = pci_xen_init;
        } else {
+               const struct dom0_vga_console_info *info =
+                       (void *)((char *)xen_start_info +
+                                xen_start_info->console.dom0.info_off);
+
+               xen_init_vga(info, xen_start_info->console.dom0.info_size);
+               xen_start_info->console.domU.mfn = 0;
+               xen_start_info->console.domU.evtchn = 0;
+
                /* Make sure ACS will be enabled */
                pci_request_acs();
        }
index 25c52f9..ffcf261 100644 (file)
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
 #ifdef CONFIG_XEN_PVHVM
 static int xen_emul_unplug;
 
-static int __init check_platform_magic(void)
+static int check_platform_magic(void)
 {
        short magic;
        char protocol;
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
new file mode 100644 (file)
index 0000000..1cd7f4d
--- /dev/null
@@ -0,0 +1,67 @@
+#include <linux/screen_info.h>
+#include <linux/init.h>
+
+#include <asm/bootparam.h>
+#include <asm/setup.h>
+
+#include <xen/interface/xen.h>
+
+#include "xen-ops.h"
+
+void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
+{
+       struct screen_info *screen_info = &boot_params.screen_info;
+
+       /* This is drawn from a dump from vgacon:startup in
+        * standard Linux. */
+       screen_info->orig_video_mode = 3;
+       screen_info->orig_video_isVGA = 1;
+       screen_info->orig_video_lines = 25;
+       screen_info->orig_video_cols = 80;
+       screen_info->orig_video_ega_bx = 3;
+       screen_info->orig_video_points = 16;
+       screen_info->orig_y = screen_info->orig_video_lines - 1;
+
+       switch (info->video_type) {
+       case XEN_VGATYPE_TEXT_MODE_3:
+               if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
+                   + sizeof(info->u.text_mode_3))
+                       break;
+               screen_info->orig_video_lines = info->u.text_mode_3.rows;
+               screen_info->orig_video_cols = info->u.text_mode_3.columns;
+               screen_info->orig_x = info->u.text_mode_3.cursor_x;
+               screen_info->orig_y = info->u.text_mode_3.cursor_y;
+               screen_info->orig_video_points =
+                       info->u.text_mode_3.font_height;
+               break;
+
+       case XEN_VGATYPE_VESA_LFB:
+               if (size < offsetof(struct dom0_vga_console_info,
+                                   u.vesa_lfb.gbl_caps))
+                       break;
+               screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB;
+               screen_info->lfb_width = info->u.vesa_lfb.width;
+               screen_info->lfb_height = info->u.vesa_lfb.height;
+               screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel;
+               screen_info->lfb_base = info->u.vesa_lfb.lfb_base;
+               screen_info->lfb_size = info->u.vesa_lfb.lfb_size;
+               screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line;
+               screen_info->red_size = info->u.vesa_lfb.red_size;
+               screen_info->red_pos = info->u.vesa_lfb.red_pos;
+               screen_info->green_size = info->u.vesa_lfb.green_size;
+               screen_info->green_pos = info->u.vesa_lfb.green_pos;
+               screen_info->blue_size = info->u.vesa_lfb.blue_size;
+               screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
+               screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
+               screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+               if (size >= offsetof(struct dom0_vga_console_info,
+                                    u.vesa_lfb.gbl_caps)
+                   + sizeof(info->u.vesa_lfb.gbl_caps))
+                       screen_info->capabilities = info->u.vesa_lfb.gbl_caps;
+               if (size >= offsetof(struct dom0_vga_console_info,
+                                    u.vesa_lfb.mode_attrs)
+                   + sizeof(info->u.vesa_lfb.mode_attrs))
+                       screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
+               break;
+       }
+}
index 97dfdc8..b095739 100644 (file)
@@ -88,6 +88,17 @@ static inline void xen_uninit_lock_cpu(int cpu)
 }
 #endif
 
+struct dom0_vga_console_info;
+
+#ifdef CONFIG_XEN_DOM0
+void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
+#else
+static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
+                                      size_t size)
+{
+}
+#endif
+
 /* Declare an asm function, along with symbols needed to make it
    inlineable */
 #define DECL_ASM(ret, name, ...)               \
index 6cc0db1..3f129b4 100644 (file)
@@ -684,7 +684,7 @@ again:
 
        err = xenbus_switch_state(dev, XenbusStateConnected);
        if (err)
-               xenbus_dev_fatal(dev, err, "switching to Connected state",
+               xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
                                 dev->nodename);
 
        return;
index f245c58..ce7914c 100644 (file)
@@ -262,6 +262,7 @@ enum intel_pch {
 };
 
 #define QUIRK_PIPEA_FORCE (1<<0)
+#define QUIRK_LVDS_SSC_DISABLE (1<<1)
 
 struct intel_fbdev;
 
@@ -1194,7 +1195,9 @@ void i915_gem_free_all_phys_object(struct drm_device *dev);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
 uint32_t
-i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj);
+i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
+                                   uint32_t size,
+                                   int tiling_mode);
 
 /* i915_gem_gtt.c */
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
index 5c0d124..a087e1b 100644 (file)
@@ -1374,25 +1374,24 @@ i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
 }
 
 static uint32_t
-i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
+i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
 {
-       struct drm_device *dev = obj->base.dev;
-       uint32_t size;
+       uint32_t gtt_size;
 
        if (INTEL_INFO(dev)->gen >= 4 ||
-           obj->tiling_mode == I915_TILING_NONE)
-               return obj->base.size;
+           tiling_mode == I915_TILING_NONE)
+               return size;
 
        /* Previous chips need a power-of-two fence region when tiling */
        if (INTEL_INFO(dev)->gen == 3)
-               size = 1024*1024;
+               gtt_size = 1024*1024;
        else
-               size = 512*1024;
+               gtt_size = 512*1024;
 
-       while (size < obj->base.size)
-               size <<= 1;
+       while (gtt_size < size)
+               gtt_size <<= 1;
 
-       return size;
+       return gtt_size;
 }
 
 /**
@@ -1403,59 +1402,52 @@ i915_gem_get_gtt_size(struct drm_i915_gem_object *obj)
  * potential fence register mapping.
  */
 static uint32_t
-i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
+i915_gem_get_gtt_alignment(struct drm_device *dev,
+                          uint32_t size,
+                          int tiling_mode)
 {
-       struct drm_device *dev = obj->base.dev;
-
        /*
         * Minimum alignment is 4k (GTT page size), but might be greater
         * if a fence register is needed for the object.
         */
        if (INTEL_INFO(dev)->gen >= 4 ||
-           obj->tiling_mode == I915_TILING_NONE)
+           tiling_mode == I915_TILING_NONE)
                return 4096;
 
        /*
         * Previous chips need to be aligned to the size of the smallest
         * fence register that can contain the object.
         */
-       return i915_gem_get_gtt_size(obj);
+       return i915_gem_get_gtt_size(dev, size, tiling_mode);
 }
 
 /**
  * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
  *                                      unfenced object
- * @obj: object to check
+ * @dev: the device
+ * @size: size of the object
+ * @tiling_mode: tiling mode of the object
  *
  * Return the required GTT alignment for an object, only taking into account
  * unfenced tiled surface requirements.
  */
 uint32_t
-i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
+i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
+                                   uint32_t size,
+                                   int tiling_mode)
 {
-       struct drm_device *dev = obj->base.dev;
-       int tile_height;
-
        /*
         * Minimum alignment is 4k (GTT page size) for sane hw.
         */
        if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
-           obj->tiling_mode == I915_TILING_NONE)
+           tiling_mode == I915_TILING_NONE)
                return 4096;
 
-       /*
-        * Older chips need unfenced tiled buffers to be aligned to the left
-        * edge of an even tile row (where tile rows are counted as if the bo is
-        * placed in a fenced gtt region).
+       /* Previous hardware however needs to be aligned to a power-of-two
+        * tile height. The simplest method for determining this is to reuse
+        * the power-of-tile object size.
         */
-       if (IS_GEN2(dev))
-               tile_height = 16;
-       else if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
-               tile_height = 32;
-       else
-               tile_height = 8;
-
-       return tile_height * obj->stride * 2;
+       return i915_gem_get_gtt_size(dev, size, tiling_mode);
 }
 
 int
@@ -2744,9 +2736,16 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
                return -EINVAL;
        }
 
-       fence_size = i915_gem_get_gtt_size(obj);
-       fence_alignment = i915_gem_get_gtt_alignment(obj);
-       unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(obj);
+       fence_size = i915_gem_get_gtt_size(dev,
+                                          obj->base.size,
+                                          obj->tiling_mode);
+       fence_alignment = i915_gem_get_gtt_alignment(dev,
+                                                    obj->base.size,
+                                                    obj->tiling_mode);
+       unfenced_alignment =
+               i915_gem_get_unfenced_gtt_alignment(dev,
+                                                   obj->base.size,
+                                                   obj->tiling_mode);
 
        if (alignment == 0)
                alignment = map_and_fenceable ? fence_alignment :
index 82d70fd..99c4faa 100644 (file)
@@ -348,7 +348,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
                /* Rebind if we need a change of alignment */
                if (!obj->map_and_fenceable) {
                        u32 unfenced_alignment =
-                               i915_gem_get_unfenced_gtt_alignment(obj);
+                               i915_gem_get_unfenced_gtt_alignment(dev,
+                                                                   obj->base.size,
+                                                                   args->tiling_mode);
                        if (obj->gtt_offset & (unfenced_alignment - 1))
                                ret = i915_gem_object_unbind(obj);
                }
index 21b6f93..0f1c799 100644 (file)
@@ -4305,7 +4305,8 @@ static void intel_update_watermarks(struct drm_device *dev)
 
 static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
 {
-       return dev_priv->lvds_use_ssc && i915_panel_use_ssc;
+       return dev_priv->lvds_use_ssc && i915_panel_use_ssc
+               && !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE);
 }
 
 static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
@@ -7810,6 +7811,15 @@ static void quirk_pipea_force (struct drm_device *dev)
        DRM_DEBUG_DRIVER("applying pipe a force quirk\n");
 }
 
+/*
+ * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason
+ */
+static void quirk_ssc_force_disable(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       dev_priv->quirks |= QUIRK_LVDS_SSC_DISABLE;
+}
+
 struct intel_quirk {
        int device;
        int subsystem_vendor;
@@ -7838,6 +7848,9 @@ struct intel_quirk intel_quirks[] = {
        /* 855 & before need to leave pipe A & dpll A up */
        { 0x3582, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force },
        { 0x2562, PCI_ANY_ID, PCI_ANY_ID, quirk_pipea_force },
+
+       /* Lenovo U160 cannot use SSC on LVDS */
+       { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable },
 };
 
 static void intel_init_quirks(struct drm_device *dev)
index efa2024..2535933 100644 (file)
@@ -117,7 +117,7 @@ static __init int map_switcher(void)
 
        /*
         * Now the Switcher is mapped at the right address, we can't fail!
-        * Copy in the compiled-in Switcher code (from <arch>_switcher.S).
+        * Copy in the compiled-in Switcher code (from x86/switcher_32.S).
         */
        memcpy(switcher_vma->addr, start_switcher_text,
               end_switcher_text - start_switcher_text);
index daaf866..28433a1 100644 (file)
@@ -375,11 +375,9 @@ static bool direct_trap(unsigned int num)
        /*
         * The Host needs to see page faults (for shadow paging and to save the
         * fault address), general protection faults (in/out emulation) and
-        * device not available (TS handling), invalid opcode fault (kvm hcall),
-        * and of course, the hypercall trap.
+        * device not available (TS handling) and of course, the hypercall trap.
         */
-       return num != 14 && num != 13 && num != 7 &&
-                       num != 6 && num != LGUEST_TRAP_ENTRY;
+       return num != 14 && num != 13 && num != 7 && num != LGUEST_TRAP_ENTRY;
 }
 /*:*/
 
@@ -429,8 +427,8 @@ void pin_stack_pages(struct lg_cpu *cpu)
 
 /*
  * Direct traps also mean that we need to know whenever the Guest wants to use
- * a different kernel stack, so we can change the IDT entries to use that
- * stack.  The IDT entries expect a virtual address, so unlike most addresses
+ * a different kernel stack, so we can change the guest TSS to use that
+ * stack.  The TSS entries expect a virtual address, so unlike most addresses
  * the Guest gives us, the "esp" (stack pointer) value here is virtual, not
  * physical.
  *
index 9136411..295df06 100644 (file)
@@ -59,6 +59,8 @@ struct lg_cpu {
 
        struct lguest_pages *last_pages;
 
+       /* Initialization mode: linear map everything. */
+       bool linear_pages;
        int cpu_pgd; /* Which pgd this cpu is currently using */
 
        /* If a hypercall was asked for, this points to the arguments. */
index 69c84a1..5289ffa 100644 (file)
@@ -108,6 +108,17 @@ static u32 lg_get_features(struct virtio_device *vdev)
        return features;
 }
 
+/*
+ * To notify on reset or feature finalization, we (ab)use the NOTIFY
+ * hypercall, with the descriptor address of the device.
+ */
+static void status_notify(struct virtio_device *vdev)
+{
+       unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
+
+       hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0);
+}
+
 /*
  * The virtio core takes the features the Host offers, and copies the ones
  * supported by the driver into the vdev->features array.  Once that's all
@@ -135,6 +146,9 @@ static void lg_finalize_features(struct virtio_device *vdev)
                if (test_bit(i, vdev->features))
                        out_features[i / 8] |= (1 << (i % 8));
        }
+
+       /* Tell Host we've finished with this device's feature negotiation */
+       status_notify(vdev);
 }
 
 /* Once they've found a field, getting a copy of it is easy. */
@@ -168,28 +182,21 @@ static u8 lg_get_status(struct virtio_device *vdev)
        return to_lgdev(vdev)->desc->status;
 }
 
-/*
- * To notify on status updates, we (ab)use the NOTIFY hypercall, with the
- * descriptor address of the device.  A zero status means "reset".
- */
-static void set_status(struct virtio_device *vdev, u8 status)
-{
-       unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
-
-       /* We set the status. */
-       to_lgdev(vdev)->desc->status = status;
-       hcall(LHCALL_NOTIFY, (max_pfn << PAGE_SHIFT) + offset, 0, 0, 0);
-}
-
 static void lg_set_status(struct virtio_device *vdev, u8 status)
 {
        BUG_ON(!status);
-       set_status(vdev, status);
+       to_lgdev(vdev)->desc->status = status;
+
+       /* Tell Host immediately if we failed. */
+       if (status & VIRTIO_CONFIG_S_FAILED)
+               status_notify(vdev);
 }
 
 static void lg_reset(struct virtio_device *vdev)
 {
-       set_status(vdev, 0);
+       /* 0 status means "reset" */
+       to_lgdev(vdev)->desc->status = 0;
+       status_notify(vdev);
 }
 
 /*
index 948c547..f97e625 100644 (file)
@@ -1,8 +1,10 @@
-/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher
- * controls and communicates with the Guest.  For example, the first write will
- * tell us the Guest's memory layout and entry point.  A read will run the
- * Guest until something happens, such as a signal or the Guest doing a NOTIFY
- * out to the Launcher.
+/*P:200 This contains all the /dev/lguest code, whereby the userspace
+ * launcher controls and communicates with the Guest.  For example,
+ * the first write will tell us the Guest's memory layout and entry
+ * point.  A read will run the Guest until something happens, such as
+ * a signal or the Guest doing a NOTIFY out to the Launcher.  There is
+ * also a way for the Launcher to attach eventfds to particular NOTIFY
+ * values instead of returning from the read() call.
 :*/
 #include <linux/uaccess.h>
 #include <linux/miscdevice.h>
@@ -357,8 +359,8 @@ static int initialize(struct file *file, const unsigned long __user *input)
                goto free_eventfds;
 
        /*
-        * Initialize the Guest's shadow page tables, using the toplevel
-        * address the Launcher gave us.  This allocates memory, so can fail.
+        * Initialize the Guest's shadow page tables.  This allocates
+        * memory, so can fail.
         */
        err = init_guest_pagetable(lg);
        if (err)
@@ -516,6 +518,7 @@ static const struct file_operations lguest_fops = {
        .read    = read,
        .llseek  = default_llseek,
 };
+/*:*/
 
 /*
  * This is a textbook example of a "misc" character device.  Populate a "struct
index d21578e..3b62be1 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
-#include <asm/bootparam.h>
 #include "lg.h"
 
 /*M:008
@@ -156,7 +155,7 @@ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
 }
 
 /*
- * These functions are just like the above two, except they access the Guest
+ * These functions are just like the above, except they access the Guest
  * page tables.  Hence they return a Guest address.
  */
 static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
@@ -196,7 +195,7 @@ static unsigned long gpte_addr(struct lg_cpu *cpu,
 #endif
 /*:*/
 
-/*M:014
+/*M:007
  * get_pfn is slow: we could probably try to grab batches of pages here as
  * an optimization (ie. pre-faulting).
 :*/
@@ -325,10 +324,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 #endif
 
        /* First step: get the top-level Guest page table entry. */
-       gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
-       /* Toplevel not present?  We can't map it in. */
-       if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
-               return false;
+       if (unlikely(cpu->linear_pages)) {
+               /* Faking up a linear mapping. */
+               gpgd = __pgd(CHECK_GPGD_MASK);
+       } else {
+               gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
+               /* Toplevel not present?  We can't map it in. */
+               if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
+                       return false;
+       }
 
        /* Now look at the matching shadow entry. */
        spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
@@ -353,10 +357,15 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
        }
 
 #ifdef CONFIG_X86_PAE
-       gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
-       /* Middle level not present?  We can't map it in. */
-       if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
-               return false;
+       if (unlikely(cpu->linear_pages)) {
+               /* Faking up a linear mapping. */
+               gpmd = __pmd(_PAGE_TABLE);
+       } else {
+               gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
+               /* Middle level not present?  We can't map it in. */
+               if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
+                       return false;
+       }
 
        /* Now look at the matching shadow entry. */
        spmd = spmd_addr(cpu, *spgd, vaddr);
@@ -397,8 +406,13 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
        gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
 #endif
 
-       /* Read the actual PTE value. */
-       gpte = lgread(cpu, gpte_ptr, pte_t);
+       if (unlikely(cpu->linear_pages)) {
+               /* Linear?  Make up a PTE which points to same page. */
+               gpte = __pte((vaddr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
+       } else {
+               /* Read the actual PTE value. */
+               gpte = lgread(cpu, gpte_ptr, pte_t);
+       }
 
        /* If this page isn't in the Guest page tables, we can't page it in. */
        if (!(pte_flags(gpte) & _PAGE_PRESENT))
@@ -454,7 +468,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
         * Finally, we write the Guest PTE entry back: we've set the
         * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
         */
-       lgwrite(cpu, gpte_ptr, pte_t, gpte);
+       if (likely(!cpu->linear_pages))
+               lgwrite(cpu, gpte_ptr, pte_t, gpte);
 
        /*
         * The fault is fixed, the page table is populated, the mapping
@@ -612,6 +627,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 #ifdef CONFIG_X86_PAE
        pmd_t gpmd;
 #endif
+
+       /* Still not set up?  Just map 1:1. */
+       if (unlikely(cpu->linear_pages))
+               return vaddr;
+
        /* First step: get the top-level Guest page table entry. */
        gpgd = lgread(cpu, gpgd_addr(cpu, vaddr), pgd_t);
        /* Toplevel not present?  We can't map it in. */
@@ -708,32 +728,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
        return next;
 }
 
-/*H:430
- * (iv) Switching page tables
- *
- * Now we've seen all the page table setting and manipulation, let's see
- * what happens when the Guest changes page tables (ie. changes the top-level
- * pgdir).  This occurs on almost every context switch.
- */
-void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
-{
-       int newpgdir, repin = 0;
-
-       /* Look to see if we have this one already. */
-       newpgdir = find_pgdir(cpu->lg, pgtable);
-       /*
-        * If not, we allocate or mug an existing one: if it's a fresh one,
-        * repin gets set to 1.
-        */
-       if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
-               newpgdir = new_pgdir(cpu, pgtable, &repin);
-       /* Change the current pgd index to the new one. */
-       cpu->cpu_pgd = newpgdir;
-       /* If it was completely blank, we map in the Guest kernel stack */
-       if (repin)
-               pin_stack_pages(cpu);
-}
-
 /*H:470
  * Finally, a routine which throws away everything: all PGD entries in all
  * the shadow page tables, including the Guest's kernel mappings.  This is used
@@ -780,6 +774,44 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu)
        /* We need the Guest kernel stack mapped again. */
        pin_stack_pages(cpu);
 }
+
+/*H:430
+ * (iv) Switching page tables
+ *
+ * Now we've seen all the page table setting and manipulation, let's see
+ * what happens when the Guest changes page tables (ie. changes the top-level
+ * pgdir).  This occurs on almost every context switch.
+ */
+void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
+{
+       int newpgdir, repin = 0;
+
+       /*
+        * The very first time they call this, we're actually running without
+        * any page tables; we've been making it up.  Throw them away now.
+        */
+       if (unlikely(cpu->linear_pages)) {
+               release_all_pagetables(cpu->lg);
+               cpu->linear_pages = false;
+               /* Force allocation of a new pgdir. */
+               newpgdir = ARRAY_SIZE(cpu->lg->pgdirs);
+       } else {
+               /* Look to see if we have this one already. */
+               newpgdir = find_pgdir(cpu->lg, pgtable);
+       }
+
+       /*
+        * If not, we allocate or mug an existing one: if it's a fresh one,
+        * repin gets set to 1.
+        */
+       if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
+               newpgdir = new_pgdir(cpu, pgtable, &repin);
+       /* Change the current pgd index to the new one. */
+       cpu->cpu_pgd = newpgdir;
+       /* If it was completely blank, we map in the Guest kernel stack */
+       if (repin)
+               pin_stack_pages(cpu);
+}
 /*:*/
 
 /*M:009
@@ -919,168 +951,26 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
 }
 #endif
 
-/*H:505
- * To get through boot, we construct simple identity page mappings (which
- * set virtual == physical) and linear mappings which will get the Guest far
- * enough into the boot to create its own.  The linear mapping means we
- * simplify the Guest boot, but it makes assumptions about their PAGE_OFFSET,
- * as you'll see.
- *
- * We lay them out of the way, just below the initrd (which is why we need to
- * know its size here).
- */
-static unsigned long setup_pagetables(struct lguest *lg,
-                                     unsigned long mem,
-                                     unsigned long initrd_size)
-{
-       pgd_t __user *pgdir;
-       pte_t __user *linear;
-       unsigned long mem_base = (unsigned long)lg->mem_base;
-       unsigned int mapped_pages, i, linear_pages;
-#ifdef CONFIG_X86_PAE
-       pmd_t __user *pmds;
-       unsigned int j;
-       pgd_t pgd;
-       pmd_t pmd;
-#else
-       unsigned int phys_linear;
-#endif
-
-       /*
-        * We have mapped_pages frames to map, so we need linear_pages page
-        * tables to map them.
-        */
-       mapped_pages = mem / PAGE_SIZE;
-       linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE;
-
-       /* We put the toplevel page directory page at the top of memory. */
-       pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE);
-
-       /* Now we use the next linear_pages pages as pte pages */
-       linear = (void *)pgdir - linear_pages * PAGE_SIZE;
-
-#ifdef CONFIG_X86_PAE
-       /*
-        * And the single mid page goes below that.  We only use one, but
-        * that's enough to map 1G, which definitely gets us through boot.
-        */
-       pmds = (void *)linear - PAGE_SIZE;
-#endif
-       /*
-        * Linear mapping is easy: put every page's address into the
-        * mapping in order.
-        */
-       for (i = 0; i < mapped_pages; i++) {
-               pte_t pte;
-               pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER));
-               if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0)
-                       return -EFAULT;
-       }
-
-#ifdef CONFIG_X86_PAE
-       /*
-        * Make the Guest PMD entries point to the corresponding place in the
-        * linear mapping (up to one page worth of PMD).
-        */
-       for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
-            i += PTRS_PER_PTE, j++) {
-               pmd = pfn_pmd(((unsigned long)&linear[i] - mem_base)/PAGE_SIZE,
-                             __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
-
-               if (copy_to_user(&pmds[j], &pmd, sizeof(pmd)) != 0)
-                       return -EFAULT;
-       }
-
-       /* One PGD entry, pointing to that PMD page. */
-       pgd = __pgd(((unsigned long)pmds - mem_base) | _PAGE_PRESENT);
-       /* Copy it in as the first PGD entry (ie. addresses 0-1G). */
-       if (copy_to_user(&pgdir[0], &pgd, sizeof(pgd)) != 0)
-               return -EFAULT;
-       /*
-        * And the other PGD entry to make the linear mapping at PAGE_OFFSET
-        */
-       if (copy_to_user(&pgdir[KERNEL_PGD_BOUNDARY], &pgd, sizeof(pgd)))
-               return -EFAULT;
-#else
-       /*
-        * The top level points to the linear page table pages above.
-        * We setup the identity and linear mappings here.
-        */
-       phys_linear = (unsigned long)linear - mem_base;
-       for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) {
-               pgd_t pgd;
-               /*
-                * Create a PGD entry which points to the right part of the
-                * linear PTE pages.
-                */
-               pgd = __pgd((phys_linear + i * sizeof(pte_t)) |
-                           (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER));
-
-               /*
-                * Copy it into the PGD page at 0 and PAGE_OFFSET.
-                */
-               if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd))
-                   || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET)
-                                          + i / PTRS_PER_PTE],
-                                   &pgd, sizeof(pgd)))
-                       return -EFAULT;
-       }
-#endif
-
-       /*
-        * We return the top level (guest-physical) address: we remember where
-        * this is to write it into lguest_data when the Guest initializes.
-        */
-       return (unsigned long)pgdir - mem_base;
-}
-
 /*H:500
  * (vii) Setting up the page tables initially.
  *
- * When a Guest is first created, the Launcher tells us where the toplevel of
- * its first page table is.  We set some things up here:
+ * When a Guest is first created, set initialize a shadow page table which
+ * we will populate on future faults.  The Guest doesn't have any actual
+ * pagetables yet, so we set linear_pages to tell demand_page() to fake it
+ * for the moment.
  */
 int init_guest_pagetable(struct lguest *lg)
 {
-       u64 mem;
-       u32 initrd_size;
-       struct boot_params __user *boot = (struct boot_params *)lg->mem_base;
-#ifdef CONFIG_X86_PAE
-       pgd_t *pgd;
-       pmd_t *pmd_table;
-#endif
-       /*
-        * Get the Guest memory size and the ramdisk size from the boot header
-        * located at lg->mem_base (Guest address 0).
-        */
-       if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
-           || get_user(initrd_size, &boot->hdr.ramdisk_size))
-               return -EFAULT;
+       struct lg_cpu *cpu = &lg->cpus[0];
+       int allocated = 0;
 
-       /*
-        * We start on the first shadow page table, and give it a blank PGD
-        * page.
-        */
-       lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size);
-       if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir))
-               return lg->pgdirs[0].gpgdir;
-       lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
-       if (!lg->pgdirs[0].pgdir)
+       /* lg (and lg->cpus[]) starts zeroed: this allocates a new pgdir */
+       cpu->cpu_pgd = new_pgdir(cpu, 0, &allocated);
+       if (!allocated)
                return -ENOMEM;
 
-#ifdef CONFIG_X86_PAE
-       /* For PAE, we also create the initial mid-level. */
-       pgd = lg->pgdirs[0].pgdir;
-       pmd_table = (pmd_t *) get_zeroed_page(GFP_KERNEL);
-       if (!pmd_table)
-               return -ENOMEM;
-
-       set_pgd(pgd + SWITCHER_PGD_INDEX,
-               __pgd(__pa(pmd_table) | _PAGE_PRESENT));
-#endif
-
-       /* This is the current page table. */
-       lg->cpus[0].cpu_pgd = 0;
+       /* We start with a linear mapping until the initialize. */
+       cpu->linear_pages = true;
        return 0;
 }
 
@@ -1095,10 +985,10 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
                 * of virtual addresses used by the Switcher.
                 */
                || put_user(RESERVE_MEM * 1024 * 1024,
-                       &cpu->lg->lguest_data->reserve_mem)
-               || put_user(cpu->lg->pgdirs[0].gpgdir,
-                       &cpu->lg->lguest_data->pgdir))
+                           &cpu->lg->lguest_data->reserve_mem)) {
                kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
+               return;
+       }
 
        /*
         * In flush_user_mappings() we loop from 0 to
index 9f1659c..65af42f 100644 (file)
@@ -269,10 +269,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 static int emulate_insn(struct lg_cpu *cpu)
 {
        u8 insn;
-       unsigned int insnlen = 0, in = 0, shift = 0;
+       unsigned int insnlen = 0, in = 0, small_operand = 0;
        /*
         * The eip contains the *virtual* address of the Guest's instruction:
-        * guest_pa just subtracts the Guest's page_offset.
+        * walk the Guest's page tables to find the "physical" address.
         */
        unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);
 
@@ -300,11 +300,10 @@ static int emulate_insn(struct lg_cpu *cpu)
        }
 
        /*
-        * 0x66 is an "operand prefix".  It means it's using the upper 16 bits
-        * of the eax register.
+        * 0x66 is an "operand prefix".  It means a 16, not 32 bit in/out.
         */
        if (insn == 0x66) {
-               shift = 16;
+               small_operand = 1;
                /* The instruction is 1 byte so far, read the next byte. */
                insnlen = 1;
                insn = lgread(cpu, physaddr + insnlen, u8);
@@ -340,11 +339,14 @@ static int emulate_insn(struct lg_cpu *cpu)
         * traditionally means "there's nothing there".
         */
        if (in) {
-               /* Lower bit tells is whether it's a 16 or 32 bit access */
-               if (insn & 0x1)
-                       cpu->regs->eax = 0xFFFFFFFF;
-               else
-                       cpu->regs->eax |= (0xFFFF << shift);
+               /* Lower bit tells means it's a 32/16 bit access */
+               if (insn & 0x1) {
+                       if (small_operand)
+                               cpu->regs->eax |= 0xFFFF;
+                       else
+                               cpu->regs->eax = 0xFFFFFFFF;
+               } else
+                       cpu->regs->eax |= 0xFF;
        }
        /* Finally, we've "done" the instruction, so move past it. */
        cpu->regs->eip += insnlen;
@@ -352,69 +354,6 @@ static int emulate_insn(struct lg_cpu *cpu)
        return 1;
 }
 
-/*
- * Our hypercalls mechanism used to be based on direct software interrupts.
- * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to
- * change over to using kvm hypercalls.
- *
- * KVM_HYPERCALL is actually a "vmcall" instruction, which generates an invalid
- * opcode fault (fault 6) on non-VT cpus, so the easiest solution seemed to be
- * an *emulation approach*: if the fault was really produced by an hypercall
- * (is_hypercall() does exactly this check), we can just call the corresponding
- * hypercall host implementation function.
- *
- * But these invalid opcode faults are notably slower than software interrupts.
- * So we implemented the *patching (or rewriting) approach*: every time we hit
- * the KVM_HYPERCALL opcode in Guest code, we patch it to the old "int 0x1f"
- * opcode, so next time the Guest calls this hypercall it will use the
- * faster trap mechanism.
- *
- * Matias even benchmarked it to convince you: this shows the average cycle
- * cost of a hypercall.  For each alternative solution mentioned above we've
- * made 5 runs of the benchmark:
- *
- * 1) direct software interrupt: 2915, 2789, 2764, 2721, 2898
- * 2) emulation technique: 3410, 3681, 3466, 3392, 3780
- * 3) patching (rewrite) technique: 2977, 2975, 2891, 2637, 2884
- *
- * One two-line function is worth a 20% hypercall speed boost!
- */
-static void rewrite_hypercall(struct lg_cpu *cpu)
-{
-       /*
-        * This are the opcodes we use to patch the Guest.  The opcode for "int
-        * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we
-        * complete the sequence with a NOP (0x90).
-        */
-       u8 insn[3] = {0xcd, 0x1f, 0x90};
-
-       __lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn));
-       /*
-        * The above write might have caused a copy of that page to be made
-        * (if it was read-only).  We need to make sure the Guest has
-        * up-to-date pagetables.  As this doesn't happen often, we can just
-        * drop them all.
-        */
-       guest_pagetable_clear_all(cpu);
-}
-
-static bool is_hypercall(struct lg_cpu *cpu)
-{
-       u8 insn[3];
-
-       /*
-        * This must be the Guest kernel trying to do something.
-        * The bottom two bits of the CS segment register are the privilege
-        * level.
-        */
-       if ((cpu->regs->cs & 3) != GUEST_PL)
-               return false;
-
-       /* Is it a vmcall? */
-       __lgread(cpu, insn, guest_pa(cpu, cpu->regs->eip), sizeof(insn));
-       return insn[0] == 0x0f && insn[1] == 0x01 && insn[2] == 0xc1;
-}
-
 /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */
 void lguest_arch_handle_trap(struct lg_cpu *cpu)
 {
@@ -429,20 +368,6 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
                        if (emulate_insn(cpu))
                                return;
                }
-               /*
-                * If KVM is active, the vmcall instruction triggers a General
-                * Protection Fault.  Normally it triggers an invalid opcode
-                * fault (6):
-                */
-       case 6:
-               /*
-                * We need to check if ring == GUEST_PL and faulting
-                * instruction == vmcall.
-                */
-               if (is_hypercall(cpu)) {
-                       rewrite_hypercall(cpu);
-                       return;
-               }
                break;
        case 14: /* We've intercepted a Page Fault. */
                /*
@@ -486,7 +411,7 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
                 * These values mean a real interrupt occurred, in which case
                 * the Host handler has already been run. We just do a
                 * friendly check if another process should now be run, then
-                * return to run the Guest again
+                * return to run the Guest again.
                 */
                cond_resched();
                return;
@@ -536,7 +461,7 @@ void __init lguest_arch_host_init(void)
        int i;
 
        /*
-        * Most of the i386/switcher.S doesn't care that it's been moved; on
+        * Most of the x86/switcher_32.S doesn't care that it's been moved; on
         * Intel, jumps are relative, and it doesn't access any references to
         * external code or data.
         *
@@ -664,7 +589,7 @@ void __init lguest_arch_host_init(void)
                clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
        }
        put_online_cpus();
-};
+}
 /*:*/
 
 void __exit lguest_arch_host_fini(void)
@@ -747,8 +672,6 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
 /*:*/
 
 /*L:030
- * lguest_arch_setup_regs()
- *
  * Most of the Guest's registers are left alone: we used get_zeroed_page() to
  * allocate the structure, so they will be 0.
  */
index f85e422..1ff5486 100644 (file)
@@ -106,6 +106,16 @@ struct mmc_blk_data {
 
 static DEFINE_MUTEX(open_lock);
 
+enum mmc_blk_status {
+       MMC_BLK_SUCCESS = 0,
+       MMC_BLK_PARTIAL,
+       MMC_BLK_RETRY,
+       MMC_BLK_RETRY_SINGLE,
+       MMC_BLK_DATA_ERR,
+       MMC_BLK_CMD_ERR,
+       MMC_BLK_ABORT,
+};
+
 module_param(perdev_minors, int, 0444);
 MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
 
@@ -427,14 +437,6 @@ static const struct block_device_operations mmc_bdops = {
 #endif
 };
 
-struct mmc_blk_request {
-       struct mmc_request      mrq;
-       struct mmc_command      sbc;
-       struct mmc_command      cmd;
-       struct mmc_command      stop;
-       struct mmc_data         data;
-};
-
 static inline int mmc_blk_part_switch(struct mmc_card *card,
                                      struct mmc_blk_data *md)
 {
@@ -525,7 +527,20 @@ static u32 mmc_sd_num_wr_blocks(struct mmc_card *card)
        return result;
 }
 
-static u32 get_card_status(struct mmc_card *card, struct request *req)
+static int send_stop(struct mmc_card *card, u32 *status)
+{
+       struct mmc_command cmd = {0};
+       int err;
+
+       cmd.opcode = MMC_STOP_TRANSMISSION;
+       cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+       err = mmc_wait_for_cmd(card->host, &cmd, 5);
+       if (err == 0)
+               *status = cmd.resp[0];
+       return err;
+}
+
+static int get_card_status(struct mmc_card *card, u32 *status, int retries)
 {
        struct mmc_command cmd = {0};
        int err;
@@ -534,11 +549,141 @@ static u32 get_card_status(struct mmc_card *card, struct request *req)
        if (!mmc_host_is_spi(card->host))
                cmd.arg = card->rca << 16;
        cmd.flags = MMC_RSP_SPI_R2 | MMC_RSP_R1 | MMC_CMD_AC;
-       err = mmc_wait_for_cmd(card->host, &cmd, 0);
+       err = mmc_wait_for_cmd(card->host, &cmd, retries);
+       if (err == 0)
+               *status = cmd.resp[0];
+       return err;
+}
+
+#define ERR_RETRY      2
+#define ERR_ABORT      1
+#define ERR_CONTINUE   0
+
+static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
+       bool status_valid, u32 status)
+{
+       switch (error) {
+       case -EILSEQ:
+               /* response crc error, retry the r/w cmd */
+               pr_err("%s: %s sending %s command, card status %#x\n",
+                       req->rq_disk->disk_name, "response CRC error",
+                       name, status);
+               return ERR_RETRY;
+
+       case -ETIMEDOUT:
+               pr_err("%s: %s sending %s command, card status %#x\n",
+                       req->rq_disk->disk_name, "timed out", name, status);
+
+               /* If the status cmd initially failed, retry the r/w cmd */
+               if (!status_valid)
+                       return ERR_RETRY;
+
+               /*
+                * If it was a r/w cmd crc error, or illegal command
+                * (eg, issued in wrong state) then retry - we should
+                * have corrected the state problem above.
+                */
+               if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND))
+                       return ERR_RETRY;
+
+               /* Otherwise abort the command */
+               return ERR_ABORT;
+
+       default:
+               /* We don't understand the error code the driver gave us */
+               pr_err("%s: unknown error %d sending read/write command, card status %#x\n",
+                      req->rq_disk->disk_name, error, status);
+               return ERR_ABORT;
+       }
+}
+
+/*
+ * Initial r/w and stop cmd error recovery.
+ * We don't know whether the card received the r/w cmd or not, so try to
+ * restore things back to a sane state.  Essentially, we do this as follows:
+ * - Obtain card status.  If the first attempt to obtain card status fails,
+ *   the status word will reflect the failed status cmd, not the failed
+ *   r/w cmd.  If we fail to obtain card status, it suggests we can no
+ *   longer communicate with the card.
+ * - Check the card state.  If the card received the cmd but there was a
+ *   transient problem with the response, it might still be in a data transfer
+ *   mode.  Try to send it a stop command.  If this fails, we can't recover.
+ * - If the r/w cmd failed due to a response CRC error, it was probably
+ *   transient, so retry the cmd.
+ * - If the r/w cmd timed out, but we didn't get the r/w cmd status, retry.
+ * - If the r/w cmd timed out, and the r/w cmd failed due to CRC error or
+ *   illegal cmd, retry.
+ * Otherwise we don't understand what happened, so abort.
+ */
+static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
+       struct mmc_blk_request *brq)
+{
+       bool prev_cmd_status_valid = true;
+       u32 status, stop_status = 0;
+       int err, retry;
+
+       /*
+        * Try to get card status which indicates both the card state
+        * and why there was no response.  If the first attempt fails,
+        * we can't be sure the returned status is for the r/w command.
+        */
+       for (retry = 2; retry >= 0; retry--) {
+               err = get_card_status(card, &status, 0);
+               if (!err)
+                       break;
+
+               prev_cmd_status_valid = false;
+               pr_err("%s: error %d sending status command, %sing\n",
+                      req->rq_disk->disk_name, err, retry ? "retry" : "abort");
+       }
+
+       /* We couldn't get a response from the card.  Give up. */
        if (err)
-               printk(KERN_ERR "%s: error %d sending status command",
-                      req->rq_disk->disk_name, err);
-       return cmd.resp[0];
+               return ERR_ABORT;
+
+       /*
+        * Check the current card state.  If it is in some data transfer
+        * mode, tell it to stop (and hopefully transition back to TRAN.)
+        */
+       if (R1_CURRENT_STATE(status) == R1_STATE_DATA ||
+           R1_CURRENT_STATE(status) == R1_STATE_RCV) {
+               err = send_stop(card, &stop_status);
+               if (err)
+                       pr_err("%s: error %d sending stop command\n",
+                              req->rq_disk->disk_name, err);
+
+               /*
+                * If the stop cmd also timed out, the card is probably
+                * not present, so abort.  Other errors are bad news too.
+                */
+               if (err)
+                       return ERR_ABORT;
+       }
+
+       /* Check for set block count errors */
+       if (brq->sbc.error)
+               return mmc_blk_cmd_error(req, "SET_BLOCK_COUNT", brq->sbc.error,
+                               prev_cmd_status_valid, status);
+
+       /* Check for r/w command errors */
+       if (brq->cmd.error)
+               return mmc_blk_cmd_error(req, "r/w cmd", brq->cmd.error,
+                               prev_cmd_status_valid, status);
+
+       /* Now for stop errors.  These aren't fatal to the transfer. */
+       pr_err("%s: error %d sending stop command, original cmd response %#x, card status %#x\n",
+              req->rq_disk->disk_name, brq->stop.error,
+              brq->cmd.resp[0], status);
+
+       /*
+        * Subsitute in our own stop status as this will give the error
+        * state which happened during the execution of the r/w command.
+        */
+       if (stop_status) {
+               brq->stop.resp[0] = stop_status;
+               brq->stop.error = 0;
+       }
+       return ERR_CONTINUE;
 }
 
 static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
@@ -669,240 +814,324 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq,
        }
 }
 
-static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
+#define CMD_ERRORS                                                     \
+       (R1_OUT_OF_RANGE |      /* Command argument out of range */     \
+        R1_ADDRESS_ERROR |     /* Misaligned address */                \
+        R1_BLOCK_LEN_ERROR |   /* Transferred block length incorrect */\
+        R1_WP_VIOLATION |      /* Tried to write to protected block */ \
+        R1_CC_ERROR |          /* Card controller error */             \
+        R1_ERROR)              /* General/unknown error */
+
+static int mmc_blk_err_check(struct mmc_card *card,
+                            struct mmc_async_req *areq)
 {
-       struct mmc_blk_data *md = mq->data;
-       struct mmc_card *card = md->queue.card;
-       struct mmc_blk_request brq;
-       int ret = 1, disable_multi = 0;
+       enum mmc_blk_status ret = MMC_BLK_SUCCESS;
+       struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req,
+                                                   mmc_active);
+       struct mmc_blk_request *brq = &mq_mrq->brq;
+       struct request *req = mq_mrq->req;
 
        /*
-        * Reliable writes are used to implement Forced Unit Access and
-        * REQ_META accesses, and are supported only on MMCs.
+        * sbc.error indicates a problem with the set block count
+        * command.  No data will have been transferred.
+        *
+        * cmd.error indicates a problem with the r/w command.  No
+        * data will have been transferred.
+        *
+        * stop.error indicates a problem with the stop command.  Data
+        * may have been transferred, or may still be transferring.
         */
-       bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
-                         (req->cmd_flags & REQ_META)) &&
-               (rq_data_dir(req) == WRITE) &&
-               (md->flags & MMC_BLK_REL_WR);
+       if (brq->sbc.error || brq->cmd.error || brq->stop.error) {
+               switch (mmc_blk_cmd_recovery(card, req, brq)) {
+               case ERR_RETRY:
+                       return MMC_BLK_RETRY;
+               case ERR_ABORT:
+                       return MMC_BLK_ABORT;
+               case ERR_CONTINUE:
+                       break;
+               }
+       }
 
-       do {
-               struct mmc_command cmd = {0};
-               u32 readcmd, writecmd, status = 0;
-
-               memset(&brq, 0, sizeof(struct mmc_blk_request));
-               brq.mrq.cmd = &brq.cmd;
-               brq.mrq.data = &brq.data;
-
-               brq.cmd.arg = blk_rq_pos(req);
-               if (!mmc_card_blockaddr(card))
-                       brq.cmd.arg <<= 9;
-               brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
-               brq.data.blksz = 512;
-               brq.stop.opcode = MMC_STOP_TRANSMISSION;
-               brq.stop.arg = 0;
-               brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-               brq.data.blocks = blk_rq_sectors(req);
+       /*
+        * Check for errors relating to the execution of the
+        * initial command - such as address errors.  No data
+        * has been transferred.
+        */
+       if (brq->cmd.resp[0] & CMD_ERRORS) {
+               pr_err("%s: r/w command failed, status = %#x\n",
+                      req->rq_disk->disk_name, brq->cmd.resp[0]);
+               return MMC_BLK_ABORT;
+       }
 
-               /*
-                * The block layer doesn't support all sector count
-                * restrictions, so we need to be prepared for too big
-                * requests.
-                */
-               if (brq.data.blocks > card->host->max_blk_count)
-                       brq.data.blocks = card->host->max_blk_count;
+       /*
+        * Everything else is either success, or a data error of some
+        * kind.  If it was a write, we may have transitioned to
+        * program mode, which we have to wait for it to complete.
+        */
+       if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
+               u32 status;
+               do {
+                       int err = get_card_status(card, &status, 5);
+                       if (err) {
+                               printk(KERN_ERR "%s: error %d requesting status\n",
+                                      req->rq_disk->disk_name, err);
+                               return MMC_BLK_CMD_ERR;
+                       }
+                       /*
+                        * Some cards mishandle the status bits,
+                        * so make sure to check both the busy
+                        * indication and the card state.
+                        */
+               } while (!(status & R1_READY_FOR_DATA) ||
+                        (R1_CURRENT_STATE(status) == R1_STATE_PRG));
+       }
 
-               /*
-                * After a read error, we redo the request one sector at a time
-                * in order to accurately determine which sectors can be read
-                * successfully.
-                */
-               if (disable_multi && brq.data.blocks > 1)
-                       brq.data.blocks = 1;
+       if (brq->data.error) {
+               pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n",
+                      req->rq_disk->disk_name, brq->data.error,
+                      (unsigned)blk_rq_pos(req),
+                      (unsigned)blk_rq_sectors(req),
+                      brq->cmd.resp[0], brq->stop.resp[0]);
 
-               if (brq.data.blocks > 1 || do_rel_wr) {
-                       /* SPI multiblock writes terminate using a special
-                        * token, not a STOP_TRANSMISSION request.
-                        */
-                       if (!mmc_host_is_spi(card->host) ||
-                           rq_data_dir(req) == READ)
-                               brq.mrq.stop = &brq.stop;
-                       readcmd = MMC_READ_MULTIPLE_BLOCK;
-                       writecmd = MMC_WRITE_MULTIPLE_BLOCK;
-               } else {
-                       brq.mrq.stop = NULL;
-                       readcmd = MMC_READ_SINGLE_BLOCK;
-                       writecmd = MMC_WRITE_BLOCK;
-               }
                if (rq_data_dir(req) == READ) {
-                       brq.cmd.opcode = readcmd;
-                       brq.data.flags |= MMC_DATA_READ;
+                       if (brq->data.blocks > 1) {
+                               /* Redo read one sector at a time */
+                               pr_warning("%s: retrying using single block read\n",
+                                          req->rq_disk->disk_name);
+                               return MMC_BLK_RETRY_SINGLE;
+                       }
+                       return MMC_BLK_DATA_ERR;
                } else {
-                       brq.cmd.opcode = writecmd;
-                       brq.data.flags |= MMC_DATA_WRITE;
+                       return MMC_BLK_CMD_ERR;
                }
+       }
 
-               if (do_rel_wr)
-                       mmc_apply_rel_rw(&brq, card, req);
+       if (ret == MMC_BLK_SUCCESS &&
+           blk_rq_bytes(req) != brq->data.bytes_xfered)
+               ret = MMC_BLK_PARTIAL;
 
-               /*
-                * Pre-defined multi-block transfers are preferable to
-                * open ended-ones (and necessary for reliable writes).
-                * However, it is not sufficient to just send CMD23,
-                * and avoid the final CMD12, as on an error condition
-                * CMD12 (stop) needs to be sent anyway. This, coupled
-                * with Auto-CMD23 enhancements provided by some
-                * hosts, means that the complexity of dealing
-                * with this is best left to the host. If CMD23 is
-                * supported by card and host, we'll fill sbc in and let
-                * the host deal with handling it correctly. This means
-                * that for hosts that don't expose MMC_CAP_CMD23, no
-                * change of behavior will be observed.
-                *
-                * N.B: Some MMC cards experience perf degradation.
-                * We'll avoid using CMD23-bounded multiblock writes for
-                * these, while retaining features like reliable writes.
-                */
+       return ret;
+}
 
-               if ((md->flags & MMC_BLK_CMD23) &&
-                   mmc_op_multi(brq.cmd.opcode) &&
-                   (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) {
-                       brq.sbc.opcode = MMC_SET_BLOCK_COUNT;
-                       brq.sbc.arg = brq.data.blocks |
-                               (do_rel_wr ? (1 << 31) : 0);
-                       brq.sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
-                       brq.mrq.sbc = &brq.sbc;
-               }
+static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
+                              struct mmc_card *card,
+                              int disable_multi,
+                              struct mmc_queue *mq)
+{
+       u32 readcmd, writecmd;
+       struct mmc_blk_request *brq = &mqrq->brq;
+       struct request *req = mqrq->req;
+       struct mmc_blk_data *md = mq->data;
 
-               mmc_set_data_timeout(&brq.data, card);
+       /*
+        * Reliable writes are used to implement Forced Unit Access and
+        * REQ_META accesses, and are supported only on MMCs.
+        */
+       bool do_rel_wr = ((req->cmd_flags & REQ_FUA) ||
+                         (req->cmd_flags & REQ_META)) &&
+               (rq_data_dir(req) == WRITE) &&
+               (md->flags & MMC_BLK_REL_WR);
 
-               brq.data.sg = mq->sg;
-               brq.data.sg_len = mmc_queue_map_sg(mq);
+       memset(brq, 0, sizeof(struct mmc_blk_request));
+       brq->mrq.cmd = &brq->cmd;
+       brq->mrq.data = &brq->data;
 
-               /*
-                * Adjust the sg list so it is the same size as the
-                * request.
+       brq->cmd.arg = blk_rq_pos(req);
+       if (!mmc_card_blockaddr(card))
+               brq->cmd.arg <<= 9;
+       brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
+       brq->data.blksz = 512;
+       brq->stop.opcode = MMC_STOP_TRANSMISSION;
+       brq->stop.arg = 0;
+       brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+       brq->data.blocks = blk_rq_sectors(req);
+
+       /*
+        * The block layer doesn't support all sector count
+        * restrictions, so we need to be prepared for too big
+        * requests.
+        */
+       if (brq->data.blocks > card->host->max_blk_count)
+               brq->data.blocks = card->host->max_blk_count;
+
+       /*
+        * After a read error, we redo the request one sector at a time
+        * in order to accurately determine which sectors can be read
+        * successfully.
+        */
+       if (disable_multi && brq->data.blocks > 1)
+               brq->data.blocks = 1;
+
+       if (brq->data.blocks > 1 || do_rel_wr) {
+               /* SPI multiblock writes terminate using a special
+                * token, not a STOP_TRANSMISSION request.
                 */
-               if (brq.data.blocks != blk_rq_sectors(req)) {
-                       int i, data_size = brq.data.blocks << 9;
-                       struct scatterlist *sg;
-
-                       for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
-                               data_size -= sg->length;
-                               if (data_size <= 0) {
-                                       sg->length += data_size;
-                                       i++;
-                                       break;
-                               }
-                       }
-                       brq.data.sg_len = i;
-               }
+               if (!mmc_host_is_spi(card->host) ||
+                   rq_data_dir(req) == READ)
+                       brq->mrq.stop = &brq->stop;
+               readcmd = MMC_READ_MULTIPLE_BLOCK;
+               writecmd = MMC_WRITE_MULTIPLE_BLOCK;
+       } else {
+               brq->mrq.stop = NULL;
+               readcmd = MMC_READ_SINGLE_BLOCK;
+               writecmd = MMC_WRITE_BLOCK;
+       }
+       if (rq_data_dir(req) == READ) {
+               brq->cmd.opcode = readcmd;
+               brq->data.flags |= MMC_DATA_READ;
+       } else {
+               brq->cmd.opcode = writecmd;
+               brq->data.flags |= MMC_DATA_WRITE;
+       }
 
-               mmc_queue_bounce_pre(mq);
+       if (do_rel_wr)
+               mmc_apply_rel_rw(brq, card, req);
 
-               mmc_wait_for_req(card->host, &brq.mrq);
+       /*
+        * Pre-defined multi-block transfers are preferable to
+        * open ended-ones (and necessary for reliable writes).
+        * However, it is not sufficient to just send CMD23,
+        * and avoid the final CMD12, as on an error condition
+        * CMD12 (stop) needs to be sent anyway. This, coupled
+        * with Auto-CMD23 enhancements provided by some
+        * hosts, means that the complexity of dealing
+        * with this is best left to the host. If CMD23 is
+        * supported by card and host, we'll fill sbc in and let
+        * the host deal with handling it correctly. This means
+        * that for hosts that don't expose MMC_CAP_CMD23, no
+        * change of behavior will be observed.
+        *
+        * N.B: Some MMC cards experience perf degradation.
+        * We'll avoid using CMD23-bounded multiblock writes for
+        * these, while retaining features like reliable writes.
+        */
 
-               mmc_queue_bounce_post(mq);
+       if ((md->flags & MMC_BLK_CMD23) &&
+           mmc_op_multi(brq->cmd.opcode) &&
+           (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) {
+               brq->sbc.opcode = MMC_SET_BLOCK_COUNT;
+               brq->sbc.arg = brq->data.blocks |
+                       (do_rel_wr ? (1 << 31) : 0);
+               brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
+               brq->mrq.sbc = &brq->sbc;
+       }
 
-               /*
-                * Check for errors here, but don't jump to cmd_err
-                * until later as we need to wait for the card to leave
-                * programming mode even when things go wrong.
-                */
-               if (brq.sbc.error || brq.cmd.error ||
-                   brq.data.error || brq.stop.error) {
-                       if (brq.data.blocks > 1 && rq_data_dir(req) == READ) {
-                               /* Redo read one sector at a time */
-                               printk(KERN_WARNING "%s: retrying using single "
-                                      "block read\n", req->rq_disk->disk_name);
-                               disable_multi = 1;
-                               continue;
-                       }
-                       status = get_card_status(card, req);
-               }
+       mmc_set_data_timeout(&brq->data, card);
 
-               if (brq.sbc.error) {
-                       printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT "
-                              "command, response %#x, card status %#x\n",
-                              req->rq_disk->disk_name, brq.sbc.error,
-                              brq.sbc.resp[0], status);
-               }
+       brq->data.sg = mqrq->sg;
+       brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
 
-               if (brq.cmd.error) {
-                       printk(KERN_ERR "%s: error %d sending read/write "
-                              "command, response %#x, card status %#x\n",
-                              req->rq_disk->disk_name, brq.cmd.error,
-                              brq.cmd.resp[0], status);
+       /*
+        * Adjust the sg list so it is the same size as the
+        * request.
+        */
+       if (brq->data.blocks != blk_rq_sectors(req)) {
+               int i, data_size = brq->data.blocks << 9;
+               struct scatterlist *sg;
+
+               for_each_sg(brq->data.sg, sg, brq->data.sg_len, i) {
+                       data_size -= sg->length;
+                       if (data_size <= 0) {
+                               sg->length += data_size;
+                               i++;
+                               break;
+                       }
                }
+               brq->data.sg_len = i;
+       }
 
-               if (brq.data.error) {
-                       if (brq.data.error == -ETIMEDOUT && brq.mrq.stop)
-                               /* 'Stop' response contains card status */
-                               status = brq.mrq.stop->resp[0];
-                       printk(KERN_ERR "%s: error %d transferring data,"
-                              " sector %u, nr %u, card status %#x\n",
-                              req->rq_disk->disk_name, brq.data.error,
-                              (unsigned)blk_rq_pos(req),
-                              (unsigned)blk_rq_sectors(req), status);
-               }
+       mqrq->mmc_active.mrq = &brq->mrq;
+       mqrq->mmc_active.err_check = mmc_blk_err_check;
 
-               if (brq.stop.error) {
-                       printk(KERN_ERR "%s: error %d sending stop command, "
-                              "response %#x, card status %#x\n",
-                              req->rq_disk->disk_name, brq.stop.error,
-                              brq.stop.resp[0], status);
-               }
+       mmc_queue_bounce_pre(mqrq);
+}
 
-               if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
-                       do {
-                               int err;
-
-                               cmd.opcode = MMC_SEND_STATUS;
-                               cmd.arg = card->rca << 16;
-                               cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
-                               err = mmc_wait_for_cmd(card->host, &cmd, 5);
-                               if (err) {
-                                       printk(KERN_ERR "%s: error %d requesting status\n",
-                                              req->rq_disk->disk_name, err);
-                                       goto cmd_err;
-                               }
-                               /*
-                                * Some cards mishandle the status bits,
-                                * so make sure to check both the busy
-                                * indication and the card state.
-                                */
-                       } while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
-                               (R1_CURRENT_STATE(cmd.resp[0]) == 7));
-
-#if 0
-                       if (cmd.resp[0] & ~0x00000900)
-                               printk(KERN_ERR "%s: status = %08x\n",
-                                      req->rq_disk->disk_name, cmd.resp[0]);
-                       if (mmc_decode_status(cmd.resp))
-                               goto cmd_err;
-#endif
-               }
+static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
+{
+       struct mmc_blk_data *md = mq->data;
+       struct mmc_card *card = md->queue.card;
+       struct mmc_blk_request *brq = &mq->mqrq_cur->brq;
+       int ret = 1, disable_multi = 0, retry = 0;
+       enum mmc_blk_status status;
+       struct mmc_queue_req *mq_rq;
+       struct request *req;
+       struct mmc_async_req *areq;
+
+       if (!rqc && !mq->mqrq_prev->req)
+               return 0;
 
-               if (brq.cmd.error || brq.stop.error || brq.data.error) {
-                       if (rq_data_dir(req) == READ) {
+       do {
+               if (rqc) {
+                       mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
+                       areq = &mq->mqrq_cur->mmc_active;
+               } else
+                       areq = NULL;
+               areq = mmc_start_req(card->host, areq, (int *) &status);
+               if (!areq)
+                       return 0;
+
+               mq_rq = container_of(areq, struct mmc_queue_req, mmc_active);
+               brq = &mq_rq->brq;
+               req = mq_rq->req;
+               mmc_queue_bounce_post(mq_rq);
+
+               switch (status) {
+               case MMC_BLK_SUCCESS:
+               case MMC_BLK_PARTIAL:
+                       /*
+                        * A block was successfully transferred.
+                        */
+                       spin_lock_irq(&md->lock);
+                       ret = __blk_end_request(req, 0,
+                                               brq->data.bytes_xfered);
+                       spin_unlock_irq(&md->lock);
+                       if (status == MMC_BLK_SUCCESS && ret) {
                                /*
-                                * After an error, we redo I/O one sector at a
-                                * time, so we only reach here after trying to
-                                * read a single sector.
+                                * The blk_end_request has returned non zero
+                                * even though all data is transfered and no
+                                * erros returned by host.
+                                * If this happen it's a bug.
                                 */
-                               spin_lock_irq(&md->lock);
-                               ret = __blk_end_request(req, -EIO, brq.data.blksz);
-                               spin_unlock_irq(&md->lock);
-                               continue;
+                               printk(KERN_ERR "%s BUG rq_tot %d d_xfer %d\n",
+                                      __func__, blk_rq_bytes(req),
+                                      brq->data.bytes_xfered);
+                               rqc = NULL;
+                               goto cmd_abort;
                        }
+                       break;
+               case MMC_BLK_CMD_ERR:
                        goto cmd_err;
+               case MMC_BLK_RETRY_SINGLE:
+                       disable_multi = 1;
+                       break;
+               case MMC_BLK_RETRY:
+                       if (retry++ < 5)
+                               break;
+               case MMC_BLK_ABORT:
+                       goto cmd_abort;
+               case MMC_BLK_DATA_ERR:
+                       /*
+                        * After an error, we redo I/O one sector at a
+                        * time, so we only reach here after trying to
+                        * read a single sector.
+                        */
+                       spin_lock_irq(&md->lock);
+                       ret = __blk_end_request(req, -EIO,
+                                               brq->data.blksz);
+                       spin_unlock_irq(&md->lock);
+                       if (!ret)
+                               goto start_new_req;
+                       break;
                }
 
-               /*
-                * A block was successfully transferred.
-                */
-               spin_lock_irq(&md->lock);
-               ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
-               spin_unlock_irq(&md->lock);
+               if (ret) {
+                       /*
+                        * In case of a none complete request
+                        * prepare it again and resend.
+                        */
+                       mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq);
+                       mmc_start_req(card->host, &mq_rq->mmc_active, NULL);
+               }
        } while (ret);
 
        return 1;
@@ -927,15 +1156,22 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
                }
        } else {
                spin_lock_irq(&md->lock);
-               ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
+               ret = __blk_end_request(req, 0, brq->data.bytes_xfered);
                spin_unlock_irq(&md->lock);
        }
 
+ cmd_abort:
        spin_lock_irq(&md->lock);
        while (ret)
                ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req));
        spin_unlock_irq(&md->lock);
 
+ start_new_req:
+       if (rqc) {
+               mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
+               mmc_start_req(card->host, &mq->mqrq_cur->mmc_active, NULL);
+       }
+
        return 0;
 }
 
@@ -945,26 +1181,37 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
        struct mmc_blk_data *md = mq->data;
        struct mmc_card *card = md->queue.card;
 
-       mmc_claim_host(card->host);
+       if (req && !mq->mqrq_prev->req)
+               /* claim host only for the first request */
+               mmc_claim_host(card->host);
+
        ret = mmc_blk_part_switch(card, md);
        if (ret) {
                ret = 0;
                goto out;
        }
 
-       if (req->cmd_flags & REQ_DISCARD) {
+       if (req && req->cmd_flags & REQ_DISCARD) {
+               /* complete ongoing async transfer before issuing discard */
+               if (card->host->areq)
+                       mmc_blk_issue_rw_rq(mq, NULL);
                if (req->cmd_flags & REQ_SECURE)
                        ret = mmc_blk_issue_secdiscard_rq(mq, req);
                else
                        ret = mmc_blk_issue_discard_rq(mq, req);
-       } else if (req->cmd_flags & REQ_FLUSH) {
+       } else if (req && req->cmd_flags & REQ_FLUSH) {
+               /* complete ongoing async transfer before issuing flush */
+               if (card->host->areq)
+                       mmc_blk_issue_rw_rq(mq, NULL);
                ret = mmc_blk_issue_flush(mq, req);
        } else {
                ret = mmc_blk_issue_rw_rq(mq, req);
        }
 
 out:
-       mmc_release_host(card->host);
+       if (!req)
+               /* release host only when there are no more requests */
+               mmc_release_host(card->host);
        return ret;
 }
 
index 233cdfa..006a5e9 100644 (file)
@@ -148,6 +148,27 @@ struct mmc_test_card {
        struct mmc_test_general_result  *gr;
 };
 
+enum mmc_test_prep_media {
+       MMC_TEST_PREP_NONE = 0,
+       MMC_TEST_PREP_WRITE_FULL = 1 << 0,
+       MMC_TEST_PREP_ERASE = 1 << 1,
+};
+
+struct mmc_test_multiple_rw {
+       unsigned int *sg_len;
+       unsigned int *bs;
+       unsigned int len;
+       unsigned int size;
+       bool do_write;
+       bool do_nonblock_req;
+       enum mmc_test_prep_media prepare;
+};
+
+struct mmc_test_async_req {
+       struct mmc_async_req areq;
+       struct mmc_test_card *test;
+};
+
 /*******************************************************************/
 /*  General helper functions                                       */
 /*******************************************************************/
@@ -367,21 +388,26 @@ out_free:
  * Map memory into a scatterlist.  Optionally allow the same memory to be
  * mapped more than once.
  */
-static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long sz,
+static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long size,
                           struct scatterlist *sglist, int repeat,
                           unsigned int max_segs, unsigned int max_seg_sz,
-                          unsigned int *sg_len)
+                          unsigned int *sg_len, int min_sg_len)
 {
        struct scatterlist *sg = NULL;
        unsigned int i;
+       unsigned long sz = size;
 
        sg_init_table(sglist, max_segs);
+       if (min_sg_len > max_segs)
+               min_sg_len = max_segs;
 
        *sg_len = 0;
        do {
                for (i = 0; i < mem->cnt; i++) {
                        unsigned long len = PAGE_SIZE << mem->arr[i].order;
 
+                       if (min_sg_len && (size / min_sg_len < len))
+                               len = ALIGN(size / min_sg_len, 512);
                        if (len > sz)
                                len = sz;
                        if (len > max_seg_sz)
@@ -554,11 +580,12 @@ static void mmc_test_print_avg_rate(struct mmc_test_card *test, uint64_t bytes,
 
        printk(KERN_INFO "%s: Transfer of %u x %u sectors (%u x %u%s KiB) took "
                         "%lu.%09lu seconds (%u kB/s, %u KiB/s, "
-                        "%u.%02u IOPS)\n",
+                        "%u.%02u IOPS, sg_len %d)\n",
                         mmc_hostname(test->card->host), count, sectors, count,
                         sectors >> 1, (sectors & 1 ? ".5" : ""),
                         (unsigned long)ts.tv_sec, (unsigned long)ts.tv_nsec,
-                        rate / 1000, rate / 1024, iops / 100, iops % 100);
+                        rate / 1000, rate / 1024, iops / 100, iops % 100,
+                        test->area.sg_len);
 
        mmc_test_save_transfer_result(test, count, sectors, ts, rate, iops);
 }
@@ -661,7 +688,7 @@ static void mmc_test_prepare_broken_mrq(struct mmc_test_card *test,
  * Checks that a normal transfer didn't have any errors
  */
 static int mmc_test_check_result(struct mmc_test_card *test,
-       struct mmc_request *mrq)
+                                struct mmc_request *mrq)
 {
        int ret;
 
@@ -685,6 +712,17 @@ static int mmc_test_check_result(struct mmc_test_card *test,
        return ret;
 }
 
+static int mmc_test_check_result_async(struct mmc_card *card,
+                                      struct mmc_async_req *areq)
+{
+       struct mmc_test_async_req *test_async =
+               container_of(areq, struct mmc_test_async_req, areq);
+
+       mmc_test_wait_busy(test_async->test);
+
+       return mmc_test_check_result(test_async->test, areq->mrq);
+}
+
 /*
  * Checks that a "short transfer" behaved as expected
  */
@@ -719,6 +757,85 @@ static int mmc_test_check_broken_result(struct mmc_test_card *test,
        return ret;
 }
 
+/*
+ * Tests nonblock transfer with certain parameters
+ */
+static void mmc_test_nonblock_reset(struct mmc_request *mrq,
+                                   struct mmc_command *cmd,
+                                   struct mmc_command *stop,
+                                   struct mmc_data *data)
+{
+       memset(mrq, 0, sizeof(struct mmc_request));
+       memset(cmd, 0, sizeof(struct mmc_command));
+       memset(data, 0, sizeof(struct mmc_data));
+       memset(stop, 0, sizeof(struct mmc_command));
+
+       mrq->cmd = cmd;
+       mrq->data = data;
+       mrq->stop = stop;
+}
+static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
+                                     struct scatterlist *sg, unsigned sg_len,
+                                     unsigned dev_addr, unsigned blocks,
+                                     unsigned blksz, int write, int count)
+{
+       struct mmc_request mrq1;
+       struct mmc_command cmd1;
+       struct mmc_command stop1;
+       struct mmc_data data1;
+
+       struct mmc_request mrq2;
+       struct mmc_command cmd2;
+       struct mmc_command stop2;
+       struct mmc_data data2;
+
+       struct mmc_test_async_req test_areq[2];
+       struct mmc_async_req *done_areq;
+       struct mmc_async_req *cur_areq = &test_areq[0].areq;
+       struct mmc_async_req *other_areq = &test_areq[1].areq;
+       int i;
+       int ret;
+
+       test_areq[0].test = test;
+       test_areq[1].test = test;
+
+       mmc_test_nonblock_reset(&mrq1, &cmd1, &stop1, &data1);
+       mmc_test_nonblock_reset(&mrq2, &cmd2, &stop2, &data2);
+
+       cur_areq->mrq = &mrq1;
+       cur_areq->err_check = mmc_test_check_result_async;
+       other_areq->mrq = &mrq2;
+       other_areq->err_check = mmc_test_check_result_async;
+
+       for (i = 0; i < count; i++) {
+               mmc_test_prepare_mrq(test, cur_areq->mrq, sg, sg_len, dev_addr,
+                                    blocks, blksz, write);
+               done_areq = mmc_start_req(test->card->host, cur_areq, &ret);
+
+               if (ret || (!done_areq && i > 0))
+                       goto err;
+
+               if (done_areq) {
+                       if (done_areq->mrq == &mrq2)
+                               mmc_test_nonblock_reset(&mrq2, &cmd2,
+                                                       &stop2, &data2);
+                       else
+                               mmc_test_nonblock_reset(&mrq1, &cmd1,
+                                                       &stop1, &data1);
+               }
+               done_areq = cur_areq;
+               cur_areq = other_areq;
+               other_areq = done_areq;
+               dev_addr += blocks;
+       }
+
+       done_areq = mmc_start_req(test->card->host, NULL, &ret);
+
+       return ret;
+err:
+       return ret;
+}
+
 /*
  * Tests a basic transfer with certain parameters
  */
@@ -1302,7 +1419,7 @@ static int mmc_test_no_highmem(struct mmc_test_card *test)
  * Map sz bytes so that it can be transferred.
  */
 static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz,
-                            int max_scatter)
+                            int max_scatter, int min_sg_len)
 {
        struct mmc_test_area *t = &test->area;
        int err;
@@ -1315,7 +1432,7 @@ static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz,
                                       &t->sg_len);
        } else {
                err = mmc_test_map_sg(t->mem, sz, t->sg, 1, t->max_segs,
-                                     t->max_seg_sz, &t->sg_len);
+                                     t->max_seg_sz, &t->sg_len, min_sg_len);
        }
        if (err)
                printk(KERN_INFO "%s: Failed to map sg list\n",
@@ -1336,14 +1453,17 @@ static int mmc_test_area_transfer(struct mmc_test_card *test,
 }
 
 /*
- * Map and transfer bytes.
+ * Map and transfer bytes for multiple transfers.
  */
-static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz,
-                           unsigned int dev_addr, int write, int max_scatter,
-                           int timed)
+static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz,
+                               unsigned int dev_addr, int write,
+                               int max_scatter, int timed, int count,
+                               bool nonblock, int min_sg_len)
 {
        struct timespec ts1, ts2;
-       int ret;
+       int ret = 0;
+       int i;
+       struct mmc_test_area *t = &test->area;
 
        /*
         * In the case of a maximally scattered transfer, the maximum transfer
@@ -1361,14 +1481,21 @@ static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz,
                        sz = max_tfr;
        }
 
-       ret = mmc_test_area_map(test, sz, max_scatter);
+       ret = mmc_test_area_map(test, sz, max_scatter, min_sg_len);
        if (ret)
                return ret;
 
        if (timed)
                getnstimeofday(&ts1);
+       if (nonblock)
+               ret = mmc_test_nonblock_transfer(test, t->sg, t->sg_len,
+                                dev_addr, t->blocks, 512, write, count);
+       else
+               for (i = 0; i < count && ret == 0; i++) {
+                       ret = mmc_test_area_transfer(test, dev_addr, write);
+                       dev_addr += sz >> 9;
+               }
 
-       ret = mmc_test_area_transfer(test, dev_addr, write);
        if (ret)
                return ret;
 
@@ -1376,11 +1503,19 @@ static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz,
                getnstimeofday(&ts2);
 
        if (timed)
-               mmc_test_print_rate(test, sz, &ts1, &ts2);
+               mmc_test_print_avg_rate(test, sz, count, &ts1, &ts2);
 
        return 0;
 }
 
+static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz,
+                           unsigned int dev_addr, int write, int max_scatter,
+                           int timed)
+{
+       return mmc_test_area_io_seq(test, sz, dev_addr, write, max_scatter,
+                                   timed, 1, false, 0);
+}
+
 /*
  * Write the test area entirely.
  */
@@ -1954,6 +2089,245 @@ static int mmc_test_large_seq_write_perf(struct mmc_test_card *test)
        return mmc_test_large_seq_perf(test, 1);
 }
 
+static int mmc_test_rw_multiple(struct mmc_test_card *test,
+                               struct mmc_test_multiple_rw *tdata,
+                               unsigned int reqsize, unsigned int size,
+                               int min_sg_len)
+{
+       unsigned int dev_addr;
+       struct mmc_test_area *t = &test->area;
+       int ret = 0;
+
+       /* Set up test area */
+       if (size > mmc_test_capacity(test->card) / 2 * 512)
+               size = mmc_test_capacity(test->card) / 2 * 512;
+       if (reqsize > t->max_tfr)
+               reqsize = t->max_tfr;
+       dev_addr = mmc_test_capacity(test->card) / 4;
+       if ((dev_addr & 0xffff0000))
+               dev_addr &= 0xffff0000; /* Round to 64MiB boundary */
+       else
+               dev_addr &= 0xfffff800; /* Round to 1MiB boundary */
+       if (!dev_addr)
+               goto err;
+
+       if (reqsize > size)
+               return 0;
+
+       /* prepare test area */
+       if (mmc_can_erase(test->card) &&
+           tdata->prepare & MMC_TEST_PREP_ERASE) {
+               ret = mmc_erase(test->card, dev_addr,
+                               size / 512, MMC_SECURE_ERASE_ARG);
+               if (ret)
+                       ret = mmc_erase(test->card, dev_addr,
+                                       size / 512, MMC_ERASE_ARG);
+               if (ret)
+                       goto err;
+       }
+
+       /* Run test */
+       ret = mmc_test_area_io_seq(test, reqsize, dev_addr,
+                                  tdata->do_write, 0, 1, size / reqsize,
+                                  tdata->do_nonblock_req, min_sg_len);
+       if (ret)
+               goto err;
+
+       return ret;
+ err:
+       printk(KERN_INFO "[%s] error\n", __func__);
+       return ret;
+}
+
+static int mmc_test_rw_multiple_size(struct mmc_test_card *test,
+                                    struct mmc_test_multiple_rw *rw)
+{
+       int ret = 0;
+       int i;
+       void *pre_req = test->card->host->ops->pre_req;
+       void *post_req = test->card->host->ops->post_req;
+
+       if (rw->do_nonblock_req &&
+           ((!pre_req && post_req) || (pre_req && !post_req))) {
+               printk(KERN_INFO "error: only one of pre/post is defined\n");
+               return -EINVAL;
+       }
+
+       for (i = 0 ; i < rw->len && ret == 0; i++) {
+               ret = mmc_test_rw_multiple(test, rw, rw->bs[i], rw->size, 0);
+               if (ret)
+                       break;
+       }
+       return ret;
+}
+
+static int mmc_test_rw_multiple_sg_len(struct mmc_test_card *test,
+                                      struct mmc_test_multiple_rw *rw)
+{
+       int ret = 0;
+       int i;
+
+       for (i = 0 ; i < rw->len && ret == 0; i++) {
+               ret = mmc_test_rw_multiple(test, rw, 512*1024, rw->size,
+                                          rw->sg_len[i]);
+               if (ret)
+                       break;
+       }
+       return ret;
+}
+
+/*
+ * Multiple blocking write 4k to 4 MB chunks
+ */
+static int mmc_test_profile_mult_write_blocking_perf(struct mmc_test_card *test)
+{
+       unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
+                            1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
+       struct mmc_test_multiple_rw test_data = {
+               .bs = bs,
+               .size = TEST_AREA_MAX_SIZE,
+               .len = ARRAY_SIZE(bs),
+               .do_write = true,
+               .do_nonblock_req = false,
+               .prepare = MMC_TEST_PREP_ERASE,
+       };
+
+       return mmc_test_rw_multiple_size(test, &test_data);
+};
+
+/*
+ * Multiple non-blocking write 4k to 4 MB chunks
+ */
+static int mmc_test_profile_mult_write_nonblock_perf(struct mmc_test_card *test)
+{
+       unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
+                            1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
+       struct mmc_test_multiple_rw test_data = {
+               .bs = bs,
+               .size = TEST_AREA_MAX_SIZE,
+               .len = ARRAY_SIZE(bs),
+               .do_write = true,
+               .do_nonblock_req = true,
+               .prepare = MMC_TEST_PREP_ERASE,
+       };
+
+       return mmc_test_rw_multiple_size(test, &test_data);
+}
+
+/*
+ * Multiple blocking read 4k to 4 MB chunks
+ */
+static int mmc_test_profile_mult_read_blocking_perf(struct mmc_test_card *test)
+{
+       unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
+                            1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
+       struct mmc_test_multiple_rw test_data = {
+               .bs = bs,
+               .size = TEST_AREA_MAX_SIZE,
+               .len = ARRAY_SIZE(bs),
+               .do_write = false,
+               .do_nonblock_req = false,
+               .prepare = MMC_TEST_PREP_NONE,
+       };
+
+       return mmc_test_rw_multiple_size(test, &test_data);
+}
+
+/*
+ * Multiple non-blocking read 4k to 4 MB chunks
+ */
+static int mmc_test_profile_mult_read_nonblock_perf(struct mmc_test_card *test)
+{
+       unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
+                            1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
+       struct mmc_test_multiple_rw test_data = {
+               .bs = bs,
+               .size = TEST_AREA_MAX_SIZE,
+               .len = ARRAY_SIZE(bs),
+               .do_write = false,
+               .do_nonblock_req = true,
+               .prepare = MMC_TEST_PREP_NONE,
+       };
+
+       return mmc_test_rw_multiple_size(test, &test_data);
+}
+
+/*
+ * Multiple blocking write 1 to 512 sg elements
+ */
+static int mmc_test_profile_sglen_wr_blocking_perf(struct mmc_test_card *test)
+{
+       unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+                                1 << 7, 1 << 8, 1 << 9};
+       struct mmc_test_multiple_rw test_data = {
+               .sg_len = sg_len,
+               .size = TEST_AREA_MAX_SIZE,
+               .len = ARRAY_SIZE(sg_len),
+               .do_write = true,
+               .do_nonblock_req = false,
+               .prepare = MMC_TEST_PREP_ERASE,
+       };
+
+       return mmc_test_rw_multiple_sg_len(test, &test_data);
+};
+
+/*
+ * Multiple non-blocking write 1 to 512 sg elements
+ */
+static int mmc_test_profile_sglen_wr_nonblock_perf(struct mmc_test_card *test)
+{
+       unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+                                1 << 7, 1 << 8, 1 << 9};
+       struct mmc_test_multiple_rw test_data = {
+               .sg_len = sg_len,
+               .size = TEST_AREA_MAX_SIZE,
+               .len = ARRAY_SIZE(sg_len),
+               .do_write = true,
+               .do_nonblock_req = true,
+               .prepare = MMC_TEST_PREP_ERASE,
+       };
+
+       return mmc_test_rw_multiple_sg_len(test, &test_data);
+}
+
+/*
+ * Multiple blocking read 1 to 512 sg elements
+ */
+static int mmc_test_profile_sglen_r_blocking_perf(struct mmc_test_card *test)
+{
+       unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+                                1 << 7, 1 << 8, 1 << 9};
+       struct mmc_test_multiple_rw test_data = {
+               .sg_len = sg_len,
+               .size = TEST_AREA_MAX_SIZE,
+               .len = ARRAY_SIZE(sg_len),
+               .do_write = false,
+               .do_nonblock_req = false,
+               .prepare = MMC_TEST_PREP_NONE,
+       };
+
+       return mmc_test_rw_multiple_sg_len(test, &test_data);
+}
+
+/*
+ * Multiple non-blocking read 1 to 512 sg elements
+ */
+static int mmc_test_profile_sglen_r_nonblock_perf(struct mmc_test_card *test)
+{
+       unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+                                1 << 7, 1 << 8, 1 << 9};
+       struct mmc_test_multiple_rw test_data = {
+               .sg_len = sg_len,
+               .size = TEST_AREA_MAX_SIZE,
+               .len = ARRAY_SIZE(sg_len),
+               .do_write = false,
+               .do_nonblock_req = true,
+               .prepare = MMC_TEST_PREP_NONE,
+       };
+
+       return mmc_test_rw_multiple_sg_len(test, &test_data);
+}
+
 static const struct mmc_test_case mmc_test_cases[] = {
        {
                .name = "Basic write (no data verification)",
@@ -2221,6 +2595,61 @@ static const struct mmc_test_case mmc_test_cases[] = {
                .cleanup = mmc_test_area_cleanup,
        },
 
+       {
+               .name = "Write performance with blocking req 4k to 4MB",
+               .prepare = mmc_test_area_prepare,
+               .run = mmc_test_profile_mult_write_blocking_perf,
+               .cleanup = mmc_test_area_cleanup,
+       },
+
+       {
+               .name = "Write performance with non-blocking req 4k to 4MB",
+               .prepare = mmc_test_area_prepare,
+               .run = mmc_test_profile_mult_write_nonblock_perf,
+               .cleanup = mmc_test_area_cleanup,
+       },
+
+       {
+               .name = "Read performance with blocking req 4k to 4MB",
+               .prepare = mmc_test_area_prepare,
+               .run = mmc_test_profile_mult_read_blocking_perf,
+               .cleanup = mmc_test_area_cleanup,
+       },
+
+       {
+               .name = "Read performance with non-blocking req 4k to 4MB",
+               .prepare = mmc_test_area_prepare,
+               .run = mmc_test_profile_mult_read_nonblock_perf,
+               .cleanup = mmc_test_area_cleanup,
+       },
+
+       {
+               .name = "Write performance blocking req 1 to 512 sg elems",
+               .prepare = mmc_test_area_prepare,
+               .run = mmc_test_profile_sglen_wr_blocking_perf,
+               .cleanup = mmc_test_area_cleanup,
+       },
+
+       {
+               .name = "Write performance non-blocking req 1 to 512 sg elems",
+               .prepare = mmc_test_area_prepare,
+               .run = mmc_test_profile_sglen_wr_nonblock_perf,
+               .cleanup = mmc_test_area_cleanup,
+       },
+
+       {
+               .name = "Read performance blocking req 1 to 512 sg elems",
+               .prepare = mmc_test_area_prepare,
+               .run = mmc_test_profile_sglen_r_blocking_perf,
+               .cleanup = mmc_test_area_cleanup,
+       },
+
+       {
+               .name = "Read performance non-blocking req 1 to 512 sg elems",
+               .prepare = mmc_test_area_prepare,
+               .run = mmc_test_profile_sglen_r_nonblock_perf,
+               .cleanup = mmc_test_area_cleanup,
+       },
 };
 
 static DEFINE_MUTEX(mmc_test_lock);
@@ -2445,6 +2874,32 @@ static const struct file_operations mmc_test_fops_test = {
        .release        = single_release,
 };
 
+static int mtf_testlist_show(struct seq_file *sf, void *data)
+{
+       int i;
+
+       mutex_lock(&mmc_test_lock);
+
+       for (i = 0; i < ARRAY_SIZE(mmc_test_cases); i++)
+               seq_printf(sf, "%d:\t%s\n", i+1, mmc_test_cases[i].name);
+
+       mutex_unlock(&mmc_test_lock);
+
+       return 0;
+}
+
+static int mtf_testlist_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, mtf_testlist_show, inode->i_private);
+}
+
+static const struct file_operations mmc_test_fops_testlist = {
+       .open           = mtf_testlist_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
 static void mmc_test_free_file_test(struct mmc_card *card)
 {
        struct mmc_test_dbgfs_file *df, *dfs;
@@ -2476,7 +2931,18 @@ static int mmc_test_register_file_test(struct mmc_card *card)
 
        if (IS_ERR_OR_NULL(file)) {
                dev_err(&card->dev,
-                       "Can't create file. Perhaps debugfs is disabled.\n");
+                       "Can't create test. Perhaps debugfs is disabled.\n");
+               ret = -ENODEV;
+               goto err;
+       }
+
+       if (card->debugfs_root)
+               file = debugfs_create_file("testlist", S_IRUGO,
+                       card->debugfs_root, card, &mmc_test_fops_testlist);
+
+       if (IS_ERR_OR_NULL(file)) {
+               dev_err(&card->dev,
+                       "Can't create testlist. Perhaps debugfs is disabled.\n");
                ret = -ENODEV;
                goto err;
        }
index 6413afa..45fb362 100644 (file)
@@ -52,14 +52,18 @@ static int mmc_queue_thread(void *d)
        down(&mq->thread_sem);
        do {
                struct request *req = NULL;
+               struct mmc_queue_req *tmp;
 
                spin_lock_irq(q->queue_lock);
                set_current_state(TASK_INTERRUPTIBLE);
                req = blk_fetch_request(q);
-               mq->req = req;
+               mq->mqrq_cur->req = req;
                spin_unlock_irq(q->queue_lock);
 
-               if (!req) {
+               if (req || mq->mqrq_prev->req) {
+                       set_current_state(TASK_RUNNING);
+                       mq->issue_fn(mq, req);
+               } else {
                        if (kthread_should_stop()) {
                                set_current_state(TASK_RUNNING);
                                break;
@@ -67,11 +71,14 @@ static int mmc_queue_thread(void *d)
                        up(&mq->thread_sem);
                        schedule();
                        down(&mq->thread_sem);
-                       continue;
                }
-               set_current_state(TASK_RUNNING);
 
-               mq->issue_fn(mq, req);
+               /* Current request becomes previous request and vice versa. */
+               mq->mqrq_prev->brq.mrq.data = NULL;
+               mq->mqrq_prev->req = NULL;
+               tmp = mq->mqrq_prev;
+               mq->mqrq_prev = mq->mqrq_cur;
+               mq->mqrq_cur = tmp;
        } while (1);
        up(&mq->thread_sem);
 
@@ -97,10 +104,46 @@ static void mmc_request(struct request_queue *q)
                return;
        }
 
-       if (!mq->req)
+       if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
                wake_up_process(mq->thread);
 }
 
+struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
+{
+       struct scatterlist *sg;
+
+       sg = kmalloc(sizeof(struct scatterlist)*sg_len, GFP_KERNEL);
+       if (!sg)
+               *err = -ENOMEM;
+       else {
+               *err = 0;
+               sg_init_table(sg, sg_len);
+       }
+
+       return sg;
+}
+
+static void mmc_queue_setup_discard(struct request_queue *q,
+                                   struct mmc_card *card)
+{
+       unsigned max_discard;
+
+       max_discard = mmc_calc_max_discard(card);
+       if (!max_discard)
+               return;
+
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+       q->limits.max_discard_sectors = max_discard;
+       if (card->erased_byte == 0)
+               q->limits.discard_zeroes_data = 1;
+       q->limits.discard_granularity = card->pref_erase << 9;
+       /* granularity must not be greater than max. discard */
+       if (card->pref_erase > max_discard)
+               q->limits.discard_granularity = 0;
+       if (mmc_can_secure_erase_trim(card))
+               queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
+}
+
 /**
  * mmc_init_queue - initialise a queue structure.
  * @mq: mmc queue
@@ -116,6 +159,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
        struct mmc_host *host = card->host;
        u64 limit = BLK_BOUNCE_HIGH;
        int ret;
+       struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
+       struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
 
        if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
                limit = *mmc_dev(host)->dma_mask;
@@ -125,21 +170,16 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
        if (!mq->queue)
                return -ENOMEM;
 
+       memset(&mq->mqrq_cur, 0, sizeof(mq->mqrq_cur));
+       memset(&mq->mqrq_prev, 0, sizeof(mq->mqrq_prev));
+       mq->mqrq_cur = mqrq_cur;
+       mq->mqrq_prev = mqrq_prev;
        mq->queue->queuedata = mq;
-       mq->req = NULL;
 
        blk_queue_prep_rq(mq->queue, mmc_prep_request);
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
-       if (mmc_can_erase(card)) {
-               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
-               mq->queue->limits.max_discard_sectors = UINT_MAX;
-               if (card->erased_byte == 0)
-                       mq->queue->limits.discard_zeroes_data = 1;
-               mq->queue->limits.discard_granularity = card->pref_erase << 9;
-               if (mmc_can_secure_erase_trim(card))
-                       queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD,
-                                               mq->queue);
-       }
+       if (mmc_can_erase(card))
+               mmc_queue_setup_discard(mq->queue, card);
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
        if (host->max_segs == 1) {
@@ -155,53 +195,64 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
                        bouncesz = host->max_blk_count * 512;
 
                if (bouncesz > 512) {
-                       mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
-                       if (!mq->bounce_buf) {
+                       mqrq_cur->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
+                       if (!mqrq_cur->bounce_buf) {
+                               printk(KERN_WARNING "%s: unable to "
+                                       "allocate bounce cur buffer\n",
+                                       mmc_card_name(card));
+                       }
+                       mqrq_prev->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
+                       if (!mqrq_prev->bounce_buf) {
                                printk(KERN_WARNING "%s: unable to "
-                                       "allocate bounce buffer\n",
+                                       "allocate bounce prev buffer\n",
                                        mmc_card_name(card));
+                               kfree(mqrq_cur->bounce_buf);
+                               mqrq_cur->bounce_buf = NULL;
                        }
                }
 
-               if (mq->bounce_buf) {
+               if (mqrq_cur->bounce_buf && mqrq_prev->bounce_buf) {
                        blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
                        blk_queue_max_hw_sectors(mq->queue, bouncesz / 512);
                        blk_queue_max_segments(mq->queue, bouncesz / 512);
                        blk_queue_max_segment_size(mq->queue, bouncesz);
 
-                       mq->sg = kmalloc(sizeof(struct scatterlist),
-                               GFP_KERNEL);
-                       if (!mq->sg) {
-                               ret = -ENOMEM;
+                       mqrq_cur->sg = mmc_alloc_sg(1, &ret);
+                       if (ret)
                                goto cleanup_queue;
-                       }
-                       sg_init_table(mq->sg, 1);
 
-                       mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
-                               bouncesz / 512, GFP_KERNEL);
-                       if (!mq->bounce_sg) {
-                               ret = -ENOMEM;
+                       mqrq_cur->bounce_sg =
+                               mmc_alloc_sg(bouncesz / 512, &ret);
+                       if (ret)
+                               goto cleanup_queue;
+
+                       mqrq_prev->sg = mmc_alloc_sg(1, &ret);
+                       if (ret)
+                               goto cleanup_queue;
+
+                       mqrq_prev->bounce_sg =
+                               mmc_alloc_sg(bouncesz / 512, &ret);
+                       if (ret)
                                goto cleanup_queue;
-                       }
-                       sg_init_table(mq->bounce_sg, bouncesz / 512);
                }
        }
 #endif
 
-       if (!mq->bounce_buf) {
+       if (!mqrq_cur->bounce_buf && !mqrq_prev->bounce_buf) {
                blk_queue_bounce_limit(mq->queue, limit);
                blk_queue_max_hw_sectors(mq->queue,
                        min(host->max_blk_count, host->max_req_size / 512));
                blk_queue_max_segments(mq->queue, host->max_segs);
                blk_queue_max_segment_size(mq->queue, host->max_seg_size);
 
-               mq->sg = kmalloc(sizeof(struct scatterlist) *
-                       host->max_segs, GFP_KERNEL);
-               if (!mq->sg) {
-                       ret = -ENOMEM;
+               mqrq_cur->sg = mmc_alloc_sg(host->max_segs, &ret);
+               if (ret)
+                       goto cleanup_queue;
+
+
+               mqrq_prev->sg = mmc_alloc_sg(host->max_segs, &ret);
+               if (ret)
                        goto cleanup_queue;
-               }
-               sg_init_table(mq->sg, host->max_segs);
        }
 
        sema_init(&mq->thread_sem, 1);
@@ -216,16 +267,22 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 
        return 0;
  free_bounce_sg:
-       if (mq->bounce_sg)
-               kfree(mq->bounce_sg);
-       mq->bounce_sg = NULL;
+       kfree(mqrq_cur->bounce_sg);
+       mqrq_cur->bounce_sg = NULL;
+       kfree(mqrq_prev->bounce_sg);
+       mqrq_prev->bounce_sg = NULL;
+
  cleanup_queue:
-       if (mq->sg)
-               kfree(mq->sg);
-       mq->sg = NULL;
-       if (mq->bounce_buf)
-               kfree(mq->bounce_buf);
-       mq->bounce_buf = NULL;
+       kfree(mqrq_cur->sg);
+       mqrq_cur->sg = NULL;
+       kfree(mqrq_cur->bounce_buf);
+       mqrq_cur->bounce_buf = NULL;
+
+       kfree(mqrq_prev->sg);
+       mqrq_prev->sg = NULL;
+       kfree(mqrq_prev->bounce_buf);
+       mqrq_prev->bounce_buf = NULL;
+
        blk_cleanup_queue(mq->queue);
        return ret;
 }
@@ -234,6 +291,8 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
 {
        struct request_queue *q = mq->queue;
        unsigned long flags;
+       struct mmc_queue_req *mqrq_cur = mq->mqrq_cur;
+       struct mmc_queue_req *mqrq_prev = mq->mqrq_prev;
 
        /* Make sure the queue isn't suspended, as that will deadlock */
        mmc_queue_resume(mq);
@@ -247,16 +306,23 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
        blk_start_queue(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 
-       if (mq->bounce_sg)
-               kfree(mq->bounce_sg);
-       mq->bounce_sg = NULL;
+       kfree(mqrq_cur->bounce_sg);
+       mqrq_cur->bounce_sg = NULL;
 
-       kfree(mq->sg);
-       mq->sg = NULL;
+       kfree(mqrq_cur->sg);
+       mqrq_cur->sg = NULL;
 
-       if (mq->bounce_buf)
-               kfree(mq->bounce_buf);
-       mq->bounce_buf = NULL;
+       kfree(mqrq_cur->bounce_buf);
+       mqrq_cur->bounce_buf = NULL;
+
+       kfree(mqrq_prev->bounce_sg);
+       mqrq_prev->bounce_sg = NULL;
+
+       kfree(mqrq_prev->sg);
+       mqrq_prev->sg = NULL;
+
+       kfree(mqrq_prev->bounce_buf);
+       mqrq_prev->bounce_buf = NULL;
 
        mq->card = NULL;
 }
@@ -309,27 +375,27 @@ void mmc_queue_resume(struct mmc_queue *mq)
 /*
  * Prepare the sg list(s) to be handed of to the host driver
  */
-unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
+unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq)
 {
        unsigned int sg_len;
        size_t buflen;
        struct scatterlist *sg;
        int i;
 
-       if (!mq->bounce_buf)
-               return blk_rq_map_sg(mq->queue, mq->req, mq->sg);
+       if (!mqrq->bounce_buf)
+               return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg);
 
-       BUG_ON(!mq->bounce_sg);
+       BUG_ON(!mqrq->bounce_sg);
 
-       sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg);
+       sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg);
 
-       mq->bounce_sg_len = sg_len;
+       mqrq->bounce_sg_len = sg_len;
 
        buflen = 0;
-       for_each_sg(mq->bounce_sg, sg, sg_len, i)
+       for_each_sg(mqrq->bounce_sg, sg, sg_len, i)
                buflen += sg->length;
 
-       sg_init_one(mq->sg, mq->bounce_buf, buflen);
+       sg_init_one(mqrq->sg, mqrq->bounce_buf, buflen);
 
        return 1;
 }
@@ -338,31 +404,30 @@ unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
  * If writing, bounce the data to the buffer before the request
  * is sent to the host driver
  */
-void mmc_queue_bounce_pre(struct mmc_queue *mq)
+void mmc_queue_bounce_pre(struct mmc_queue_req *mqrq)
 {
-       if (!mq->bounce_buf)
+       if (!mqrq->bounce_buf)
                return;
 
-       if (rq_data_dir(mq->req) != WRITE)
+       if (rq_data_dir(mqrq->req) != WRITE)
                return;
 
-       sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len,
-               mq->bounce_buf, mq->sg[0].length);
+       sg_copy_to_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len,
+               mqrq->bounce_buf, mqrq->sg[0].length);
 }
 
 /*
  * If reading, bounce the data from the buffer after the request
  * has been handled by the host driver
  */
-void mmc_queue_bounce_post(struct mmc_queue *mq)
+void mmc_queue_bounce_post(struct mmc_queue_req *mqrq)
 {
-       if (!mq->bounce_buf)
+       if (!mqrq->bounce_buf)
                return;
 
-       if (rq_data_dir(mq->req) != READ)
+       if (rq_data_dir(mqrq->req) != READ)
                return;
 
-       sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len,
-               mq->bounce_buf, mq->sg[0].length);
+       sg_copy_from_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len,
+               mqrq->bounce_buf, mqrq->sg[0].length);
 }
-
index 6223ef8..d2a1eb4 100644 (file)
@@ -4,19 +4,35 @@
 struct request;
 struct task_struct;
 
+struct mmc_blk_request {
+       struct mmc_request      mrq;
+       struct mmc_command      sbc;
+       struct mmc_command      cmd;
+       struct mmc_command      stop;
+       struct mmc_data         data;
+};
+
+struct mmc_queue_req {
+       struct request          *req;
+       struct mmc_blk_request  brq;
+       struct scatterlist      *sg;
+       char                    *bounce_buf;
+       struct scatterlist      *bounce_sg;
+       unsigned int            bounce_sg_len;
+       struct mmc_async_req    mmc_active;
+};
+
 struct mmc_queue {
        struct mmc_card         *card;
        struct task_struct      *thread;
        struct semaphore        thread_sem;
        unsigned int            flags;
-       struct request          *req;
        int                     (*issue_fn)(struct mmc_queue *, struct request *);
        void                    *data;
        struct request_queue    *queue;
-       struct scatterlist      *sg;
-       char                    *bounce_buf;
-       struct scatterlist      *bounce_sg;
-       unsigned int            bounce_sg_len;
+       struct mmc_queue_req    mqrq[2];
+       struct mmc_queue_req    *mqrq_cur;
+       struct mmc_queue_req    *mqrq_prev;
 };
 
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
@@ -25,8 +41,9 @@ extern void mmc_cleanup_queue(struct mmc_queue *);
 extern void mmc_queue_suspend(struct mmc_queue *);
 extern void mmc_queue_resume(struct mmc_queue *);
 
-extern unsigned int mmc_queue_map_sg(struct mmc_queue *);
-extern void mmc_queue_bounce_pre(struct mmc_queue *);
-extern void mmc_queue_bounce_post(struct mmc_queue *);
+extern unsigned int mmc_queue_map_sg(struct mmc_queue *,
+                                    struct mmc_queue_req *);
+extern void mmc_queue_bounce_pre(struct mmc_queue_req *);
+extern void mmc_queue_bounce_post(struct mmc_queue_req *);
 
 #endif
index 7843efe..f091b43 100644 (file)
@@ -198,9 +198,109 @@ mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
 
 static void mmc_wait_done(struct mmc_request *mrq)
 {
-       complete(mrq->done_data);
+       complete(&mrq->completion);
 }
 
+static void __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
+{
+       init_completion(&mrq->completion);
+       mrq->done = mmc_wait_done;
+       mmc_start_request(host, mrq);
+}
+
+static void mmc_wait_for_req_done(struct mmc_host *host,
+                                 struct mmc_request *mrq)
+{
+       wait_for_completion(&mrq->completion);
+}
+
+/**
+ *     mmc_pre_req - Prepare for a new request
+ *     @host: MMC host to prepare command
+ *     @mrq: MMC request to prepare for
+ *     @is_first_req: true if there is no previous started request
+ *                     that may run in parellel to this call, otherwise false
+ *
+ *     mmc_pre_req() is called in prior to mmc_start_req() to let
+ *     host prepare for the new request. Preparation of a request may be
+ *     performed while another request is running on the host.
+ */
+static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
+                bool is_first_req)
+{
+       if (host->ops->pre_req)
+               host->ops->pre_req(host, mrq, is_first_req);
+}
+
+/**
+ *     mmc_post_req - Post process a completed request
+ *     @host: MMC host to post process command
+ *     @mrq: MMC request to post process for
+ *     @err: Error, if non zero, clean up any resources made in pre_req
+ *
+ *     Let the host post process a completed request. Post processing of
+ *     a request may be performed while another reuqest is running.
+ */
+static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
+                        int err)
+{
+       if (host->ops->post_req)
+               host->ops->post_req(host, mrq, err);
+}
+
+/**
+ *     mmc_start_req - start a non-blocking request
+ *     @host: MMC host to start command
+ *     @areq: async request to start
+ *     @error: out parameter returns 0 for success, otherwise non zero
+ *
+ *     Start a new MMC custom command request for a host.
+ *     If there is on ongoing async request wait for completion
+ *     of that request and start the new one and return.
+ *     Does not wait for the new request to complete.
+ *
+ *      Returns the completed request, NULL in case of none completed.
+ *     Wait for the an ongoing request (previoulsy started) to complete and
+ *     return the completed request. If there is no ongoing request, NULL
+ *     is returned without waiting. NULL is not an error condition.
+ */
+struct mmc_async_req *mmc_start_req(struct mmc_host *host,
+                                   struct mmc_async_req *areq, int *error)
+{
+       int err = 0;
+       struct mmc_async_req *data = host->areq;
+
+       /* Prepare a new request */
+       if (areq)
+               mmc_pre_req(host, areq->mrq, !host->areq);
+
+       if (host->areq) {
+               mmc_wait_for_req_done(host, host->areq->mrq);
+               err = host->areq->err_check(host->card, host->areq);
+               if (err) {
+                       mmc_post_req(host, host->areq->mrq, 0);
+                       if (areq)
+                               mmc_post_req(host, areq->mrq, -EINVAL);
+
+                       host->areq = NULL;
+                       goto out;
+               }
+       }
+
+       if (areq)
+               __mmc_start_req(host, areq->mrq);
+
+       if (host->areq)
+               mmc_post_req(host, host->areq->mrq, 0);
+
+       host->areq = areq;
+ out:
+       if (error)
+               *error = err;
+       return data;
+}
+EXPORT_SYMBOL(mmc_start_req);
+
 /**
  *     mmc_wait_for_req - start a request and wait for completion
  *     @host: MMC host to start command
@@ -212,16 +312,9 @@ static void mmc_wait_done(struct mmc_request *mrq)
  */
 void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
 {
-       DECLARE_COMPLETION_ONSTACK(complete);
-
-       mrq->done_data = &complete;
-       mrq->done = mmc_wait_done;
-
-       mmc_start_request(host, mrq);
-
-       wait_for_completion(&complete);
+       __mmc_start_req(host, mrq);
+       mmc_wait_for_req_done(host, mrq);
 }
-
 EXPORT_SYMBOL(mmc_wait_for_req);
 
 /**
@@ -1516,6 +1609,82 @@ int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from,
 }
 EXPORT_SYMBOL(mmc_erase_group_aligned);
 
+static unsigned int mmc_do_calc_max_discard(struct mmc_card *card,
+                                           unsigned int arg)
+{
+       struct mmc_host *host = card->host;
+       unsigned int max_discard, x, y, qty = 0, max_qty, timeout;
+       unsigned int last_timeout = 0;
+
+       if (card->erase_shift)
+               max_qty = UINT_MAX >> card->erase_shift;
+       else if (mmc_card_sd(card))
+               max_qty = UINT_MAX;
+       else
+               max_qty = UINT_MAX / card->erase_size;
+
+       /* Find the largest qty with an OK timeout */
+       do {
+               y = 0;
+               for (x = 1; x && x <= max_qty && max_qty - x >= qty; x <<= 1) {
+                       timeout = mmc_erase_timeout(card, arg, qty + x);
+                       if (timeout > host->max_discard_to)
+                               break;
+                       if (timeout < last_timeout)
+                               break;
+                       last_timeout = timeout;
+                       y = x;
+               }
+               qty += y;
+       } while (y);
+
+       if (!qty)
+               return 0;
+
+       if (qty == 1)
+               return 1;
+
+       /* Convert qty to sectors */
+       if (card->erase_shift)
+               max_discard = --qty << card->erase_shift;
+       else if (mmc_card_sd(card))
+               max_discard = qty;
+       else
+               max_discard = --qty * card->erase_size;
+
+       return max_discard;
+}
+
+unsigned int mmc_calc_max_discard(struct mmc_card *card)
+{
+       struct mmc_host *host = card->host;
+       unsigned int max_discard, max_trim;
+
+       if (!host->max_discard_to)
+               return UINT_MAX;
+
+       /*
+        * Without erase_group_def set, MMC erase timeout depends on clock
+        * frequence which can change.  In that case, the best choice is
+        * just the preferred erase size.
+        */
+       if (mmc_card_mmc(card) && !(card->ext_csd.erase_group_def & 1))
+               return card->pref_erase;
+
+       max_discard = mmc_do_calc_max_discard(card, MMC_ERASE_ARG);
+       if (mmc_can_trim(card)) {
+               max_trim = mmc_do_calc_max_discard(card, MMC_TRIM_ARG);
+               if (max_trim < max_discard)
+                       max_discard = max_trim;
+       } else if (max_discard < card->erase_size) {
+               max_discard = 0;
+       }
+       pr_debug("%s: calculated max. discard sectors %u for timeout %u ms\n",
+                mmc_hostname(host), max_discard, host->max_discard_to);
+       return max_discard;
+}
+EXPORT_SYMBOL(mmc_calc_max_discard);
+
 int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
 {
        struct mmc_command cmd = {0};
@@ -1663,6 +1832,10 @@ int mmc_power_save_host(struct mmc_host *host)
 {
        int ret = 0;
 
+#ifdef CONFIG_MMC_DEBUG
+       pr_info("%s: %s: powering down\n", mmc_hostname(host), __func__);
+#endif
+
        mmc_bus_get(host);
 
        if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
@@ -1685,6 +1858,10 @@ int mmc_power_restore_host(struct mmc_host *host)
 {
        int ret;
 
+#ifdef CONFIG_MMC_DEBUG
+       pr_info("%s: %s: powering up\n", mmc_hostname(host), __func__);
+#endif
+
        mmc_bus_get(host);
 
        if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
index ff27741..633975f 100644 (file)
@@ -409,52 +409,62 @@ out:
 
 static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 {
-       int host_drv_type = 0, card_drv_type = 0;
+       int host_drv_type = SD_DRIVER_TYPE_B;
+       int card_drv_type = SD_DRIVER_TYPE_B;
+       int drive_strength;
        int err;
 
        /*
         * If the host doesn't support any of the Driver Types A,C or D,
-        * default Driver Type B is used.
+        * or there is no board specific handler then default Driver
+        * Type B is used.
         */
        if (!(card->host->caps & (MMC_CAP_DRIVER_TYPE_A | MMC_CAP_DRIVER_TYPE_C
            | MMC_CAP_DRIVER_TYPE_D)))
                return 0;
 
-       if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) {
-               host_drv_type = MMC_SET_DRIVER_TYPE_A;
-               if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
-                       card_drv_type = MMC_SET_DRIVER_TYPE_A;
-               else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
-                       card_drv_type = MMC_SET_DRIVER_TYPE_B;
-               else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
-                       card_drv_type = MMC_SET_DRIVER_TYPE_C;
-       } else if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) {
-               host_drv_type = MMC_SET_DRIVER_TYPE_C;
-               if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
-                       card_drv_type = MMC_SET_DRIVER_TYPE_C;
-       } else if (!(card->host->caps & MMC_CAP_DRIVER_TYPE_D)) {
-               /*
-                * If we are here, that means only the default driver type
-                * B is supported by the host.
-                */
-               host_drv_type = MMC_SET_DRIVER_TYPE_B;
-               if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
-                       card_drv_type = MMC_SET_DRIVER_TYPE_B;
-               else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
-                       card_drv_type = MMC_SET_DRIVER_TYPE_C;
-       }
+       if (!card->host->ops->select_drive_strength)
+               return 0;
+
+       if (card->host->caps & MMC_CAP_DRIVER_TYPE_A)
+               host_drv_type |= SD_DRIVER_TYPE_A;
+
+       if (card->host->caps & MMC_CAP_DRIVER_TYPE_C)
+               host_drv_type |= SD_DRIVER_TYPE_C;
+
+       if (card->host->caps & MMC_CAP_DRIVER_TYPE_D)
+               host_drv_type |= SD_DRIVER_TYPE_D;
+
+       if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
+               card_drv_type |= SD_DRIVER_TYPE_A;
+
+       if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+               card_drv_type |= SD_DRIVER_TYPE_C;
+
+       if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D)
+               card_drv_type |= SD_DRIVER_TYPE_D;
+
+       /*
+        * The drive strength that the hardware can support
+        * depends on the board design.  Pass the appropriate
+        * information and let the hardware specific code
+        * return what is possible given the options
+        */
+       drive_strength = card->host->ops->select_drive_strength(
+               card->sw_caps.uhs_max_dtr,
+               host_drv_type, card_drv_type);
 
-       err = mmc_sd_switch(card, 1, 2, card_drv_type, status);
+       err = mmc_sd_switch(card, 1, 2, drive_strength, status);
        if (err)
                return err;
 
-       if ((status[15] & 0xF) != card_drv_type) {
-               printk(KERN_WARNING "%s: Problem setting driver strength!\n",
+       if ((status[15] & 0xF) != drive_strength) {
+               printk(KERN_WARNING "%s: Problem setting drive strength!\n",
                        mmc_hostname(card->host));
                return 0;
        }
 
-       mmc_set_driver_type(card->host, host_drv_type);
+       mmc_set_driver_type(card->host, drive_strength);
 
        return 0;
 }
index d2565df..e4e6822 100644 (file)
@@ -167,11 +167,8 @@ static int sdio_bus_remove(struct device *dev)
        int ret = 0;
 
        /* Make sure card is powered before invoking ->remove() */
-       if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
-               ret = pm_runtime_get_sync(dev);
-               if (ret < 0)
-                       goto out;
-       }
+       if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+               pm_runtime_get_sync(dev);
 
        drv->remove(func);
 
@@ -191,7 +188,6 @@ static int sdio_bus_remove(struct device *dev)
        if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
                pm_runtime_put_sync(dev);
 
-out:
        return ret;
 }
 
index 56dbf3f..8c87096 100644 (file)
@@ -81,28 +81,32 @@ config MMC_RICOH_MMC
 
          If unsure, say Y.
 
-config MMC_SDHCI_OF
-       tristate "SDHCI support on OpenFirmware platforms"
-       depends on MMC_SDHCI && OF
+config MMC_SDHCI_PLTFM
+       tristate "SDHCI platform and OF driver helper"
+       depends on MMC_SDHCI
        help
-         This selects the OF support for Secure Digital Host Controller
-         Interfaces.
+         This selects the common helper functions support for Secure Digital
+         Host Controller Interface based platform and OF drivers.
+
+         If you have a controller with this interface, say Y or M here.
 
          If unsure, say N.
 
 config MMC_SDHCI_OF_ESDHC
-       bool "SDHCI OF support for the Freescale eSDHC controller"
-       depends on MMC_SDHCI_OF
+       tristate "SDHCI OF support for the Freescale eSDHC controller"
+       depends on MMC_SDHCI_PLTFM
        depends on PPC_OF
        select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
        help
          This selects the Freescale eSDHC controller support.
 
+         If you have a controller with this interface, say Y or M here.
+
          If unsure, say N.
 
 config MMC_SDHCI_OF_HLWD
-       bool "SDHCI OF support for the Nintendo Wii SDHCI controllers"
-       depends on MMC_SDHCI_OF
+       tristate "SDHCI OF support for the Nintendo Wii SDHCI controllers"
+       depends on MMC_SDHCI_PLTFM
        depends on PPC_OF
        select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
        help
@@ -110,40 +114,36 @@ config MMC_SDHCI_OF_HLWD
          found in the "Hollywood" chipset of the Nintendo Wii video game
          console.
 
-         If unsure, say N.
-
-config MMC_SDHCI_PLTFM
-       tristate "SDHCI support on the platform specific bus"
-       depends on MMC_SDHCI
-       help
-         This selects the platform specific bus support for Secure Digital Host
-         Controller Interface.
-
          If you have a controller with this interface, say Y or M here.
 
          If unsure, say N.
 
 config MMC_SDHCI_CNS3XXX
-       bool "SDHCI support on the Cavium Networks CNS3xxx SoC"
+       tristate "SDHCI support on the Cavium Networks CNS3xxx SoC"
        depends on ARCH_CNS3XXX
        depends on MMC_SDHCI_PLTFM
        help
          This selects the SDHCI support for CNS3xxx System-on-Chip devices.
 
+         If you have a controller with this interface, say Y or M here.
+
          If unsure, say N.
 
 config MMC_SDHCI_ESDHC_IMX
-       bool "SDHCI platform support for the Freescale eSDHC i.MX controller"
-       depends on MMC_SDHCI_PLTFM && (ARCH_MX25 || ARCH_MX35 || ARCH_MX5)
+       tristate "SDHCI platform support for the Freescale eSDHC i.MX controller"
+       depends on ARCH_MX25 || ARCH_MX35 || ARCH_MX5
+       depends on MMC_SDHCI_PLTFM
        select MMC_SDHCI_IO_ACCESSORS
        help
          This selects the Freescale eSDHC controller support on the platform
          bus, found on platforms like mx35/51.
 
+         If you have a controller with this interface, say Y or M here.
+
          If unsure, say N.
 
 config MMC_SDHCI_DOVE
-       bool "SDHCI support on Marvell's Dove SoC"
+       tristate "SDHCI support on Marvell's Dove SoC"
        depends on ARCH_DOVE
        depends on MMC_SDHCI_PLTFM
        select MMC_SDHCI_IO_ACCESSORS
@@ -151,11 +151,14 @@ config MMC_SDHCI_DOVE
          This selects the Secure Digital Host Controller Interface in
          Marvell's Dove SoC.
 
+         If you have a controller with this interface, say Y or M here.
+
          If unsure, say N.
 
 config MMC_SDHCI_TEGRA
-       bool "SDHCI platform support for the Tegra SD/MMC Controller"
-       depends on MMC_SDHCI_PLTFM && ARCH_TEGRA
+       tristate "SDHCI platform support for the Tegra SD/MMC Controller"
+       depends on ARCH_TEGRA
+       depends on MMC_SDHCI_PLTFM
        select MMC_SDHCI_IO_ACCESSORS
        help
          This selects the Tegra SD/MMC controller. If you have a Tegra
@@ -178,14 +181,28 @@ config MMC_SDHCI_S3C
 
          If unsure, say N.
 
-config MMC_SDHCI_PXA
-       tristate "Marvell PXA168/PXA910/MMP2 SD Host Controller support"
-       depends on ARCH_PXA || ARCH_MMP
+config MMC_SDHCI_PXAV3
+       tristate "Marvell MMP2 SD Host Controller support (PXAV3)"
+       depends on CLKDEV_LOOKUP
        select MMC_SDHCI
-       select MMC_SDHCI_IO_ACCESSORS
+       select MMC_SDHCI_PLTFM
+       default CPU_MMP2
+       help
+         This selects the Marvell(R) PXAV3 SD Host Controller.
+         If you have a MMP2 platform with SD Host Controller
+         and a card slot, say Y or M here.
+
+         If unsure, say N.
+
+config MMC_SDHCI_PXAV2
+       tristate "Marvell PXA9XX SD Host Controller support (PXAV2)"
+       depends on CLKDEV_LOOKUP
+       select MMC_SDHCI
+       select MMC_SDHCI_PLTFM
+       default CPU_PXA910
        help
-         This selects the Marvell(R) PXA168/PXA910/MMP2 SD Host Controller.
-         If you have a PXA168/PXA910/MMP2 platform with SD Host Controller
+         This selects the Marvell(R) PXAV2 SD Host Controller.
+         If you have a PXA9XX platform with SD Host Controller
          and a card slot, say Y or M here.
 
          If unsure, say N.
@@ -281,13 +298,12 @@ config MMC_ATMELMCI
 endchoice
 
 config MMC_ATMELMCI_DMA
-       bool "Atmel MCI DMA support (EXPERIMENTAL)"
-       depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE && EXPERIMENTAL
+       bool "Atmel MCI DMA support"
+       depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE
        help
          Say Y here to have the Atmel MCI driver use a DMA engine to
          do data transfers and thus increase the throughput and
-         reduce the CPU utilization. Note that this is highly
-         experimental and may cause the driver to lock up.
+         reduce the CPU utilization.
 
          If unsure, say N.
 
index 58a5cf7..b4b83f3 100644 (file)
@@ -9,7 +9,8 @@ obj-$(CONFIG_MMC_MXC)           += mxcmmc.o
 obj-$(CONFIG_MMC_MXS)          += mxs-mmc.o
 obj-$(CONFIG_MMC_SDHCI)                += sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)    += sdhci-pci.o
-obj-$(CONFIG_MMC_SDHCI_PXA)    += sdhci-pxa.o
+obj-$(CONFIG_MMC_SDHCI_PXAV3)  += sdhci-pxav3.o
+obj-$(CONFIG_MMC_SDHCI_PXAV2)  += sdhci-pxav2.o
 obj-$(CONFIG_MMC_SDHCI_S3C)    += sdhci-s3c.o
 obj-$(CONFIG_MMC_SDHCI_SPEAR)  += sdhci-spear.o
 obj-$(CONFIG_MMC_WBSD)         += wbsd.o
@@ -31,9 +32,7 @@ obj-$(CONFIG_MMC_SDRICOH_CS)  += sdricoh_cs.o
 obj-$(CONFIG_MMC_TMIO)         += tmio_mmc.o
 obj-$(CONFIG_MMC_TMIO_CORE)    += tmio_mmc_core.o
 tmio_mmc_core-y                        := tmio_mmc_pio.o
-ifneq ($(CONFIG_MMC_SDHI),n)
-tmio_mmc_core-y                        += tmio_mmc_dma.o
-endif
+tmio_mmc_core-$(subst m,y,$(CONFIG_MMC_SDHI))  += tmio_mmc_dma.o
 obj-$(CONFIG_MMC_SDHI)         += sh_mobile_sdhi.o
 obj-$(CONFIG_MMC_CB710)                += cb710-mmc.o
 obj-$(CONFIG_MMC_VIA_SDMMC)    += via-sdmmc.o
@@ -44,17 +43,13 @@ obj-$(CONFIG_MMC_JZ4740)    += jz4740_mmc.o
 obj-$(CONFIG_MMC_VUB300)       += vub300.o
 obj-$(CONFIG_MMC_USHC)         += ushc.o
 
-obj-$(CONFIG_MMC_SDHCI_PLTFM)                  += sdhci-platform.o
-sdhci-platform-y                               := sdhci-pltfm.o
-sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX)     += sdhci-cns3xxx.o
-sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX)   += sdhci-esdhc-imx.o
-sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE)                += sdhci-dove.o
-sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA)       += sdhci-tegra.o
-
-obj-$(CONFIG_MMC_SDHCI_OF)     += sdhci-of.o
-sdhci-of-y                             := sdhci-of-core.o
-sdhci-of-$(CONFIG_MMC_SDHCI_OF_ESDHC)  += sdhci-of-esdhc.o
-sdhci-of-$(CONFIG_MMC_SDHCI_OF_HLWD)   += sdhci-of-hlwd.o
+obj-$(CONFIG_MMC_SDHCI_PLTFM)          += sdhci-pltfm.o
+obj-$(CONFIG_MMC_SDHCI_CNS3XXX)                += sdhci-cns3xxx.o
+obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX)      += sdhci-esdhc-imx.o
+obj-$(CONFIG_MMC_SDHCI_DOVE)           += sdhci-dove.o
+obj-$(CONFIG_MMC_SDHCI_TEGRA)          += sdhci-tegra.o
+obj-$(CONFIG_MMC_SDHCI_OF_ESDHC)       += sdhci-of-esdhc.o
+obj-$(CONFIG_MMC_SDHCI_OF_HLWD)                += sdhci-of-hlwd.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
        CFLAGS-cb710-mmc        += -DDEBUG
index d3e6a96..a4aa3af 100644 (file)
@@ -77,7 +77,8 @@
 
 #include <mach/board.h>
 #include <mach/cpu.h>
-#include <mach/at91_mci.h>
+
+#include "at91_mci.h"
 
 #define DRIVER_NAME "at91_mci"
 
similarity index 99%
rename from arch/arm/mach-at91/include/mach/at91_mci.h
rename to drivers/mmc/host/at91_mci.h
index 02182c1..eec3a6b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * arch/arm/mach-at91/include/mach/at91_mci.h
+ * drivers/mmc/host/at91_mci.h
  *
  * Copyright (C) 2005 Ivan Kokshaysky
  * Copyright (C) SAN People
index aa8039f..fa8cae1 100644 (file)
@@ -203,6 +203,7 @@ struct atmel_mci_slot {
 #define ATMCI_CARD_PRESENT     0
 #define ATMCI_CARD_NEED_INIT   1
 #define ATMCI_SHUTDOWN         2
+#define ATMCI_SUSPENDED                3
 
        int                     detect_pin;
        int                     wp_pin;
@@ -1878,10 +1879,72 @@ static int __exit atmci_remove(struct platform_device *pdev)
        return 0;
 }
 
+#ifdef CONFIG_PM
+static int atmci_suspend(struct device *dev)
+{
+       struct atmel_mci *host = dev_get_drvdata(dev);
+       int i;
+
+        for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+               struct atmel_mci_slot *slot = host->slot[i];
+               int ret;
+
+               if (!slot)
+                       continue;
+               ret = mmc_suspend_host(slot->mmc);
+               if (ret < 0) {
+                       while (--i >= 0) {
+                               slot = host->slot[i];
+                               if (slot
+                               && test_bit(ATMCI_SUSPENDED, &slot->flags)) {
+                                       mmc_resume_host(host->slot[i]->mmc);
+                                       clear_bit(ATMCI_SUSPENDED, &slot->flags);
+                               }
+                       }
+                       return ret;
+               } else {
+                       set_bit(ATMCI_SUSPENDED, &slot->flags);
+               }
+       }
+
+       return 0;
+}
+
+static int atmci_resume(struct device *dev)
+{
+       struct atmel_mci *host = dev_get_drvdata(dev);
+       int i;
+       int ret = 0;
+
+       for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+               struct atmel_mci_slot *slot = host->slot[i];
+               int err;
+
+               slot = host->slot[i];
+               if (!slot)
+                       continue;
+               if (!test_bit(ATMCI_SUSPENDED, &slot->flags))
+                       continue;
+               err = mmc_resume_host(slot->mmc);
+               if (err < 0)
+                       ret = err;
+               else
+                       clear_bit(ATMCI_SUSPENDED, &slot->flags);
+       }
+
+       return ret;
+}
+static SIMPLE_DEV_PM_OPS(atmci_pm, atmci_suspend, atmci_resume);
+#define ATMCI_PM_OPS   (&atmci_pm)
+#else
+#define ATMCI_PM_OPS   NULL
+#endif
+
 static struct platform_driver atmci_driver = {
        .remove         = __exit_p(atmci_remove),
        .driver         = {
                .name           = "atmel_mci",
+               .pm             = ATMCI_PM_OPS,
        },
 };
 
index 66dcddb..0c839d3 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/mmc/dw_mmc.h>
 #include <linux/bitops.h>
 #include <linux/regulator/consumer.h>
+#include <linux/workqueue.h>
 
 #include "dw_mmc.h"
 
@@ -100,6 +101,8 @@ struct dw_mci_slot {
        int                     last_detect_state;
 };
 
+static struct workqueue_struct *dw_mci_card_workqueue;
+
 #if defined(CONFIG_DEBUG_FS)
 static int dw_mci_req_show(struct seq_file *s, void *v)
 {
@@ -284,7 +287,7 @@ static void send_stop_cmd(struct dw_mci *host, struct mmc_data *data)
 /* DMA interface functions */
 static void dw_mci_stop_dma(struct dw_mci *host)
 {
-       if (host->use_dma) {
+       if (host->using_dma) {
                host->dma_ops->stop(host);
                host->dma_ops->cleanup(host);
        } else {
@@ -432,6 +435,8 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
        unsigned int i, direction, sg_len;
        u32 temp;
 
+       host->using_dma = 0;
+
        /* If we don't have a channel, we can't do DMA */
        if (!host->use_dma)
                return -ENODEV;
@@ -451,6 +456,8 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
                        return -EINVAL;
        }
 
+       host->using_dma = 1;
+
        if (data->flags & MMC_DATA_READ)
                direction = DMA_FROM_DEVICE;
        else
@@ -489,14 +496,18 @@ static void dw_mci_submit_data(struct dw_mci *host, struct mmc_data *data)
        host->sg = NULL;
        host->data = data;
 
+       if (data->flags & MMC_DATA_READ)
+               host->dir_status = DW_MCI_RECV_STATUS;
+       else
+               host->dir_status = DW_MCI_SEND_STATUS;
+
        if (dw_mci_submit_data_dma(host, data)) {
                host->sg = data->sg;
                host->pio_offset = 0;
-               if (data->flags & MMC_DATA_READ)
-                       host->dir_status = DW_MCI_RECV_STATUS;
-               else
-                       host->dir_status = DW_MCI_SEND_STATUS;
+               host->part_buf_start = 0;
+               host->part_buf_count = 0;
 
+               mci_writel(host, RINTSTS, SDMMC_INT_TXDR | SDMMC_INT_RXDR);
                temp = mci_readl(host, INTMASK);
                temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR;
                mci_writel(host, INTMASK, temp);
@@ -574,7 +585,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot)
        }
 
        /* Set the current slot bus width */
-       mci_writel(host, CTYPE, slot->ctype);
+       mci_writel(host, CTYPE, (slot->ctype << slot->id));
 }
 
 static void dw_mci_start_request(struct dw_mci *host,
@@ -624,13 +635,13 @@ static void dw_mci_start_request(struct dw_mci *host,
                host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
 }
 
+/* must be called with host->lock held */
 static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
                                 struct mmc_request *mrq)
 {
        dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n",
                 host->state);
 
-       spin_lock_bh(&host->lock);
        slot->mrq = mrq;
 
        if (host->state == STATE_IDLE) {
@@ -639,8 +650,6 @@ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
        } else {
                list_add_tail(&slot->queue_node, &host->queue);
        }
-
-       spin_unlock_bh(&host->lock);
 }
 
 static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
@@ -650,14 +659,23 @@ static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
        WARN_ON(slot->mrq);
 
+       /*
+        * The check for card presence and queueing of the request must be
+        * atomic, otherwise the card could be removed in between and the
+        * request wouldn't fail until another card was inserted.
+        */
+       spin_lock_bh(&host->lock);
+
        if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) {
+               spin_unlock_bh(&host->lock);
                mrq->cmd->error = -ENOMEDIUM;
                mmc_request_done(mmc, mrq);
                return;
        }
 
-       /* We don't support multiple blocks of weird lengths. */
        dw_mci_queue_request(host, slot, mrq);
+
+       spin_unlock_bh(&host->lock);
 }
 
 static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
@@ -831,7 +849,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
        struct mmc_command *cmd;
        enum dw_mci_state state;
        enum dw_mci_state prev_state;
-       u32 status;
+       u32 status, ctrl;
 
        spin_lock(&host->lock);
 
@@ -891,13 +909,19 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
                        if (status & DW_MCI_DATA_ERROR_FLAGS) {
                                if (status & SDMMC_INT_DTO) {
-                                       dev_err(&host->pdev->dev,
-                                               "data timeout error\n");
                                        data->error = -ETIMEDOUT;
                                } else if (status & SDMMC_INT_DCRC) {
-                                       dev_err(&host->pdev->dev,
-                                               "data CRC error\n");
                                        data->error = -EILSEQ;
+                               } else if (status & SDMMC_INT_EBE &&
+                                          host->dir_status ==
+                                                       DW_MCI_SEND_STATUS) {
+                                       /*
+                                        * No data CRC status was returned.
+                                        * The number of bytes transferred will
+                                        * be exaggerated in PIO mode.
+                                        */
+                                       data->bytes_xfered = 0;
+                                       data->error = -ETIMEDOUT;
                                } else {
                                        dev_err(&host->pdev->dev,
                                                "data FIFO error "
@@ -905,6 +929,16 @@ static void dw_mci_tasklet_func(unsigned long priv)
                                                status);
                                        data->error = -EIO;
                                }
+                               /*
+                                * After an error, there may be data lingering
+                                * in the FIFO, so reset it - doing so
+                                * generates a block interrupt, hence setting
+                                * the scatter-gather pointer to NULL.
+                                */
+                               host->sg = NULL;
+                               ctrl = mci_readl(host, CTRL);
+                               ctrl |= SDMMC_CTRL_FIFO_RESET;
+                               mci_writel(host, CTRL, ctrl);
                        } else {
                                data->bytes_xfered = data->blocks * data->blksz;
                                data->error = 0;
@@ -946,84 +980,278 @@ unlock:
 
 }
 
-static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
+/* push final bytes to part_buf, only use during push */
+static void dw_mci_set_part_bytes(struct dw_mci *host, void *buf, int cnt)
 {
-       u16 *pdata = (u16 *)buf;
+       memcpy((void *)&host->part_buf, buf, cnt);
+       host->part_buf_count = cnt;
+}
 
-       WARN_ON(cnt % 2 != 0);
+/* append bytes to part_buf, only use during push */
+static int dw_mci_push_part_bytes(struct dw_mci *host, void *buf, int cnt)
+{
+       cnt = min(cnt, (1 << host->data_shift) - host->part_buf_count);
+       memcpy((void *)&host->part_buf + host->part_buf_count, buf, cnt);
+       host->part_buf_count += cnt;
+       return cnt;
+}
 
-       cnt = cnt >> 1;
-       while (cnt > 0) {
-               mci_writew(host, DATA, *pdata++);
-               cnt--;
+/* pull first bytes from part_buf, only use during pull */
+static int dw_mci_pull_part_bytes(struct dw_mci *host, void *buf, int cnt)
+{
+       cnt = min(cnt, (int)host->part_buf_count);
+       if (cnt) {
+               memcpy(buf, (void *)&host->part_buf + host->part_buf_start,
+                      cnt);
+               host->part_buf_count -= cnt;
+               host->part_buf_start += cnt;
        }
+       return cnt;
 }
 
-static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
+/* pull final bytes from the part_buf, assuming it's just been filled */
+static void dw_mci_pull_final_bytes(struct dw_mci *host, void *buf, int cnt)
 {
-       u16 *pdata = (u16 *)buf;
+       memcpy(buf, &host->part_buf, cnt);
+       host->part_buf_start = cnt;
+       host->part_buf_count = (1 << host->data_shift) - cnt;
+}
 
-       WARN_ON(cnt % 2 != 0);
+static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
+{
+       /* try and push anything in the part_buf */
+       if (unlikely(host->part_buf_count)) {
+               int len = dw_mci_push_part_bytes(host, buf, cnt);
+               buf += len;
+               cnt -= len;
+               if (!sg_next(host->sg) || host->part_buf_count == 2) {
+                       mci_writew(host, DATA, host->part_buf16);
+                       host->part_buf_count = 0;
+               }
+       }
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       if (unlikely((unsigned long)buf & 0x1)) {
+               while (cnt >= 2) {
+                       u16 aligned_buf[64];
+                       int len = min(cnt & -2, (int)sizeof(aligned_buf));
+                       int items = len >> 1;
+                       int i;
+                       /* memcpy from input buffer into aligned buffer */
+                       memcpy(aligned_buf, buf, len);
+                       buf += len;
+                       cnt -= len;
+                       /* push data from aligned buffer into fifo */
+                       for (i = 0; i < items; ++i)
+                               mci_writew(host, DATA, aligned_buf[i]);
+               }
+       } else
+#endif
+       {
+               u16 *pdata = buf;
+               for (; cnt >= 2; cnt -= 2)
+                       mci_writew(host, DATA, *pdata++);
+               buf = pdata;
+       }
+       /* put anything remaining in the part_buf */
+       if (cnt) {
+               dw_mci_set_part_bytes(host, buf, cnt);
+               if (!sg_next(host->sg))
+                       mci_writew(host, DATA, host->part_buf16);
+       }
+}
 
-       cnt = cnt >> 1;
-       while (cnt > 0) {
-               *pdata++ = mci_readw(host, DATA);
-               cnt--;
+static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       if (unlikely((unsigned long)buf & 0x1)) {
+               while (cnt >= 2) {
+                       /* pull data from fifo into aligned buffer */
+                       u16 aligned_buf[64];
+                       int len = min(cnt & -2, (int)sizeof(aligned_buf));
+                       int items = len >> 1;
+                       int i;
+                       for (i = 0; i < items; ++i)
+                               aligned_buf[i] = mci_readw(host, DATA);
+                       /* memcpy from aligned buffer into output buffer */
+                       memcpy(buf, aligned_buf, len);
+                       buf += len;
+                       cnt -= len;
+               }
+       } else
+#endif
+       {
+               u16 *pdata = buf;
+               for (; cnt >= 2; cnt -= 2)
+                       *pdata++ = mci_readw(host, DATA);
+               buf = pdata;
+       }
+       if (cnt) {
+               host->part_buf16 = mci_readw(host, DATA);
+               dw_mci_pull_final_bytes(host, buf, cnt);
        }
 }
 
 static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
 {
-       u32 *pdata = (u32 *)buf;
-
-       WARN_ON(cnt % 4 != 0);
-       WARN_ON((unsigned long)pdata & 0x3);
-
-       cnt = cnt >> 2;
-       while (cnt > 0) {
-               mci_writel(host, DATA, *pdata++);
-               cnt--;
+       /* try and push anything in the part_buf */
+       if (unlikely(host->part_buf_count)) {
+               int len = dw_mci_push_part_bytes(host, buf, cnt);
+               buf += len;
+               cnt -= len;
+               if (!sg_next(host->sg) || host->part_buf_count == 4) {
+                       mci_writel(host, DATA, host->part_buf32);
+                       host->part_buf_count = 0;
+               }
+       }
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       if (unlikely((unsigned long)buf & 0x3)) {
+               while (cnt >= 4) {
+                       u32 aligned_buf[32];
+                       int len = min(cnt & -4, (int)sizeof(aligned_buf));
+                       int items = len >> 2;
+                       int i;
+                       /* memcpy from input buffer into aligned buffer */
+                       memcpy(aligned_buf, buf, len);
+                       buf += len;
+                       cnt -= len;
+                       /* push data from aligned buffer into fifo */
+                       for (i = 0; i < items; ++i)
+                               mci_writel(host, DATA, aligned_buf[i]);
+               }
+       } else
+#endif
+       {
+               u32 *pdata = buf;
+               for (; cnt >= 4; cnt -= 4)
+                       mci_writel(host, DATA, *pdata++);
+               buf = pdata;
+       }
+       /* put anything remaining in the part_buf */
+       if (cnt) {
+               dw_mci_set_part_bytes(host, buf, cnt);
+               if (!sg_next(host->sg))
+                       mci_writel(host, DATA, host->part_buf32);
        }
 }
 
 static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
 {
-       u32 *pdata = (u32 *)buf;
-
-       WARN_ON(cnt % 4 != 0);
-       WARN_ON((unsigned long)pdata & 0x3);
-
-       cnt = cnt >> 2;
-       while (cnt > 0) {
-               *pdata++ = mci_readl(host, DATA);
-               cnt--;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       if (unlikely((unsigned long)buf & 0x3)) {
+               while (cnt >= 4) {
+                       /* pull data from fifo into aligned buffer */
+                       u32 aligned_buf[32];
+                       int len = min(cnt & -4, (int)sizeof(aligned_buf));
+                       int items = len >> 2;
+                       int i;
+                       for (i = 0; i < items; ++i)
+                               aligned_buf[i] = mci_readl(host, DATA);
+                       /* memcpy from aligned buffer into output buffer */
+                       memcpy(buf, aligned_buf, len);
+                       buf += len;
+                       cnt -= len;
+               }
+       } else
+#endif
+       {
+               u32 *pdata = buf;
+               for (; cnt >= 4; cnt -= 4)
+                       *pdata++ = mci_readl(host, DATA);
+               buf = pdata;
+       }
+       if (cnt) {
+               host->part_buf32 = mci_readl(host, DATA);
+               dw_mci_pull_final_bytes(host, buf, cnt);
        }
 }
 
 static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
 {
-       u64 *pdata = (u64 *)buf;
-
-       WARN_ON(cnt % 8 != 0);
-
-       cnt = cnt >> 3;
-       while (cnt > 0) {
-               mci_writeq(host, DATA, *pdata++);
-               cnt--;
+       /* try and push anything in the part_buf */
+       if (unlikely(host->part_buf_count)) {
+               int len = dw_mci_push_part_bytes(host, buf, cnt);
+               buf += len;
+               cnt -= len;
+               if (!sg_next(host->sg) || host->part_buf_count == 8) {
+                       mci_writew(host, DATA, host->part_buf);
+                       host->part_buf_count = 0;
+               }
+       }
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       if (unlikely((unsigned long)buf & 0x7)) {
+               while (cnt >= 8) {
+                       u64 aligned_buf[16];
+                       int len = min(cnt & -8, (int)sizeof(aligned_buf));
+                       int items = len >> 3;
+                       int i;
+                       /* memcpy from input buffer into aligned buffer */
+                       memcpy(aligned_buf, buf, len);
+                       buf += len;
+                       cnt -= len;
+                       /* push data from aligned buffer into fifo */
+                       for (i = 0; i < items; ++i)
+                               mci_writeq(host, DATA, aligned_buf[i]);
+               }
+       } else
+#endif
+       {
+               u64 *pdata = buf;
+               for (; cnt >= 8; cnt -= 8)
+                       mci_writeq(host, DATA, *pdata++);
+               buf = pdata;
+       }
+       /* put anything remaining in the part_buf */
+       if (cnt) {
+               dw_mci_set_part_bytes(host, buf, cnt);
+               if (!sg_next(host->sg))
+                       mci_writeq(host, DATA, host->part_buf);
        }
 }
 
 static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
 {
-       u64 *pdata = (u64 *)buf;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       if (unlikely((unsigned long)buf & 0x7)) {
+               while (cnt >= 8) {
+                       /* pull data from fifo into aligned buffer */
+                       u64 aligned_buf[16];
+                       int len = min(cnt & -8, (int)sizeof(aligned_buf));
+                       int items = len >> 3;
+                       int i;
+                       for (i = 0; i < items; ++i)
+                               aligned_buf[i] = mci_readq(host, DATA);
+                       /* memcpy from aligned buffer into output buffer */
+                       memcpy(buf, aligned_buf, len);
+                       buf += len;
+                       cnt -= len;
+               }
+       } else
+#endif
+       {
+               u64 *pdata = buf;
+               for (; cnt >= 8; cnt -= 8)
+                       *pdata++ = mci_readq(host, DATA);
+               buf = pdata;
+       }
+       if (cnt) {
+               host->part_buf = mci_readq(host, DATA);
+               dw_mci_pull_final_bytes(host, buf, cnt);
+       }
+}
 
-       WARN_ON(cnt % 8 != 0);
+static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt)
+{
+       int len;
 
-       cnt = cnt >> 3;
-       while (cnt > 0) {
-               *pdata++ = mci_readq(host, DATA);
-               cnt--;
-       }
+       /* get remaining partial bytes */
+       len = dw_mci_pull_part_bytes(host, buf, cnt);
+       if (unlikely(len == cnt))
+               return;
+       buf += len;
+       cnt -= len;
+
+       /* get the rest of the data */
+       host->pull_data(host, buf, cnt);
 }
 
 static void dw_mci_read_data_pio(struct dw_mci *host)
@@ -1037,9 +1265,10 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
        unsigned int nbytes = 0, len;
 
        do {
-               len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift;
+               len = host->part_buf_count +
+                       (SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
                if (offset + len <= sg->length) {
-                       host->pull_data(host, (void *)(buf + offset), len);
+                       dw_mci_pull_data(host, (void *)(buf + offset), len);
 
                        offset += len;
                        nbytes += len;
@@ -1055,8 +1284,8 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
                        }
                } else {
                        unsigned int remaining = sg->length - offset;
-                       host->pull_data(host, (void *)(buf + offset),
-                                       remaining);
+                       dw_mci_pull_data(host, (void *)(buf + offset),
+                                        remaining);
                        nbytes += remaining;
 
                        flush_dcache_page(sg_page(sg));
@@ -1066,7 +1295,7 @@ static void dw_mci_read_data_pio(struct dw_mci *host)
 
                        offset = len - remaining;
                        buf = sg_virt(sg);
-                       host->pull_data(host, buf, offset);
+                       dw_mci_pull_data(host, buf, offset);
                        nbytes += offset;
                }
 
@@ -1083,7 +1312,6 @@ stat