SUNRPC: Add API to acquire source address
[pandora-kernel.git] / fs / direct-io.c
index d740ab6..4a588db 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/rwsem.h>
 #include <linux/uio.h>
 #include <linux/atomic.h>
+#include <linux/prefetch.h>
 
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
@@ -580,9 +581,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
 {
        int ret;
        sector_t fs_startblk;   /* Into file, in filesystem-sized blocks */
+       sector_t fs_endblk;     /* Into file, in filesystem-sized blocks */
        unsigned long fs_count; /* Number of filesystem-sized blocks */
-       unsigned long dio_count;/* Number of dio_block-sized blocks */
-       unsigned long blkmask;
        int create;
 
        /*
@@ -593,11 +593,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
        if (ret == 0) {
                BUG_ON(sdio->block_in_file >= sdio->final_block_in_request);
                fs_startblk = sdio->block_in_file >> sdio->blkfactor;
-               dio_count = sdio->final_block_in_request - sdio->block_in_file;
-               fs_count = dio_count >> sdio->blkfactor;
-               blkmask = (1 << sdio->blkfactor) - 1;
-               if (dio_count & blkmask)        
-                       fs_count++;
+               fs_endblk = (sdio->final_block_in_request - 1) >>
+                                       sdio->blkfactor;
+               fs_count = fs_endblk - fs_startblk + 1;
 
                map_bh->b_state = 0;
                map_bh->b_size = fs_count << dio->inode->i_blkbits;
@@ -1090,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio)
  * individual fields and will generate much worse code. This is important
  * for the whole file.
  */
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+static inline ssize_t
+do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        struct block_device *bdev, const struct iovec *iov, loff_t offset, 
        unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
        dio_submit_t submit_io, int flags)
@@ -1100,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        size_t size;
        unsigned long addr;
        unsigned blkbits = inode->i_blkbits;
-       unsigned bdev_blkbits = 0;
        unsigned blocksize_mask = (1 << blkbits) - 1;
        ssize_t retval = -EINVAL;
        loff_t end = offset;
@@ -1113,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        if (rw & WRITE)
                rw = WRITE_ODIRECT;
 
-       if (bdev)
-               bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
+       /*
+        * Avoid references to bdev if not absolutely needed to give
+        * the early prefetch in the caller enough time.
+        */
 
        if (offset & blocksize_mask) {
                if (bdev)
-                        blkbits = bdev_blkbits;
+                       blkbits = blksize_bits(bdev_logical_block_size(bdev));
                blocksize_mask = (1 << blkbits) - 1;
                if (offset & blocksize_mask)
                        goto out;
@@ -1129,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
                addr = (unsigned long)iov[seg].iov_base;
                size = iov[seg].iov_len;
                end += size;
-               if ((addr & blocksize_mask) || (size & blocksize_mask))  {
+               if (unlikely((addr & blocksize_mask) ||
+                            (size & blocksize_mask))) {
                        if (bdev)
-                                blkbits = bdev_blkbits;
+                               blkbits = blksize_bits(
+                                        bdev_logical_block_size(bdev));
                        blocksize_mask = (1 << blkbits) - 1;
-                       if ((addr & blocksize_mask) || (size & blocksize_mask))  
+                       if ((addr & blocksize_mask) || (size & blocksize_mask))
                                goto out;
                }
        }
@@ -1316,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 out:
        return retval;
 }
+
+ssize_t
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+       struct block_device *bdev, const struct iovec *iov, loff_t offset,
+       unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+       dio_submit_t submit_io, int flags)
+{
+       /*
+        * The block device state is needed in the end to finally
+        * submit everything.  Since it's likely to be cache cold
+        * prefetch it here as first thing to hide some of the
+        * latency.
+        *
+        * Attempt to prefetch the pieces we likely need later.
+        */
+       prefetch(&bdev->bd_disk->part_tbl);
+       prefetch(bdev->bd_queue);
+       prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
+
+       return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+                                    nr_segs, get_block, end_io,
+                                    submit_io, flags);
+}
+
 EXPORT_SYMBOL(__blockdev_direct_IO);
 
 static __init int dio_init(void)