->splice_write() via ->write_iter()
authorAl Viro <viro@zeniv.linux.org.uk>
Sat, 5 Apr 2014 08:27:08 +0000 (04:27 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Thu, 12 Jun 2014 04:18:51 +0000 (00:18 -0400)
iter_file_splice_write() - a ->splice_write() instance that gathers the
pipe buffers, builds a bio_vec-based iov_iter covering those and feeds
it to ->write_iter().  A bunch of simple cases coverted to that...

[AV: fixed the braino spotted by Cyrill]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
16 files changed:
fs/block_dev.c
fs/exofs/file.c
fs/ext2/file.c
fs/ext3/file.c
fs/ext4/file.c
fs/f2fs/file.c
fs/gfs2/file.c
fs/jfs/file.c
fs/ramfs/file-mmu.c
fs/ramfs/file-nommu.c
fs/reiserfs/file.c
fs/splice.c
fs/ubifs/file.c
fs/xfs/xfs_file.c
fs/xfs/xfs_trace.h
include/linux/fs.h

index 4e36b8e..e68e150 100644 (file)
@@ -1583,7 +1583,7 @@ const struct file_operations def_blk_fops = {
        .compat_ioctl   = compat_blkdev_ioctl,
 #endif
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
 
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
index 5b7f6be..71bf8e4 100644 (file)
@@ -77,7 +77,7 @@ const struct file_operations exofs_file_operations = {
        .fsync          = exofs_file_fsync,
        .flush          = exofs_flush,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
 
 const struct inode_operations exofs_file_inode_operations = {
index 970c6ac..7c87b22 100644 (file)
@@ -75,7 +75,7 @@ const struct file_operations ext2_file_operations = {
        .release        = ext2_release_file,
        .fsync          = ext2_fsync,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
index c833b12..a062fa1 100644 (file)
@@ -63,7 +63,7 @@ const struct file_operations ext3_file_operations = {
        .release        = ext3_release_file,
        .fsync          = ext3_sync_file,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
 
 const struct inode_operations ext3_file_inode_operations = {
index 48383a5..708aad7 100644 (file)
@@ -599,7 +599,7 @@ const struct file_operations ext4_file_operations = {
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .fallocate      = ext4_fallocate,
 };
 
index 22f4900..e4ba4b9 100644 (file)
@@ -692,5 +692,5 @@ const struct file_operations f2fs_file_operations = {
        .compat_ioctl   = f2fs_compat_ioctl,
 #endif
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 };
index ca932cd..01b4c5b 100644 (file)
@@ -1068,7 +1068,7 @@ const struct file_operations gfs2_file_fops = {
        .lock           = gfs2_lock,
        .flock          = gfs2_flock,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .setlease       = gfs2_setlease,
        .fallocate      = gfs2_fallocate,
 };
@@ -1098,7 +1098,7 @@ const struct file_operations gfs2_file_fops_nolock = {
        .release        = gfs2_release,
        .fsync          = gfs2_fsync,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .setlease       = generic_setlease,
        .fallocate      = gfs2_fallocate,
 };
index cc744ec..33aa0cc 100644 (file)
@@ -157,7 +157,7 @@ const struct file_operations jfs_file_operations = {
        .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .fsync          = jfs_fsync,
        .release        = jfs_release,
        .unlocked_ioctl = jfs_ioctl,
index 6ea0b97..4f56de8 100644 (file)
@@ -38,7 +38,7 @@ const struct file_operations ramfs_file_operations = {
        .mmap           = generic_file_mmap,
        .fsync          = noop_fsync,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .llseek         = generic_file_llseek,
 };
 
index 9ed420f..dda012a 100644 (file)
@@ -43,7 +43,7 @@ const struct file_operations ramfs_file_operations = {
        .write_iter             = generic_file_write_iter,
        .fsync                  = noop_fsync,
        .splice_read            = generic_file_splice_read,
-       .splice_write           = generic_file_splice_write,
+       .splice_write           = iter_file_splice_write,
        .llseek                 = generic_file_llseek,
 };
 
index 7c8ecd6..f070cc8 100644 (file)
@@ -248,7 +248,7 @@ const struct file_operations reiserfs_file_operations = {
        .read_iter = generic_file_read_iter,
        .write_iter = generic_file_write_iter,
        .splice_read = generic_file_splice_read,
-       .splice_write = generic_file_splice_write,
+       .splice_write = iter_file_splice_write,
        .llseek = generic_file_llseek,
 };
 
index f99e420..f195a9b 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
+#include <linux/aio.h>
 #include "internal.h"
 
 /*
@@ -1052,6 +1053,145 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 EXPORT_SYMBOL(generic_file_splice_write);
 
+/**
+ * iter_file_splice_write - splice data from a pipe to a file
+ * @pipe:      pipe info
+ * @out:       file to write to
+ * @ppos:      position in @out
+ * @len:       number of bytes to splice
+ * @flags:     splice modifier flags
+ *
+ * Description:
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file.
+ *    This one is ->write_iter-based.
+ *
+ */
+ssize_t
+iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+                         loff_t *ppos, size_t len, unsigned int flags)
+{
+       struct splice_desc sd = {
+               .total_len = len,
+               .flags = flags,
+               .pos = *ppos,
+               .u.file = out,
+       };
+       int nbufs = pipe->buffers;
+       struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
+                                       GFP_KERNEL);
+       ssize_t ret;
+
+       if (unlikely(!array))
+               return -ENOMEM;
+
+       pipe_lock(pipe);
+
+       splice_from_pipe_begin(&sd);
+       while (sd.total_len) {
+               struct iov_iter from;
+               struct kiocb kiocb;
+               size_t left;
+               int n, idx;
+
+               ret = splice_from_pipe_next(pipe, &sd);
+               if (ret <= 0)
+                       break;
+
+               if (unlikely(nbufs < pipe->buffers)) {
+                       kfree(array);
+                       nbufs = pipe->buffers;
+                       array = kcalloc(nbufs, sizeof(struct bio_vec),
+                                       GFP_KERNEL);
+                       if (!array) {
+                               ret = -ENOMEM;
+                               break;
+                       }
+               }
+
+               /* build the vector */
+               left = sd.total_len;
+               for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
+                       struct pipe_buffer *buf = pipe->bufs + idx;
+                       size_t this_len = buf->len;
+
+                       if (this_len > left)
+                               this_len = left;
+
+                       if (idx == pipe->buffers - 1)
+                               idx = -1;
+
+                       ret = buf->ops->confirm(pipe, buf);
+                       if (unlikely(ret)) {
+                               if (ret == -ENODATA)
+                                       ret = 0;
+                               goto done;
+                       }
+
+                       array[n].bv_page = buf->page;
+                       array[n].bv_len = this_len;
+                       array[n].bv_offset = buf->offset;
+                       left -= this_len;
+               }
+
+               /* ... iov_iter */
+               from.type = ITER_BVEC | WRITE;
+               from.bvec = array;
+               from.nr_segs = n;
+               from.count = sd.total_len - left;
+               from.iov_offset = 0;
+
+               /* ... and iocb */
+               init_sync_kiocb(&kiocb, out);
+               kiocb.ki_pos = sd.pos;
+               kiocb.ki_nbytes = sd.total_len - left;
+
+               /* now, send it */
+               ret = out->f_op->write_iter(&kiocb, &from);
+               if (-EIOCBQUEUED == ret)
+                       ret = wait_on_sync_kiocb(&kiocb);
+
+               if (ret <= 0)
+                       break;
+
+               sd.num_spliced += ret;
+               sd.total_len -= ret;
+               *ppos = sd.pos = kiocb.ki_pos;
+
+               /* dismiss the fully eaten buffers, adjust the partial one */
+               while (ret) {
+                       struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+                       if (ret >= buf->len) {
+                               const struct pipe_buf_operations *ops = buf->ops;
+                               ret -= buf->len;
+                               buf->len = 0;
+                               buf->ops = NULL;
+                               ops->release(pipe, buf);
+                               pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+                               pipe->nrbufs--;
+                               if (pipe->files)
+                                       sd.need_wakeup = true;
+                       } else {
+                               buf->offset += ret;
+                               buf->len -= ret;
+                               ret = 0;
+                       }
+               }
+       }
+done:
+       kfree(array);
+       splice_from_pipe_end(pipe, &sd);
+
+       pipe_unlock(pipe);
+
+       if (sd.num_spliced)
+               ret = sd.num_spliced;
+
+       return ret;
+}
+
+EXPORT_SYMBOL(iter_file_splice_write);
+
 static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                          struct splice_desc *sd)
 {
index 6bc4e8e..0888502 100644 (file)
@@ -1585,7 +1585,7 @@ const struct file_operations ubifs_file_operations = {
        .fsync          = ubifs_fsync,
        .unlocked_ioctl = ubifs_ioctl,
        .splice_read    = generic_file_splice_read,
-       .splice_write   = generic_file_splice_write,
+       .splice_write   = iter_file_splice_write,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ubifs_compat_ioctl,
 #endif
index 5446e86..b1c489c 100644 (file)
@@ -342,47 +342,6 @@ xfs_file_splice_read(
        return ret;
 }
 
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-       struct pipe_inode_info  *pipe,
-       struct file             *outfilp,
-       loff_t                  *ppos,
-       size_t                  count,
-       unsigned int            flags)
-{
-       struct inode            *inode = outfilp->f_mapping->host;
-       struct xfs_inode        *ip = XFS_I(inode);
-       int                     ioflags = 0;
-       ssize_t                 ret;
-
-       XFS_STATS_INC(xs_write_calls);
-
-       if (outfilp->f_mode & FMODE_NOCMTIME)
-               ioflags |= IO_INVIS;
-
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-       ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-       if (ret > 0)
-               XFS_STATS_ADD(xs_write_bytes, ret);
-
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-       return ret;
-}
-
 /*
  * This routine is called to handle zeroing any space in the last block of the
  * file that is beyond the EOF.  We do this since the size is being increased
@@ -1442,7 +1401,7 @@ const struct file_operations xfs_file_operations = {
        .read_iter      = xfs_file_read_iter,
        .write_iter     = xfs_file_write_iter,
        .splice_read    = xfs_file_splice_read,
-       .splice_write   = xfs_file_splice_write,
+       .splice_write   = iter_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = xfs_file_compat_ioctl,
index 65d8c79..53182f9 100644 (file)
@@ -1060,7 +1060,6 @@ DEFINE_RW_EVENT(xfs_file_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
 
 DECLARE_EVENT_CLASS(xfs_page_class,
        TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
index a644884..8bd8ed3 100644 (file)
@@ -2434,6 +2434,8 @@ extern ssize_t default_file_splice_read(struct file *, loff_t *,
                struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
                struct file *, loff_t *, size_t, unsigned int);
+extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
+               struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
                struct file *out, loff_t *, size_t len, unsigned int flags);