[GFS2] Put back O_DIRECT support
authorSteven Whitehouse <swhiteho@redhat.com>
Tue, 14 Feb 2006 11:54:42 +0000 (11:54 +0000)
committerSteven Whitehouse <swhiteho@redhat.com>
Tue, 14 Feb 2006 11:54:42 +0000 (11:54 +0000)
This patch adds back O_DIRECT support with various caveats
attached:

 1. Journaled data can be read via O_DIRECT since its now the
    same on disk format as normal data files.
 2. Journaled data writes with O_DIRECT will be failed sliently
    back to normal writes (should we really do this I wonder or
    should we return an error instead?)
 3. Stuffed files will be failed back to normal buffered I/O
 4. All the usual corner cases (write beyond current end of file,
    write to an unallocated block) will also revert to normal buffered I/O.

The I/O path is slightly odd as reads arrive at the page cache layer
with the lock for the file already held, but writes arrive unlocked.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
fs/gfs2/ops_address.c
fs/gfs2/ops_file.c

index b14357e..74706f3 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
+#include <linux/fs.h>
 #include <asm/semaphore.h>
 
 #include "gfs2.h"
@@ -555,30 +556,73 @@ static int gfs2_invalidatepage(struct page *page, unsigned long offset)
        return ret;
 }
 
-static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-                         loff_t offset, unsigned long nr_segs)
+static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
+                                   loff_t offset, unsigned long nr_segs)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       struct gfs2_inode *ip = get_v2ip(inode);
+       struct gfs2_holder gh;
+       int rv;
+
+       /*
+        * Shared lock, even though its write, since we do no allocation
+        * on this path. All we need change is atime.
+        */
+       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+       rv = gfs2_glock_nq_m_atime(1, &gh);
+       if (rv)
+               goto out;
+
+       /*
+        * Should we return an error here? I can't see that O_DIRECT for
+        * a journaled file makes any sense. For now we'll silently fall
+        * back to buffered I/O, likewise we do the same for stuffed
+        * files since they are (a) small and (b) unaligned.
+        */
+       if (gfs2_is_jdata(ip))
+               goto out;
+
+       if (gfs2_is_stuffed(ip))
+               goto out;
+
+       rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
+                                 iov, offset, nr_segs, get_blocks_noalloc,
+                                 NULL, DIO_OWN_LOCKING);
+out:
+       gfs2_glock_dq_m(1, &gh);
+       gfs2_holder_uninit(&gh);
+
+       return rv;
+}
+
+/**
+ * gfs2_direct_IO
+ *
+ * This is called with a shared lock already held for the read path.
+ * Currently, no locks are held when the write path is called.
+ */
+static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
+                             const struct iovec *iov, loff_t offset,
+                             unsigned long nr_segs)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        struct gfs2_inode *ip = get_v2ip(inode);
        struct gfs2_sbd *sdp = ip->i_sbd;
-       get_blocks_t *gb = get_blocks;
 
        atomic_inc(&sdp->sd_ops_address);
 
-       if (gfs2_is_jdata(ip))
-               return -EINVAL;
+       if (rw == WRITE)
+               return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
 
-       if (rw == WRITE) {
-               return -EOPNOTSUPP; /* for now */
-       } else {
-               if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
-                   gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
-                       return -EINVAL;
-       }
+       if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
+           gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+               return -EINVAL;
 
-       return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-                                 offset, nr_segs, gb, NULL);
+       return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
+                                   offset, nr_segs, get_blocks, NULL,
+                                   DIO_OWN_LOCKING);
 }
 
 struct address_space_operations gfs2_file_aops = {
index 56820b3..bcde7a0 100644 (file)
@@ -176,16 +176,16 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
                 * If any segment has a negative length, or the cumulative
                 * length ever wraps negative then return -EINVAL.
                 */
-       count += iv->iov_len;
-       if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-               return -EINVAL;
-       if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
-               continue;
-       if (seg == 0)
-               return -EFAULT;
-       nr_segs = seg;
-       count -= iv->iov_len;   /* This segment is no good */
-       break;
+               count += iv->iov_len;
+               if (unlikely((ssize_t)(count|iv->iov_len) < 0))
+                       return -EINVAL;
+               if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+                       continue;
+               if (seg == 0)
+                       return -EFAULT;
+               nr_segs = seg;
+               count -= iv->iov_len;   /* This segment is no good */
+               break;
        }
 
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
@@ -204,10 +204,14 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
                retval = gfs2_glock_nq_m_atime(1, &gh);
                if (retval)
                        goto out;
-
+               if (gfs2_is_stuffed(ip)) {
+                       gfs2_glock_dq_m(1, &gh);
+                       gfs2_holder_uninit(&gh);
+                       goto fallback_to_normal;
+               }
                size = i_size_read(inode);
                if (pos < size) {
-                        retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
+                       retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
                        if (retval > 0 && !is_sync_kiocb(iocb))
                                retval = -EIOCBQUEUED;
                        if (retval > 0)
@@ -219,6 +223,7 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
                goto out;
        }
 
+fallback_to_normal:
        retval = 0;
        if (count) {
                for (seg = 0; seg < nr_segs; seg++) {