fs: remove prepare_write/commit_write

author Nick Piggin <npiggin@suse.de>

Wed, 29 Oct 2008 21:00:55 +0000 (14:00 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 30 Oct 2008 18:38:45 +0000 (11:38 -0700)
author Nick Piggin <npiggin@suse.de>
Wed, 29 Oct 2008 21:00:55 +0000 (14:00 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 30 Oct 2008 18:38:45 +0000 (11:38 -0700)
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking

index 8362860..23d2f44 100644 (file)
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -161,8 +161,12 @@ prototypes:
         int (*set_page_dirty)(struct page *page);
         int (*readpages)(struct file *filp, struct address_space *mapping,
                         struct list_head *pages, unsigned nr_pages);
-       int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
-       int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
+       int (*write_begin)(struct file *, struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned flags,
+                               struct page **pagep, void **fsdata);
+       int (*write_end)(struct file *, struct address_space *mapping,
+                               loff_t pos, unsigned len, unsigned copied,
+                               struct page *page, void *fsdata);
         sector_t (*bmap)(struct address_space *, sector_t);
         int (*invalidatepage) (struct page *, unsigned long);
         int (*releasepage) (struct page *, int);
@@ -180,8 +184,6 @@ sync_page:          no      maybe
  writepages:            no
  set_page_dirty         no      no
  readpages:             no
-prepare_write:         no      yes                     yes
-commit_write:          no      yes                     yes
  write_begin:           no      locks the page          yes
  write_end:             no      yes, unlocks            yes
  perform_write:         no      n/a                     yes
@@ -191,7 +193,7 @@ releasepage:                no      yes
  direct_IO:             no
  launder_page:          no      yes
  
-       ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
+       ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
  may be called from the request handler (/dev/loop).
  
         ->readpage() unlocks the page, either synchronously or via I/O
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt

index c4d348d..5579bda 100644 (file)
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -492,7 +492,7 @@ written-back to storage typically in whole pages, however the
  address_space has finer control of write sizes.
  
  The read process essentially only requires 'readpage'.  The write
-process is more complicated and uses prepare_write/commit_write or
+process is more complicated and uses write_begin/write_end or
  set_page_dirty to write data into the address_space, and writepage,
  sync_page, and writepages to writeback data to storage.
  
@@ -521,8 +521,6 @@ struct address_space_operations {
         int (*set_page_dirty)(struct page *page);
         int (*readpages)(struct file *filp, struct address_space *mapping,
                         struct list_head *pages, unsigned nr_pages);
-       int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
-       int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
         int (*write_begin)(struct file *, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
                                 struct page **pagep, void **fsdata);
@@ -598,37 +596,7 @@ struct address_space_operations {
         readpages is only used for read-ahead, so read errors are
         ignored.  If anything goes wrong, feel free to give up.
  
-  prepare_write: called by the generic write path in VM to set up a write
-       request for a page.  This indicates to the address space that
-       the given range of bytes is about to be written.  The
-       address_space should check that the write will be able to
-       complete, by allocating space if necessary and doing any other
-       internal housekeeping.  If the write will update parts of
-       any basic-blocks on storage, then those blocks should be
-       pre-read (if they haven't been read already) so that the
-       updated blocks can be written out properly.
-       The page will be locked.
-
-       Note: the page _must not_ be marked uptodate in this function
-       (or anywhere else) unless it actually is uptodate right now. As
-       soon as a page is marked uptodate, it is possible for a concurrent
-       read(2) to copy it to userspace.
-
-  commit_write: If prepare_write succeeds, new data will be copied
-        into the page and then commit_write will be called.  It will
-        typically update the size of the file (if appropriate) and
-        mark the inode as dirty, and do any other related housekeeping
-        operations.  It should avoid returning an error if possible -
-        errors should have been handled by prepare_write.
-
-  write_begin: This is intended as a replacement for prepare_write. The
-       key differences being that:
-               - it returns a locked page (in *pagep) rather than being
-                 given a pre locked page;
-               - it must be able to cope with short writes (where the
-                 length passed to write_begin is greater than the number
-                 of bytes copied into the page).
-
+  write_begin:
         Called by the generic buffered write code to ask the filesystem to
         prepare to write len bytes at the given offset in the file. The
         address_space should check that the write will be able to complete,
@@ -640,6 +608,9 @@ struct address_space_operations {
          The filesystem must return the locked pagecache page for the specified
         offset, in *pagep, for the caller to write into.
  
+       It must be able to cope with short writes (where the length passed to
+       write_begin is greater than the number of bytes copied into the page).
+
         flags is a field for AOP_FLAG_xxx flags, described in
         include/linux/fs.h.
  
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index 3f09cd8..5c4ee70 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -40,8 +40,7 @@
   * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
   *
   * Support for falling back on the write file operation when the address space
- * operations prepare_write and/or commit_write are not available on the
- * backing filesystem.
+ * operations write_begin is not available on the backing filesystem.
   * Anton Altaparmakov, 16 Feb 2005
   *
   * Still To Fix:
@@ -765,7 +764,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
                  */
                 if (!file->f_op->splice_read)
                         goto out_putf;
-               if (aops->prepare_write || aops->write_begin)
+               if (aops->write_begin)
                         lo_flags |= LO_FLAGS_USE_AOPS;
                 if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
                         lo_flags |= LO_FLAGS_READ_ONLY;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c

index 19eafbe..2b2eec1 100644 (file)
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -175,7 +175,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
  
         if (rw == WRITE) {
                 /*
-                * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(),
+                * FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
                  * so we need to update the ->mmu_private to block boundary.
                  *
                  * But we must fill the remaining area or hole by nul for
diff --git a/fs/libfs.c b/fs/libfs.c

index 7468859..e960a83 100644 (file)
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -814,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr);
  EXPORT_SYMBOL(simple_link);
  EXPORT_SYMBOL(simple_lookup);
  EXPORT_SYMBOL(simple_pin_fs);
-EXPORT_SYMBOL(simple_prepare_write);
+EXPORT_UNUSED_SYMBOL(simple_prepare_write);
  EXPORT_SYMBOL(simple_readpage);
  EXPORT_SYMBOL(simple_release_fs);
  EXPORT_SYMBOL(simple_rename);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index 8d3225a..7efe937 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -679,8 +679,7 @@ leave:
  
  /* Some parts of this taken from generic_cont_expand, which turned out
   * to be too fragile to do exactly what we need without us having to
- * worry about recursive locking in ->prepare_write() and
- * ->commit_write(). */
+ * worry about recursive locking in ->write_begin() and ->write_end(). */
  static int ocfs2_write_zero_page(struct inode *inode,
                                  u64 size)
  {
diff --git a/fs/splice.c b/fs/splice.c

index a1e701c..1abab5c 100644 (file)
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
         };
  
         /*
-        * The actor worker might be calling ->prepare_write and
-        * ->commit_write. Most of the time, these expect i_mutex to
+        * The actor worker might be calling ->write_begin and
+        * ->write_end. Most of the time, these expect i_mutex to
          * be held. Since this may result in an ABBA deadlock with
          * pipe->inode, we have to order lock acquiry here.
          */
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 5b248d6..0dcdd94 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -489,13 +489,6 @@ struct address_space_operations {
         int (*readpages)(struct file *filp, struct address_space *mapping,
                         struct list_head *pages, unsigned nr_pages);
  
-       /*
-        * ext3 requires that a successful prepare_write() call be followed
-        * by a commit_write() call - they must be balanced
-        */
-       int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
-       int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
-
         int (*write_begin)(struct file *, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
                                 struct page **pagep, void **fsdata);
diff --git a/mm/filemap.c b/mm/filemap.c

index ab85536..f3e5f89 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2029,48 +2029,8 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping,
  {
         const struct address_space_operations *aops = mapping->a_ops;
  
-       if (aops->write_begin) {
-               return aops->write_begin(file, mapping, pos, len, flags,
+       return aops->write_begin(file, mapping, pos, len, flags,
                                                         pagep, fsdata);
-       } else {
-               int ret;
-               pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-               unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-               struct inode *inode = mapping->host;
-               struct page *page;
-again:
-               page = __grab_cache_page(mapping, index);
-               *pagep = page;
-               if (!page)
-                       return -ENOMEM;
-
-               if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
-                       /*
-                        * There is no way to resolve a short write situation
-                        * for a !Uptodate page (except by double copying in
-                        * the caller done by generic_perform_write_2copy).
-                        *
-                        * Instead, we have to bring it uptodate here.
-                        */
-                       ret = aops->readpage(file, page);
-                       page_cache_release(page);
-                       if (ret) {
-                               if (ret == AOP_TRUNCATED_PAGE)
-                                       goto again;
-                               return ret;
-                       }
-                       goto again;
-               }
-
-               ret = aops->prepare_write(file, page, offset, offset+len);
-               if (ret) {
-                       unlock_page(page);
-                       page_cache_release(page);
-                       if (pos + len > inode->i_size)
-                               vmtruncate(inode, inode->i_size);
-               }
-               return ret;
-       }
  }
  EXPORT_SYMBOL(pagecache_write_begin);
  
@@ -2079,32 +2039,9 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
                                 struct page *page, void *fsdata)
  {
         const struct address_space_operations *aops = mapping->a_ops;
-       int ret;
-
-       if (aops->write_end) {
-               mark_page_accessed(page);
-               ret = aops->write_end(file, mapping, pos, len, copied,
-                                                       page, fsdata);
-       } else {
-               unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-               struct inode *inode = mapping->host;
-
-               flush_dcache_page(page);
-               ret = aops->commit_write(file, page, offset, offset+len);
-               unlock_page(page);
-               mark_page_accessed(page);
-               page_cache_release(page);
-
-               if (ret < 0) {
-                       if (pos + len > inode->i_size)
-                               vmtruncate(inode, inode->i_size);
-               } else if (ret > 0)
-                       ret = min_t(size_t, copied, ret);
-               else
-                       ret = copied;
-       }
  
-       return ret;
+       mark_page_accessed(page);
+       return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
  }
  EXPORT_SYMBOL(pagecache_write_end);
  
@@ -2226,174 +2163,6 @@ repeat:
  }
  EXPORT_SYMBOL(__grab_cache_page);
  
-static ssize_t generic_perform_write_2copy(struct file *file,
-                               struct iov_iter *i, loff_t pos)
-{
-       struct address_space *mapping = file->f_mapping;
-       const struct address_space_operations *a_ops = mapping->a_ops;
-       struct inode *inode = mapping->host;
-       long status = 0;
-       ssize_t written = 0;
-
-       do {
-               struct page *src_page;
-               struct page *page;
-               pgoff_t index;          /* Pagecache index for current page */
-               unsigned long offset;   /* Offset into pagecache page */
-               unsigned long bytes;    /* Bytes to write to page */
-               size_t copied;          /* Bytes copied from user */
-
-               offset = (pos & (PAGE_CACHE_SIZE - 1));
-               index = pos >> PAGE_CACHE_SHIFT;
-               bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
-                                               iov_iter_count(i));
-
-               /*
-                * a non-NULL src_page indicates that we're doing the
-                * copy via get_user_pages and kmap.
-                */
-               src_page = NULL;
-
-               /*
-                * Bring in the user page that we will copy from _first_.
-                * Otherwise there's a nasty deadlock on copying from the
-                * same page as we're writing to, without it being marked
-                * up-to-date.
-                *
-                * Not only is this an optimisation, but it is also required
-                * to check that the address is actually valid, when atomic
-                * usercopies are used, below.
-                */
-               if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
-                       status = -EFAULT;
-                       break;
-               }
-
-               page = __grab_cache_page(mapping, index);
-               if (!page) {
-                       status = -ENOMEM;
-                       break;
-               }
-
-               /*
-                * non-uptodate pages cannot cope with short copies, and we
-                * cannot take a pagefault with the destination page locked.
-                * So pin the source page to copy it.
-                */
-               if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) {
-                       unlock_page(page);
-
-                       src_page = alloc_page(GFP_KERNEL);
-                       if (!src_page) {
-                               page_cache_release(page);
-                               status = -ENOMEM;
-                               break;
-                       }
-
-                       /*
-                        * Cannot get_user_pages with a page locked for the
-                        * same reason as we can't take a page fault with a
-                        * page locked (as explained below).
-                        */
-                       copied = iov_iter_copy_from_user(src_page, i,
-                                                               offset, bytes);
-                       if (unlikely(copied == 0)) {
-                               status = -EFAULT;
-                               page_cache_release(page);
-                               page_cache_release(src_page);
-                               break;
-                       }
-                       bytes = copied;
-
-                       lock_page(page);
-                       /*
-                        * Can't handle the page going uptodate here, because
-                        * that means we would use non-atomic usercopies, which
-                        * zero out the tail of the page, which can cause
-                        * zeroes to become transiently visible. We could just
-                        * use a non-zeroing copy, but the APIs aren't too
-                        * consistent.
-                        */
-                       if (unlikely(!page->mapping || PageUptodate(page))) {
-                               unlock_page(page);
-                               page_cache_release(page);
-                               page_cache_release(src_page);
-                               continue;
-                       }
-               }
-
-               status = a_ops->prepare_write(file, page, offset, offset+bytes);
-               if (unlikely(status))
-                       goto fs_write_aop_error;
-
-               if (!src_page) {
-                       /*
-                        * Must not enter the pagefault handler here, because
-                        * we hold the page lock, so we might recursively
-                        * deadlock on the same lock, or get an ABBA deadlock
-                        * against a different lock, or against the mmap_sem
-                        * (which nests outside the page lock).  So increment
-                        * preempt count, and use _atomic usercopies.
-                        *
-                        * The page is uptodate so we are OK to encounter a
-                        * short copy: if unmodified parts of the page are
-                        * marked dirty and written out to disk, it doesn't
-                        * really matter.
-                        */
-                       pagefault_disable();
-                       copied = iov_iter_copy_from_user_atomic(page, i,
-                                                               offset, bytes);
-                       pagefault_enable();
-               } else {
-                       void *src, *dst;
-                       src = kmap_atomic(src_page, KM_USER0);
-                       dst = kmap_atomic(page, KM_USER1);
-                       memcpy(dst + offset, src + offset, bytes);
-                       kunmap_atomic(dst, KM_USER1);
-                       kunmap_atomic(src, KM_USER0);
-                       copied = bytes;
-               }
-               flush_dcache_page(page);
-
-               status = a_ops->commit_write(file, page, offset, offset+bytes);
-               if (unlikely(status < 0))
-                       goto fs_write_aop_error;
-               if (unlikely(status > 0)) /* filesystem did partial write */
-                       copied = min_t(size_t, copied, status);
-
-               unlock_page(page);
-               mark_page_accessed(page);
-               page_cache_release(page);
-               if (src_page)
-                       page_cache_release(src_page);
-
-               iov_iter_advance(i, copied);
-               pos += copied;
-               written += copied;
-
-               balance_dirty_pages_ratelimited(mapping);
-               cond_resched();
-               continue;
-
-fs_write_aop_error:
-               unlock_page(page);
-               page_cache_release(page);
-               if (src_page)
-                       page_cache_release(src_page);
-
-               /*
-                * prepare_write() may have instantiated a few blocks
-                * outside i_size.  Trim these off again. Don't need
-                * i_size_read because we hold i_mutex.
-                */
-               if (pos + bytes > inode->i_size)
-                       vmtruncate(inode, inode->i_size);
-               break;
-       } while (iov_iter_count(i));
-
-       return written ? written : status;
-}
-
  static ssize_t generic_perform_write(struct file *file,
                                 struct iov_iter *i, loff_t pos)
  {
@@ -2494,10 +2263,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
         struct iov_iter i;
  
         iov_iter_init(&i, iov, nr_segs, count, written);
-       if (a_ops->write_begin)
-               status = generic_perform_write(file, &i, pos);
-       else
-               status = generic_perform_write_2copy(file, &i, pos);
+       status = generic_perform_write(file, &i, pos);
  
         if (likely(status >= 0)) {
                 written += status;
author	Nick Piggin <npiggin@suse.de>
	Wed, 29 Oct 2008 21:00:55 +0000 (14:00 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 30 Oct 2008 18:38:45 +0000 (11:38 -0700)
Documentation/filesystems/Locking		patch \| blob \| history
Documentation/filesystems/vfs.txt		patch \| blob \| history
drivers/block/loop.c		patch \| blob \| history
fs/fat/inode.c		patch \| blob \| history
fs/libfs.c		patch \| blob \| history
fs/ocfs2/file.c		patch \| blob \| history
fs/splice.c		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
mm/filemap.c		patch \| blob \| history