[PATCH] BLOCK: Dissociate generic_writepages() from mpage stuff [try #6]
authorDavid Howells <dhowells@redhat.com>
Tue, 29 Aug 2006 18:06:09 +0000 (19:06 +0100)
committerJens Axboe <axboe@nelson.home.kernel.dk>
Sat, 30 Sep 2006 18:52:26 +0000 (20:52 +0200)
Dissociate the generic_writepages() function from the mpage stuff, moving its
declaration to linux/mm.h and actually emitting a full implementation into
mm/page-writeback.c.

The implementation is a partial duplicate of mpage_writepages() with all BIO
references removed.

It is used by NFS to do writeback.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
fs/block_dev.c
fs/mpage.c
include/linux/mpage.h
include/linux/writeback.h
mm/page-writeback.c

index 20f7333..335c38b 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/blkpg.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/mount.h>
 #include <linux/uio.h>
index 1e45982..692a3e5 100644 (file)
@@ -693,6 +693,8 @@ out:
  * the call was made get new I/O started against them.  If wbc->sync_mode is
  * WB_SYNC_ALL then we were called for data integrity and we must wait for
  * existing IO to complete.
+ *
+ * If you fix this you should check generic_writepages() also!
  */
 int
 mpage_writepages(struct address_space *mapping,
index 3ca8804..517c098 100644 (file)
@@ -20,9 +20,3 @@ int mpage_writepages(struct address_space *mapping,
                struct writeback_control *wbc, get_block_t get_block);
 int mpage_writepage(struct page *page, get_block_t *get_block,
                struct writeback_control *wbc);
-
-static inline int
-generic_writepages(struct address_space *mapping, struct writeback_control *wbc)
-{
-       return mpage_writepages(mapping, wbc, NULL);
-}
index 9d4074e..4f4d98a 100644 (file)
@@ -111,6 +111,8 @@ balance_dirty_pages_ratelimited(struct address_space *mapping)
 }
 
 int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
+extern int generic_writepages(struct address_space *mapping,
+                             struct writeback_control *wbc);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
 int sync_page_range(struct inode *inode, struct address_space *mapping,
                        loff_t pos, loff_t count);
index 9fdcc79..ecf2783 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
+#include <linux/pagevec.h>
 
 /*
  * The maximum number of pages to writeout in a single bdflush/kupdate
@@ -551,6 +552,139 @@ void __init page_writeback_init(void)
        register_cpu_notifier(&ratelimit_nb);
 }
 
+/**
+ * generic_writepages - walk the list of dirty pages of the given
+ *                      address space and writepage() all of them.
+ *
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ *
+ * This is a library function, which implements the writepages()
+ * address_space_operation.
+ *
+ * If a page is already under I/O, generic_writepages() skips it, even
+ * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them.  If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ *
+ * Derived from mpage_writepages() - if you fix this you should check that
+ * also!
+ */
+int generic_writepages(struct address_space *mapping,
+                      struct writeback_control *wbc)
+{
+       struct backing_dev_info *bdi = mapping->backing_dev_info;
+       int ret = 0;
+       int done = 0;
+       int (*writepage)(struct page *page, struct writeback_control *wbc);
+       struct pagevec pvec;
+       int nr_pages;
+       pgoff_t index;
+       pgoff_t end;            /* Inclusive */
+       int scanned = 0;
+       int range_whole = 0;
+
+       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+               wbc->encountered_congestion = 1;
+               return 0;
+       }
+
+       writepage = mapping->a_ops->writepage;
+
+       /* deal with chardevs and other special file */
+       if (!writepage)
+               return 0;
+
+       pagevec_init(&pvec, 0);
+       if (wbc->range_cyclic) {
+               index = mapping->writeback_index; /* Start from prev offset */
+               end = -1;
+       } else {
+               index = wbc->range_start >> PAGE_CACHE_SHIFT;
+               end = wbc->range_end >> PAGE_CACHE_SHIFT;
+               if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                       range_whole = 1;
+               scanned = 1;
+       }
+retry:
+       while (!done && (index <= end) &&
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                             PAGECACHE_TAG_DIRTY,
+                                             min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+               unsigned i;
+
+               scanned = 1;
+               for (i = 0; i < nr_pages; i++) {
+                       struct page *page = pvec.pages[i];
+
+                       /*
+                        * At this point we hold neither mapping->tree_lock nor
+                        * lock on the page itself: the page may be truncated or
+                        * invalidated (changing page->mapping to NULL), or even
+                        * swizzled back from swapper_space to tmpfs file
+                        * mapping
+                        */
+                       lock_page(page);
+
+                       if (unlikely(page->mapping != mapping)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (!wbc->range_cyclic && page->index > end) {
+                               done = 1;
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       if (wbc->sync_mode != WB_SYNC_NONE)
+                               wait_on_page_writeback(page);
+
+                       if (PageWriteback(page) ||
+                           !clear_page_dirty_for_io(page)) {
+                               unlock_page(page);
+                               continue;
+                       }
+
+                       ret = (*writepage)(page, wbc);
+                       if (ret) {
+                               if (ret == -ENOSPC)
+                                       set_bit(AS_ENOSPC, &mapping->flags);
+                               else
+                                       set_bit(AS_EIO, &mapping->flags);
+                       }
+
+                       if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
+                               unlock_page(page);
+                       if (ret || (--(wbc->nr_to_write) <= 0))
+                               done = 1;
+                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                               wbc->encountered_congestion = 1;
+                               done = 1;
+                       }
+               }
+               pagevec_release(&pvec);
+               cond_resched();
+       }
+       if (!scanned && !done) {
+               /*
+                * We hit the last page and there is more work to be done: wrap
+                * back to the start of the file
+                */
+               scanned = 1;
+               index = 0;
+               goto retry;
+       }
+       if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+               mapping->writeback_index = index;
+       return ret;
+}
+
+EXPORT_SYMBOL(generic_writepages);
+
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
        int ret;