Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
[pandora-kernel.git] / fs / fs-writeback.c
index 482de0a..59c6e49 100644 (file)
@@ -630,6 +630,7 @@ static long wb_writeback(struct bdi_writeback *wb,
        };
        unsigned long oldest_jif;
        long wrote = 0;
+       long write_chunk;
        struct inode *inode;
 
        if (wbc.for_kupdate) {
@@ -642,6 +643,24 @@ static long wb_writeback(struct bdi_writeback *wb,
                wbc.range_end = LLONG_MAX;
        }
 
+       /*
+        * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
+        * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
+        * here avoids calling into writeback_inodes_wb() more than once.
+        *
+        * The intended call sequence for WB_SYNC_ALL writeback is:
+        *
+        *      wb_writeback()
+        *          __writeback_inodes_sb()     <== called only once
+        *              write_cache_pages()     <== called once for each inode
+        *                   (quickly) tag currently dirty pages
+        *                   (maybe slowly) sync all tagged pages
+        */
+       if (wbc.sync_mode == WB_SYNC_NONE)
+               write_chunk = MAX_WRITEBACK_PAGES;
+       else
+               write_chunk = LONG_MAX;
+
        wbc.wb_start = jiffies; /* livelock avoidance */
        for (;;) {
                /*
@@ -650,6 +669,16 @@ static long wb_writeback(struct bdi_writeback *wb,
                if (work->nr_pages <= 0)
                        break;
 
+               /*
+                * Background writeout and kupdate-style writeback may
+                * run forever. Stop them if there is other work to do
+                * so that e.g. sync can proceed. They'll be restarted
+                * after the other works are all done.
+                */
+               if ((work->for_background || work->for_kupdate) &&
+                   !list_empty(&wb->bdi->work_list))
+                       break;
+
                /*
                 * For background writeout, stop when we are below the
                 * background dirty threshold
@@ -658,7 +687,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                        break;
 
                wbc.more_io = 0;
-               wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+               wbc.nr_to_write = write_chunk;
                wbc.pages_skipped = 0;
 
                trace_wbc_writeback_start(&wbc, wb->bdi);
@@ -668,8 +697,8 @@ static long wb_writeback(struct bdi_writeback *wb,
                        writeback_inodes_wb(wb, &wbc);
                trace_wbc_writeback_written(&wbc, wb->bdi);
 
-               work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-               wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+               work->nr_pages -= write_chunk - wbc.nr_to_write;
+               wrote += write_chunk - wbc.nr_to_write;
 
                /*
                 * If we consumed everything, see if we have more
@@ -684,7 +713,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                /*
                 * Did we write something? Try for more
                 */
-               if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
+               if (wbc.nr_to_write < write_chunk)
                        continue;
                /*
                 * Nothing written. Wait for some inode to
@@ -1196,7 +1225,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
  * @sb: the superblock
  *
  * This function writes and waits on any dirty inode belonging to this
- * super_block. The number of pages synced is returned.
+ * super_block.
  */
 void sync_inodes_sb(struct super_block *sb)
 {
@@ -1274,11 +1303,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 EXPORT_SYMBOL(sync_inode);
 
 /**
- * sync_inode - write an inode to disk
+ * sync_inode_metadata - write an inode to disk
  * @inode: the inode to sync
  * @wait: wait for I/O to complete.
  *
- * Write an inode to disk and adjust it's dirty state after completion.
+ * Write an inode to disk and adjust its dirty state after completion.
  *
  * Note: only writes the actual inode, no associated data or other metadata.
  */