Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6
[pandora-kernel.git] / fs / fs-writeback.c
index 24e85ce..5c4161f 100644 (file)
@@ -45,6 +45,7 @@ struct wb_writeback_args {
        unsigned int for_kupdate:1;
        unsigned int range_cyclic:1;
        unsigned int for_background:1;
+       unsigned int sb_pinned:1;
 };
 
 /*
@@ -192,7 +193,8 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
 }
 
 static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
-                                struct wb_writeback_args *args)
+                                struct wb_writeback_args *args,
+                                int wait)
 {
        struct bdi_work *work;
 
@@ -204,6 +206,8 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
        if (work) {
                bdi_work_init(work, args);
                bdi_queue_work(bdi, work);
+               if (wait)
+                       bdi_wait_on_work_clear(work);
        } else {
                struct bdi_writeback *wb = &bdi->wb;
 
@@ -230,6 +234,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
                .sync_mode      = WB_SYNC_ALL,
                .nr_pages       = LONG_MAX,
                .range_cyclic   = 0,
+               /*
+                * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
+                * lets make it explicitly clear.
+                */
+               .sb_pinned      = 1,
        };
        struct bdi_work work;
 
@@ -245,21 +254,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
  * @bdi: the backing device to write from
  * @sb: write inodes from this super_block
  * @nr_pages: the number of pages to write
+ * @sb_locked: caller already holds sb umount sem.
  *
  * Description:
  *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
  *   started when this function returns, we make no guarentees on
- *   completion. Caller need not hold sb s_umount semaphore.
+ *   completion. Caller specifies whether sb umount sem is held already or not.
  *
  */
 void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-                        long nr_pages)
+                        long nr_pages, int sb_locked)
 {
        struct wb_writeback_args args = {
                .sb             = sb,
                .sync_mode      = WB_SYNC_NONE,
                .nr_pages       = nr_pages,
                .range_cyclic   = 1,
+               .sb_pinned      = sb_locked,
        };
 
        /*
@@ -271,7 +282,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
                args.for_background = 1;
        }
 
-       bdi_alloc_queue_work(bdi, &args);
+       bdi_alloc_queue_work(bdi, &args, sb_locked);
 }
 
 /*
@@ -452,11 +463,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
        BUG_ON(inode->i_state & I_SYNC);
 
-       /* Set I_SYNC, reset I_DIRTY */
-       dirty = inode->i_state & I_DIRTY;
+       /* Set I_SYNC, reset I_DIRTY_PAGES */
        inode->i_state |= I_SYNC;
-       inode->i_state &= ~I_DIRTY;
-
+       inode->i_state &= ~I_DIRTY_PAGES;
        spin_unlock(&inode_lock);
 
        ret = do_writepages(mapping, wbc);
@@ -472,6 +481,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
                        ret = err;
        }
 
+       /*
+        * Some filesystems may redirty the inode during the writeback
+        * due to delalloc, clear dirty metadata flags right before
+        * write_inode()
+        */
+       spin_lock(&inode_lock);
+       dirty = inode->i_state & I_DIRTY;
+       inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+       spin_unlock(&inode_lock);
        /* Don't write the inode if only I_DIRTY_PAGES was set */
        if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
                int err = write_inode(inode, wbc);
@@ -577,7 +595,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
        /*
         * Caller must already hold the ref for this
         */
-       if (wbc->sync_mode == WB_SYNC_ALL) {
+       if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
                WARN_ON(!rwsem_is_locked(&sb->s_umount));
                return SB_NOT_PINNED;
        }
@@ -751,6 +769,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                .for_kupdate            = args->for_kupdate,
                .for_background         = args->for_background,
                .range_cyclic           = args->range_cyclic,
+               .sb_pinned              = args->sb_pinned,
        };
        unsigned long oldest_jif;
        long wrote = 0;
@@ -852,6 +871,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
        unsigned long expired;
        long nr_pages;
 
+       /*
+        * When set to zero, disable periodic writeback
+        */
+       if (!dirty_writeback_interval)
+               return 0;
+
        expired = wb->last_old_flush +
                        msecs_to_jiffies(dirty_writeback_interval * 10);
        if (time_before(jiffies, expired))
@@ -887,6 +912,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 
        while ((work = get_next_work_item(bdi, wb)) != NULL) {
                struct wb_writeback_args args = work->args;
+               int post_clear;
 
                /*
                 * Override sync mode, in case we must wait for completion
@@ -894,11 +920,13 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
                if (force_wait)
                        work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
 
+               post_clear = WB_SYNC_ALL || args.sb_pinned;
+
                /*
                 * If this isn't a data integrity operation, just notify
                 * that we have seen this work and we are now starting it.
                 */
-               if (args.sync_mode == WB_SYNC_NONE)
+               if (!post_clear)
                        wb_clear_pending(wb, work);
 
                wrote += wb_writeback(wb, &args);
@@ -907,7 +935,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
                 * This is a data integrity writeback, so only do the
                 * notification when we have completed the work.
                 */
-               if (args.sync_mode == WB_SYNC_ALL)
+               if (post_clear)
                        wb_clear_pending(wb, work);
        }
 
@@ -947,8 +975,17 @@ int bdi_writeback_task(struct bdi_writeback *wb)
                                break;
                }
 
-               wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
-               schedule_timeout_interruptible(wait_jiffies);
+               if (dirty_writeback_interval) {
+                       wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+                       schedule_timeout_interruptible(wait_jiffies);
+               } else {
+                       set_current_state(TASK_INTERRUPTIBLE);
+                       if (list_empty_careful(&wb->bdi->work_list) &&
+                           !kthread_should_stop())
+                               schedule();
+                       __set_current_state(TASK_RUNNING);
+               }
+
                try_to_freeze();
        }
 
@@ -974,7 +1011,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages)
                if (!bdi_has_dirty_io(bdi))
                        continue;
 
-               bdi_alloc_queue_work(bdi, &args);
+               bdi_alloc_queue_work(bdi, &args, 0);
        }
 
        rcu_read_unlock();
@@ -1183,6 +1220,18 @@ static void wait_sb_inodes(struct super_block *sb)
        iput(old_inode);
 }
 
+static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
+{
+       unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+       unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+       long nr_to_write;
+
+       nr_to_write = nr_dirty + nr_unstable +
+                       (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+       bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
+}
+
 /**
  * writeback_inodes_sb -       writeback dirty inodes from given super_block
  * @sb: the superblock
@@ -1194,17 +1243,22 @@ static void wait_sb_inodes(struct super_block *sb)
  */
 void writeback_inodes_sb(struct super_block *sb)
 {
-       unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
-       unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-       long nr_to_write;
-
-       nr_to_write = nr_dirty + nr_unstable +
-                       (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
-       bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
+       __writeback_inodes_sb(sb, 0);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
+/**
+ * writeback_inodes_sb_locked  - writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Like writeback_inodes_sb(), except the caller already holds the
+ * sb umount sem.
+ */
+void writeback_inodes_sb_locked(struct super_block *sb)
+{
+       __writeback_inodes_sb(sb, 1);
+}
+
 /**
  * writeback_inodes_sb_if_idle -       start writeback if none underway
  * @sb: the superblock