kernel/resource.c: fix muxed resource handling in __request_region()
[pandora-kernel.git] / fs / fs-writeback.c
index 04cf3b9..7286eb4 100644 (file)
@@ -41,19 +41,12 @@ struct wb_writeback_work {
        unsigned int for_kupdate:1;
        unsigned int range_cyclic:1;
        unsigned int for_background:1;
+       enum wb_reason reason;          /* why was writeback initiated? */
 
        struct list_head list;          /* pending work list */
        struct completion *done;        /* set if the caller waits */
 };
 
-/*
- * Include the creation of the trace points after defining the
- * wb_writeback_work structure so that the definition remains local to this
- * file.
- */
-#define CREATE_TRACE_POINTS
-#include <trace/events/writeback.h>
-
 /*
  * We don't actually have pdflush, but this one is exported though /proc...
  */
@@ -70,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 {
        return test_bit(BDI_writeback_running, &bdi->state);
 }
+EXPORT_SYMBOL(writeback_in_progress);
 
 static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 {
@@ -86,6 +80,14 @@ static inline struct inode *wb_inode(struct list_head *head)
        return list_entry(head, struct inode, i_wb_list);
 }
 
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure and inline functions so that the definition
+ * remains local to this file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
 /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
 static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
 {
@@ -115,7 +117,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 
 static void
 __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
-                     bool range_cyclic)
+                     bool range_cyclic, enum wb_reason reason)
 {
        struct wb_writeback_work *work;
 
@@ -135,6 +137,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
        work->sync_mode = WB_SYNC_NONE;
        work->nr_pages  = nr_pages;
        work->range_cyclic = range_cyclic;
+       work->reason    = reason;
 
        bdi_queue_work(bdi, work);
 }
@@ -143,6 +146,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  * bdi_start_writeback - start writeback
  * @bdi: the backing device to write from
  * @nr_pages: the number of pages to write
+ * @reason: reason why some writeback work was initiated
  *
  * Description:
  *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
@@ -150,9 +154,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+                       enum wb_reason reason)
 {
-       __bdi_start_writeback(bdi, nr_pages, true);
+       __bdi_start_writeback(bdi, nr_pages, true, reason);
 }
 
 /**
@@ -251,7 +256,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
  */
 static int move_expired_inodes(struct list_head *delaying_queue,
                               struct list_head *dispatch_queue,
-                              unsigned long *older_than_this)
+                              struct wb_writeback_work *work)
 {
        LIST_HEAD(tmp);
        struct list_head *pos, *node;
@@ -262,8 +267,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
 
        while (!list_empty(delaying_queue)) {
                inode = wb_inode(delaying_queue->prev);
-               if (older_than_this &&
-                   inode_dirtied_after(inode, *older_than_this))
+               if (work->older_than_this &&
+                   inode_dirtied_after(inode, *work->older_than_this))
                        break;
                if (sb && sb != inode->i_sb)
                        do_sb_sort = 1;
@@ -302,13 +307,13 @@ out:
  *                                           |
  *                                           +--> dequeue for IO
  */
-static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
 {
        int moved;
        assert_spin_locked(&wb->list_lock);
        list_splice_init(&wb->b_more_io, &wb->b_io);
-       moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
-       trace_writeback_queue_io(wb, older_than_this, moved);
+       moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
+       trace_writeback_queue_io(wb, work, moved);
 }
 
 static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -391,7 +396,6 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
 
        /* Set I_SYNC, reset I_DIRTY_PAGES */
        inode->i_state |= I_SYNC;
-       inode->i_state &= ~I_DIRTY_PAGES;
        spin_unlock(&inode->i_lock);
        spin_unlock(&wb->list_lock);
 
@@ -414,9 +418,28 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
         * write_inode()
         */
        spin_lock(&inode->i_lock);
+
        dirty = inode->i_state & I_DIRTY;
-       inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+       inode->i_state &= ~I_DIRTY;
+
+       /*
+        * Paired with smp_mb() in __mark_inode_dirty().  This allows
+        * __mark_inode_dirty() to test i_state without grabbing i_lock -
+        * either they see the I_DIRTY bits cleared or we see the dirtied
+        * inode.
+        *
+        * I_DIRTY_PAGES is always cleared together above even if @mapping
+        * still has dirty pages.  The flag is reinstated after smp_mb() if
+        * necessary.  This guarantees that either __mark_inode_dirty()
+        * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
+        */
+       smp_mb();
+
+       if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+               inode->i_state |= I_DIRTY_PAGES;
+
        spin_unlock(&inode->i_lock);
+
        /* Don't write the inode if only I_DIRTY_PAGES was set */
        if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
                int err = write_inode(inode, wbc);
@@ -442,7 +465,6 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
                         * We didn't write back all the pages.  nfs_writepages()
                         * sometimes bales out without doing anything.
                         */
-                       inode->i_state |= I_DIRTY_PAGES;
                        if (wbc->nr_to_write <= 0) {
                                /*
                                 * slice used up: queue for next turn
@@ -641,31 +663,40 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
        return wrote;
 }
 
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages)
+long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+                               enum wb_reason reason)
 {
        struct wb_writeback_work work = {
                .nr_pages       = nr_pages,
                .sync_mode      = WB_SYNC_NONE,
                .range_cyclic   = 1,
+               .reason         = reason,
        };
 
        spin_lock(&wb->list_lock);
        if (list_empty(&wb->b_io))
-               queue_io(wb, NULL);
+               queue_io(wb, &work);
        __writeback_inodes_wb(wb, &work);
        spin_unlock(&wb->list_lock);
 
        return nr_pages - work.nr_pages;
 }
 
-static inline bool over_bground_thresh(void)
+static bool over_bground_thresh(struct backing_dev_info *bdi)
 {
        unsigned long background_thresh, dirty_thresh;
 
        global_dirty_limits(&background_thresh, &dirty_thresh);
 
-       return (global_page_state(NR_FILE_DIRTY) +
-               global_page_state(NR_UNSTABLE_NFS) > background_thresh);
+       if (global_page_state(NR_FILE_DIRTY) +
+           global_page_state(NR_UNSTABLE_NFS) > background_thresh)
+               return true;
+
+       if (bdi_stat(bdi, BDI_RECLAIMABLE) >
+                               bdi_dirty_limit(bdi, background_thresh))
+               return true;
+
+       return false;
 }
 
 /*
@@ -675,7 +706,7 @@ static inline bool over_bground_thresh(void)
 static void wb_update_bandwidth(struct bdi_writeback *wb,
                                unsigned long start_time)
 {
-       __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, start_time);
+       __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time);
 }
 
 /*
@@ -727,7 +758,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                 * For background writeout, stop when we are below the
                 * background dirty threshold
                 */
-               if (work->for_background && !over_bground_thresh())
+               if (work->for_background && !over_bground_thresh(wb->bdi))
                        break;
 
                if (work->for_kupdate) {
@@ -738,7 +769,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 
                trace_writeback_start(wb->bdi, work);
                if (list_empty(&wb->b_io))
-                       queue_io(wb, work->older_than_this);
+                       queue_io(wb, work);
                if (work->sb)
                        progress = writeback_sb_inodes(work->sb, wb, work);
                else
@@ -811,13 +842,14 @@ static unsigned long get_nr_dirty_pages(void)
 
 static long wb_check_background_flush(struct bdi_writeback *wb)
 {
-       if (over_bground_thresh()) {
+       if (over_bground_thresh(wb->bdi)) {
 
                struct wb_writeback_work work = {
                        .nr_pages       = LONG_MAX,
                        .sync_mode      = WB_SYNC_NONE,
                        .for_background = 1,
                        .range_cyclic   = 1,
+                       .reason         = WB_REASON_BACKGROUND,
                };
 
                return wb_writeback(wb, &work);
@@ -851,6 +883,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
                        .sync_mode      = WB_SYNC_NONE,
                        .for_kupdate    = 1,
                        .range_cyclic   = 1,
+                       .reason         = WB_REASON_PERIODIC,
                };
 
                return wb_writeback(wb, &work);
@@ -969,7 +1002,7 @@ int bdi_writeback_thread(void *data)
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
  */
-void wakeup_flusher_threads(long nr_pages)
+void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
 {
        struct backing_dev_info *bdi;
 
@@ -982,7 +1015,7 @@ void wakeup_flusher_threads(long nr_pages)
        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
                if (!bdi_has_dirty_io(bdi))
                        continue;
-               __bdi_start_writeback(bdi, nr_pages, false);
+               __bdi_start_writeback(bdi, nr_pages, false, reason);
        }
        rcu_read_unlock();
 }
@@ -1048,12 +1081,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
        }
 
        /*
-        * make sure that changes are seen by all cpus before we test i_state
-        * -- mikulas
+        * Paired with smp_mb() in __writeback_single_inode() for the
+        * following lockless i_state test.  See there for details.
         */
        smp_mb();
 
-       /* avoid the locking if we can */
        if ((inode->i_state & flags) == flags)
                return;
 
@@ -1198,12 +1230,15 @@ static void wait_sb_inodes(struct super_block *sb)
  * writeback_inodes_sb_nr -    writeback dirty inodes from given super_block
  * @sb: the superblock
  * @nr: the number of pages to write
+ * @reason: reason why some writeback work initiated
  *
  * Start writeback on some inodes on this super_block. No guarantees are made
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
+void writeback_inodes_sb_nr(struct super_block *sb,
+                           unsigned long nr,
+                           enum wb_reason reason)
 {
        DECLARE_COMPLETION_ONSTACK(done);
        struct wb_writeback_work work = {
@@ -1212,6 +1247,7 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
                .tagged_writepages      = 1,
                .done                   = &done,
                .nr_pages               = nr,
+               .reason                 = reason,
        };
 
        WARN_ON(!rwsem_is_locked(&sb->s_umount));
@@ -1223,29 +1259,31 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
 /**
  * writeback_inodes_sb -       writeback dirty inodes from given super_block
  * @sb: the superblock
+ * @reason: reason why some writeback work was initiated
  *
  * Start writeback on some inodes on this super_block. No guarantees are made
  * on how many (if any) will be written, and this function does not wait
  * for IO completion of submitted IO.
  */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
 {
-       return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+       return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
 /**
  * writeback_inodes_sb_if_idle -       start writeback if none underway
  * @sb: the superblock
+ * @reason: reason why some writeback work was initiated
  *
  * Invoke writeback_inodes_sb if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
  */
-int writeback_inodes_sb_if_idle(struct super_block *sb)
+int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
 {
        if (!writeback_in_progress(sb->s_bdi)) {
                down_read(&sb->s_umount);
-               writeback_inodes_sb(sb);
+               writeback_inodes_sb(sb, reason);
                up_read(&sb->s_umount);
                return 1;
        } else
@@ -1257,16 +1295,18 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
  * writeback_inodes_sb_if_idle -       start writeback if none underway
  * @sb: the superblock
  * @nr: the number of pages to write
+ * @reason: reason why some writeback work was initiated
  *
  * Invoke writeback_inodes_sb if no writeback is currently underway.
  * Returns 1 if writeback was started, 0 if not.
  */
 int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
-                                  unsigned long nr)
+                                  unsigned long nr,
+                                  enum wb_reason reason)
 {
        if (!writeback_in_progress(sb->s_bdi)) {
                down_read(&sb->s_umount);
-               writeback_inodes_sb_nr(sb, nr);
+               writeback_inodes_sb_nr(sb, nr, reason);
                up_read(&sb->s_umount);
                return 1;
        } else
@@ -1290,6 +1330,7 @@ void sync_inodes_sb(struct super_block *sb)
                .nr_pages       = LONG_MAX,
                .range_cyclic   = 0,
                .done           = &done,
+               .reason         = WB_REASON_SYNC,
        };
 
        WARN_ON(!rwsem_is_locked(&sb->s_umount));