return list_entry(head, struct inode, i_wb_list);
}
-static void bdi_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_work *work)
+/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
+static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
{
- trace_writeback_queue(bdi, work);
-
- spin_lock_bh(&bdi->wb_lock);
- list_add_tail(&work->list, &bdi->work_list);
if (bdi->wb.task) {
wake_up_process(bdi->wb.task);
} else {
* The bdi thread isn't there, wake up the forker thread which
* will create and run it.
*/
- trace_writeback_nothread(bdi, work);
wake_up_process(default_backing_dev_info.wb.task);
}
+}
+
+static void bdi_queue_work(struct backing_dev_info *bdi,
+ struct wb_writeback_work *work)
+{
+ trace_writeback_queue(bdi, work);
+
+ spin_lock_bh(&bdi->wb_lock);
+ list_add_tail(&work->list, &bdi->work_list);
+ if (!bdi->wb.task)
+ trace_writeback_nothread(bdi, work);
+ bdi_wakeup_flusher(bdi);
spin_unlock_bh(&bdi->wb_lock);
}
static void
__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
- bool range_cyclic, bool for_background)
+ bool range_cyclic)
{
struct wb_writeback_work *work;
work->sync_mode = WB_SYNC_NONE;
work->nr_pages = nr_pages;
work->range_cyclic = range_cyclic;
- work->for_background = for_background;
bdi_queue_work(bdi, work);
}
*/
void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
{
- __bdi_start_writeback(bdi, nr_pages, true, false);
+ __bdi_start_writeback(bdi, nr_pages, true);
}
/**
* @bdi: the backing device to write from
*
* Description:
- * This does WB_SYNC_NONE background writeback. The IO is only
- * started when this function returns, we make no guarentees on
- * completion. Caller need not hold sb s_umount semaphore.
+ * This makes sure WB_SYNC_NONE background writeback happens. When
+ * this function returns, it is only guaranteed that for given BDI
+ * some IO is happening if we are over background dirty threshold.
+ * Caller need not hold sb s_umount semaphore.
*/
void bdi_start_background_writeback(struct backing_dev_info *bdi)
{
- __bdi_start_writeback(bdi, LONG_MAX, true, true);
+ /*
+ * We just wake up the flusher thread. It will perform background
+ * writeback as soon as there is no other work to do.
+ */
+ trace_writeback_wake_background(bdi);
+ spin_lock_bh(&bdi->wb_lock);
+ bdi_wakeup_flusher(bdi);
+ spin_unlock_bh(&bdi->wb_lock);
}
/*
};
unsigned long oldest_jif;
long wrote = 0;
+ long write_chunk;
struct inode *inode;
if (wbc.for_kupdate) {
wbc.range_end = LLONG_MAX;
}
+ /*
+ * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
+ * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
+ * here avoids calling into writeback_inodes_wb() more than once.
+ *
+ * The intended call sequence for WB_SYNC_ALL writeback is:
+ *
+ * wb_writeback()
+ * __writeback_inodes_sb() <== called only once
+ * write_cache_pages() <== called once for each inode
+ * (quickly) tag currently dirty pages
+ * (maybe slowly) sync all tagged pages
+ */
+ if (wbc.sync_mode == WB_SYNC_NONE)
+ write_chunk = MAX_WRITEBACK_PAGES;
+ else
+ write_chunk = LONG_MAX;
+
wbc.wb_start = jiffies; /* livelock avoidance */
for (;;) {
/*
if (work->nr_pages <= 0)
break;
+ /*
+ * Background writeout and kupdate-style writeback may
+ * run forever. Stop them if there is other work to do
+ * so that e.g. sync can proceed. They'll be restarted
+ * after the other works are all done.
+ */
+ if ((work->for_background || work->for_kupdate) &&
+ !list_empty(&wb->bdi->work_list))
+ break;
+
/*
* For background writeout, stop when we are below the
* background dirty threshold
break;
wbc.more_io = 0;
- wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+ wbc.nr_to_write = write_chunk;
wbc.pages_skipped = 0;
trace_wbc_writeback_start(&wbc, wb->bdi);
writeback_inodes_wb(wb, &wbc);
trace_wbc_writeback_written(&wbc, wb->bdi);
- work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
- wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+ work->nr_pages -= write_chunk - wbc.nr_to_write;
+ wrote += write_chunk - wbc.nr_to_write;
/*
* If we consumed everything, see if we have more
/*
* Did we write something? Try for more
*/
- if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
+ if (wbc.nr_to_write < write_chunk)
continue;
/*
* Nothing written. Wait for some inode to
return work;
}
+/*
+ * Add in the number of potentially dirty inodes, because each inode
+ * write can dirty pagecache in the underlying blockdev.
+ */
+static unsigned long get_nr_dirty_pages(void)
+{
+ return global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS) +
+ get_nr_dirty_inodes();
+}
+
+static long wb_check_background_flush(struct bdi_writeback *wb)
+{
+ if (over_bground_thresh()) {
+
+ struct wb_writeback_work work = {
+ .nr_pages = LONG_MAX,
+ .sync_mode = WB_SYNC_NONE,
+ .for_background = 1,
+ .range_cyclic = 1,
+ };
+
+ return wb_writeback(wb, &work);
+ }
+
+ return 0;
+}
+
static long wb_check_old_data_flush(struct bdi_writeback *wb)
{
unsigned long expired;
return 0;
wb->last_old_flush = jiffies;
- /*
- * Add in the number of potentially dirty inodes, because each inode
- * write can dirty pagecache in the underlying blockdev.
- */
- nr_pages = global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS) +
- get_nr_dirty_inodes();
+ nr_pages = get_nr_dirty_pages();
if (nr_pages) {
struct wb_writeback_work work = {
* Check for periodic writeback, kupdated() style
*/
wrote += wb_check_old_data_flush(wb);
+ wrote += wb_check_background_flush(wb);
clear_bit(BDI_writeback_running, &wb->bdi->state);
return wrote;
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
- __bdi_start_writeback(bdi, nr_pages, false, false);
+ __bdi_start_writeback(bdi, nr_pages, false);
}
rcu_read_unlock();
}
}
/**
- * writeback_inodes_sb - writeback dirty inodes from given super_block
+ * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
* @sb: the superblock
+ * @nr: the number of pages to write
*
* Start writeback on some inodes on this super_block. No guarantees are made
* on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO. The number of pages submitted is
- * returned.
+ * for IO completion of submitted IO.
*/
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
{
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_NONE,
.done = &done,
+ .nr_pages = nr,
};
WARN_ON(!rwsem_is_locked(&sb->s_umount));
-
- work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
-
bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
}
+EXPORT_SYMBOL(writeback_inodes_sb_nr);
+
+/**
+ * writeback_inodes_sb - writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb(struct super_block *sb)
+{
+ return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+}
EXPORT_SYMBOL(writeback_inodes_sb);
/**
}
EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+/**
+ * writeback_inodes_sb_if_idle - start writeback if none underway
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
+ unsigned long nr)
+{
+ if (!writeback_in_progress(sb->s_bdi)) {
+ down_read(&sb->s_umount);
+ writeback_inodes_sb_nr(sb, nr);
+ up_read(&sb->s_umount);
+ return 1;
+ } else
+ return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+
/**
* sync_inodes_sb - sync sb inode pages
* @sb: the superblock
*
* This function writes and waits on any dirty inode belonging to this
- * super_block. The number of pages synced is returned.
+ * super_block.
*/
void sync_inodes_sb(struct super_block *sb)
{
EXPORT_SYMBOL(sync_inode);
/**
- * sync_inode - write an inode to disk
+ * sync_inode_metadata - write an inode to disk
* @inode: the inode to sync
* @wait: wait for I/O to complete.
*
- * Write an inode to disk and adjust it's dirty state after completion.
+ * Write an inode to disk and adjust its dirty state after completion.
*
* Note: only writes the actual inode, no associated data or other metadata.
*/