X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fpage-writeback.c;h=ea3f83b21197edbb5d8b50e78b7a27d3473c8796;hb=d9aab1cfdd30ff43df1e45f9430c30d99c2c6e47;hp=71252486bc6f1f161b87592ded6e8b7e6c0dc8ee;hpb=f10cdea68b70bd85706baed0decab59618f9c353;p=pandora-kernel.git diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 71252486bc6f..ea3f83b21197 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -411,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty) * * Returns @bdi's dirty limit in pages. The term "dirty" in the context of * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages. - * And the "limit" in the name is not seriously taken as hard limit in - * balance_dirty_pages(). + * + * Note that balance_dirty_pages() will only seriously take it as a hard limit + * when sleeping max_pause per page is not enough to keep the dirty pages under + * control. For example, when the device is completely stalled due to some error + * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key. + * In the other normal situations, it acts more gently by throttling the tasks + * more (rather than completely block them) when the bdi dirty pages go high. * * It allocates high/low dirty limits to fast/slow devices, in order to prevent * - starving fast devices @@ -594,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, */ if (unlikely(bdi_thresh > thresh)) bdi_thresh = thresh; + /* + * It's very possible that bdi_thresh is close to 0 not because the + * device is slow, but that it has remained inactive for long time. + * Honour such devices a reasonable good (hopefully IO efficient) + * threshold, so that the occasional writes won't be blocked and active + * writes can rampup the threshold quickly. + */ bdi_thresh = max(bdi_thresh, (limit - dirty) / 8); /* * scale global setpoint to bdi's: @@ -977,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi, * * 8 serves as the safety ratio. */ - if (bdi_dirty) - t = min(t, bdi_dirty * HZ / (8 * bw + 1)); + t = min(t, bdi_dirty * HZ / (8 * bw + 1)); /* * The pause time will be settled within range (max_pause/4, max_pause). @@ -1136,6 +1147,19 @@ pause: if (task_ratelimit) break; + /* + * In the case of an unresponding NFS server and the NFS dirty + * pages exceeds dirty_thresh, give the other good bdi's a pipe + * to go through, so that tasks on them still remain responsive. + * + * In theory 1 page is enough to keep the comsumer-producer + * pipe going: the flusher cleans 1 page => the task dirties 1 + * more page. However bdi_dirty has accounting errors. So use + * the larger and more IO friendly bdi_stat_error. + */ + if (bdi_dirty <= bdi_stat_error(bdi)) + break; + if (fatal_signal_pending(current)) break; } @@ -1776,6 +1800,24 @@ int __set_page_dirty_nobuffers(struct page *page) } EXPORT_SYMBOL(__set_page_dirty_nobuffers); +/* + * Call this whenever redirtying a page, to de-account the dirty counters + * (NR_DIRTIED, BDI_DIRTIED, tsk->nr_dirtied), so that they match the written + * counters (NR_WRITTEN, BDI_WRITTEN) in long term. The mismatches will lead to + * systematic errors in balanced_dirty_ratelimit and the dirty pages position + * control. + */ +void account_page_redirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + if (mapping && mapping_cap_account_dirty(mapping)) { + current->nr_dirtied--; + dec_zone_page_state(page, NR_DIRTIED); + dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED); + } +} +EXPORT_SYMBOL(account_page_redirty); + /* * When a writepage implementation decides that it doesn't want to write this * page for some reason, it should redirty the locked page via @@ -1784,6 +1826,7 @@ EXPORT_SYMBOL(__set_page_dirty_nobuffers); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) { wbc->pages_skipped++; + account_page_redirty(page); return __set_page_dirty_nobuffers(page); } EXPORT_SYMBOL(redirty_page_for_writepage);