X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=mm%2Fpage-writeback.c;h=ecf27839c2035a138000743753d3c2d8cefc6c07;hb=811d736f9e8013966e1a5a930c0db09508bdbb15;hp=4ec7026c7bab14e1f4a9e512742e318a6630607f;hpb=a144a5633c1e625c3134c2ce8d549a054468fd98;p=pandora-kernel.git diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4ec7026c7bab..ecf27839c203 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -23,12 +23,15 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include /* * The maximum number of pages to writeout in a single bdflush/kupdate @@ -45,7 +48,6 @@ */ static long ratelimit_pages = 32; -static long total_pages; /* The total number of pages in the machine. */ static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ /* @@ -99,22 +101,6 @@ EXPORT_SYMBOL(laptop_mode); static void background_writeout(unsigned long _min_pages); -struct writeback_state -{ - unsigned long nr_dirty; - unsigned long nr_unstable; - unsigned long nr_mapped; - unsigned long nr_writeback; -}; - -static void get_writeback_state(struct writeback_state *wbs) -{ - wbs->nr_dirty = read_page_state(nr_dirty); - wbs->nr_unstable = read_page_state(nr_unstable); - wbs->nr_mapped = read_page_state(nr_mapped); - wbs->nr_writeback = read_page_state(nr_writeback); -} - /* * Work out the current dirty-memory clamping and background writeout * thresholds. @@ -133,19 +119,17 @@ static void get_writeback_state(struct writeback_state *wbs) * clamping level. */ static void -get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty, - struct address_space *mapping) +get_dirty_limits(long *pbackground, long *pdirty, + struct address_space *mapping) { int background_ratio; /* Percentages */ int dirty_ratio; int unmapped_ratio; long background; long dirty; - unsigned long available_memory = total_pages; + unsigned long available_memory = vm_total_pages; struct task_struct *tsk; - get_writeback_state(wbs); - #ifdef CONFIG_HIGHMEM /* * If this mapping can only allocate from low memory, @@ -156,7 +140,9 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty, #endif - unmapped_ratio = 100 - (wbs->nr_mapped * 100) / total_pages; + unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + + global_page_state(NR_ANON_PAGES)) * 100) / + vm_total_pages; dirty_ratio = vm_dirty_ratio; if (dirty_ratio > unmapped_ratio / 2) @@ -189,7 +175,6 @@ get_dirty_limits(struct writeback_state *wbs, long *pbackground, long *pdirty, */ static void balance_dirty_pages(struct address_space *mapping) { - struct writeback_state wbs; long nr_reclaimable; long background_thresh; long dirty_thresh; @@ -207,11 +192,12 @@ static void balance_dirty_pages(struct address_space *mapping) .range_cyclic = 1, }; - get_dirty_limits(&wbs, &background_thresh, - &dirty_thresh, mapping); - nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable; - if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) - break; + get_dirty_limits(&background_thresh, &dirty_thresh, mapping); + nr_reclaimable = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); + if (nr_reclaimable + global_page_state(NR_WRITEBACK) <= + dirty_thresh) + break; if (!dirty_exceeded) dirty_exceeded = 1; @@ -224,11 +210,14 @@ static void balance_dirty_pages(struct address_space *mapping) */ if (nr_reclaimable) { writeback_inodes(&wbc); - get_dirty_limits(&wbs, &background_thresh, - &dirty_thresh, mapping); - nr_reclaimable = wbs.nr_dirty + wbs.nr_unstable; - if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh) - break; + get_dirty_limits(&background_thresh, + &dirty_thresh, mapping); + nr_reclaimable = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); + if (nr_reclaimable + + global_page_state(NR_WRITEBACK) + <= dirty_thresh) + break; pages_written += write_chunk - wbc.nr_to_write; if (pages_written >= write_chunk) break; /* We've done our duty */ @@ -236,8 +225,9 @@ static void balance_dirty_pages(struct address_space *mapping) blk_congestion_wait(WRITE, HZ/10); } - if (nr_reclaimable + wbs.nr_writeback <= dirty_thresh && dirty_exceeded) - dirty_exceeded = 0; + if (nr_reclaimable + global_page_state(NR_WRITEBACK) + <= dirty_thresh && dirty_exceeded) + dirty_exceeded = 0; if (writeback_in_progress(bdi)) return; /* pdflush is already working this queue */ @@ -255,6 +245,16 @@ static void balance_dirty_pages(struct address_space *mapping) pdflush_operation(background_writeout, 0); } +void set_page_dirty_balance(struct page *page) +{ + if (set_page_dirty(page)) { + struct address_space *mapping = page_mapping(page); + + if (mapping) + balance_dirty_pages_ratelimited(mapping); + } +} + /** * balance_dirty_pages_ratelimited_nr - balance dirty memory state * @mapping: address_space which was dirtied @@ -299,12 +299,11 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); void throttle_vm_writeout(void) { - struct writeback_state wbs; long background_thresh; long dirty_thresh; for ( ; ; ) { - get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); + get_dirty_limits(&background_thresh, &dirty_thresh, NULL); /* * Boost the allowable dirty threshold a bit for page @@ -312,8 +311,9 @@ void throttle_vm_writeout(void) */ dirty_thresh += dirty_thresh / 10; /* wheeee... */ - if (wbs.nr_unstable + wbs.nr_writeback <= dirty_thresh) - break; + if (global_page_state(NR_UNSTABLE_NFS) + + global_page_state(NR_WRITEBACK) <= dirty_thresh) + break; blk_congestion_wait(WRITE, HZ/10); } } @@ -336,12 +336,12 @@ static void background_writeout(unsigned long _min_pages) }; for ( ; ; ) { - struct writeback_state wbs; long background_thresh; long dirty_thresh; - get_dirty_limits(&wbs, &background_thresh, &dirty_thresh, NULL); - if (wbs.nr_dirty + wbs.nr_unstable < background_thresh + get_dirty_limits(&background_thresh, &dirty_thresh, NULL); + if (global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS) < background_thresh && min_pages <= 0) break; wbc.encountered_congestion = 0; @@ -365,12 +365,9 @@ static void background_writeout(unsigned long _min_pages) */ int wakeup_pdflush(long nr_pages) { - if (nr_pages == 0) { - struct writeback_state wbs; - - get_writeback_state(&wbs); - nr_pages = wbs.nr_dirty + wbs.nr_unstable; - } + if (nr_pages == 0) + nr_pages = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS); return pdflush_operation(background_writeout, nr_pages); } @@ -401,7 +398,6 @@ static void wb_kupdate(unsigned long arg) unsigned long start_jif; unsigned long next_jif; long nr_to_write; - struct writeback_state wbs; struct writeback_control wbc = { .bdi = NULL, .sync_mode = WB_SYNC_NONE, @@ -414,11 +410,11 @@ static void wb_kupdate(unsigned long arg) sync_supers(); - get_writeback_state(&wbs); oldest_jif = jiffies - dirty_expire_interval; start_jif = jiffies; next_jif = start_jif + dirty_writeback_interval; - nr_to_write = wbs.nr_dirty + wbs.nr_unstable + + nr_to_write = global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS) + (inodes_stat.nr_inodes - inodes_stat.nr_unused); while (nr_to_write > 0) { wbc.encountered_congestion = 0; @@ -507,9 +503,9 @@ void laptop_sync_completion(void) * will write six megabyte chunks, max. */ -static void set_ratelimit(void) +void writeback_set_ratelimit(void) { - ratelimit_pages = total_pages / (num_online_cpus() * 32); + ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); if (ratelimit_pages < 16) ratelimit_pages = 16; if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) @@ -519,7 +515,7 @@ static void set_ratelimit(void) static int __cpuinit ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) { - set_ratelimit(); + writeback_set_ratelimit(); return 0; } @@ -538,9 +534,7 @@ void __init page_writeback_init(void) long buffer_pages = nr_free_buffer_pages(); long correction; - total_pages = nr_free_pagecache_pages(); - - correction = (100 * 4 * buffer_pages) / total_pages; + correction = (100 * 4 * buffer_pages) / vm_total_pages; if (correction < 100) { dirty_background_ratio *= correction; @@ -554,10 +548,143 @@ void __init page_writeback_init(void) vm_dirty_ratio = 1; } mod_timer(&wb_timer, jiffies + dirty_writeback_interval); - set_ratelimit(); + writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); } +/** + * generic_writepages - walk the list of dirty pages of the given + * address space and writepage() all of them. + * + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * + * This is a library function, which implements the writepages() + * address_space_operation. + * + * If a page is already under I/O, generic_writepages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + * + * Derived from mpage_writepages() - if you fix this you should check that + * also! + */ +int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + int (*writepage)(struct page *page, struct writeback_control *wbc); + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + writepage = mapping->a_ops->writepage; + + /* deal with chardevs and other special file */ + if (!writepage) + return 0; + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc); + if (ret) { + if (ret == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else + set_bit(AS_EIO, &mapping->flags); + } + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + unlock_page(page); + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + +EXPORT_SYMBOL(generic_writepages); + int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; @@ -566,7 +693,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) return 0; wbc->for_writepages = 1; if (mapping->a_ops->writepages) - ret = mapping->a_ops->writepages(mapping, wbc); + ret = mapping->a_ops->writepages(mapping, wbc); else ret = generic_writepages(mapping, wbc); wbc->for_writepages = 0; @@ -640,7 +767,8 @@ int __set_page_dirty_nobuffers(struct page *page) if (mapping2) { /* Race with truncate? */ BUG_ON(mapping2 != mapping); if (mapping_cap_account_dirty(mapping)) - inc_page_state(nr_dirty); + __inc_zone_page_state(page, + NR_FILE_DIRTY); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } @@ -705,7 +833,7 @@ int set_page_dirty_lock(struct page *page) { int ret; - lock_page(page); + lock_page_nosync(page); ret = set_page_dirty(page); unlock_page(page); return ret; @@ -728,8 +856,14 @@ int test_clear_page_dirty(struct page *page) page_index(page), PAGECACHE_TAG_DIRTY); write_unlock_irqrestore(&mapping->tree_lock, flags); - if (mapping_cap_account_dirty(mapping)) - dec_page_state(nr_dirty); + /* + * We can continue to use `mapping' here because the + * page is locked, which pins the address_space + */ + if (mapping_cap_account_dirty(mapping)) { + page_mkclean(page); + dec_zone_page_state(page, NR_FILE_DIRTY); + } return 1; } write_unlock_irqrestore(&mapping->tree_lock, flags); @@ -759,8 +893,10 @@ int clear_page_dirty_for_io(struct page *page) if (mapping) { if (TestClearPageDirty(page)) { - if (mapping_cap_account_dirty(mapping)) - dec_page_state(nr_dirty); + if (mapping_cap_account_dirty(mapping)) { + page_mkclean(page); + dec_zone_page_state(page, NR_FILE_DIRTY); + } return 1; } return 0; @@ -817,6 +953,15 @@ int test_set_page_writeback(struct page *page) } EXPORT_SYMBOL(test_set_page_writeback); +/* + * Wakes up tasks that are being throttled due to writeback congestion + */ +void writeback_congestion_end(void) +{ + blk_congestion_end(WRITE); +} +EXPORT_SYMBOL(writeback_congestion_end); + /* * Return true if any of the pages in the mapping are marged with the * passed tag.