mm: vmscan: throttle reclaim if encountering too many dirty pages under writeback

author Mel Gorman <mgorman@suse.de>

Tue, 1 Nov 2011 00:07:56 +0000 (17:07 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 1 Nov 2011 00:30:46 +0000 (17:30 -0700)
author Mel Gorman <mgorman@suse.de>
Tue, 1 Nov 2011 00:07:56 +0000 (17:07 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 1 Nov 2011 00:30:46 +0000 (17:30 -0700)
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 15e3a29..7b0573f 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -751,7 +751,9 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
  static unsigned long shrink_page_list(struct list_head *page_list,
                                       struct zone *zone,
                                       struct scan_control *sc,
-                                     int priority)
+                                     int priority,
+                                     unsigned long *ret_nr_dirty,
+                                     unsigned long *ret_nr_writeback)
  {
         LIST_HEAD(ret_pages);
         LIST_HEAD(free_pages);
@@ -759,6 +761,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
         unsigned long nr_dirty = 0;
         unsigned long nr_congested = 0;
         unsigned long nr_reclaimed = 0;
+       unsigned long nr_writeback = 0;
  
         cond_resched();
  
@@ -795,6 +798,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
  
                 if (PageWriteback(page)) {
+                       nr_writeback++;
                         /*
                          * Synchronous reclaim cannot queue pages for
                          * writeback due to the possibility of stack overflow
@@ -1000,6 +1004,8 @@ keep_lumpy:
  
         list_splice(&ret_pages, page_list);
         count_vm_events(PGACTIVATE, pgactivate);
+       *ret_nr_dirty += nr_dirty;
+       *ret_nr_writeback += nr_writeback;
         return nr_reclaimed;
  }
  
@@ -1460,6 +1466,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
         unsigned long nr_taken;
         unsigned long nr_anon;
         unsigned long nr_file;
+       unsigned long nr_dirty = 0;
+       unsigned long nr_writeback = 0;
         isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
  
         while (unlikely(too_many_isolated(zone, file, sc))) {
@@ -1512,12 +1520,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
  
         spin_unlock_irq(&zone->lru_lock);
  
-       nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority);
+       nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority,
+                                               &nr_dirty, &nr_writeback);
  
         /* Check if we should syncronously wait for writeback */
         if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
                 set_reclaim_mode(priority, sc, true);
-               nr_reclaimed += shrink_page_list(&page_list, zone, sc, priority);
+               nr_reclaimed += shrink_page_list(&page_list, zone, sc,
+                                       priority, &nr_dirty, &nr_writeback);
         }
  
         local_irq_disable();
@@ -1527,6 +1537,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
  
         putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
  
+       /*
+        * If reclaim is isolating dirty pages under writeback, it implies
+        * that the long-lived page allocation rate is exceeding the page
+        * laundering rate. Either the global limits are not being effective
+        * at throttling processes due to the page distribution throughout
+        * zones or there is heavy usage of a slow backing device. The
+        * only option is to throttle from reclaim context which is not ideal
+        * as there is no guarantee the dirtying process is throttled in the
+        * same way balance_dirty_pages() manages.
+        *
+        * This scales the number of dirty pages that must be under writeback
+        * before throttling depending on priority. It is a simple backoff
+        * function that has the most effect in the range DEF_PRIORITY to
+        * DEF_PRIORITY-2 which is the priority reclaim is considered to be
+        * in trouble and reclaim is considered to be in trouble.
+        *
+        * DEF_PRIORITY   100% isolated pages must be PageWriteback to throttle
+        * DEF_PRIORITY-1  50% must be PageWriteback
+        * DEF_PRIORITY-2  25% must be PageWriteback, kswapd in trouble
+        * ...
+        * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any
+        *                     isolated page is PageWriteback
+        */
+       if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority)))
+               wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
+
         trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
                 zone_idx(zone),
                 nr_scanned, nr_reclaimed,
author	Mel Gorman <mgorman@suse.de>
	Tue, 1 Nov 2011 00:07:56 +0000 (17:07 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 1 Nov 2011 00:30:46 +0000 (17:30 -0700)