vmscan.c: fix invalid strict_strtoul() check in write_scan_unevictable_node()
[pandora-kernel.git] / mm / vmscan.c
index 15e3a29..ac644fe 100644 (file)
@@ -751,7 +751,9 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages)
 static unsigned long shrink_page_list(struct list_head *page_list,
                                      struct zone *zone,
                                      struct scan_control *sc,
-                                     int priority)
+                                     int priority,
+                                     unsigned long *ret_nr_dirty,
+                                     unsigned long *ret_nr_writeback)
 {
        LIST_HEAD(ret_pages);
        LIST_HEAD(free_pages);
@@ -759,6 +761,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
        unsigned long nr_dirty = 0;
        unsigned long nr_congested = 0;
        unsigned long nr_reclaimed = 0;
+       unsigned long nr_writeback = 0;
 
        cond_resched();
 
@@ -795,6 +798,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                        (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
 
                if (PageWriteback(page)) {
+                       nr_writeback++;
                        /*
                         * Synchronous reclaim cannot queue pages for
                         * writeback due to the possibility of stack overflow
@@ -862,7 +866,15 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         */
                        if (page_is_file_cache(page) &&
                                        (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) {
-                               inc_zone_page_state(page, NR_VMSCAN_WRITE_SKIP);
+                               /*
+                                * Immediately reclaim when written back.
+                                * Similar in principal to deactivate_page()
+                                * except we already have the page isolated
+                                * and know it's dirty
+                                */
+                               inc_zone_page_state(page, NR_VMSCAN_IMMEDIATE);
+                               SetPageReclaim(page);
+
                                goto keep_locked;
                        }
 
@@ -1000,6 +1012,8 @@ keep_lumpy:
 
        list_splice(&ret_pages, page_list);
        count_vm_events(PGACTIVATE, pgactivate);
+       *ret_nr_dirty += nr_dirty;
+       *ret_nr_writeback += nr_writeback;
        return nr_reclaimed;
 }
 
@@ -1460,6 +1474,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
        unsigned long nr_taken;
        unsigned long nr_anon;
        unsigned long nr_file;
+       unsigned long nr_dirty = 0;
+       unsigned long nr_writeback = 0;
        isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
 
        while (unlikely(too_many_isolated(zone, file, sc))) {
@@ -1512,12 +1528,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 
        spin_unlock_irq(&zone->lru_lock);
 
-       nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority);
+       nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority,
+                                               &nr_dirty, &nr_writeback);
 
        /* Check if we should syncronously wait for writeback */
        if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
                set_reclaim_mode(priority, sc, true);
-               nr_reclaimed += shrink_page_list(&page_list, zone, sc, priority);
+               nr_reclaimed += shrink_page_list(&page_list, zone, sc,
+                                       priority, &nr_dirty, &nr_writeback);
        }
 
        local_irq_disable();
@@ -1527,6 +1545,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 
        putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
 
+       /*
+        * If reclaim is isolating dirty pages under writeback, it implies
+        * that the long-lived page allocation rate is exceeding the page
+        * laundering rate. Either the global limits are not being effective
+        * at throttling processes due to the page distribution throughout
+        * zones or there is heavy usage of a slow backing device. The
+        * only option is to throttle from reclaim context which is not ideal
+        * as there is no guarantee the dirtying process is throttled in the
+        * same way balance_dirty_pages() manages.
+        *
+        * This scales the number of dirty pages that must be under writeback
+        * before throttling depending on priority. It is a simple backoff
+        * function that has the most effect in the range DEF_PRIORITY to
+        * DEF_PRIORITY-2 which is the priority reclaim is considered to be
+        * in trouble and reclaim is considered to be in trouble.
+        *
+        * DEF_PRIORITY   100% isolated pages must be PageWriteback to throttle
+        * DEF_PRIORITY-1  50% must be PageWriteback
+        * DEF_PRIORITY-2  25% must be PageWriteback, kswapd in trouble
+        * ...
+        * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any
+        *                     isolated page is PageWriteback
+        */
+       if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority)))
+               wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
+
        trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
                zone_idx(zone),
                nr_scanned, nr_reclaimed,
@@ -2704,6 +2748,8 @@ out:
 
                        /* If balanced, clear the congested flag */
                        zone_clear_flag(zone, ZONE_CONGESTED);
+                       if (i <= *classzone_idx)
+                               balanced += zone->present_pages;
                }
        }
 
@@ -2777,7 +2823,9 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
 static int kswapd(void *p)
 {
        unsigned long order, new_order;
+       unsigned balanced_order;
        int classzone_idx, new_classzone_idx;
+       int balanced_classzone_idx;
        pg_data_t *pgdat = (pg_data_t*)p;
        struct task_struct *tsk = current;
 
@@ -2808,7 +2856,9 @@ static int kswapd(void *p)
        set_freezable();
 
        order = new_order = 0;
+       balanced_order = 0;
        classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
+       balanced_classzone_idx = classzone_idx;
        for ( ; ; ) {
                int ret;
 
@@ -2817,7 +2867,8 @@ static int kswapd(void *p)
                 * new request of a similar or harder type will succeed soon
                 * so consider going to sleep on the basis we reclaimed at
                 */
-               if (classzone_idx >= new_classzone_idx && order == new_order) {
+               if (balanced_classzone_idx >= new_classzone_idx &&
+                                       balanced_order == new_order) {
                        new_order = pgdat->kswapd_max_order;
                        new_classzone_idx = pgdat->classzone_idx;
                        pgdat->kswapd_max_order =  0;
@@ -2832,9 +2883,12 @@ static int kswapd(void *p)
                        order = new_order;
                        classzone_idx = new_classzone_idx;
                } else {
-                       kswapd_try_to_sleep(pgdat, order, classzone_idx);
+                       kswapd_try_to_sleep(pgdat, balanced_order,
+                                               balanced_classzone_idx);
                        order = pgdat->kswapd_max_order;
                        classzone_idx = pgdat->classzone_idx;
+                       new_order = order;
+                       new_classzone_idx = classzone_idx;
                        pgdat->kswapd_max_order = 0;
                        pgdat->classzone_idx = pgdat->nr_zones - 1;
                }
@@ -2849,7 +2903,9 @@ static int kswapd(void *p)
                 */
                if (!ret) {
                        trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
-                       order = balance_pgdat(pgdat, order, &classzone_idx);
+                       balanced_classzone_idx = classzone_idx;
+                       balanced_order = balance_pgdat(pgdat, order,
+                                               &balanced_classzone_idx);
                }
        }
        return 0;
@@ -3464,8 +3520,8 @@ static ssize_t write_scan_unevictable_node(struct sys_device *dev,
        unsigned long res;
        unsigned long req = strict_strtoul(buf, 10, &res);
 
-       if (!req)
-               return 1;       /* zero is no-op */
+       if (req || !res)
+               return 1; /* Invalid input or zero is no-op */
 
        for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
                if (!populated_zone(zone))