Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[pandora-kernel.git] / mm / vmscan.c
index d348882..47a5096 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/memcontrol.h>
 #include <linux/delayacct.h>
 #include <linux/sysctl.h>
+#include <linux/compaction.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -1044,7 +1045,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                case 0:
                        list_move(&page->lru, dst);
                        mem_cgroup_del_lru(page);
-                       nr_taken++;
+                       nr_taken += hpage_nr_pages(page);
                        break;
 
                case -EBUSY:
@@ -1102,7 +1103,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                        if (__isolate_lru_page(cursor_page, mode, file) == 0) {
                                list_move(&cursor_page->lru, dst);
                                mem_cgroup_del_lru(cursor_page);
-                               nr_taken++;
+                               nr_taken += hpage_nr_pages(page);
                                nr_lumpy_taken++;
                                if (PageDirty(cursor_page))
                                        nr_lumpy_dirty++;
@@ -1157,14 +1158,15 @@ static unsigned long clear_active_flags(struct list_head *page_list,
        struct page *page;
 
        list_for_each_entry(page, page_list, lru) {
+               int numpages = hpage_nr_pages(page);
                lru = page_lru_base_type(page);
                if (PageActive(page)) {
                        lru += LRU_ACTIVE;
                        ClearPageActive(page);
-                       nr_active++;
+                       nr_active += numpages;
                }
                if (count)
-                       count[lru]++;
+                       count[lru] += numpages;
        }
 
        return nr_active;
@@ -1274,7 +1276,8 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
                add_page_to_lru_list(zone, page, lru);
                if (is_active_lru(lru)) {
                        int file = is_file_lru(lru);
-                       reclaim_stat->recent_rotated[file]++;
+                       int numpages = hpage_nr_pages(page);
+                       reclaim_stat->recent_rotated[file] += numpages;
                }
                if (!pagevec_add(&pvec, page)) {
                        spin_unlock_irq(&zone->lru_lock);
@@ -1482,7 +1485,7 @@ static void move_active_pages_to_lru(struct zone *zone,
 
                list_move(&page->lru, &zone->lru[lru].list);
                mem_cgroup_add_lru_list(page, lru);
-               pgmoved++;
+               pgmoved += hpage_nr_pages(page);
 
                if (!pagevec_add(&pvec, page) || list_empty(list)) {
                        spin_unlock_irq(&zone->lru_lock);
@@ -1550,7 +1553,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                }
 
                if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
-                       nr_rotated++;
+                       nr_rotated += hpage_nr_pages(page);
                        /*
                         * Identify referenced, file-backed active pages and
                         * give them one more trip around the active list. So
@@ -2227,7 +2230,8 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
 }
 
 /* is kswapd sleeping prematurely? */
-static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
+static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
+                                       int classzone_idx)
 {
        int i;
        unsigned long balanced = 0;
@@ -2235,20 +2239,28 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 
        /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
        if (remaining)
-               return 1;
+               return true;
 
-       /* If after HZ/10, a zone is below the high mark, it's premature */
+       /* Check the watermark levels */
        for (i = 0; i < pgdat->nr_zones; i++) {
                struct zone *zone = pgdat->node_zones + i;
 
                if (!populated_zone(zone))
                        continue;
 
-               if (zone->all_unreclaimable)
+               /*
+                * balance_pgdat() skips over all_unreclaimable after
+                * DEF_PRIORITY. Effectively, it considers them balanced so
+                * they must be considered balanced here as well if kswapd
+                * is to sleep
+                */
+               if (zone->all_unreclaimable) {
+                       balanced += zone->present_pages;
                        continue;
+               }
 
                if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
-                                                               0, 0))
+                                                       classzone_idx, 0))
                        all_zones_ok = false;
                else
                        balanced += zone->present_pages;
@@ -2260,7 +2272,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
         * must be balanced
         */
        if (order)
-               return pgdat_balanced(pgdat, balanced, 0);
+               return pgdat_balanced(pgdat, balanced, classzone_idx);
        else
                return !all_zones_ok;
 }
@@ -2269,7 +2281,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at high_wmark_pages(zone).
  *
- * Returns the number of pages which were actually freed.
+ * Returns the final order kswapd was reclaiming at
  *
  * There is special handling here for zones which are full of pinned pages.
  * This can happen if the pages are all mlocked, or if they are all used by
@@ -2287,7 +2299,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
  * of pages is balanced across the zones.
  */
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
-                                                       int classzone_idx)
+                                                       int *classzone_idx)
 {
        int all_zones_ok;
        unsigned long balanced;
@@ -2350,6 +2362,7 @@ loop_again:
                        if (!zone_watermark_ok_safe(zone, order,
                                        high_wmark_pages(zone), 0, 0)) {
                                end_zone = i;
+                               *classzone_idx = i;
                                break;
                        }
                }
@@ -2372,6 +2385,7 @@ loop_again:
                 * cause too much scanning of the lower zones.
                 */
                for (i = 0; i <= end_zone; i++) {
+                       int compaction;
                        struct zone *zone = pgdat->node_zones + i;
                        int nr_slab;
 
@@ -2401,9 +2415,26 @@ loop_again:
                                                lru_pages);
                        sc.nr_reclaimed += reclaim_state->reclaimed_slab;
                        total_scanned += sc.nr_scanned;
+
+                       compaction = 0;
+                       if (order &&
+                           zone_watermark_ok(zone, 0,
+                                              high_wmark_pages(zone),
+                                             end_zone, 0) &&
+                           !zone_watermark_ok(zone, order,
+                                              high_wmark_pages(zone),
+                                              end_zone, 0)) {
+                               compact_zone_order(zone,
+                                                  order,
+                                                  sc.gfp_mask, false,
+                                                  COMPACT_MODE_KSWAPD);
+                               compaction = 1;
+                       }
+
                        if (zone->all_unreclaimable)
                                continue;
-                       if (nr_slab == 0 && !zone_reclaimable(zone))
+                       if (!compaction && nr_slab == 0 &&
+                           !zone_reclaimable(zone))
                                zone->all_unreclaimable = 1;
                        /*
                         * If we've done a decent amount of scanning and
@@ -2414,15 +2445,6 @@ loop_again:
                            total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
                                sc.may_writepage = 1;
 
-                       /*
-                        * Compact the zone for higher orders to reduce
-                        * latencies for higher-order allocations that
-                        * would ordinarily call try_to_compact_pages()
-                        */
-                       if (sc.order > PAGE_ALLOC_COSTLY_ORDER)
-                               compact_zone_order(zone, sc.order, sc.gfp_mask,
-                                                       false);
-
                        if (!zone_watermark_ok_safe(zone, order,
                                        high_wmark_pages(zone), end_zone, 0)) {
                                all_zones_ok = 0;
@@ -2443,12 +2465,12 @@ loop_again:
                                 * spectulatively avoid congestion waits
                                 */
                                zone_clear_flag(zone, ZONE_CONGESTED);
-                               if (i <= classzone_idx)
+                               if (i <= *classzone_idx)
                                        balanced += zone->present_pages;
                        }
 
                }
-               if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))
+               if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))
                        break;          /* kswapd: all done */
                /*
                 * OK, kswapd is getting into trouble.  Take a nap, then take
@@ -2477,7 +2499,7 @@ out:
         * high-order: Balanced zones must make up at least 25% of the node
         *             for the node to be balanced
         */
-       if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) {
+       if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) {
                cond_resched();
 
                try_to_freeze();
@@ -2532,10 +2554,17 @@ out:
                }
        }
 
-       return sc.nr_reclaimed;
+       /*
+        * Return the order we were reclaiming at so sleeping_prematurely()
+        * makes a decision on the order we were last reclaiming at. However,
+        * if another caller entered the allocator slow path while kswapd
+        * was awake, order will remain at the higher level
+        */
+       *classzone_idx = end_zone;
+       return order;
 }
 
-static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
+static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
 {
        long remaining = 0;
        DEFINE_WAIT(wait);
@@ -2546,7 +2575,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
        prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
 
        /* Try to sleep for a short interval */
-       if (!sleeping_prematurely(pgdat, order, remaining)) {
+       if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) {
                remaining = schedule_timeout(HZ/10);
                finish_wait(&pgdat->kswapd_wait, &wait);
                prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
@@ -2556,7 +2585,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
         * After a short sleep, check if it was a premature sleep. If not, then
         * go fully to sleep until explicitly woken up.
         */
-       if (!sleeping_prematurely(pgdat, order, remaining)) {
+       if (!sleeping_prematurely(pgdat, order, remaining, classzone_idx)) {
                trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
 
                /*
@@ -2644,9 +2673,11 @@ static int kswapd(void *p)
                        order = new_order;
                        classzone_idx = new_classzone_idx;
                } else {
-                       kswapd_try_to_sleep(pgdat, order);
+                       kswapd_try_to_sleep(pgdat, order, classzone_idx);
                        order = pgdat->kswapd_max_order;
                        classzone_idx = pgdat->classzone_idx;
+                       pgdat->kswapd_max_order = 0;
+                       pgdat->classzone_idx = MAX_NR_ZONES - 1;
                }
 
                ret = try_to_freeze();
@@ -2659,7 +2690,7 @@ static int kswapd(void *p)
                 */
                if (!ret) {
                        trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
-                       balance_pgdat(pgdat, order, classzone_idx);
+                       order = balance_pgdat(pgdat, order, &classzone_idx);
                }
        }
        return 0;