mm: update the description for vm_total_pages
[pandora-kernel.git] / mm / vmscan.c
index 0f16ffe..5fec1ba 100644 (file)
 #include <trace/events/vmscan.h>
 
 struct scan_control {
-       /* Incremented by the number of inactive pages that were scanned */
-       unsigned long nr_scanned;
-
-       /* Number of pages freed so far during a call to shrink_zones() */
-       unsigned long nr_reclaimed;
-
        /* How many pages shrink_list() should reclaim */
        unsigned long nr_to_reclaim;
 
-       unsigned long hibernation_mode;
-
        /* This context's GFP mask */
        gfp_t gfp_mask;
 
-       int may_writepage;
-
-       /* Can mapped pages be reclaimed? */
-       int may_unmap;
-
-       /* Can pages be swapped as part of reclaim? */
-       int may_swap;
-
+       /* Allocation order */
        int order;
 
-       /* Scan (total_size >> priority) pages at once */
-       int priority;
-
-       /* anon vs. file LRUs scanning "ratio" */
-       int swappiness;
+       /*
+        * Nodemask of nodes allowed by the caller. If NULL, all nodes
+        * are scanned.
+        */
+       nodemask_t      *nodemask;
 
        /*
         * The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
         */
        struct mem_cgroup *target_mem_cgroup;
 
-       /*
-        * Nodemask of nodes allowed by the caller. If NULL, all nodes
-        * are scanned.
-        */
-       nodemask_t      *nodemask;
+       /* Scan (total_size >> priority) pages at once */
+       int priority;
+
+       unsigned int may_writepage:1;
+
+       /* Can mapped pages be reclaimed? */
+       unsigned int may_unmap:1;
+
+       /* Can pages be swapped as part of reclaim? */
+       unsigned int may_swap:1;
+
+       unsigned int hibernation_mode:1;
+
+       /* One of the zones is ready for compaction */
+       unsigned int compaction_ready:1;
+
+       /* Incremented by the number of inactive pages that were scanned */
+       unsigned long nr_scanned;
+
+       /* Number of pages freed so far during a call to shrink_zones() */
+       unsigned long nr_reclaimed;
 };
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
  * From 0 .. 100.  Higher means more swappy.
  */
 int vm_swappiness = 60;
-unsigned long vm_total_pages;  /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
 
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
@@ -1865,8 +1870,8 @@ enum scan_balance {
  * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
  * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
  */
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
-                          unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+                          struct scan_control *sc, unsigned long *nr)
 {
        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
        u64 fraction[2];
@@ -1909,7 +1914,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * using the memory controller's swap limit feature would be
         * too expensive.
         */
-       if (!global_reclaim(sc) && !sc->swappiness) {
+       if (!global_reclaim(sc) && !swappiness) {
                scan_balance = SCAN_FILE;
                goto out;
        }
@@ -1919,7 +1924,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * system is close to OOM, scan both anon and file equally
         * (unless the swappiness setting disagrees with swapping).
         */
-       if (!sc->priority && sc->swappiness) {
+       if (!sc->priority && swappiness) {
                scan_balance = SCAN_EQUAL;
                goto out;
        }
@@ -1962,7 +1967,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * With swappiness at 100, anonymous and file have the same priority.
         * This scanning priority is essentially the inverse of IO cost.
         */
-       anon_prio = sc->swappiness;
+       anon_prio = swappiness;
        file_prio = 200 - anon_prio;
 
        /*
@@ -2052,7 +2057,8 @@ out:
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+                         struct scan_control *sc)
 {
        unsigned long nr[NR_LRU_LISTS];
        unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2069,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
        struct blk_plug plug;
        bool scan_adjusted;
 
-       get_scan_count(lruvec, sc, nr);
+       get_scan_count(lruvec, swappiness, sc, nr);
 
        /* Record the original scan target for proportional adjustments later */
        memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2247,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
        }
 }
 
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 {
        unsigned long nr_reclaimed, nr_scanned;
+       bool reclaimable = false;
 
        do {
                struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2266,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                memcg = mem_cgroup_iter(root, NULL, &reclaim);
                do {
                        struct lruvec *lruvec;
+                       int swappiness;
 
                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+                       swappiness = mem_cgroup_swappiness(memcg);
 
-                       sc->swappiness = mem_cgroup_swappiness(memcg);
-                       shrink_lruvec(lruvec, sc);
+                       shrink_lruvec(lruvec, swappiness, sc);
 
                        /*
                         * Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2295,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                           sc->nr_scanned - nr_scanned,
                           sc->nr_reclaimed - nr_reclaimed);
 
+               if (sc->nr_reclaimed - nr_reclaimed)
+                       reclaimable = true;
+
        } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
                                         sc->nr_scanned - nr_scanned, sc));
+
+       return reclaimable;
 }
 
 /* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
 {
        unsigned long balance_gap, watermark;
        bool watermark_ok;
 
-       /* Do not consider compaction for orders reclaim is meant to satisfy */
-       if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
-               return false;
-
        /*
         * Compaction takes time to run and there are potentially other
         * callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2318,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
         */
        balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
                        zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
-       watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+       watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
        watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
 
        /*
         * If compaction is deferred, reclaim up to a point where
         * compaction will have a chance of success when re-enabled
         */
-       if (compaction_deferred(zone, sc->order))
+       if (compaction_deferred(zone, order))
                return watermark_ok;
 
        /* If compaction is not ready to start, keep reclaiming */
-       if (!compaction_suitable(zone, sc->order))
+       if (!compaction_suitable(zone, order))
                return false;
 
        return watermark_ok;
@@ -2342,10 +2351,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
  * If a zone is deemed to be full of pinned pages then just give it a light
  * scan then give up on it.
  *
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns true if a zone was reclaimable.
  */
 static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 {
@@ -2354,13 +2360,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        unsigned long nr_soft_reclaimed;
        unsigned long nr_soft_scanned;
        unsigned long lru_pages = 0;
-       bool aborted_reclaim = false;
        struct reclaim_state *reclaim_state = current->reclaim_state;
        gfp_t orig_mask;
        struct shrink_control shrink = {
                .gfp_mask = sc->gfp_mask,
        };
        enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+       bool reclaimable = false;
 
        /*
         * If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2397,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                        if (sc->priority != DEF_PRIORITY &&
                            !zone_reclaimable(zone))
                                continue;       /* Let kswapd poll it */
-                       if (IS_ENABLED(CONFIG_COMPACTION)) {
-                               /*
-                                * If we already have plenty of memory free for
-                                * compaction in this zone, don't free any more.
-                                * Even though compaction is invoked for any
-                                * non-zero order, only frequent costly order
-                                * reclamation is disruptive enough to become a
-                                * noticeable problem, like transparent huge
-                                * page allocations.
-                                */
-                               if ((zonelist_zone_idx(z) <= requested_highidx)
-                                   && compaction_ready(zone, sc)) {
-                                       aborted_reclaim = true;
-                                       continue;
-                               }
+
+                       /*
+                        * If we already have plenty of memory free for
+                        * compaction in this zone, don't free any more.
+                        * Even though compaction is invoked for any
+                        * non-zero order, only frequent costly order
+                        * reclamation is disruptive enough to become a
+                        * noticeable problem, like transparent huge
+                        * page allocations.
+                        */
+                       if (IS_ENABLED(CONFIG_COMPACTION) &&
+                           sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+                           zonelist_zone_idx(z) <= requested_highidx &&
+                           compaction_ready(zone, sc->order)) {
+                               sc->compaction_ready = true;
+                               continue;
                        }
+
                        /*
                         * This steals pages from memory cgroups over softlimit
                         * and returns the number of reclaimed pages and
@@ -2419,10 +2427,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                                &nr_soft_scanned);
                        sc->nr_reclaimed += nr_soft_reclaimed;
                        sc->nr_scanned += nr_soft_scanned;
+                       if (nr_soft_reclaimed)
+                               reclaimable = true;
                        /* need some check for avoid more shrink_zone() */
                }
 
-               shrink_zone(zone, sc);
+               if (shrink_zone(zone, sc))
+                       reclaimable = true;
+
+               if (global_reclaim(sc) &&
+                   !reclaimable && zone_reclaimable(zone))
+                       reclaimable = true;
        }
 
        /*
@@ -2445,27 +2460,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         */
        sc->gfp_mask = orig_mask;
 
-       return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
-               struct scan_control *sc)
-{
-       struct zoneref *z;
-       struct zone *zone;
-
-       for_each_zone_zonelist_nodemask(zone, z, zonelist,
-                       gfp_zone(sc->gfp_mask), sc->nodemask) {
-               if (!populated_zone(zone))
-                       continue;
-               if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-                       continue;
-               if (zone_reclaimable(zone))
-                       return false;
-       }
-
-       return true;
+       return reclaimable;
 }
 
 /*
@@ -2489,7 +2484,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 {
        unsigned long total_scanned = 0;
        unsigned long writeback_threshold;
-       bool aborted_reclaim;
+       bool zones_reclaimable;
 
        delayacct_freepages_start();
 
@@ -2500,11 +2495,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
                                sc->priority);
                sc->nr_scanned = 0;
-               aborted_reclaim = shrink_zones(zonelist, sc);
+               zones_reclaimable = shrink_zones(zonelist, sc);
 
                total_scanned += sc->nr_scanned;
                if (sc->nr_reclaimed >= sc->nr_to_reclaim)
-                       goto out;
+                       break;
+
+               if (sc->compaction_ready)
+                       break;
 
                /*
                 * If we're getting trouble reclaiming, start doing
@@ -2526,28 +2524,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                                WB_REASON_TRY_TO_FREE_PAGES);
                        sc->may_writepage = 1;
                }
-       } while (--sc->priority >= 0 && !aborted_reclaim);
+       } while (--sc->priority >= 0);
 
-out:
        delayacct_freepages_end();
 
        if (sc->nr_reclaimed)
                return sc->nr_reclaimed;
 
-       /*
-        * As hibernation is going on, kswapd is freezed so that it can't mark
-        * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
-        * check.
-        */
-       if (oom_killer_disabled)
-               return 0;
-
        /* Aborted reclaim to try compaction? don't OOM, then */
-       if (aborted_reclaim)
+       if (sc->compaction_ready)
                return 1;
 
-       /* top priority shrink_zones still had more to do? don't OOM, then */
-       if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+       /* Any of the zones still reclaimable?  Don't OOM. */
+       if (zones_reclaimable)
                return 1;
 
        return 0;
@@ -2684,15 +2673,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 {
        unsigned long nr_reclaimed;
        struct scan_control sc = {
+               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
+               .order = order,
+               .nodemask = nodemask,
+               .priority = DEF_PRIORITY,
                .may_writepage = !laptop_mode,
-               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .may_unmap = 1,
                .may_swap = 1,
-               .order = order,
-               .priority = DEF_PRIORITY,
-               .target_mem_cgroup = NULL,
-               .nodemask = nodemask,
        };
 
        /*
@@ -2722,17 +2710,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
                                                unsigned long *nr_scanned)
 {
        struct scan_control sc = {
-               .nr_scanned = 0,
                .nr_to_reclaim = SWAP_CLUSTER_MAX,
+               .target_mem_cgroup = memcg,
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
                .may_swap = !noswap,
-               .order = 0,
-               .priority = 0,
-               .swappiness = mem_cgroup_swappiness(memcg),
-               .target_mem_cgroup = memcg,
        };
        struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+       int swappiness = mem_cgroup_swappiness(memcg);
 
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2733,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
         * will pick up pages from other mem cgroup's as well. We hack
         * the priority and make it zero.
         */
-       shrink_lruvec(lruvec, &sc);
+       shrink_lruvec(lruvec, swappiness, &sc);
 
        trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
@@ -2764,16 +2749,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
        unsigned long nr_reclaimed;
        int nid;
        struct scan_control sc = {
-               .may_writepage = !laptop_mode,
-               .may_unmap = 1,
-               .may_swap = !noswap,
                .nr_to_reclaim = SWAP_CLUSTER_MAX,
-               .order = 0,
-               .priority = DEF_PRIORITY,
-               .target_mem_cgroup = memcg,
-               .nodemask = NULL, /* we don't care the placement */
                .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+               .target_mem_cgroup = memcg,
+               .priority = DEF_PRIORITY,
+               .may_writepage = !laptop_mode,
+               .may_unmap = 1,
+               .may_swap = !noswap,
        };
 
        /*
@@ -3031,12 +3014,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
        unsigned long nr_soft_scanned;
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
+               .order = order,
                .priority = DEF_PRIORITY,
+               .may_writepage = !laptop_mode,
                .may_unmap = 1,
                .may_swap = 1,
-               .may_writepage = !laptop_mode,
-               .order = order,
-               .target_mem_cgroup = NULL,
        };
        count_vm_event(PAGEOUTRUN);
 
@@ -3417,14 +3399,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 {
        struct reclaim_state reclaim_state;
        struct scan_control sc = {
+               .nr_to_reclaim = nr_to_reclaim,
                .gfp_mask = GFP_HIGHUSER_MOVABLE,
-               .may_swap = 1,
-               .may_unmap = 1,
+               .priority = DEF_PRIORITY,
                .may_writepage = 1,
-               .nr_to_reclaim = nr_to_reclaim,
+               .may_unmap = 1,
+               .may_swap = 1,
                .hibernation_mode = 1,
-               .order = 0,
-               .priority = DEF_PRIORITY,
        };
        struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
        struct task_struct *p = current;
@@ -3604,13 +3585,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
        struct task_struct *p = current;
        struct reclaim_state reclaim_state;
        struct scan_control sc = {
-               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
-               .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
-               .may_swap = 1,
                .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
                .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
                .order = order,
                .priority = ZONE_RECLAIM_PRIORITY,
+               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+               .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+               .may_swap = 1,
        };
        struct shrink_control shrink = {
                .gfp_mask = sc.gfp_mask,