mm: update the description for vm_total_pages

[pandora-kernel.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 0f16ffe..5fec1ba 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -59,35 +59,20 @@
  #include <trace/events/vmscan.h>
  
  struct scan_control {
-       /* Incremented by the number of inactive pages that were scanned */
-       unsigned long nr_scanned;
-
-       /* Number of pages freed so far during a call to shrink_zones() */
-       unsigned long nr_reclaimed;
-
         /* How many pages shrink_list() should reclaim */
         unsigned long nr_to_reclaim;
  
-       unsigned long hibernation_mode;
-
         /* This context's GFP mask */
         gfp_t gfp_mask;
  
-       int may_writepage;
-
-       /* Can mapped pages be reclaimed? */
-       int may_unmap;
-
-       /* Can pages be swapped as part of reclaim? */
-       int may_swap;
-
+       /* Allocation order */
         int order;
  
-       /* Scan (total_size >> priority) pages at once */
-       int priority;
-
-       /* anon vs. file LRUs scanning "ratio" */
-       int swappiness;
+       /*
+        * Nodemask of nodes allowed by the caller. If NULL, all nodes
+        * are scanned.
+        */
+       nodemask_t      *nodemask;
  
         /*
          * The memory cgroup that hit its limit and as a result is the
@@ -95,11 +80,27 @@ struct scan_control {
          */
         struct mem_cgroup *target_mem_cgroup;
  
-       /*
-        * Nodemask of nodes allowed by the caller. If NULL, all nodes
-        * are scanned.
-        */
-       nodemask_t      *nodemask;
+       /* Scan (total_size >> priority) pages at once */
+       int priority;
+
+       unsigned int may_writepage:1;
+
+       /* Can mapped pages be reclaimed? */
+       unsigned int may_unmap:1;
+
+       /* Can pages be swapped as part of reclaim? */
+       unsigned int may_swap:1;
+
+       unsigned int hibernation_mode:1;
+
+       /* One of the zones is ready for compaction */
+       unsigned int compaction_ready:1;
+
+       /* Incremented by the number of inactive pages that were scanned */
+       unsigned long nr_scanned;
+
+       /* Number of pages freed so far during a call to shrink_zones() */
+       unsigned long nr_reclaimed;
  };
  
  #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -136,7 +137,11 @@ struct scan_control {
   * From 0 .. 100.  Higher means more swappy.
   */
  int vm_swappiness = 60;
-unsigned long vm_total_pages;  /* The total number of pages which the VM controls */
+/*
+ * The total number of pages which are beyond the high watermark within all
+ * zones.
+ */
+unsigned long vm_total_pages;
  
  static LIST_HEAD(shrinker_list);
  static DECLARE_RWSEM(shrinker_rwsem);
@@ -1865,8 +1870,8 @@ enum scan_balance {
   * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan
   * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
   */
-static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
-                          unsigned long *nr)
+static void get_scan_count(struct lruvec *lruvec, int swappiness,
+                          struct scan_control *sc, unsigned long *nr)
  {
         struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
         u64 fraction[2];
@@ -1909,7 +1914,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
          * using the memory controller's swap limit feature would be
          * too expensive.
          */
-       if (!global_reclaim(sc) && !sc->swappiness) {
+       if (!global_reclaim(sc) && !swappiness) {
                 scan_balance = SCAN_FILE;
                 goto out;
         }
@@ -1919,7 +1924,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
          * system is close to OOM, scan both anon and file equally
          * (unless the swappiness setting disagrees with swapping).
          */
-       if (!sc->priority && sc->swappiness) {
+       if (!sc->priority && swappiness) {
                 scan_balance = SCAN_EQUAL;
                 goto out;
         }
@@ -1962,7 +1967,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
          * With swappiness at 100, anonymous and file have the same priority.
          * This scanning priority is essentially the inverse of IO cost.
          */
-       anon_prio = sc->swappiness;
+       anon_prio = swappiness;
         file_prio = 200 - anon_prio;
  
         /*
@@ -2052,7 +2057,8 @@ out:
  /*
   * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
   */
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
+                         struct scan_control *sc)
  {
         unsigned long nr[NR_LRU_LISTS];
         unsigned long targets[NR_LRU_LISTS];
@@ -2063,7 +2069,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
         struct blk_plug plug;
         bool scan_adjusted;
  
-       get_scan_count(lruvec, sc, nr);
+       get_scan_count(lruvec, swappiness, sc, nr);
  
         /* Record the original scan target for proportional adjustments later */
         memcpy(targets, nr, sizeof(nr));
@@ -2241,9 +2247,10 @@ static inline bool should_continue_reclaim(struct zone *zone,
         }
  }
  
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc)
  {
         unsigned long nr_reclaimed, nr_scanned;
+       bool reclaimable = false;
  
         do {
                 struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2259,11 +2266,12 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                 memcg = mem_cgroup_iter(root, NULL, &reclaim);
                 do {
                         struct lruvec *lruvec;
+                       int swappiness;
  
                         lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+                       swappiness = mem_cgroup_swappiness(memcg);
  
-                       sc->swappiness = mem_cgroup_swappiness(memcg);
-                       shrink_lruvec(lruvec, sc);
+                       shrink_lruvec(lruvec, swappiness, sc);
  
                         /*
                          * Direct reclaim and kswapd have to scan all memory
@@ -2287,20 +2295,21 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                            sc->nr_scanned - nr_scanned,
                            sc->nr_reclaimed - nr_reclaimed);
  
+               if (sc->nr_reclaimed - nr_reclaimed)
+                       reclaimable = true;
+
         } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
                                          sc->nr_scanned - nr_scanned, sc));
+
+       return reclaimable;
  }
  
  /* Returns true if compaction should go ahead for a high-order request */
-static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+static inline bool compaction_ready(struct zone *zone, int order)
  {
         unsigned long balance_gap, watermark;
         bool watermark_ok;
  
-       /* Do not consider compaction for orders reclaim is meant to satisfy */
-       if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
-               return false;
-
         /*
          * Compaction takes time to run and there are potentially other
          * callers using the pages just freed. Continue reclaiming until
@@ -2309,18 +2318,18 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
          */
         balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
                         zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
-       watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+       watermark = high_wmark_pages(zone) + balance_gap + (2UL << order);
         watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
  
         /*
          * If compaction is deferred, reclaim up to a point where
          * compaction will have a chance of success when re-enabled
          */
-       if (compaction_deferred(zone, sc->order))
+       if (compaction_deferred(zone, order))
                 return watermark_ok;
  
         /* If compaction is not ready to start, keep reclaiming */
-       if (!compaction_suitable(zone, sc->order))
+       if (!compaction_suitable(zone, order))
                 return false;
  
         return watermark_ok;
@@ -2342,10 +2351,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
   * If a zone is deemed to be full of pinned pages then just give it a light
   * scan then give up on it.
   *
- * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is ready to begin. This indicates to
- * the caller that it should consider retrying the allocation instead of
- * further reclaim.
+ * Returns true if a zone was reclaimable.
   */
  static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
  {
@@ -2354,13 +2360,13 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
         unsigned long nr_soft_reclaimed;
         unsigned long nr_soft_scanned;
         unsigned long lru_pages = 0;
-       bool aborted_reclaim = false;
         struct reclaim_state *reclaim_state = current->reclaim_state;
         gfp_t orig_mask;
         struct shrink_control shrink = {
                 .gfp_mask = sc->gfp_mask,
         };
         enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
+       bool reclaimable = false;
  
         /*
          * If the number of buffer_heads in the machine exceeds the maximum
@@ -2391,22 +2397,24 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                         if (sc->priority != DEF_PRIORITY &&
                             !zone_reclaimable(zone))
                                 continue;       /* Let kswapd poll it */
-                       if (IS_ENABLED(CONFIG_COMPACTION)) {
-                               /*
-                                * If we already have plenty of memory free for
-                                * compaction in this zone, don't free any more.
-                                * Even though compaction is invoked for any
-                                * non-zero order, only frequent costly order
-                                * reclamation is disruptive enough to become a
-                                * noticeable problem, like transparent huge
-                                * page allocations.
-                                */
-                               if ((zonelist_zone_idx(z) <= requested_highidx)
-                                   && compaction_ready(zone, sc)) {
-                                       aborted_reclaim = true;
-                                       continue;
-                               }
+
+                       /*
+                        * If we already have plenty of memory free for
+                        * compaction in this zone, don't free any more.
+                        * Even though compaction is invoked for any
+                        * non-zero order, only frequent costly order
+                        * reclamation is disruptive enough to become a
+                        * noticeable problem, like transparent huge
+                        * page allocations.
+                        */
+                       if (IS_ENABLED(CONFIG_COMPACTION) &&
+                           sc->order > PAGE_ALLOC_COSTLY_ORDER &&
+                           zonelist_zone_idx(z) <= requested_highidx &&
+                           compaction_ready(zone, sc->order)) {
+                               sc->compaction_ready = true;
+                               continue;
                         }
+
                         /*
                          * This steals pages from memory cgroups over softlimit
                          * and returns the number of reclaimed pages and
@@ -2419,10 +2427,17 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                                 &nr_soft_scanned);
                         sc->nr_reclaimed += nr_soft_reclaimed;
                         sc->nr_scanned += nr_soft_scanned;
+                       if (nr_soft_reclaimed)
+                               reclaimable = true;
                         /* need some check for avoid more shrink_zone() */
                 }
  
-               shrink_zone(zone, sc);
+               if (shrink_zone(zone, sc))
+                       reclaimable = true;
+
+               if (global_reclaim(sc) &&
+                   !reclaimable && zone_reclaimable(zone))
+                       reclaimable = true;
         }
  
         /*
@@ -2445,27 +2460,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
          */
         sc->gfp_mask = orig_mask;
  
-       return aborted_reclaim;
-}
-
-/* All zones in zonelist are unreclaimable? */
-static bool all_unreclaimable(struct zonelist *zonelist,
-               struct scan_control *sc)
-{
-       struct zoneref *z;
-       struct zone *zone;
-
-       for_each_zone_zonelist_nodemask(zone, z, zonelist,
-                       gfp_zone(sc->gfp_mask), sc->nodemask) {
-               if (!populated_zone(zone))
-                       continue;
-               if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-                       continue;
-               if (zone_reclaimable(zone))
-                       return false;
-       }
-
-       return true;
+       return reclaimable;
  }
  
  /*
@@ -2489,7 +2484,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
  {
         unsigned long total_scanned = 0;
         unsigned long writeback_threshold;
-       bool aborted_reclaim;
+       bool zones_reclaimable;
  
         delayacct_freepages_start();
  
@@ -2500,11 +2495,14 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                 vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
                                 sc->priority);
                 sc->nr_scanned = 0;
-               aborted_reclaim = shrink_zones(zonelist, sc);
+               zones_reclaimable = shrink_zones(zonelist, sc);
  
                 total_scanned += sc->nr_scanned;
                 if (sc->nr_reclaimed >= sc->nr_to_reclaim)
-                       goto out;
+                       break;
+
+               if (sc->compaction_ready)
+                       break;
  
                 /*
                  * If we're getting trouble reclaiming, start doing
@@ -2526,28 +2524,19 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                                 WB_REASON_TRY_TO_FREE_PAGES);
                         sc->may_writepage = 1;
                 }
-       } while (--sc->priority >= 0 && !aborted_reclaim);
+       } while (--sc->priority >= 0);
  
-out:
         delayacct_freepages_end();
  
         if (sc->nr_reclaimed)
                 return sc->nr_reclaimed;
  
-       /*
-        * As hibernation is going on, kswapd is freezed so that it can't mark
-        * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
-        * check.
-        */
-       if (oom_killer_disabled)
-               return 0;
-
         /* Aborted reclaim to try compaction? don't OOM, then */
-       if (aborted_reclaim)
+       if (sc->compaction_ready)
                 return 1;
  
-       /* top priority shrink_zones still had more to do? don't OOM, then */
-       if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
+       /* Any of the zones still reclaimable?  Don't OOM. */
+       if (zones_reclaimable)
                 return 1;
  
         return 0;
@@ -2684,15 +2673,14 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
  {
         unsigned long nr_reclaimed;
         struct scan_control sc = {
+               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
+               .order = order,
+               .nodemask = nodemask,
+               .priority = DEF_PRIORITY,
                 .may_writepage = !laptop_mode,
-               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                 .may_unmap = 1,
                 .may_swap = 1,
-               .order = order,
-               .priority = DEF_PRIORITY,
-               .target_mem_cgroup = NULL,
-               .nodemask = nodemask,
         };
  
         /*
@@ -2722,17 +2710,14 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
                                                 unsigned long *nr_scanned)
  {
         struct scan_control sc = {
-               .nr_scanned = 0,
                 .nr_to_reclaim = SWAP_CLUSTER_MAX,
+               .target_mem_cgroup = memcg,
                 .may_writepage = !laptop_mode,
                 .may_unmap = 1,
                 .may_swap = !noswap,
-               .order = 0,
-               .priority = 0,
-               .swappiness = mem_cgroup_swappiness(memcg),
-               .target_mem_cgroup = memcg,
         };
         struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+       int swappiness = mem_cgroup_swappiness(memcg);
  
         sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2748,7 +2733,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
          * will pick up pages from other mem cgroup's as well. We hack
          * the priority and make it zero.
          */
-       shrink_lruvec(lruvec, &sc);
+       shrink_lruvec(lruvec, swappiness, &sc);
  
         trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
  
@@ -2764,16 +2749,14 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
         unsigned long nr_reclaimed;
         int nid;
         struct scan_control sc = {
-               .may_writepage = !laptop_mode,
-               .may_unmap = 1,
-               .may_swap = !noswap,
                 .nr_to_reclaim = SWAP_CLUSTER_MAX,
-               .order = 0,
-               .priority = DEF_PRIORITY,
-               .target_mem_cgroup = memcg,
-               .nodemask = NULL, /* we don't care the placement */
                 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                                 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
+               .target_mem_cgroup = memcg,
+               .priority = DEF_PRIORITY,
+               .may_writepage = !laptop_mode,
+               .may_unmap = 1,
+               .may_swap = !noswap,
         };
  
         /*
@@ -3031,12 +3014,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
         unsigned long nr_soft_scanned;
         struct scan_control sc = {
                 .gfp_mask = GFP_KERNEL,
+               .order = order,
                 .priority = DEF_PRIORITY,
+               .may_writepage = !laptop_mode,
                 .may_unmap = 1,
                 .may_swap = 1,
-               .may_writepage = !laptop_mode,
-               .order = order,
-               .target_mem_cgroup = NULL,
         };
         count_vm_event(PAGEOUTRUN);
  
@@ -3417,14 +3399,13 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
  {
         struct reclaim_state reclaim_state;
         struct scan_control sc = {
+               .nr_to_reclaim = nr_to_reclaim,
                 .gfp_mask = GFP_HIGHUSER_MOVABLE,
-               .may_swap = 1,
-               .may_unmap = 1,
+               .priority = DEF_PRIORITY,
                 .may_writepage = 1,
-               .nr_to_reclaim = nr_to_reclaim,
+               .may_unmap = 1,
+               .may_swap = 1,
                 .hibernation_mode = 1,
-               .order = 0,
-               .priority = DEF_PRIORITY,
         };
         struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
         struct task_struct *p = current;
@@ -3604,13 +3585,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
         struct task_struct *p = current;
         struct reclaim_state reclaim_state;
         struct scan_control sc = {
-               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
-               .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
-               .may_swap = 1,
                 .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
                 .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
                 .order = order,
                 .priority = ZONE_RECLAIM_PRIORITY,
+               .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+               .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+               .may_swap = 1,
         };
         struct shrink_control shrink = {
                 .gfp_mask = sc.gfp_mask,