memcg: move charges to root cgroup if use_hierarchy=0

[pandora-kernel.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 33dc256..4c5453f 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -53,24 +53,6 @@
  #define CREATE_TRACE_POINTS
  #include <trace/events/vmscan.h>
  
-/*
- * reclaim_mode determines how the inactive list is shrunk
- * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages
- * RECLAIM_MODE_ASYNC:  Do not block
- * RECLAIM_MODE_SYNC:   Allow blocking e.g. call wait_on_page_writeback
- * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference
- *                     page from the LRU and reclaim all pages within a
- *                     naturally aligned range
- * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of
- *                     order-0 pages and then compact the zone
- */
-typedef unsigned __bitwise__ reclaim_mode_t;
-#define RECLAIM_MODE_SINGLE            ((__force reclaim_mode_t)0x01u)
-#define RECLAIM_MODE_ASYNC             ((__force reclaim_mode_t)0x02u)
-#define RECLAIM_MODE_SYNC              ((__force reclaim_mode_t)0x04u)
-#define RECLAIM_MODE_LUMPYRECLAIM      ((__force reclaim_mode_t)0x08u)
-#define RECLAIM_MODE_COMPACTION                ((__force reclaim_mode_t)0x10u)
-
  struct scan_control {
         /* Incremented by the number of inactive pages that were scanned */
         unsigned long nr_scanned;
@@ -96,11 +78,8 @@ struct scan_control {
  
         int order;
  
-       /*
-        * Intend to reclaim enough continuous memory rather than reclaim
-        * enough amount of memory. i.e, mode for high order allocation.
-        */
-       reclaim_mode_t reclaim_mode;
+       /* Scan (total_size >> priority) pages at once */
+       int priority;
  
         /*
          * The memory cgroup that hit its limit and as a result is the
@@ -115,11 +94,6 @@ struct scan_control {
         nodemask_t      *nodemask;
  };
  
-struct mem_cgroup_zone {
-       struct mem_cgroup *mem_cgroup;
-       struct zone *zone;
-};
-
  #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
  
  #ifdef ARCH_HAS_PREFETCH
@@ -164,44 +138,21 @@ static bool global_reclaim(struct scan_control *sc)
  {
         return !sc->target_mem_cgroup;
  }
-
-static bool scanning_global_lru(struct mem_cgroup_zone *mz)
-{
-       return !mz->mem_cgroup;
-}
  #else
  static bool global_reclaim(struct scan_control *sc)
  {
         return true;
  }
-
-static bool scanning_global_lru(struct mem_cgroup_zone *mz)
-{
-       return true;
-}
  #endif
  
-static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
-{
-       if (!scanning_global_lru(mz))
-               return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
-
-       return &mz->zone->reclaim_stat;
-}
-
-static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
-                                      enum lru_list lru)
+static unsigned long get_lruvec_size(struct lruvec *lruvec, enum lru_list lru)
  {
-       if (!scanning_global_lru(mz))
-               return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
-                                                   zone_to_nid(mz->zone),
-                                                   zone_idx(mz->zone),
-                                                   BIT(lru));
+       if (!mem_cgroup_disabled())
+               return mem_cgroup_get_lruvec_size(lruvec, lru);
  
-       return zone_page_state(mz->zone, NR_LRU_BASE + lru);
+       return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru);
  }
  
-
  /*
   * Add a shrinker callback to be called from the vm
   */
@@ -364,39 +315,6 @@ out:
         return ret;
  }
  
-static void set_reclaim_mode(int priority, struct scan_control *sc,
-                                  bool sync)
-{
-       reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC;
-
-       /*
-        * Initially assume we are entering either lumpy reclaim or
-        * reclaim/compaction.Depending on the order, we will either set the
-        * sync mode or just reclaim order-0 pages later.
-        */
-       if (COMPACTION_BUILD)
-               sc->reclaim_mode = RECLAIM_MODE_COMPACTION;
-       else
-               sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM;
-
-       /*
-        * Avoid using lumpy reclaim or reclaim/compaction if possible by
-        * restricting when its set to either costly allocations or when
-        * under memory pressure
-        */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               sc->reclaim_mode |= syncmode;
-       else if (sc->order && priority < DEF_PRIORITY - 2)
-               sc->reclaim_mode |= syncmode;
-       else
-               sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
-}
-
-static void reset_reclaim_mode(struct scan_control *sc)
-{
-       sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
-}
-
  static inline int is_page_cache_freeable(struct page *page)
  {
         /*
@@ -416,10 +334,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
                 return 1;
         if (bdi == current->backing_dev_info)
                 return 1;
-
-       /* lumpy reclaim for hugepage often need a lot of write */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               return 1;
         return 0;
  }
  
@@ -523,8 +437,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
                         /* synchronous write or broken a_ops? */
                         ClearPageReclaim(page);
                 }
-               trace_mm_vmscan_writepage(page,
-                       trace_reclaim_flags(page, sc->reclaim_mode));
+               trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
                 inc_zone_page_state(page, NR_VMSCAN_WRITE);
                 return PAGE_SUCCESS;
         }
@@ -701,19 +614,15 @@ enum page_references {
  };
  
  static enum page_references page_check_references(struct page *page,
-                                                 struct mem_cgroup_zone *mz,
                                                   struct scan_control *sc)
  {
         int referenced_ptes, referenced_page;
         unsigned long vm_flags;
  
-       referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
+       referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
+                                         &vm_flags);
         referenced_page = TestClearPageReferenced(page);
  
-       /* Lumpy reclaim - ignore references */
-       if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
-               return PAGEREF_RECLAIM;
-
         /*
          * Mlock lost the isolation race with us.  Let try_to_unmap()
          * move the page to the unevictable list.
@@ -722,7 +631,7 @@ static enum page_references page_check_references(struct page *page,
                 return PAGEREF_RECLAIM;
  
         if (referenced_ptes) {
-               if (PageAnon(page))
+               if (PageSwapBacked(page))
                         return PAGEREF_ACTIVATE;
                 /*
                  * All mapped pages start out with page table
@@ -763,9 +672,8 @@ static enum page_references page_check_references(struct page *page,
   * shrink_page_list() returns the number of reclaimed pages
   */
  static unsigned long shrink_page_list(struct list_head *page_list,
-                                     struct mem_cgroup_zone *mz,
+                                     struct zone *zone,
                                       struct scan_control *sc,
-                                     int priority,
                                       unsigned long *ret_nr_dirty,
                                       unsigned long *ret_nr_writeback)
  {
@@ -794,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                         goto keep;
  
                 VM_BUG_ON(PageActive(page));
-               VM_BUG_ON(page_zone(page) != mz->zone);
+               VM_BUG_ON(page_zone(page) != zone);
  
                 sc->nr_scanned++;
  
@@ -813,22 +721,11 @@ static unsigned long shrink_page_list(struct list_head *page_list,
  
                 if (PageWriteback(page)) {
                         nr_writeback++;
-                       /*
-                        * Synchronous reclaim cannot queue pages for
-                        * writeback due to the possibility of stack overflow
-                        * but if it encounters a page under writeback, wait
-                        * for the IO to complete.
-                        */
-                       if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) &&
-                           may_enter_fs)
-                               wait_on_page_writeback(page);
-                       else {
-                               unlock_page(page);
-                               goto keep_lumpy;
-                       }
+                       unlock_page(page);
+                       goto keep;
                 }
  
-               references = page_check_references(page, mz, sc);
+               references = page_check_references(page, sc);
                 switch (references) {
                 case PAGEREF_ACTIVATE:
                         goto activate_locked;
@@ -879,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                          * unless under significant pressure.
                          */
                         if (page_is_file_cache(page) &&
-                                       (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) {
+                                       (!current_is_kswapd() ||
+                                        sc->priority >= DEF_PRIORITY - 2)) {
                                 /*
                                  * Immediately reclaim when written back.
                                  * Similar in principal to deactivate_page()
@@ -908,7 +806,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                 goto activate_locked;
                         case PAGE_SUCCESS:
                                 if (PageWriteback(page))
-                                       goto keep_lumpy;
+                                       goto keep;
                                 if (PageDirty(page))
                                         goto keep;
  
@@ -994,7 +892,6 @@ cull_mlocked:
                         try_to_free_swap(page);
                 unlock_page(page);
                 putback_lru_page(page);
-               reset_reclaim_mode(sc);
                 continue;
  
  activate_locked:
@@ -1007,8 +904,6 @@ activate_locked:
  keep_locked:
                 unlock_page(page);
  keep:
-               reset_reclaim_mode(sc);
-keep_lumpy:
                 list_add(&page->lru, &ret_pages);
                 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
         }
@@ -1020,7 +915,7 @@ keep_lumpy:
          * will encounter the same problem
          */
         if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
-               zone_set_flag(mz->zone, ZONE_CONGESTED);
+               zone_set_flag(zone, ZONE_CONGESTED);
  
         free_hot_cold_page_list(&free_pages, 1);
  
@@ -1041,34 +936,15 @@ keep_lumpy:
   *
   * returns 0 on success, -ve errno on failure.
   */
-int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
+int __isolate_lru_page(struct page *page, isolate_mode_t mode)
  {
-       bool all_lru_mode;
         int ret = -EINVAL;
  
         /* Only take pages on the LRU. */
         if (!PageLRU(page))
                 return ret;
  
-       all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) ==
-               (ISOLATE_ACTIVE|ISOLATE_INACTIVE);
-
-       /*
-        * When checking the active state, we need to be sure we are
-        * dealing with comparible boolean values.  Take the logical not
-        * of each.
-        */
-       if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE))
-               return ret;
-
-       if (!all_lru_mode && !!page_is_file_cache(page) != file)
-               return ret;
-
-       /*
-        * When this function is being called for lumpy reclaim, we
-        * initially look into all LRU pages, active, inactive and
-        * unevictable; only give shrink_page_list evictable pages.
-        */
+       /* Do not give back unevictable pages for compaction */
         if (PageUnevictable(page))
                 return ret;
  
@@ -1135,52 +1011,38 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
   * Appropriate locks must be held before calling this function.
   *
   * @nr_to_scan:        The number of pages to look through on the list.
- * @mz:                The mem_cgroup_zone to pull pages from.
+ * @lruvec:    The LRU vector to pull pages from.
   * @dst:       The temp list to put pages on to.
   * @nr_scanned:        The number of pages that were scanned.
   * @sc:                The scan_control struct for this reclaim session
   * @mode:      One of the LRU isolation modes
- * @active:    True [1] if isolating active pages
- * @file:      True [1] if isolating file [!anon] pages
+ * @lru:       LRU list id for isolating
   *
   * returns how many pages were moved onto *@dst.
   */
  static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
-               struct mem_cgroup_zone *mz, struct list_head *dst,
+               struct lruvec *lruvec, struct list_head *dst,
                 unsigned long *nr_scanned, struct scan_control *sc,
-               isolate_mode_t mode, int active, int file)
+               isolate_mode_t mode, enum lru_list lru)
  {
-       struct lruvec *lruvec;
         struct list_head *src;
         unsigned long nr_taken = 0;
-       unsigned long nr_lumpy_taken = 0;
-       unsigned long nr_lumpy_dirty = 0;
-       unsigned long nr_lumpy_failed = 0;
         unsigned long scan;
-       int lru = LRU_BASE;
+       int file = is_file_lru(lru);
  
-       lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
-       if (active)
-               lru += LRU_ACTIVE;
-       if (file)
-               lru += LRU_FILE;
         src = &lruvec->lists[lru];
  
         for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
                 struct page *page;
-               unsigned long pfn;
-               unsigned long end_pfn;
-               unsigned long page_pfn;
-               int zone_id;
  
                 page = lru_to_page(src);
                 prefetchw_prev_lru_page(page, src, flags);
  
                 VM_BUG_ON(!PageLRU(page));
  
-               switch (__isolate_lru_page(page, mode, file)) {
+               switch (__isolate_lru_page(page, mode)) {
                 case 0:
-                       mem_cgroup_lru_del(page);
+                       mem_cgroup_lru_del_list(page, lru);
                         list_move(&page->lru, dst);
                         nr_taken += hpage_nr_pages(page);
                         break;
@@ -1193,84 +1055,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                 default:
                         BUG();
                 }
-
-               if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM))
-                       continue;
-
-               /*
-                * Attempt to take all pages in the order aligned region
-                * surrounding the tag page.  Only take those pages of
-                * the same active state as that tag page.  We may safely
-                * round the target page pfn down to the requested order
-                * as the mem_map is guaranteed valid out to MAX_ORDER,
-                * where that page is in a different zone we will detect
-                * it from its zone id and abort this block scan.
-                */
-               zone_id = page_zone_id(page);
-               page_pfn = page_to_pfn(page);
-               pfn = page_pfn & ~((1 << sc->order) - 1);
-               end_pfn = pfn + (1 << sc->order);
-               for (; pfn < end_pfn; pfn++) {
-                       struct page *cursor_page;
-
-                       /* The target page is in the block, ignore it. */
-                       if (unlikely(pfn == page_pfn))
-                               continue;
-
-                       /* Avoid holes within the zone. */
-                       if (unlikely(!pfn_valid_within(pfn)))
-                               break;
-
-                       cursor_page = pfn_to_page(pfn);
-
-                       /* Check that we have not crossed a zone boundary. */
-                       if (unlikely(page_zone_id(cursor_page) != zone_id))
-                               break;
-
-                       /*
-                        * If we don't have enough swap space, reclaiming of
-                        * anon page which don't already have a swap slot is
-                        * pointless.
-                        */
-                       if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
-                           !PageSwapCache(cursor_page))
-                               break;
-
-                       if (__isolate_lru_page(cursor_page, mode, file) == 0) {
-                               unsigned int isolated_pages;
-
-                               mem_cgroup_lru_del(cursor_page);
-                               list_move(&cursor_page->lru, dst);
-                               isolated_pages = hpage_nr_pages(cursor_page);
-                               nr_taken += isolated_pages;
-                               nr_lumpy_taken += isolated_pages;
-                               if (PageDirty(cursor_page))
-                                       nr_lumpy_dirty += isolated_pages;
-                               scan++;
-                               pfn += isolated_pages - 1;
-                       } else {
-                               /*
-                                * Check if the page is freed already.
-                                *
-                                * We can't use page_count() as that
-                                * requires compound_head and we don't
-                                * have a pin on the page here. If a
-                                * page is tail, we may or may not
-                                * have isolated the head, so assume
-                                * it's not free, it'd be tricky to
-                                * track the head status without a
-                                * page pin.
-                                */
-                               if (!PageTail(cursor_page) &&
-                                   !atomic_read(&cursor_page->_count))
-                                       continue;
-                               break;
-                       }
-               }
-
-               /* If we break out of the loop above, lumpy reclaim failed */
-               if (pfn < end_pfn)
-                       nr_lumpy_failed++;
         }
  
         *nr_scanned = scan;
@@ -1278,7 +1062,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
         trace_mm_vmscan_lru_isolate(sc->order,
                         nr_to_scan, scan,
                         nr_taken,
-                       nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
                         mode, file);
         return nr_taken;
  }
@@ -1357,11 +1140,11 @@ static int too_many_isolated(struct zone *zone, int file,
  }
  
  static noinline_for_stack void
-putback_inactive_pages(struct mem_cgroup_zone *mz,
+putback_inactive_pages(struct lruvec *lruvec,
                        struct list_head *page_list)
  {
-       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
-       struct zone *zone = mz->zone;
+       struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
+       struct zone *zone = lruvec_zone(lruvec);
         LIST_HEAD(pages_to_free);
  
         /*
@@ -1407,112 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
         list_splice(&pages_to_free, page_list);
  }
  
-static noinline_for_stack void
-update_isolated_counts(struct mem_cgroup_zone *mz,
-                      struct list_head *page_list,
-                      unsigned long *nr_anon,
-                      unsigned long *nr_file)
-{
-       struct zone *zone = mz->zone;
-       unsigned int count[NR_LRU_LISTS] = { 0, };
-       unsigned long nr_active = 0;
-       struct page *page;
-       int lru;
-
-       /*
-        * Count pages and clear active flags
-        */
-       list_for_each_entry(page, page_list, lru) {
-               int numpages = hpage_nr_pages(page);
-               lru = page_lru_base_type(page);
-               if (PageActive(page)) {
-                       lru += LRU_ACTIVE;
-                       ClearPageActive(page);
-                       nr_active += numpages;
-               }
-               count[lru] += numpages;
-       }
-
-       preempt_disable();
-       __count_vm_events(PGDEACTIVATE, nr_active);
-
-       __mod_zone_page_state(zone, NR_ACTIVE_FILE,
-                             -count[LRU_ACTIVE_FILE]);
-       __mod_zone_page_state(zone, NR_INACTIVE_FILE,
-                             -count[LRU_INACTIVE_FILE]);
-       __mod_zone_page_state(zone, NR_ACTIVE_ANON,
-                             -count[LRU_ACTIVE_ANON]);
-       __mod_zone_page_state(zone, NR_INACTIVE_ANON,
-                             -count[LRU_INACTIVE_ANON]);
-
-       *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
-       *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
-
-       __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
-       __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
-       preempt_enable();
-}
-
-/*
- * Returns true if a direct reclaim should wait on pages under writeback.
- *
- * If we are direct reclaiming for contiguous pages and we do not reclaim
- * everything in the list, try again and wait for writeback IO to complete.
- * This will stall high-order allocations noticeably. Only do that when really
- * need to free the pages under high memory pressure.
- */
-static inline bool should_reclaim_stall(unsigned long nr_taken,
-                                       unsigned long nr_freed,
-                                       int priority,
-                                       struct scan_control *sc)
-{
-       int lumpy_stall_priority;
-
-       /* kswapd should not stall on sync IO */
-       if (current_is_kswapd())
-               return false;
-
-       /* Only stall on lumpy reclaim */
-       if (sc->reclaim_mode & RECLAIM_MODE_SINGLE)
-               return false;
-
-       /* If we have reclaimed everything on the isolated list, no stall */
-       if (nr_freed == nr_taken)
-               return false;
-
-       /*
-        * For high-order allocations, there are two stall thresholds.
-        * High-cost allocations stall immediately where as lower
-        * order allocations such as stacks require the scanning
-        * priority to be much higher before stalling.
-        */
-       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-               lumpy_stall_priority = DEF_PRIORITY;
-       else
-               lumpy_stall_priority = DEF_PRIORITY / 3;
-
-       return priority <= lumpy_stall_priority;
-}
-
  /*
   * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
   * of reclaimed pages
   */
  static noinline_for_stack unsigned long
-shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
-                    struct scan_control *sc, int priority, int file)
+shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+                    struct scan_control *sc, enum lru_list lru)
  {
         LIST_HEAD(page_list);
         unsigned long nr_scanned;
         unsigned long nr_reclaimed = 0;
         unsigned long nr_taken;
-       unsigned long nr_anon;
-       unsigned long nr_file;
         unsigned long nr_dirty = 0;
         unsigned long nr_writeback = 0;
-       isolate_mode_t isolate_mode = ISOLATE_INACTIVE;
-       struct zone *zone = mz->zone;
-       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+       isolate_mode_t isolate_mode = 0;
+       int file = is_file_lru(lru);
+       struct zone *zone = lruvec_zone(lruvec);
+       struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
  
         while (unlikely(too_many_isolated(zone, file, sc))) {
                 congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1522,10 +1217,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
                         return SWAP_CLUSTER_MAX;
         }
  
-       set_reclaim_mode(priority, sc, false);
-       if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
-               isolate_mode |= ISOLATE_ACTIVE;
-
         lru_add_drain();
  
         if (!sc->may_unmap)
@@ -1535,8 +1226,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
  
         spin_lock_irq(&zone->lru_lock);
  
-       nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned,
-                                    sc, isolate_mode, 0, file);
+       nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
+                                    &nr_scanned, sc, isolate_mode, lru);
+
+       __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
+       __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
+
         if (global_reclaim(sc)) {
                 zone->pages_scanned += nr_scanned;
                 if (current_is_kswapd())
@@ -1551,22 +1246,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
         if (nr_taken == 0)
                 return 0;
  
-       update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
-
-       nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
+       nr_reclaimed = shrink_page_list(&page_list, zone, sc,
                                                 &nr_dirty, &nr_writeback);
  
-       /* Check if we should syncronously wait for writeback */
-       if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
-               set_reclaim_mode(priority, sc, true);
-               nr_reclaimed += shrink_page_list(&page_list, mz, sc,
-                                       priority, &nr_dirty, &nr_writeback);
-       }
-
         spin_lock_irq(&zone->lru_lock);
  
-       reclaim_stat->recent_scanned[0] += nr_anon;
-       reclaim_stat->recent_scanned[1] += nr_file;
+       reclaim_stat->recent_scanned[file] += nr_taken;
  
         if (global_reclaim(sc)) {
                 if (current_is_kswapd())
@@ -1577,10 +1262,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
                                                nr_reclaimed);
         }
  
-       putback_inactive_pages(mz, &page_list);
+       putback_inactive_pages(lruvec, &page_list);
  
-       __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
-       __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+       __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
  
         spin_unlock_irq(&zone->lru_lock);
  
@@ -1609,14 +1293,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
          * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any
          *                     isolated page is PageWriteback
          */
-       if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority)))
+       if (nr_writeback && nr_writeback >=
+                       (nr_taken >> (DEF_PRIORITY - sc->priority)))
                 wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
  
         trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
                 zone_idx(zone),
                 nr_scanned, nr_reclaimed,
-               priority,
-               trace_shrink_flags(file, sc->reclaim_mode));
+               sc->priority,
+               trace_shrink_flags(file));
         return nr_reclaimed;
  }
  
@@ -1677,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone,
  }
  
  static void shrink_active_list(unsigned long nr_to_scan,
-                              struct mem_cgroup_zone *mz,
+                              struct lruvec *lruvec,
                                struct scan_control *sc,
-                              int priority, int file)
+                              enum lru_list lru)
  {
         unsigned long nr_taken;
         unsigned long nr_scanned;
@@ -1688,15 +1373,14 @@ static void shrink_active_list(unsigned long nr_to_scan,
         LIST_HEAD(l_active);
         LIST_HEAD(l_inactive);
         struct page *page;
-       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+       struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
         unsigned long nr_rotated = 0;
-       isolate_mode_t isolate_mode = ISOLATE_ACTIVE;
-       struct zone *zone = mz->zone;
+       isolate_mode_t isolate_mode = 0;
+       int file = is_file_lru(lru);
+       struct zone *zone = lruvec_zone(lruvec);
  
         lru_add_drain();
  
-       reset_reclaim_mode(sc);
-
         if (!sc->may_unmap)
                 isolate_mode |= ISOLATE_UNMAPPED;
         if (!sc->may_writepage)
@@ -1704,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan,
  
         spin_lock_irq(&zone->lru_lock);
  
-       nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc,
-                                    isolate_mode, 1, file);
+       nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
+                                    &nr_scanned, sc, isolate_mode, lru);
         if (global_reclaim(sc))
                 zone->pages_scanned += nr_scanned;
  
         reclaim_stat->recent_scanned[file] += nr_taken;
  
         __count_zone_vm_events(PGREFILL, zone, nr_scanned);
-       if (file)
-               __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
-       else
-               __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
+       __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
         __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
         spin_unlock_irq(&zone->lru_lock);
  
@@ -1737,7 +1418,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
                         }
                 }
  
-               if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
+               if (page_referenced(page, 0, sc->target_mem_cgroup,
+                                   &vm_flags)) {
                         nr_rotated += hpage_nr_pages(page);
                         /*
                          * Identify referenced, file-backed active pages and
@@ -1770,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
          */
         reclaim_stat->recent_rotated[file] += nr_rotated;
  
-       move_active_pages_to_lru(zone, &l_active, &l_hold,
-                                               LRU_ACTIVE + file * LRU_FILE);
-       move_active_pages_to_lru(zone, &l_inactive, &l_hold,
-                                               LRU_BASE   + file * LRU_FILE);
+       move_active_pages_to_lru(zone, &l_active, &l_hold, lru);
+       move_active_pages_to_lru(zone, &l_inactive, &l_hold, lru - LRU_ACTIVE);
         __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
         spin_unlock_irq(&zone->lru_lock);
  
@@ -1796,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone)
  
  /**
   * inactive_anon_is_low - check if anonymous pages need to be deactivated
- * @zone: zone to check
- * @sc:   scan control of this context
+ * @lruvec: LRU vector to check
   *
   * Returns true if the zone does not have enough inactive anon pages,
   * meaning some active anon pages need to be deactivated.
   */
-static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
+static int inactive_anon_is_low(struct lruvec *lruvec)
  {
         /*
          * If we don't have swap space, anonymous page deactivation
@@ -1811,14 +1490,13 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
         if (!total_swap_pages)
                 return 0;
  
-       if (!scanning_global_lru(mz))
-               return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
-                                                      mz->zone);
+       if (!mem_cgroup_disabled())
+               return mem_cgroup_inactive_anon_is_low(lruvec);
  
-       return inactive_anon_is_low_global(mz->zone);
+       return inactive_anon_is_low_global(lruvec_zone(lruvec));
  }
  #else
-static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
+static inline int inactive_anon_is_low(struct lruvec *lruvec)
  {
         return 0;
  }
@@ -1836,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone)
  
  /**
   * inactive_file_is_low - check if file pages need to be deactivated
- * @mz: memory cgroup and zone to check
+ * @lruvec: LRU vector to check
   *
   * When the system is doing streaming IO, memory pressure here
   * ensures that active file pages get deactivated, until more
@@ -1848,44 +1526,41 @@ static int inactive_file_is_low_global(struct zone *zone)
   * This uses a different ratio than the anonymous pages, because
   * the page cache uses a use-once replacement algorithm.
   */
-static int inactive_file_is_low(struct mem_cgroup_zone *mz)
+static int inactive_file_is_low(struct lruvec *lruvec)
  {
-       if (!scanning_global_lru(mz))
-               return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
-                                                      mz->zone);
+       if (!mem_cgroup_disabled())
+               return mem_cgroup_inactive_file_is_low(lruvec);
  
-       return inactive_file_is_low_global(mz->zone);
+       return inactive_file_is_low_global(lruvec_zone(lruvec));
  }
  
-static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
+static int inactive_list_is_low(struct lruvec *lruvec, int file)
  {
         if (file)
-               return inactive_file_is_low(mz);
+               return inactive_file_is_low(lruvec);
         else
-               return inactive_anon_is_low(mz);
+               return inactive_anon_is_low(lruvec);
  }
  
  static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
-                                struct mem_cgroup_zone *mz,
-                                struct scan_control *sc, int priority)
+                                struct lruvec *lruvec, struct scan_control *sc)
  {
         int file = is_file_lru(lru);
  
         if (is_active_lru(lru)) {
-               if (inactive_list_is_low(mz, file))
-                       shrink_active_list(nr_to_scan, mz, sc, priority, file);
+               if (inactive_list_is_low(lruvec, file))
+                       shrink_active_list(nr_to_scan, lruvec, sc, lru);
                 return 0;
         }
  
-       return shrink_inactive_list(nr_to_scan, mz, sc, priority, file);
+       return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
  }
  
-static int vmscan_swappiness(struct mem_cgroup_zone *mz,
-                            struct scan_control *sc)
+static int vmscan_swappiness(struct scan_control *sc)
  {
         if (global_reclaim(sc))
                 return vm_swappiness;
-       return mem_cgroup_swappiness(mz->mem_cgroup);
+       return mem_cgroup_swappiness(sc->target_mem_cgroup);
  }
  
  /*
@@ -1896,17 +1571,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz,
   *
   * nr[0] = anon pages to scan; nr[1] = file pages to scan
   */
-static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
-                          unsigned long *nr, int priority)
+static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
+                          unsigned long *nr)
  {
         unsigned long anon, file, free;
         unsigned long anon_prio, file_prio;
         unsigned long ap, fp;
-       struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+       struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
         u64 fraction[2], denominator;
         enum lru_list lru;
         int noswap = 0;
         bool force_scan = false;
+       struct zone *zone = lruvec_zone(lruvec);
  
         /*
          * If the zone or memcg is small, nr[l] can be 0.  This
@@ -1918,7 +1594,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
          * latencies, so it's better to scan a minimum amount there as
          * well.
          */
-       if (current_is_kswapd() && mz->zone->all_unreclaimable)
+       if (current_is_kswapd() && zone->all_unreclaimable)
                 force_scan = true;
         if (!global_reclaim(sc))
                 force_scan = true;
@@ -1932,16 +1608,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
                 goto out;
         }
  
-       anon  = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
-               zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
-       file  = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
-               zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+       anon  = get_lruvec_size(lruvec, LRU_ACTIVE_ANON) +
+               get_lruvec_size(lruvec, LRU_INACTIVE_ANON);
+       file  = get_lruvec_size(lruvec, LRU_ACTIVE_FILE) +
+               get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
  
         if (global_reclaim(sc)) {
-               free  = zone_page_state(mz->zone, NR_FREE_PAGES);
+               free  = zone_page_state(zone, NR_FREE_PAGES);
                 /* If we have very few page cache pages,
                    force-scan anon pages. */
-               if (unlikely(file + free <= high_wmark_pages(mz->zone))) {
+               if (unlikely(file + free <= high_wmark_pages(zone))) {
                         fraction[0] = 1;
                         fraction[1] = 0;
                         denominator = 1;
@@ -1953,8 +1629,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
          * With swappiness at 100, anonymous and file have the same priority.
          * This scanning priority is essentially the inverse of IO cost.
          */
-       anon_prio = vmscan_swappiness(mz, sc);
-       file_prio = 200 - vmscan_swappiness(mz, sc);
+       anon_prio = vmscan_swappiness(sc);
+       file_prio = 200 - vmscan_swappiness(sc);
  
         /*
          * OK, so we have swap space and a fair amount of page cache
@@ -1967,7 +1643,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
          *
          * anon in [0], file in [1]
          */
-       spin_lock_irq(&mz->zone->lru_lock);
+       spin_lock_irq(&zone->lru_lock);
         if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
                 reclaim_stat->recent_scanned[0] /= 2;
                 reclaim_stat->recent_rotated[0] /= 2;
@@ -1983,12 +1659,12 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
          * proportional to the fraction of recently scanned pages on
          * each list that were recently referenced and in active use.
          */
-       ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
+       ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
         ap /= reclaim_stat->recent_rotated[0] + 1;
  
-       fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
+       fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
         fp /= reclaim_stat->recent_rotated[1] + 1;
-       spin_unlock_irq(&mz->zone->lru_lock);
+       spin_unlock_irq(&zone->lru_lock);
  
         fraction[0] = ap;
         fraction[1] = fp;
@@ -1998,9 +1674,9 @@ out:
                 int file = is_file_lru(lru);
                 unsigned long scan;
  
-               scan = zone_nr_lru_pages(mz, lru);
-               if (priority || noswap) {
-                       scan >>= priority;
+               scan = get_lruvec_size(lruvec, lru);
+               if (sc->priority || noswap || !vmscan_swappiness(sc)) {
+                       scan >>= sc->priority;
                         if (!scan && force_scan)
                                 scan = SWAP_CLUSTER_MAX;
                         scan = div64_u64(scan * fraction[file], denominator);
@@ -2009,14 +1685,25 @@ out:
         }
  }
  
+/* Use reclaim/compaction for costly allocs or under memory pressure */
+static bool in_reclaim_compaction(struct scan_control *sc)
+{
+       if (COMPACTION_BUILD && sc->order &&
+                       (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
+                        sc->priority < DEF_PRIORITY - 2))
+               return true;
+
+       return false;
+}
+
  /*
- * Reclaim/compaction depends on a number of pages being freed. To avoid
- * disruption to the system, a small number of order-0 pages continue to be
- * rotated and reclaimed in the normal fashion. However, by the time we get
- * back to the allocator and call try_to_compact_zone(), we ensure that
- * there are enough free pages for it to be likely successful
+ * Reclaim/compaction is used for high-order allocation requests. It reclaims
+ * order-0 pages before compacting the zone. should_continue_reclaim() returns
+ * true if more pages should be reclaimed such that when the page allocator
+ * calls try_to_compact_zone() that it will have enough free pages to succeed.
+ * It will give up earlier than that if there is difficulty reclaiming pages.
   */
-static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
+static inline bool should_continue_reclaim(struct lruvec *lruvec,
                                         unsigned long nr_reclaimed,
                                         unsigned long nr_scanned,
                                         struct scan_control *sc)
@@ -2025,7 +1712,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
         unsigned long inactive_lru_pages;
  
         /* If not in reclaim/compaction mode, stop */
-       if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
+       if (!in_reclaim_compaction(sc))
                 return false;
  
         /* Consider stopping depending on scan and reclaim activity */
@@ -2056,15 +1743,16 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
          * inactive lists are large enough, continue reclaiming
          */
         pages_for_compaction = (2UL << sc->order);
-       inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
+       inactive_lru_pages = get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
         if (nr_swap_pages > 0)
-               inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
+               inactive_lru_pages += get_lruvec_size(lruvec,
+                                                     LRU_INACTIVE_ANON);
         if (sc->nr_reclaimed < pages_for_compaction &&
                         inactive_lru_pages > pages_for_compaction)
                 return true;
  
         /* If compaction would go ahead or the allocation would succeed, stop */
-       switch (compaction_suitable(mz->zone, sc->order)) {
+       switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) {
         case COMPACT_PARTIAL:
         case COMPACT_CONTINUE:
                 return false;
@@ -2076,8 +1764,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
  /*
   * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
   */
-static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
-                                  struct scan_control *sc)
+static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
  {
         unsigned long nr[NR_LRU_LISTS];
         unsigned long nr_to_scan;
@@ -2089,7 +1776,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
  restart:
         nr_reclaimed = 0;
         nr_scanned = sc->nr_scanned;
-       get_scan_count(mz, sc, nr, priority);
+       get_scan_count(lruvec, sc, nr);
  
         blk_start_plug(&plug);
         while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2101,7 +1788,7 @@ restart:
                                 nr[lru] -= nr_to_scan;
  
                                 nr_reclaimed += shrink_list(lru, nr_to_scan,
-                                                           mz, sc, priority);
+                                                           lruvec, sc);
                         }
                 }
                 /*
@@ -2112,7 +1799,8 @@ restart:
                  * with multiple processes reclaiming pages, the total
                  * freeing target can get unreasonably large.
                  */
-               if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY)
+               if (nr_reclaimed >= nr_to_reclaim &&
+                   sc->priority < DEF_PRIORITY)
                         break;
         }
         blk_finish_plug(&plug);
@@ -2122,35 +1810,33 @@ restart:
          * Even if we did not try to evict anon pages at all, we want to
          * rebalance the anon lru active/inactive ratio.
          */
-       if (inactive_anon_is_low(mz))
-               shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0);
+       if (inactive_anon_is_low(lruvec))
+               shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+                                  sc, LRU_ACTIVE_ANON);
  
         /* reclaim/compaction might need reclaim to continue */
-       if (should_continue_reclaim(mz, nr_reclaimed,
-                                       sc->nr_scanned - nr_scanned, sc))
+       if (should_continue_reclaim(lruvec, nr_reclaimed,
+                                   sc->nr_scanned - nr_scanned, sc))
                 goto restart;
  
         throttle_vm_writeout(sc->gfp_mask);
  }
  
-static void shrink_zone(int priority, struct zone *zone,
-                       struct scan_control *sc)
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
  {
         struct mem_cgroup *root = sc->target_mem_cgroup;
         struct mem_cgroup_reclaim_cookie reclaim = {
                 .zone = zone,
-               .priority = priority,
+               .priority = sc->priority,
         };
         struct mem_cgroup *memcg;
  
         memcg = mem_cgroup_iter(root, NULL, &reclaim);
         do {
-               struct mem_cgroup_zone mz = {
-                       .mem_cgroup = memcg,
-                       .zone = zone,
-               };
+               struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+
+               shrink_lruvec(lruvec, sc);
  
-               shrink_mem_cgroup_zone(priority, &mz, sc);
                 /*
                  * Limit reclaim has historically picked one memcg and
                  * scanned it with decreasing priority levels until
@@ -2226,8 +1912,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
   * the caller that it should consider retrying the allocation instead of
   * further reclaim.
   */
-static bool shrink_zones(int priority, struct zonelist *zonelist,
-                                       struct scan_control *sc)
+static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
  {
         struct zoneref *z;
         struct zone *zone;
@@ -2254,7 +1939,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
                 if (global_reclaim(sc)) {
                         if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                 continue;
-                       if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+                       if (zone->all_unreclaimable &&
+                                       sc->priority != DEF_PRIORITY)
                                 continue;       /* Let kswapd poll it */
                         if (COMPACTION_BUILD) {
                                 /*
@@ -2286,7 +1972,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
                         /* need some check for avoid more shrink_zone() */
                 }
  
-               shrink_zone(priority, zone, sc);
+               shrink_zone(zone, sc);
         }
  
         return aborted_reclaim;
@@ -2337,7 +2023,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                         struct scan_control *sc,
                                         struct shrink_control *shrink)
  {
-       int priority;
         unsigned long total_scanned = 0;
         struct reclaim_state *reclaim_state = current->reclaim_state;
         struct zoneref *z;
@@ -2350,11 +2035,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
         if (global_reclaim(sc))
                 count_vm_event(ALLOCSTALL);
  
-       for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+       do {
                 sc->nr_scanned = 0;
-               if (!priority)
-                       disable_swap_token(sc->target_mem_cgroup);
-               aborted_reclaim = shrink_zones(priority, zonelist, sc);
+               aborted_reclaim = shrink_zones(zonelist, sc);
  
                 /*
                  * Don't shrink slabs when reclaiming memory from
@@ -2396,7 +2079,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
  
                 /* Take a nap, wait for some writeback to complete */
                 if (!sc->hibernation_mode && sc->nr_scanned &&
-                   priority < DEF_PRIORITY - 2) {
+                   sc->priority < DEF_PRIORITY - 2) {
                         struct zone *preferred_zone;
  
                         first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask),
@@ -2404,7 +2087,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                                                 &preferred_zone);
                         wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10);
                 }
-       }
+       } while (--sc->priority >= 0);
  
  out:
         delayacct_freepages_end();
@@ -2442,6 +2125,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                 .may_unmap = 1,
                 .may_swap = 1,
                 .order = order,
+               .priority = DEF_PRIORITY,
                 .target_mem_cgroup = NULL,
                 .nodemask = nodemask,
         };
@@ -2474,17 +2158,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
                 .may_unmap = 1,
                 .may_swap = !noswap,
                 .order = 0,
+               .priority = 0,
                 .target_mem_cgroup = memcg,
         };
-       struct mem_cgroup_zone mz = {
-               .mem_cgroup = memcg,
-               .zone = zone,
-       };
+       struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
  
         sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                         (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
  
-       trace_mm_vmscan_memcg_softlimit_reclaim_begin(0,
+       trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
                                                       sc.may_writepage,
                                                       sc.gfp_mask);
  
@@ -2495,7 +2177,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
          * will pick up pages from other mem cgroup's as well. We hack
          * the priority and make it zero.
          */
-       shrink_mem_cgroup_zone(0, &mz, &sc);
+       shrink_lruvec(lruvec, &sc);
  
         trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
  
@@ -2516,6 +2198,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                 .may_swap = !noswap,
                 .nr_to_reclaim = SWAP_CLUSTER_MAX,
                 .order = 0,
+               .priority = DEF_PRIORITY,
                 .target_mem_cgroup = memcg,
                 .nodemask = NULL, /* we don't care the placement */
                 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2546,8 +2229,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
  }
  #endif
  
-static void age_active_anon(struct zone *zone, struct scan_control *sc,
-                           int priority)
+static void age_active_anon(struct zone *zone, struct scan_control *sc)
  {
         struct mem_cgroup *memcg;
  
@@ -2556,14 +2238,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc,
  
         memcg = mem_cgroup_iter(NULL, NULL, NULL);
         do {
-               struct mem_cgroup_zone mz = {
-                       .mem_cgroup = memcg,
-                       .zone = zone,
-               };
+               struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
  
-               if (inactive_anon_is_low(&mz))
-                       shrink_active_list(SWAP_CLUSTER_MAX, &mz,
-                                          sc, priority, 0);
+               if (inactive_anon_is_low(lruvec))
+                       shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+                                          sc, LRU_ACTIVE_ANON);
  
                 memcg = mem_cgroup_iter(NULL, memcg, NULL);
         } while (memcg);
@@ -2672,7 +2351,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
  {
         int all_zones_ok;
         unsigned long balanced;
-       int priority;
         int i;
         int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
         unsigned long total_scanned;
@@ -2696,18 +2374,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
         };
  loop_again:
         total_scanned = 0;
+       sc.priority = DEF_PRIORITY;
         sc.nr_reclaimed = 0;
         sc.may_writepage = !laptop_mode;
         count_vm_event(PAGEOUTRUN);
  
-       for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+       do {
                 unsigned long lru_pages = 0;
                 int has_under_min_watermark_zone = 0;
  
-               /* The swap token gets in the way of swapout... */
-               if (!priority)
-                       disable_swap_token(NULL);
-
                 all_zones_ok = 1;
                 balanced = 0;
  
@@ -2721,14 +2396,15 @@ loop_again:
                         if (!populated_zone(zone))
                                 continue;
  
-                       if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+                       if (zone->all_unreclaimable &&
+                           sc.priority != DEF_PRIORITY)
                                 continue;
  
                         /*
                          * Do some background aging of the anon list, to give
                          * pages a chance to be referenced before reclaiming.
                          */
-                       age_active_anon(zone, &sc, priority);
+                       age_active_anon(zone, &sc);
  
                         /*
                          * If the number of buffer_heads in the machine
@@ -2776,7 +2452,8 @@ loop_again:
                         if (!populated_zone(zone))
                                 continue;
  
-                       if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+                       if (zone->all_unreclaimable &&
+                           sc.priority != DEF_PRIORITY)
                                 continue;
  
                         sc.nr_scanned = 0;
@@ -2820,7 +2497,7 @@ loop_again:
                                     !zone_watermark_ok_safe(zone, testorder,
                                         high_wmark_pages(zone) + balance_gap,
                                         end_zone, 0)) {
-                               shrink_zone(priority, zone, &sc);
+                               shrink_zone(zone, &sc);
  
                                 reclaim_state->reclaimed_slab = 0;
                                 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
@@ -2877,7 +2554,7 @@ loop_again:
                  * OK, kswapd is getting into trouble.  Take a nap, then take
                  * another pass across the zones.
                  */
-               if (total_scanned && (priority < DEF_PRIORITY - 2)) {
+               if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) {
                         if (has_under_min_watermark_zone)
                                 count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
                         else
@@ -2892,7 +2569,7 @@ loop_again:
                  */
                 if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
                         break;
-       }
+       } while (--sc.priority >= 0);
  out:
  
         /*
@@ -2942,7 +2619,8 @@ out:
                         if (!populated_zone(zone))
                                 continue;
  
-                       if (zone->all_unreclaimable && priority != DEF_PRIORITY)
+                       if (zone->all_unreclaimable &&
+                           sc.priority != DEF_PRIORITY)
                                 continue;
  
                         /* Would compaction fail due to lack of free memory? */
@@ -3209,6 +2887,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
                 .nr_to_reclaim = nr_to_reclaim,
                 .hibernation_mode = 1,
                 .order = 0,
+               .priority = DEF_PRIORITY,
         };
         struct shrink_control shrink = {
                 .gfp_mask = sc.gfp_mask,
@@ -3386,7 +3065,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
         const unsigned long nr_pages = 1 << order;
         struct task_struct *p = current;
         struct reclaim_state reclaim_state;
-       int priority;
         struct scan_control sc = {
                 .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
                 .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -3395,6 +3073,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                                        SWAP_CLUSTER_MAX),
                 .gfp_mask = gfp_mask,
                 .order = order,
+               .priority = ZONE_RECLAIM_PRIORITY,
         };
         struct shrink_control shrink = {
                 .gfp_mask = sc.gfp_mask,
@@ -3417,11 +3096,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                  * Free memory by calling shrink zone with increasing
                  * priorities until we have enough memory freed.
                  */
-               priority = ZONE_RECLAIM_PRIORITY;
                 do {
-                       shrink_zone(priority, zone, &sc);
-                       priority--;
-               } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
+                       shrink_zone(zone, &sc);
+               } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
         }
  
         nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
@@ -3536,7 +3213,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
         if (mapping_unevictable(page_mapping(page)))
                 return 0;
  
-       if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page)))
+       if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page)))
                 return 0;
  
         return 1;