mm: memcg: group swapped-out statistics counter logically

[pandora-kernel.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index f342778..1118e02 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -59,7 +59,7 @@
  
  struct cgroup_subsys mem_cgroup_subsys __read_mostly;
  #define MEM_CGROUP_RECLAIM_RETRIES     5
-struct mem_cgroup *root_mem_cgroup __read_mostly;
+static struct mem_cgroup *root_mem_cgroup __read_mostly;
  
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -73,7 +73,7 @@ static int really_do_swap_account __initdata = 0;
  #endif
  
  #else
-#define do_swap_account                (0)
+#define do_swap_account                0
  #endif
  
  
@@ -88,14 +88,12 @@ enum mem_cgroup_stat_index {
         MEM_CGROUP_STAT_RSS,       /* # of pages charged as anon rss */
         MEM_CGROUP_STAT_FILE_MAPPED,  /* # of pages charged as file rss */
         MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
-       MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */
         MEM_CGROUP_STAT_NSTATS,
  };
  
  enum mem_cgroup_events_index {
         MEM_CGROUP_EVENTS_PGPGIN,       /* # of pages paged in */
         MEM_CGROUP_EVENTS_PGPGOUT,      /* # of pages paged out */
-       MEM_CGROUP_EVENTS_COUNT,        /* # of pages paged in/out */
         MEM_CGROUP_EVENTS_PGFAULT,      /* # of page-faults */
         MEM_CGROUP_EVENTS_PGMAJFAULT,   /* # of major page-faults */
         MEM_CGROUP_EVENTS_NSTATS,
@@ -112,13 +110,14 @@ enum mem_cgroup_events_target {
         MEM_CGROUP_TARGET_NUMAINFO,
         MEM_CGROUP_NTARGETS,
  };
-#define THRESHOLDS_EVENTS_TARGET (128)
-#define SOFTLIMIT_EVENTS_TARGET (1024)
-#define NUMAINFO_EVENTS_TARGET (1024)
+#define THRESHOLDS_EVENTS_TARGET 128
+#define SOFTLIMIT_EVENTS_TARGET 1024
+#define NUMAINFO_EVENTS_TARGET 1024
  
  struct mem_cgroup_stat_cpu {
         long count[MEM_CGROUP_STAT_NSTATS];
         unsigned long events[MEM_CGROUP_EVENTS_NSTATS];
+       unsigned long nr_page_events;
         unsigned long targets[MEM_CGROUP_NTARGETS];
  };
  
@@ -138,7 +137,6 @@ struct mem_cgroup_per_zone {
  
         struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
  
-       struct zone_reclaim_stat reclaim_stat;
         struct rb_node          tree_node;      /* RB tree node */
         unsigned long long      usage_in_excess;/* Set to the value by which */
                                                 /* the soft limit is exceeded*/
@@ -182,7 +180,7 @@ struct mem_cgroup_threshold {
  
  /* For threshold */
  struct mem_cgroup_threshold_ary {
-       /* An array index points to threshold just below usage. */
+       /* An array index points to threshold just below or equal to usage. */
         int current_threshold;
         /* Size of entries[] */
         unsigned int size;
@@ -305,7 +303,7 @@ struct mem_cgroup {
         /*
          * percpu counter.
          */
-       struct mem_cgroup_stat_cpu *stat;
+       struct mem_cgroup_stat_cpu __percpu *stat;
         /*
          * used when a cpu is offlined or other synchronizations
          * See mem_cgroup_read_stat().
@@ -360,8 +358,8 @@ static bool move_file(void)
   * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
   * limit reclaim to prevent infinite loops, if they ever occur.
   */
-#define        MEM_CGROUP_MAX_RECLAIM_LOOPS            (100)
-#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2)
+#define        MEM_CGROUP_MAX_RECLAIM_LOOPS            100
+#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
  
  enum charge_type {
         MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -377,8 +375,8 @@ enum charge_type {
  #define _MEM                   (0)
  #define _MEMSWAP               (1)
  #define _OOM_TYPE              (2)
-#define MEMFILE_PRIVATE(x, val)        (((x) << 16) | (val))
-#define MEMFILE_TYPE(val)      (((val) >> 16) & 0xffff)
+#define MEMFILE_PRIVATE(x, val)        ((x) << 16 | (val))
+#define MEMFILE_TYPE(val)      ((val) >> 16 & 0xffff)
  #define MEMFILE_ATTR(val)      ((val) & 0xffff)
  /* Used for OOM nofiier */
  #define OOM_CONTROL            (0)
@@ -718,12 +716,21 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
                 nr_pages = -nr_pages; /* for event */
         }
  
-       __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
+       __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
  
         preempt_enable();
  }
  
  unsigned long
+mem_cgroup_get_lruvec_size(struct lruvec *lruvec, enum lru_list lru)
+{
+       struct mem_cgroup_per_zone *mz;
+
+       mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+       return mz->lru_size[lru];
+}
+
+static unsigned long
  mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
                         unsigned int lru_mask)
  {
@@ -770,7 +777,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
  {
         unsigned long val, next;
  
-       val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+       val = __this_cpu_read(memcg->stat->nr_page_events);
         next = __this_cpu_read(memcg->stat->targets[target]);
         /* from time_after() in jiffies.h */
         if ((long)next - (long)val < 0) {
@@ -1116,11 +1123,6 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
         mz->lru_size[lru] -= 1 << compound_order(page);
  }
  
-void mem_cgroup_lru_del(struct page *page)
-{
-       mem_cgroup_lru_del_list(page, page_lru(page));
-}
-
  /**
   * mem_cgroup_lru_move_lists - account for moving a page between lrus
   * @zone: zone of the page
@@ -1149,15 +1151,25 @@ struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
   * Checks whether given mem is same or in the root_mem_cgroup's
   * hierarchy subtree
   */
+bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
+                                 struct mem_cgroup *memcg)
+{
+       if (root_memcg == memcg)
+               return true;
+       if (!root_memcg->use_hierarchy)
+               return false;
+       return css_is_ancestor(&memcg->css, &root_memcg->css);
+}
+
  static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-               struct mem_cgroup *memcg)
+                                      struct mem_cgroup *memcg)
  {
-       if (root_memcg != memcg) {
-               return (root_memcg->use_hierarchy &&
-                       css_is_ancestor(&memcg->css, &root_memcg->css));
-       }
+       bool ret;
  
-       return true;
+       rcu_read_lock();
+       ret = __mem_cgroup_same_or_subtree(root_memcg, memcg);
+       rcu_read_unlock();
+       return ret;
  }
  
  int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
@@ -1195,19 +1207,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
         return ret;
  }
  
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
+int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
  {
         unsigned long inactive_ratio;
-       int nid = zone_to_nid(zone);
-       int zid = zone_idx(zone);
         unsigned long inactive;
         unsigned long active;
         unsigned long gb;
  
-       inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
-                                               BIT(LRU_INACTIVE_ANON));
-       active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
-                                             BIT(LRU_ACTIVE_ANON));
+       inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_ANON);
+       active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_ANON);
  
         gb = (inactive + active) >> (30 - PAGE_SHIFT);
         if (gb)
@@ -1218,31 +1226,17 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
         return inactive * inactive_ratio < active;
  }
  
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
+int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
  {
         unsigned long active;
         unsigned long inactive;
-       int zid = zone_idx(zone);
-       int nid = zone_to_nid(zone);
  
-       inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
-                                               BIT(LRU_INACTIVE_FILE));
-       active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
-                                             BIT(LRU_ACTIVE_FILE));
+       inactive = mem_cgroup_get_lruvec_size(lruvec, LRU_INACTIVE_FILE);
+       active = mem_cgroup_get_lruvec_size(lruvec, LRU_ACTIVE_FILE);
  
         return (active > inactive);
  }
  
-struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
-                                                     struct zone *zone)
-{
-       int nid = zone_to_nid(zone);
-       int zid = zone_idx(zone);
-       struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid);
-
-       return &mz->reclaim_stat;
-}
-
  struct zone_reclaim_stat *
  mem_cgroup_get_reclaim_stat_from_page(struct page *page)
  {
@@ -1258,7 +1252,7 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
         /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
         smp_rmb();
         mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-       return &mz->reclaim_stat;
+       return &mz->lruvec.reclaim_stat;
  }
  
  #define mem_cgroup_from_res_counter(counter, member)   \
@@ -1634,7 +1628,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
   * unused nodes. But scan_nodes is lazily updated and may not cotain
   * enough new information. We need to do double check.
   */
-bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
+static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
  {
         int nid;
  
@@ -1669,7 +1663,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
         return 0;
  }
  
-bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
+static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
  {
         return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
  }
@@ -1843,7 +1837,8 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
  /*
   * try to call OOM killer. returns false if we should exit memory-reclaim loop.
   */
-bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
+static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
+                                 int order)
  {
         struct oom_wait_info owait;
         bool locked, need_to_kill;
@@ -1992,7 +1987,7 @@ struct memcg_stock_pcp {
         unsigned int nr_pages;
         struct work_struct work;
         unsigned long flags;
-#define FLUSHING_CACHED_CHARGE (0)
+#define FLUSHING_CACHED_CHARGE 0
  };
  static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
  static DEFINE_MUTEX(percpu_charge_mutex);
@@ -2139,7 +2134,7 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
         int i;
  
         spin_lock(&memcg->pcp_counter_lock);
-       for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
+       for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
                 long x = per_cpu(memcg->stat->count[i], cpu);
  
                 per_cpu(memcg->stat->count[i], cpu) = 0;
@@ -2426,6 +2421,24 @@ static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
         }
  }
  
+/*
+ * Cancel chrages in this cgroup....doesn't propagate to parent cgroup.
+ * This is useful when moving usage to parent cgroup.
+ */
+static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
+                                       unsigned int nr_pages)
+{
+       unsigned long bytes = nr_pages * PAGE_SIZE;
+
+       if (mem_cgroup_is_root(memcg))
+               return;
+
+       res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
+       if (do_swap_account)
+               res_counter_uncharge_until(&memcg->memsw,
+                                               memcg->memsw.parent, bytes);
+}
+
  /*
   * A helper function to get mem_cgroup from ID. must be called under
   * rcu_read_lock(). The caller must check css_is_removed() or some if
@@ -2547,7 +2560,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
  
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  
-#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION))
+#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
  /*
   * Because tail pages are not marked as "used", set it. We're under
   * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2578,23 +2591,19 @@ void mem_cgroup_split_huge_fixup(struct page *head)
   * @pc:        page_cgroup of the page.
   * @from: mem_cgroup which the page is moved from.
   * @to:        mem_cgroup which the page is moved to. @from != @to.
- * @uncharge: whether we should call uncharge and css_put against @from.
   *
   * The caller must confirm following.
   * - page is not on LRU (isolate_page() is useful.)
   * - compound_lock is held when nr_pages > 1
   *
- * This function doesn't do "charge" nor css_get to new cgroup. It should be
- * done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is
- * true, this function does "uncharge" from old cgroup, but it doesn't if
- * @uncharge is false, so a caller should do "uncharge".
+ * This function doesn't do "charge" to new cgroup and doesn't do "uncharge"
+ * from old cgroup.
   */
  static int mem_cgroup_move_account(struct page *page,
                                    unsigned int nr_pages,
                                    struct page_cgroup *pc,
                                    struct mem_cgroup *from,
-                                  struct mem_cgroup *to,
-                                  bool uncharge)
+                                  struct mem_cgroup *to)
  {
         unsigned long flags;
         int ret;
@@ -2628,9 +2637,6 @@ static int mem_cgroup_move_account(struct page *page,
                 preempt_enable();
         }
         mem_cgroup_charge_statistics(from, anon, -nr_pages);
-       if (uncharge)
-               /* This is not "cancel", but cancel_charge does all we need. */
-               __mem_cgroup_cancel_charge(from, nr_pages);
  
         /* caller should have done css_get */
         pc->mem_cgroup = to;
@@ -2664,15 +2670,13 @@ static int mem_cgroup_move_parent(struct page *page,
                                   struct mem_cgroup *child,
                                   gfp_t gfp_mask)
  {
-       struct cgroup *cg = child->css.cgroup;
-       struct cgroup *pcg = cg->parent;
         struct mem_cgroup *parent;
         unsigned int nr_pages;
         unsigned long uninitialized_var(flags);
         int ret;
  
         /* Is ROOT ? */
-       if (!pcg)
+       if (mem_cgroup_is_root(child))
                 return -EINVAL;
  
         ret = -EBUSY;
@@ -2683,21 +2687,23 @@ static int mem_cgroup_move_parent(struct page *page,
  
         nr_pages = hpage_nr_pages(page);
  
-       parent = mem_cgroup_from_cont(pcg);
-       ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);
-       if (ret)
-               goto put_back;
+       parent = parent_mem_cgroup(child);
+       /*
+        * If no parent, move charges to root cgroup.
+        */
+       if (!parent)
+               parent = root_mem_cgroup;
  
         if (nr_pages > 1)
                 flags = compound_lock_irqsave(page);
  
-       ret = mem_cgroup_move_account(page, nr_pages, pc, child, parent, true);
-       if (ret)
-               __mem_cgroup_cancel_charge(parent, nr_pages);
+       ret = mem_cgroup_move_account(page, nr_pages,
+                               pc, child, parent);
+       if (!ret)
+               __mem_cgroup_cancel_local_charge(child, nr_pages);
  
         if (nr_pages > 1)
                 compound_unlock_irqrestore(page, flags);
-put_back:
         putback_lru_page(page);
  put:
         put_page(page);
@@ -2845,24 +2851,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
          */
         if (do_swap_account && PageSwapCache(page)) {
                 swp_entry_t ent = {.val = page_private(page)};
-               struct mem_cgroup *swap_memcg;
-               unsigned short id;
-
-               id = swap_cgroup_record(ent, 0);
-               rcu_read_lock();
-               swap_memcg = mem_cgroup_lookup(id);
-               if (swap_memcg) {
-                       /*
-                        * This recorded memcg can be obsolete one. So, avoid
-                        * calling css_tryget
-                        */
-                       if (!mem_cgroup_is_root(swap_memcg))
-                               res_counter_uncharge(&swap_memcg->memsw,
-                                                    PAGE_SIZE);
-                       mem_cgroup_swap_statistics(swap_memcg, false);
-                       mem_cgroup_put(swap_memcg);
-               }
-               rcu_read_unlock();
+               mem_cgroup_uncharge_swap(ent);
         }
         /*
          * At swapin, we may charge account against cgroup which has no tasks.
@@ -3155,7 +3144,6 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
   * @entry: swap entry to be moved
   * @from:  mem_cgroup which the entry is moved from
   * @to:  mem_cgroup which the entry is moved to
- * @need_fixup: whether we should fixup res_counters and refcounts.
   *
   * It succeeds only when the swap_cgroup's record for this entry is the same
   * as the mem_cgroup's id of @from.
@@ -3166,7 +3154,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
   * both res and memsw, and called css_get().
   */
  static int mem_cgroup_move_swap_account(swp_entry_t entry,
-               struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup)
+                               struct mem_cgroup *from, struct mem_cgroup *to)
  {
         unsigned short old_id, new_id;
  
@@ -3185,24 +3173,13 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
                  * swap-in, the refcount of @to might be decreased to 0.
                  */
                 mem_cgroup_get(to);
-               if (need_fixup) {
-                       if (!mem_cgroup_is_root(from))
-                               res_counter_uncharge(&from->memsw, PAGE_SIZE);
-                       mem_cgroup_put(from);
-                       /*
-                        * we charged both to->res and to->memsw, so we should
-                        * uncharge to->res.
-                        */
-                       if (!mem_cgroup_is_root(to))
-                               res_counter_uncharge(&to->res, PAGE_SIZE);
-               }
                 return 0;
         }
         return -EINVAL;
  }
  #else
  static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
-               struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup)
+                               struct mem_cgroup *from, struct mem_cgroup *to)
  {
         return -EINVAL;
  }
@@ -3363,7 +3340,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
  void mem_cgroup_replace_page_cache(struct page *oldpage,
                                   struct page *newpage)
  {
-       struct mem_cgroup *memcg;
+       struct mem_cgroup *memcg = NULL;
         struct page_cgroup *pc;
         enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
  
@@ -3373,11 +3350,20 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
         pc = lookup_page_cgroup(oldpage);
         /* fix accounting on old pages */
         lock_page_cgroup(pc);
-       memcg = pc->mem_cgroup;
-       mem_cgroup_charge_statistics(memcg, false, -1);
-       ClearPageCgroupUsed(pc);
+       if (PageCgroupUsed(pc)) {
+               memcg = pc->mem_cgroup;
+               mem_cgroup_charge_statistics(memcg, false, -1);
+               ClearPageCgroupUsed(pc);
+       }
         unlock_page_cgroup(pc);
  
+       /*
+        * When called from shmem_replace_page(), in some cases the
+        * oldpage has already been charged, and in some cases not.
+        */
+       if (!memcg)
+               return;
+
         if (PageSwapBacked(oldpage))
                 type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
  
@@ -3793,7 +3779,7 @@ try_to_free:
         goto move_account;
  }
  
-int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
+static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
  {
         return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
  }
@@ -4057,9 +4043,9 @@ enum {
         MCS_CACHE,
         MCS_RSS,
         MCS_FILE_MAPPED,
+       MCS_SWAP,
         MCS_PGPGIN,
         MCS_PGPGOUT,
-       MCS_SWAP,
         MCS_PGFAULT,
         MCS_PGMAJFAULT,
         MCS_INACTIVE_ANON,
@@ -4074,26 +4060,22 @@ struct mcs_total_stat {
         s64 stat[NR_MCS_STAT];
  };
  
-struct {
-       char *local_name;
-       char *total_name;
-} memcg_stat_strings[NR_MCS_STAT] = {
-       {"cache", "total_cache"},
-       {"rss", "total_rss"},
-       {"mapped_file", "total_mapped_file"},
-       {"pgpgin", "total_pgpgin"},
-       {"pgpgout", "total_pgpgout"},
-       {"swap", "total_swap"},
-       {"pgfault", "total_pgfault"},
-       {"pgmajfault", "total_pgmajfault"},
-       {"inactive_anon", "total_inactive_anon"},
-       {"active_anon", "total_active_anon"},
-       {"inactive_file", "total_inactive_file"},
-       {"active_file", "total_active_file"},
-       {"unevictable", "total_unevictable"}
+static const char *memcg_stat_strings[NR_MCS_STAT] = {
+       "cache",
+       "rss",
+       "mapped_file",
+       "swap",
+       "pgpgin",
+       "pgpgout",
+       "pgfault",
+       "pgmajfault",
+       "inactive_anon",
+       "active_anon",
+       "inactive_file",
+       "active_file",
+       "unevictable",
  };
  
-
  static void
  mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
  {
@@ -4142,12 +4124,12 @@ mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
  }
  
  #ifdef CONFIG_NUMA
-static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
+static int mem_control_numa_stat_show(struct cgroup *cont, struct cftype *cft,
+                                     struct seq_file *m)
  {
         int nid;
         unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
         unsigned long node_nr;
-       struct cgroup *cont = m->private;
         struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
  
         total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL);
@@ -4189,7 +4171,7 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
  #endif /* CONFIG_NUMA */
  
  static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
-                                struct cgroup_map_cb *cb)
+                                struct seq_file *m)
  {
         struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
         struct mcs_total_stat mystat;
@@ -4202,16 +4184,18 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
         for (i = 0; i < NR_MCS_STAT; i++) {
                 if (i == MCS_SWAP && !do_swap_account)
                         continue;
-               cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]);
+               seq_printf(m, "%s %llu\n", memcg_stat_strings[i],
+                          (unsigned long long)mystat.stat[i]);
         }
  
         /* Hierarchical information */
         {
                 unsigned long long limit, memsw_limit;
                 memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit);
-               cb->fill(cb, "hierarchical_memory_limit", limit);
+               seq_printf(m, "hierarchical_memory_limit %llu\n", limit);
                 if (do_swap_account)
-                       cb->fill(cb, "hierarchical_memsw_limit", memsw_limit);
+                       seq_printf(m, "hierarchical_memsw_limit %llu\n",
+                                  memsw_limit);
         }
  
         memset(&mystat, 0, sizeof(mystat));
@@ -4219,33 +4203,32 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
         for (i = 0; i < NR_MCS_STAT; i++) {
                 if (i == MCS_SWAP && !do_swap_account)
                         continue;
-               cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]);
+               seq_printf(m, "total_%s %llu\n", memcg_stat_strings[i],
+                          (unsigned long long)mystat.stat[i]);
         }
  
  #ifdef CONFIG_DEBUG_VM
         {
                 int nid, zid;
                 struct mem_cgroup_per_zone *mz;
+               struct zone_reclaim_stat *rstat;
                 unsigned long recent_rotated[2] = {0, 0};
                 unsigned long recent_scanned[2] = {0, 0};
  
                 for_each_online_node(nid)
                         for (zid = 0; zid < MAX_NR_ZONES; zid++) {
                                 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
+                               rstat = &mz->lruvec.reclaim_stat;
  
-                               recent_rotated[0] +=
-                                       mz->reclaim_stat.recent_rotated[0];
-                               recent_rotated[1] +=
-                                       mz->reclaim_stat.recent_rotated[1];
-                               recent_scanned[0] +=
-                                       mz->reclaim_stat.recent_scanned[0];
-                               recent_scanned[1] +=
-                                       mz->reclaim_stat.recent_scanned[1];
+                               recent_rotated[0] += rstat->recent_rotated[0];
+                               recent_rotated[1] += rstat->recent_rotated[1];
+                               recent_scanned[0] += rstat->recent_scanned[0];
+                               recent_scanned[1] += rstat->recent_scanned[1];
                         }
-               cb->fill(cb, "recent_rotated_anon", recent_rotated[0]);
-               cb->fill(cb, "recent_rotated_file", recent_rotated[1]);
-               cb->fill(cb, "recent_scanned_anon", recent_scanned[0]);
-               cb->fill(cb, "recent_scanned_file", recent_scanned[1]);
+               seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
+               seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
+               seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
+               seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]);
         }
  #endif
  
@@ -4307,7 +4290,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
         usage = mem_cgroup_usage(memcg, swap);
  
         /*
-        * current_threshold points to threshold just below usage.
+        * current_threshold points to threshold just below or equal to usage.
          * If it's not true, a threshold was crossed after last
          * call of __mem_cgroup_threshold().
          */
@@ -4433,14 +4416,15 @@ static int mem_cgroup_usage_register_event(struct cgroup *cgrp,
         /* Find current threshold */
         new->current_threshold = -1;
         for (i = 0; i < size; i++) {
-               if (new->entries[i].threshold < usage) {
+               if (new->entries[i].threshold <= usage) {
                         /*
                          * new->current_threshold will not be used until
                          * rcu_assign_pointer(), so it's safe to increment
                          * it here.
                          */
                         ++new->current_threshold;
-               }
+               } else
+                       break;
         }
  
         /* Free old spare buffer and save old primary buffer as spare */
@@ -4509,7 +4493,7 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp,
                         continue;
  
                 new->entries[j] = thresholds->primary->entries[i];
-               if (new->entries[j].threshold < usage) {
+               if (new->entries[j].threshold <= usage) {
                         /*
                          * new->current_threshold will not be used
                          * until rcu_assign_pointer(), so it's safe to increment
@@ -4623,22 +4607,6 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
         return 0;
  }
  
-#ifdef CONFIG_NUMA
-static const struct file_operations mem_control_numa_stat_file_operations = {
-       .read = seq_read,
-       .llseek = seq_lseek,
-       .release = single_release,
-};
-
-static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
-{
-       struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
-
-       file->f_op = &mem_control_numa_stat_file_operations;
-       return single_open(file, mem_control_numa_stat_show, cont);
-}
-#endif /* CONFIG_NUMA */
-
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
  static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
  {
@@ -4694,7 +4662,7 @@ static struct cftype mem_cgroup_files[] = {
         },
         {
                 .name = "stat",
-               .read_map = mem_control_stat_show,
+               .read_seq_string = mem_control_stat_show,
         },
         {
                 .name = "force_empty",
@@ -4726,8 +4694,7 @@ static struct cftype mem_cgroup_files[] = {
  #ifdef CONFIG_NUMA
         {
                 .name = "numa_stat",
-               .open = mem_control_numa_stat_open,
-               .mode = S_IRUGO,
+               .read_seq_string = mem_control_numa_stat_show,
         },
  #endif
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -4764,7 +4731,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
  {
         struct mem_cgroup_per_node *pn;
         struct mem_cgroup_per_zone *mz;
-       enum lru_list lru;
         int zone, tmp = node;
         /*
          * This routine is called against possible nodes.
@@ -4782,8 +4748,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
  
         for (zone = 0; zone < MAX_NR_ZONES; zone++) {
                 mz = &pn->zoneinfo[zone];
-               for_each_lru(lru)
-                       INIT_LIST_HEAD(&mz->lruvec.lists[lru]);
+               lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]);
                 mz->usage_in_excess = 0;
                 mz->on_tree = false;
                 mz->memcg = memcg;
@@ -5135,7 +5100,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
                 return NULL;
         if (PageAnon(page)) {
                 /* we don't move shared anon */
-               if (!move_anon() || page_mapcount(page) > 2)
+               if (!move_anon())
                         return NULL;
         } else if (!move_file())
                 /* we ignore mapcount for file pages */
@@ -5146,32 +5111,37 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
         return page;
  }
  
+#ifdef CONFIG_SWAP
  static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
                         unsigned long addr, pte_t ptent, swp_entry_t *entry)
  {
-       int usage_count;
         struct page *page = NULL;
         swp_entry_t ent = pte_to_swp_entry(ptent);
  
         if (!move_anon() || non_swap_entry(ent))
                 return NULL;
-       usage_count = mem_cgroup_count_swap_user(ent, &page);
-       if (usage_count > 1) { /* we don't move shared anon */
-               if (page)
-                       put_page(page);
-               return NULL;
-       }
+       /*
+        * Because lookup_swap_cache() updates some statistics counter,
+        * we call find_get_page() with swapper_space directly.
+        */
+       page = find_get_page(&swapper_space, ent.val);
         if (do_swap_account)
                 entry->val = ent.val;
  
         return page;
  }
+#else
+static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
+                       unsigned long addr, pte_t ptent, swp_entry_t *entry)
+{
+       return NULL;
+}
+#endif
  
  static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                         unsigned long addr, pte_t ptent, swp_entry_t *entry)
  {
         struct page *page = NULL;
-       struct inode *inode;
         struct address_space *mapping;
         pgoff_t pgoff;
  
@@ -5180,7 +5150,6 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
         if (!move_file())
                 return NULL;
  
-       inode = vma->vm_file->f_path.dentry->d_inode;
         mapping = vma->vm_file->f_mapping;
         if (pte_none(ptent))
                 pgoff = linear_page_index(vma, addr);
@@ -5479,8 +5448,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
                         if (!isolate_lru_page(page)) {
                                 pc = lookup_page_cgroup(page);
                                 if (!mem_cgroup_move_account(page, HPAGE_PMD_NR,
-                                                            pc, mc.from, mc.to,
-                                                            false)) {
+                                                       pc, mc.from, mc.to)) {
                                         mc.precharge -= HPAGE_PMD_NR;
                                         mc.moved_charge += HPAGE_PMD_NR;
                                 }
@@ -5510,7 +5478,7 @@ retry:
                                 goto put;
                         pc = lookup_page_cgroup(page);
                         if (!mem_cgroup_move_account(page, 1, pc,
-                                                    mc.from, mc.to, false)) {
+                                                    mc.from, mc.to)) {
                                 mc.precharge--;
                                 /* we uncharge from mc.from later. */
                                 mc.moved_charge++;
@@ -5521,8 +5489,7 @@ put:                      /* get_mctgt_type() gets the page */
                         break;
                 case MC_TARGET_SWAP:
                         ent = target.ent;
-                       if (!mem_cgroup_move_swap_account(ent,
-                                               mc.from, mc.to, false)) {
+                       if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) {
                                 mc.precharge--;
                                 /* we fixup refcnts and charges later. */
                                 mc.moved_swap++;
@@ -5598,7 +5565,6 @@ static void mem_cgroup_move_task(struct cgroup *cont,
         if (mm) {
                 if (mc.to)
                         mem_cgroup_move_charge(mm);
-               put_swap_token(mm);
                 mmput(mm);
         }
         if (mc.to)