mm, memcg: fix potential undefined behaviour in page stat accounting

[pandora-kernel.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 3734fd6..031ca34 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1328,41 +1328,24 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
         VM_BUG_ON((long)(*lru_size) < 0);
  }
  
-/*
- * Checks whether given mem is same or in the root_mem_cgroup's
- * hierarchy subtree
- */
-bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-                                 struct mem_cgroup *memcg)
+bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root)
  {
-       if (root_memcg == memcg)
+       if (root == memcg)
                 return true;
-       if (!root_memcg->use_hierarchy || !memcg)
+       if (!root->use_hierarchy)
                 return false;
-       return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
-}
-
-static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-                                      struct mem_cgroup *memcg)
-{
-       bool ret;
-
-       rcu_read_lock();
-       ret = __mem_cgroup_same_or_subtree(root_memcg, memcg);
-       rcu_read_unlock();
-       return ret;
+       return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
  }
  
-bool task_in_mem_cgroup(struct task_struct *task,
-                       const struct mem_cgroup *memcg)
+bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
  {
-       struct mem_cgroup *curr = NULL;
+       struct mem_cgroup *task_memcg;
         struct task_struct *p;
         bool ret;
  
         p = find_lock_task_mm(task);
         if (p) {
-               curr = get_mem_cgroup_from_mm(p->mm);
+               task_memcg = get_mem_cgroup_from_mm(p->mm);
                 task_unlock(p);
         } else {
                 /*
@@ -1371,19 +1354,12 @@ bool task_in_mem_cgroup(struct task_struct *task,
                  * killed to prevent needlessly killing additional tasks.
                  */
                 rcu_read_lock();
-               curr = mem_cgroup_from_task(task);
-               if (curr)
-                       css_get(&curr->css);
+               task_memcg = mem_cgroup_from_task(task);
+               css_get(&task_memcg->css);
                 rcu_read_unlock();
         }
-       /*
-        * We should check use_hierarchy of "memcg" not "curr". Because checking
-        * use_hierarchy of "curr" here make this function true if hierarchy is
-        * enabled in "curr" and "curr" is a child of "memcg" in *cgroup*
-        * hierarchy(even if use_hierarchy is disabled in "memcg").
-        */
-       ret = mem_cgroup_same_or_subtree(memcg, curr);
-       css_put(&curr->css);
+       ret = mem_cgroup_is_descendant(task_memcg, memcg);
+       css_put(&task_memcg->css);
         return ret;
  }
  
@@ -1468,8 +1444,8 @@ static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
         if (!from)
                 goto unlock;
  
-       ret = mem_cgroup_same_or_subtree(memcg, from)
-               || mem_cgroup_same_or_subtree(memcg, to);
+       ret = mem_cgroup_is_descendant(from, memcg) ||
+               mem_cgroup_is_descendant(to, memcg);
  unlock:
         spin_unlock(&mc.lock);
         return ret;
@@ -1743,52 +1719,11 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
         memcg->last_scanned_node = node;
         return node;
  }
-
-/*
- * Check all nodes whether it contains reclaimable pages or not.
- * For quick scan, we make use of scan_nodes. This will allow us to skip
- * unused nodes. But scan_nodes is lazily updated and may not cotain
- * enough new information. We need to do double check.
- */
-static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
-{
-       int nid;
-
-       /*
-        * quick check...making use of scan_node.
-        * We can skip unused nodes.
-        */
-       if (!nodes_empty(memcg->scan_nodes)) {
-               for (nid = first_node(memcg->scan_nodes);
-                    nid < MAX_NUMNODES;
-                    nid = next_node(nid, memcg->scan_nodes)) {
-
-                       if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
-                               return true;
-               }
-       }
-       /*
-        * Check rest of nodes.
-        */
-       for_each_node_state(nid, N_MEMORY) {
-               if (node_isset(nid, memcg->scan_nodes))
-                       continue;
-               if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
-                       return true;
-       }
-       return false;
-}
-
  #else
  int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
  {
         return 0;
  }
-
-static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
-{
-       return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
-}
  #endif
  
  static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
@@ -1832,8 +1767,6 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
                         }
                         continue;
                 }
-               if (!mem_cgroup_reclaimable(victim, false))
-                       continue;
                 total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
                                                      zone, &nr_scanned);
                 *total_scanned += nr_scanned;
@@ -1944,12 +1877,8 @@ static int memcg_oom_wake_function(wait_queue_t *wait,
         oom_wait_info = container_of(wait, struct oom_wait_info, wait);
         oom_wait_memcg = oom_wait_info->memcg;
  
-       /*
-        * Both of oom_wait_info->memcg and wake_memcg are stable under us.
-        * Then we can use css_is_ancestor without taking care of RCU.
-        */
-       if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
-               && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
+       if (!mem_cgroup_is_descendant(wake_memcg, oom_wait_memcg) &&
+           !mem_cgroup_is_descendant(oom_wait_memcg, wake_memcg))
                 return 0;
         return autoremove_wake_function(wait, mode, sync, arg);
  }
@@ -2124,11 +2053,11 @@ again:
   * @locked: value received from mem_cgroup_begin_page_stat()
   * @flags: value received from mem_cgroup_begin_page_stat()
   */
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
-                             unsigned long flags)
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
+                             unsigned long *flags)
  {
-       if (memcg && locked)
-               spin_unlock_irqrestore(&memcg->move_lock, flags);
+       if (memcg && *locked)
+               spin_unlock_irqrestore(&memcg->move_lock, *flags);
  
         rcu_read_unlock();
  }
@@ -2269,7 +2198,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
                 memcg = stock->cached;
                 if (!memcg || !stock->nr_pages)
                         continue;
-               if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
+               if (!mem_cgroup_is_descendant(memcg, root_memcg))
                         continue;
                 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
                         if (cpu == curcpu)
@@ -2590,26 +2519,6 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
         return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
  }
  
-#ifdef CONFIG_SLABINFO
-static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
-{
-       struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
-       struct memcg_cache_params *params;
-
-       if (!memcg_kmem_is_active(memcg))
-               return -EIO;
-
-       print_slabinfo_header(m);
-
-       mutex_lock(&memcg_slab_mutex);
-       list_for_each_entry(params, &memcg->memcg_slab_caches, list)
-               cache_show(memcg_params_to_cache(params), m);
-       mutex_unlock(&memcg_slab_mutex);
-
-       return 0;
-}
-#endif
-
  static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
                              unsigned long nr_pages)
  {
@@ -4751,7 +4660,10 @@ static struct cftype mem_cgroup_files[] = {
  #ifdef CONFIG_SLABINFO
         {
                 .name = "kmem.slabinfo",
-               .seq_show = mem_cgroup_slabinfo_read,
+               .seq_start = slab_start,
+               .seq_next = slab_next,
+               .seq_stop = slab_stop,
+               .seq_show = memcg_slab_show,
         },
  #endif
  #endif
@@ -5393,8 +5305,6 @@ static void __mem_cgroup_clear_mc(void)
  
  static void mem_cgroup_clear_mc(void)
  {
-       struct mem_cgroup *from = mc.from;
-
         /*
          * we must clear moving_task before waking up waiters at the end of
          * task migration.
@@ -5405,8 +5315,6 @@ static void mem_cgroup_clear_mc(void)
         mc.from = NULL;
         mc.to = NULL;
         spin_unlock(&mc.lock);
-
-       atomic_dec(&from->moving_account);
  }
  
  static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
@@ -5440,15 +5348,6 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
                         VM_BUG_ON(mc.moved_charge);
                         VM_BUG_ON(mc.moved_swap);
  
-                       /*
-                        * Signal mem_cgroup_begin_page_stat() to take
-                        * the memcg's move_lock while we're moving
-                        * its pages to another memcg.  Then wait for
-                        * already started RCU-only updates to finish.
-                        */
-                       atomic_inc(&from->moving_account);
-                       synchronize_rcu();
-
                         spin_lock(&mc.lock);
                         mc.from = from;
                         mc.to = memcg;
@@ -5580,6 +5479,13 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
         struct vm_area_struct *vma;
  
         lru_add_drain_all();
+       /*
+        * Signal mem_cgroup_begin_page_stat() to take the memcg's
+        * move_lock while we're moving its pages to another memcg.
+        * Then wait for already started RCU-only updates to finish.
+        */
+       atomic_inc(&mc.from->moving_account);
+       synchronize_rcu();
  retry:
         if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
                 /*
@@ -5612,6 +5518,7 @@ retry:
                         break;
         }
         up_read(&mm->mmap_sem);
+       atomic_dec(&mc.from->moving_account);
  }
  
  static void mem_cgroup_move_task(struct cgroup_subsys_state *css,