Merge branch 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
[pandora-kernel.git] / mm / memcontrol.c
index cf7d027..e013b8e 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/limits.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
+#include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
@@ -107,10 +108,12 @@ enum mem_cgroup_events_index {
 enum mem_cgroup_events_target {
        MEM_CGROUP_TARGET_THRESH,
        MEM_CGROUP_TARGET_SOFTLIMIT,
+       MEM_CGROUP_TARGET_NUMAINFO,
        MEM_CGROUP_NTARGETS,
 };
 #define THRESHOLDS_EVENTS_TARGET (128)
 #define SOFTLIMIT_EVENTS_TARGET (1024)
+#define NUMAINFO_EVENTS_TARGET (1024)
 
 struct mem_cgroup_stat_cpu {
        long count[MEM_CGROUP_STAT_NSTATS];
@@ -236,7 +239,8 @@ struct mem_cgroup {
        int last_scanned_node;
 #if MAX_NUMNODES > 1
        nodemask_t      scan_nodes;
-       unsigned long   next_scan_node_update;
+       atomic_t        numainfo_events;
+       atomic_t        numainfo_updating;
 #endif
        /*
         * Should the accounting and control be hierarchical, per subtree?
@@ -576,15 +580,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem,
        return val;
 }
 
-static long mem_cgroup_local_usage(struct mem_cgroup *mem)
-{
-       long ret;
-
-       ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
-       ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
-       return ret;
-}
-
 static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
                                         bool charge)
 {
@@ -688,6 +683,9 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
        case MEM_CGROUP_TARGET_SOFTLIMIT:
                next = val + SOFTLIMIT_EVENTS_TARGET;
                break;
+       case MEM_CGROUP_TARGET_NUMAINFO:
+               next = val + NUMAINFO_EVENTS_TARGET;
+               break;
        default:
                return;
        }
@@ -706,11 +704,19 @@ static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
                mem_cgroup_threshold(mem);
                __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH);
                if (unlikely(__memcg_event_check(mem,
-                       MEM_CGROUP_TARGET_SOFTLIMIT))){
+                            MEM_CGROUP_TARGET_SOFTLIMIT))) {
                        mem_cgroup_update_tree(mem, page);
                        __mem_cgroup_target_update(mem,
-                               MEM_CGROUP_TARGET_SOFTLIMIT);
+                                                  MEM_CGROUP_TARGET_SOFTLIMIT);
+               }
+#if MAX_NUMNODES > 1
+               if (unlikely(__memcg_event_check(mem,
+                       MEM_CGROUP_TARGET_NUMAINFO))) {
+                       atomic_inc(&mem->numainfo_events);
+                       __mem_cgroup_target_update(mem,
+                               MEM_CGROUP_TARGET_NUMAINFO);
                }
+#endif
        }
 }
 
@@ -1128,7 +1134,6 @@ unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
        return MEM_CGROUP_ZSTAT(mz, lru);
 }
 
-#ifdef CONFIG_NUMA
 static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
                                                        int nid)
 {
@@ -1140,6 +1145,17 @@ static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
        return ret;
 }
 
+static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
+                                                       int nid)
+{
+       unsigned long ret;
+
+       ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
+               mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
+       return ret;
+}
+
+#if MAX_NUMNODES > 1
 static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
 {
        u64 total = 0;
@@ -1151,17 +1167,6 @@ static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
        return total;
 }
 
-static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
-                                                       int nid)
-{
-       unsigned long ret;
-
-       ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
-               mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
-
-       return ret;
-}
-
 static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)
 {
        u64 total = 0;
@@ -1558,6 +1563,28 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
        return ret;
 }
 
+/**
+ * test_mem_cgroup_node_reclaimable
+ * @mem: the target memcg
+ * @nid: the node ID to be checked.
+ * @noswap : specify true here if the user wants flle only information.
+ *
+ * This function returns whether the specified memcg contains any
+ * reclaimable pages on a node. Returns true if there are any reclaimable
+ * pages in the node.
+ */
+static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
+               int nid, bool noswap)
+{
+       if (mem_cgroup_node_nr_file_lru_pages(mem, nid))
+               return true;
+       if (noswap || !total_swap_pages)
+               return false;
+       if (mem_cgroup_node_nr_anon_lru_pages(mem, nid))
+               return true;
+       return false;
+
+}
 #if MAX_NUMNODES > 1
 
 /*
@@ -1569,26 +1596,26 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
 static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
 {
        int nid;
-
-       if (time_after(mem->next_scan_node_update, jiffies))
+       /*
+        * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
+        * pagein/pageout changes since the last update.
+        */
+       if (!atomic_read(&mem->numainfo_events))
+               return;
+       if (atomic_inc_return(&mem->numainfo_updating) > 1)
                return;
 
-       mem->next_scan_node_update = jiffies + 10*HZ;
        /* make a nodemask where this memcg uses memory from */
        mem->scan_nodes = node_states[N_HIGH_MEMORY];
 
        for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
 
-               if (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_FILE) ||
-                   mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_FILE))
-                       continue;
-
-               if (total_swap_pages &&
-                   (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_ANON) ||
-                    mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_ANON)))
-                       continue;
-               node_clear(nid, mem->scan_nodes);
+               if (!test_mem_cgroup_node_reclaimable(mem, nid, false))
+                       node_clear(nid, mem->scan_nodes);
        }
+
+       atomic_set(&mem->numainfo_events, 0);
+       atomic_set(&mem->numainfo_updating, 0);
 }
 
 /*
@@ -1626,11 +1653,51 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
        return node;
 }
 
+/*
+ * Check all nodes whether it contains reclaimable pages or not.
+ * For quick scan, we make use of scan_nodes. This will allow us to skip
+ * unused nodes. But scan_nodes is lazily updated and may not cotain
+ * enough new information. We need to do double check.
+ */
+bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
+{
+       int nid;
+
+       /*
+        * quick check...making use of scan_node.
+        * We can skip unused nodes.
+        */
+       if (!nodes_empty(mem->scan_nodes)) {
+               for (nid = first_node(mem->scan_nodes);
+                    nid < MAX_NUMNODES;
+                    nid = next_node(nid, mem->scan_nodes)) {
+
+                       if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
+                               return true;
+               }
+       }
+       /*
+        * Check rest of nodes.
+        */
+       for_each_node_state(nid, N_HIGH_MEMORY) {
+               if (node_isset(nid, mem->scan_nodes))
+                       continue;
+               if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
+                       return true;
+       }
+       return false;
+}
+
 #else
 int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
 {
        return 0;
 }
+
+bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
+{
+       return test_mem_cgroup_node_reclaimable(mem, 0, noswap);
+}
 #endif
 
 /*
@@ -1701,7 +1768,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                                }
                        }
                }
-               if (!mem_cgroup_local_usage(victim)) {
+               if (!mem_cgroup_reclaimable(victim, noswap)) {
                        /* this cgroup's local usage == 0 */
                        css_put(&victim->css);
                        continue;