Merge branch 'i2c-for-linus' of git://jdelvare.pck.nerim.net/jdelvare-2.6
[pandora-kernel.git] / mm / memcontrol.c
index 49d8081..e46451e 100644 (file)
 #include <linux/backing-dev.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rcupdate.h>
+#include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
 
 struct cgroup_subsys mem_cgroup_subsys;
 static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
+static struct kmem_cache *page_cgroup_cache;
 
 /*
  * Statistics for memory cgroup.
@@ -45,6 +48,8 @@ enum mem_cgroup_stat_index {
         */
        MEM_CGROUP_STAT_CACHE,     /* # of pages charged as cache */
        MEM_CGROUP_STAT_RSS,       /* # of pages charged as rss */
+       MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
+       MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
 
        MEM_CGROUP_STAT_NSTATS,
 };
@@ -196,6 +201,13 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
                __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val);
        else
                __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
+
+       if (charge)
+               __mem_cgroup_stat_add_safe(stat,
+                               MEM_CGROUP_STAT_PGPGIN_COUNT, 1);
+       else
+               __mem_cgroup_stat_add_safe(stat,
+                               MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
 }
 
 static struct mem_cgroup_per_zone *
@@ -273,10 +285,10 @@ static void unlock_page_cgroup(struct page *page)
        bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
 }
 
-static void __mem_cgroup_remove_list(struct page_cgroup *pc)
+static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
+                       struct page_cgroup *pc)
 {
        int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
-       struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
 
        if (from)
                MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
@@ -287,10 +299,10 @@ static void __mem_cgroup_remove_list(struct page_cgroup *pc)
        list_del_init(&pc->lru);
 }
 
-static void __mem_cgroup_add_list(struct page_cgroup *pc)
+static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
+                               struct page_cgroup *pc)
 {
        int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
-       struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
 
        if (!to) {
                MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
@@ -547,7 +559,7 @@ retry:
        }
        unlock_page_cgroup(page);
 
-       pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
+       pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
        if (pc == NULL)
                goto err;
 
@@ -589,7 +601,6 @@ retry:
                        mem_cgroup_out_of_memory(mem, gfp_mask);
                        goto out;
                }
-               congestion_wait(WRITE, HZ/10);
        }
 
        pc->ref_cnt = 1;
@@ -597,7 +608,7 @@ retry:
        pc->page = page;
        pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
        if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
-               pc->flags |= PAGE_CGROUP_FLAG_CACHE;
+               pc->flags = PAGE_CGROUP_FLAG_CACHE;
 
        lock_page_cgroup(page);
        if (page_get_page_cgroup(page)) {
@@ -609,14 +620,14 @@ retry:
                 */
                res_counter_uncharge(&mem->res, PAGE_SIZE);
                css_put(&mem->css);
-               kfree(pc);
+               kmem_cache_free(page_cgroup_cache, pc);
                goto retry;
        }
        page_assign_page_cgroup(page, pc);
 
        mz = page_cgroup_zoneinfo(pc);
        spin_lock_irqsave(&mz->lru_lock, flags);
-       __mem_cgroup_add_list(pc);
+       __mem_cgroup_add_list(mz, pc);
        spin_unlock_irqrestore(&mz->lru_lock, flags);
 
        unlock_page_cgroup(page);
@@ -624,7 +635,7 @@ done:
        return 0;
 out:
        css_put(&mem->css);
-       kfree(pc);
+       kmem_cache_free(page_cgroup_cache, pc);
 err:
        return -ENOMEM;
 }
@@ -672,7 +683,7 @@ void mem_cgroup_uncharge_page(struct page *page)
        if (--(pc->ref_cnt) == 0) {
                mz = page_cgroup_zoneinfo(pc);
                spin_lock_irqsave(&mz->lru_lock, flags);
-               __mem_cgroup_remove_list(pc);
+               __mem_cgroup_remove_list(mz, pc);
                spin_unlock_irqrestore(&mz->lru_lock, flags);
 
                page_assign_page_cgroup(page, NULL);
@@ -682,7 +693,7 @@ void mem_cgroup_uncharge_page(struct page *page)
                res_counter_uncharge(&mem->res, PAGE_SIZE);
                css_put(&mem->css);
 
-               kfree(pc);
+               kmem_cache_free(page_cgroup_cache, pc);
                return;
        }
 
@@ -734,7 +745,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage)
 
        mz = page_cgroup_zoneinfo(pc);
        spin_lock_irqsave(&mz->lru_lock, flags);
-       __mem_cgroup_remove_list(pc);
+       __mem_cgroup_remove_list(mz, pc);
        spin_unlock_irqrestore(&mz->lru_lock, flags);
 
        page_assign_page_cgroup(page, NULL);
@@ -746,7 +757,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage)
 
        mz = page_cgroup_zoneinfo(pc);
        spin_lock_irqsave(&mz->lru_lock, flags);
-       __mem_cgroup_add_list(pc);
+       __mem_cgroup_add_list(mz, pc);
        spin_unlock_irqrestore(&mz->lru_lock, flags);
 
        unlock_page_cgroup(newpage);
@@ -855,16 +866,25 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
                                mem_cgroup_write_strategy);
 }
 
-static ssize_t mem_force_empty_write(struct cgroup *cont,
-                               struct cftype *cft, struct file *file,
-                               const char __user *userbuf,
-                               size_t nbytes, loff_t *ppos)
+static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 {
-       struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
-       int ret = mem_cgroup_force_empty(mem);
-       if (!ret)
-               ret = nbytes;
-       return ret;
+       struct mem_cgroup *mem;
+
+       mem = mem_cgroup_from_cont(cont);
+       switch (event) {
+       case RES_MAX_USAGE:
+               res_counter_reset_max(&mem->res);
+               break;
+       case RES_FAILCNT:
+               res_counter_reset_failcnt(&mem->res);
+               break;
+       }
+       return 0;
+}
+
+static int mem_force_empty_write(struct cgroup *cont, unsigned int event)
+{
+       return mem_cgroup_force_empty(mem_cgroup_from_cont(cont));
 }
 
 static const struct mem_cgroup_stat_desc {
@@ -873,6 +893,8 @@ static const struct mem_cgroup_stat_desc {
 } mem_cgroup_stat_desc[] = {
        [MEM_CGROUP_STAT_CACHE] = { "cache", PAGE_SIZE, },
        [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, },
+       [MEM_CGROUP_STAT_PGPGIN_COUNT] = {"pgpgin", 1, },
+       [MEM_CGROUP_STAT_PGPGOUT_COUNT] = {"pgpgout", 1, },
 };
 
 static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
@@ -909,6 +931,12 @@ static struct cftype mem_cgroup_files[] = {
                .private = RES_USAGE,
                .read_u64 = mem_cgroup_read,
        },
+       {
+               .name = "max_usage_in_bytes",
+               .private = RES_MAX_USAGE,
+               .trigger = mem_cgroup_reset,
+               .read_u64 = mem_cgroup_read,
+       },
        {
                .name = "limit_in_bytes",
                .private = RES_LIMIT,
@@ -918,11 +946,12 @@ static struct cftype mem_cgroup_files[] = {
        {
                .name = "failcnt",
                .private = RES_FAILCNT,
+               .trigger = mem_cgroup_reset,
                .read_u64 = mem_cgroup_read,
        },
        {
                .name = "force_empty",
-               .write = mem_force_empty_write,
+               .trigger = mem_force_empty_write,
        },
        {
                .name = "stat",
@@ -966,24 +995,46 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
        kfree(mem->info.nodeinfo[node]);
 }
 
+static struct mem_cgroup *mem_cgroup_alloc(void)
+{
+       struct mem_cgroup *mem;
+
+       if (sizeof(*mem) < PAGE_SIZE)
+               mem = kmalloc(sizeof(*mem), GFP_KERNEL);
+       else
+               mem = vmalloc(sizeof(*mem));
+
+       if (mem)
+               memset(mem, 0, sizeof(*mem));
+       return mem;
+}
+
+static void mem_cgroup_free(struct mem_cgroup *mem)
+{
+       if (sizeof(*mem) < PAGE_SIZE)
+               kfree(mem);
+       else
+               vfree(mem);
+}
+
+
 static struct cgroup_subsys_state *
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
        struct mem_cgroup *mem;
        int node;
 
-       if (unlikely((cont->parent) == NULL))
+       if (unlikely((cont->parent) == NULL)) {
                mem = &init_mem_cgroup;
-       else
-               mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL);
-
-       if (mem == NULL)
-               return ERR_PTR(-ENOMEM);
+               page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC);
+       } else {
+               mem = mem_cgroup_alloc();
+               if (!mem)
+                       return ERR_PTR(-ENOMEM);
+       }
 
        res_counter_init(&mem->res);
 
-       memset(&mem->info, 0, sizeof(mem->info));
-
        for_each_node_state(node, N_POSSIBLE)
                if (alloc_mem_cgroup_per_zone_info(mem, node))
                        goto free_out;
@@ -993,7 +1044,7 @@ free_out:
        for_each_node_state(node, N_POSSIBLE)
                free_mem_cgroup_per_zone_info(mem, node);
        if (cont->parent != NULL)
-               kfree(mem);
+               mem_cgroup_free(mem);
        return ERR_PTR(-ENOMEM);
 }
 
@@ -1013,7 +1064,7 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
        for_each_node_state(node, N_POSSIBLE)
                free_mem_cgroup_per_zone_info(mem, node);
 
-       kfree(mem_cgroup_from_cont(cont));
+       mem_cgroup_free(mem_cgroup_from_cont(cont));
 }
 
 static int mem_cgroup_populate(struct cgroup_subsys *ss,