IB/qib: fix false-postive maybe-uninitialized warning
[pandora-kernel.git] / mm / memcontrol.c
index f6c4beb..9ba67dd 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/rcupdate.h>
 #include <linux/limits.h>
+#include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/rbtree.h>
 #include <linux/slab.h>
@@ -686,8 +687,8 @@ static bool __memcg_event_check(struct mem_cgroup *memcg, int target)
 {
        unsigned long val, next;
 
-       val = this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
-       next = this_cpu_read(memcg->stat->targets[target]);
+       val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+       next = __this_cpu_read(memcg->stat->targets[target]);
        /* from time_after() in jiffies.h */
        return ((long)next - (long)val < 0);
 }
@@ -696,7 +697,7 @@ static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
 {
        unsigned long val, next;
 
-       val = this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+       val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
 
        switch (target) {
        case MEM_CGROUP_TARGET_THRESH:
@@ -712,7 +713,7 @@ static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
                return;
        }
 
-       this_cpu_write(memcg->stat->targets[target], next);
+       __this_cpu_write(memcg->stat->targets[target], next);
 }
 
 /*
@@ -721,6 +722,7 @@ static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
  */
 static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 {
+       preempt_disable();
        /* threshold event is triggered in finer grain than soft limit */
        if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
                mem_cgroup_threshold(memcg);
@@ -740,6 +742,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
                }
 #endif
        }
+       preempt_enable();
 }
 
 static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
@@ -993,6 +996,16 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
                return;
        pc = lookup_page_cgroup(page);
        VM_BUG_ON(PageCgroupAcctLRU(pc));
+       /*
+        * putback:                             charge:
+        * SetPageLRU                           SetPageCgroupUsed
+        * smp_mb                               smp_mb
+        * PageCgroupUsed && add to memcg LRU   PageLRU && add to memcg LRU
+        *
+        * Ensure that one of the two sides adds the page to the memcg
+        * LRU during a race.
+        */
+       smp_mb();
        if (!PageCgroupUsed(pc))
                return;
        /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
@@ -1044,7 +1057,16 @@ static void mem_cgroup_lru_add_after_commit(struct page *page)
        unsigned long flags;
        struct zone *zone = page_zone(page);
        struct page_cgroup *pc = lookup_page_cgroup(page);
-
+       /*
+        * putback:                             charge:
+        * SetPageLRU                           SetPageCgroupUsed
+        * smp_mb                               smp_mb
+        * PageCgroupUsed && add to memcg LRU   PageLRU && add to memcg LRU
+        *
+        * Ensure that one of the two sides adds the page to the memcg
+        * LRU during a race.
+        */
+       smp_mb();
        /* taking care of that the page is added to LRU while we commit it */
        if (likely(!PageLRU(page)))
                return;
@@ -1104,15 +1126,19 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
        return ret;
 }
 
-static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
-       unsigned long active;
+       unsigned long inactive_ratio;
+       int nid = zone_to_nid(zone);
+       int zid = zone_idx(zone);
        unsigned long inactive;
+       unsigned long active;
        unsigned long gb;
-       unsigned long inactive_ratio;
 
-       inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
-       active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
+       inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                               BIT(LRU_INACTIVE_ANON));
+       active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                             BIT(LRU_ACTIVE_ANON));
 
        gb = (inactive + active) >> (30 - PAGE_SHIFT);
        if (gb)
@@ -1120,39 +1146,20 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_
        else
                inactive_ratio = 1;
 
-       if (present_pages) {
-               present_pages[0] = inactive;
-               present_pages[1] = active;
-       }
-
-       return inactive_ratio;
+       return inactive * inactive_ratio < active;
 }
 
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
-{
-       unsigned long active;
-       unsigned long inactive;
-       unsigned long present_pages[2];
-       unsigned long inactive_ratio;
-
-       inactive_ratio = calc_inactive_ratio(memcg, present_pages);
-
-       inactive = present_pages[0];
-       active = present_pages[1];
-
-       if (inactive * inactive_ratio < active)
-               return 1;
-
-       return 0;
-}
-
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
 {
        unsigned long active;
        unsigned long inactive;
+       int zid = zone_idx(zone);
+       int nid = zone_to_nid(zone);
 
-       inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
-       active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
+       inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                               BIT(LRU_INACTIVE_FILE));
+       active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+                                             BIT(LRU_ACTIVE_FILE));
 
        return (active > inactive);
 }
@@ -1450,17 +1457,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
 u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
 {
        u64 limit;
-       u64 memsw;
 
        limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
-       limit += total_swap_pages << PAGE_SHIFT;
 
-       memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
        /*
-        * If memsw is finite and limits the amount of swap space available
-        * to this memcg, return that limit.
+        * Do not consider swap space if we cannot swap due to swappiness
         */
-       return min(limit, memsw);
+       if (mem_cgroup_swappiness(memcg)) {
+               u64 memsw;
+
+               limit += total_swap_pages << PAGE_SHIFT;
+               memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+
+               /*
+                * If memsw is finite and limits the amount of swap space
+                * available to this memcg, return that limit.
+                */
+               limit = min(limit, memsw);
+       }
+
+       return limit;
 }
 
 /*
@@ -3359,6 +3375,50 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
        cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
+/*
+ * At replace page cache, newpage is not under any memcg but it's on
+ * LRU. So, this function doesn't touch res_counter but handles LRU
+ * in correct way. Both pages are locked so we cannot race with uncharge.
+ */
+void mem_cgroup_replace_page_cache(struct page *oldpage,
+                                 struct page *newpage)
+{
+       struct mem_cgroup *memcg;
+       struct page_cgroup *pc;
+       struct zone *zone;
+       enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
+       unsigned long flags;
+
+       if (mem_cgroup_disabled())
+               return;
+
+       pc = lookup_page_cgroup(oldpage);
+       /* fix accounting on old pages */
+       lock_page_cgroup(pc);
+       memcg = pc->mem_cgroup;
+       mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -1);
+       ClearPageCgroupUsed(pc);
+       unlock_page_cgroup(pc);
+
+       if (PageSwapBacked(oldpage))
+               type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+
+       zone = page_zone(newpage);
+       pc = lookup_page_cgroup(newpage);
+       /*
+        * Even if newpage->mapping was NULL before starting replacement,
+        * the newpage may be on LRU(or pagevec for LRU) already. We lock
+        * LRU while we overwrite pc->mem_cgroup.
+        */
+       spin_lock_irqsave(&zone->lru_lock, flags);
+       if (PageLRU(newpage))
+               del_page_from_lru_list(zone, newpage, page_lru(newpage));
+       __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type);
+       if (PageLRU(newpage))
+               add_page_to_lru_list(zone, newpage, page_lru(newpage));
+       spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+
 #ifdef CONFIG_DEBUG_VM
 static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
@@ -4192,8 +4252,6 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
        }
 
 #ifdef CONFIG_DEBUG_VM
-       cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
-
        {
                int nid, zid;
                struct mem_cgroup_per_zone *mz;
@@ -4327,7 +4385,13 @@ static int compare_thresholds(const void *a, const void *b)
        const struct mem_cgroup_threshold *_a = a;
        const struct mem_cgroup_threshold *_b = b;
 
-       return _a->threshold - _b->threshold;
+       if (_a->threshold > _b->threshold)
+               return 1;
+
+       if (_a->threshold < _b->threshold)
+               return -1;
+
+       return 0;
 }
 
 static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
@@ -4453,6 +4517,9 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp,
         */
        BUG_ON(!thresholds);
 
+       if (!thresholds->primary)
+               goto unlock;
+
        usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
 
        /* Check if a threshold crossed before removing */
@@ -4497,11 +4564,18 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp,
 swap_buffers:
        /* Swap primary and spare array */
        thresholds->spare = thresholds->primary;
+
        rcu_assign_pointer(thresholds->primary, new);
 
        /* To be sure that nobody uses thresholds */
        synchronize_rcu();
 
+       /* If all events are unregistered, free the spare array */
+       if (!new) {
+               kfree(thresholds->spare);
+               thresholds->spare = NULL;
+       }
+unlock:
        mutex_unlock(&memcg->thresholds_lock);
 }
 
@@ -4902,9 +4976,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
                int cpu;
                enable_swap_cgroup();
                parent = NULL;
-               root_mem_cgroup = memcg;
                if (mem_cgroup_soft_limit_tree_init())
                        goto free_out;
+               root_mem_cgroup = memcg;
                for_each_possible_cpu(cpu) {
                        struct memcg_stock_pcp *stock =
                                                &per_cpu(memcg_stock, cpu);
@@ -4943,7 +5017,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        return &memcg->css;
 free_out:
        __mem_cgroup_free(memcg);
-       root_mem_cgroup = NULL;
        return ERR_PTR(error);
 }
 
@@ -5186,6 +5259,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        spinlock_t *ptl;
 
        split_huge_page_pmd(walk->mm, pmd);
+       if (pmd_trans_unstable(pmd))
+               return 0;
 
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE)
@@ -5347,6 +5422,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        spinlock_t *ptl;
 
        split_huge_page_pmd(walk->mm, pmd);
+       if (pmd_trans_unstable(pmd))
+               return 0;
 retry:
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        for (; addr != end; addr += PAGE_SIZE) {