#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
#include <linux/limits.h>
+#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
{
unsigned long val, next;
- val = this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
- next = this_cpu_read(memcg->stat->targets[target]);
+ val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+ next = __this_cpu_read(memcg->stat->targets[target]);
/* from time_after() in jiffies.h */
return ((long)next - (long)val < 0);
}
{
unsigned long val, next;
- val = this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
+ val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
switch (target) {
case MEM_CGROUP_TARGET_THRESH:
return;
}
- this_cpu_write(memcg->stat->targets[target], next);
+ __this_cpu_write(memcg->stat->targets[target], next);
}
/*
*/
static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
{
+ preempt_disable();
/* threshold event is triggered in finer grain than soft limit */
if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
mem_cgroup_threshold(memcg);
}
#endif
}
+ preempt_enable();
}
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
return;
pc = lookup_page_cgroup(page);
VM_BUG_ON(PageCgroupAcctLRU(pc));
+ /*
+ * putback: charge:
+ * SetPageLRU SetPageCgroupUsed
+ * smp_mb smp_mb
+ * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
+ *
+ * Ensure that one of the two sides adds the page to the memcg
+ * LRU during a race.
+ */
+ smp_mb();
if (!PageCgroupUsed(pc))
return;
/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
unsigned long flags;
struct zone *zone = page_zone(page);
struct page_cgroup *pc = lookup_page_cgroup(page);
-
+ /*
+ * putback: charge:
+ * SetPageLRU SetPageCgroupUsed
+ * smp_mb smp_mb
+ * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
+ *
+ * Ensure that one of the two sides adds the page to the memcg
+ * LRU during a race.
+ */
+ smp_mb();
/* taking care of that the page is added to LRU while we commit it */
if (likely(!PageLRU(page)))
return;
return ret;
}
-static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages)
+int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
{
- unsigned long active;
+ unsigned long inactive_ratio;
+ int nid = zone_to_nid(zone);
+ int zid = zone_idx(zone);
unsigned long inactive;
+ unsigned long active;
unsigned long gb;
- unsigned long inactive_ratio;
- inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
- active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
+ inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_INACTIVE_ANON));
+ active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_ACTIVE_ANON));
gb = (inactive + active) >> (30 - PAGE_SHIFT);
if (gb)
else
inactive_ratio = 1;
- if (present_pages) {
- present_pages[0] = inactive;
- present_pages[1] = active;
- }
-
- return inactive_ratio;
-}
-
-int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg)
-{
- unsigned long active;
- unsigned long inactive;
- unsigned long present_pages[2];
- unsigned long inactive_ratio;
-
- inactive_ratio = calc_inactive_ratio(memcg, present_pages);
-
- inactive = present_pages[0];
- active = present_pages[1];
-
- if (inactive * inactive_ratio < active)
- return 1;
-
- return 0;
+ return inactive * inactive_ratio < active;
}
-int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
+int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
{
unsigned long active;
unsigned long inactive;
+ int zid = zone_idx(zone);
+ int nid = zone_to_nid(zone);
- inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
- active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
+ inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_INACTIVE_FILE));
+ active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
+ BIT(LRU_ACTIVE_FILE));
return (active > inactive);
}
u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
{
u64 limit;
- u64 memsw;
limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
- limit += total_swap_pages << PAGE_SHIFT;
- memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
/*
- * If memsw is finite and limits the amount of swap space available
- * to this memcg, return that limit.
+ * Do not consider swap space if we cannot swap due to swappiness
*/
- return min(limit, memsw);
+ if (mem_cgroup_swappiness(memcg)) {
+ u64 memsw;
+
+ limit += total_swap_pages << PAGE_SHIFT;
+ memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+
+ /*
+ * If memsw is finite and limits the amount of swap space
+ * available to this memcg, return that limit.
+ */
+ limit = min(limit, memsw);
+ }
+
+ return limit;
}
/*
if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
return false;
/* Give chance to dying process */
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
return true;
}
cgroup_release_and_wakeup_rmdir(&memcg->css);
}
+/*
+ * At replace page cache, newpage is not under any memcg but it's on
+ * LRU. So, this function doesn't touch res_counter but handles LRU
+ * in correct way. Both pages are locked so we cannot race with uncharge.
+ */
+void mem_cgroup_replace_page_cache(struct page *oldpage,
+ struct page *newpage)
+{
+ struct mem_cgroup *memcg;
+ struct page_cgroup *pc;
+ struct zone *zone;
+ enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ unsigned long flags;
+
+ if (mem_cgroup_disabled())
+ return;
+
+ pc = lookup_page_cgroup(oldpage);
+ /* fix accounting on old pages */
+ lock_page_cgroup(pc);
+ memcg = pc->mem_cgroup;
+ mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -1);
+ ClearPageCgroupUsed(pc);
+ unlock_page_cgroup(pc);
+
+ if (PageSwapBacked(oldpage))
+ type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+
+ zone = page_zone(newpage);
+ pc = lookup_page_cgroup(newpage);
+ /*
+ * Even if newpage->mapping was NULL before starting replacement,
+ * the newpage may be on LRU(or pagevec for LRU) already. We lock
+ * LRU while we overwrite pc->mem_cgroup.
+ */
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ if (PageLRU(newpage))
+ del_page_from_lru_list(zone, newpage, page_lru(newpage));
+ __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type);
+ if (PageLRU(newpage))
+ add_page_to_lru_list(zone, newpage, page_lru(newpage));
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+}
+
#ifdef CONFIG_DEBUG_VM
static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
{
}
#ifdef CONFIG_DEBUG_VM
- cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
-
{
int nid, zid;
struct mem_cgroup_per_zone *mz;
const struct mem_cgroup_threshold *_a = a;
const struct mem_cgroup_threshold *_b = b;
- return _a->threshold - _b->threshold;
+ if (_a->threshold > _b->threshold)
+ return 1;
+
+ if (_a->threshold < _b->threshold)
+ return -1;
+
+ return 0;
}
static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
*/
BUG_ON(!thresholds);
+ if (!thresholds->primary)
+ goto unlock;
+
usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
/* Check if a threshold crossed before removing */
swap_buffers:
/* Swap primary and spare array */
thresholds->spare = thresholds->primary;
+
rcu_assign_pointer(thresholds->primary, new);
/* To be sure that nobody uses thresholds */
synchronize_rcu();
+ /* If all events are unregistered, free the spare array */
+ if (!new) {
+ kfree(thresholds->spare);
+ thresholds->spare = NULL;
+ }
+unlock:
mutex_unlock(&memcg->thresholds_lock);
}
if (!pn)
return 1;
- memcg->info.nodeinfo[node] = pn;
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
mz = &pn->zoneinfo[zone];
for_each_lru(l)
mz->on_tree = false;
mz->mem = memcg;
}
+ memcg->info.nodeinfo[node] = pn;
return 0;
}
int cpu;
enable_swap_cgroup();
parent = NULL;
- root_mem_cgroup = memcg;
if (mem_cgroup_soft_limit_tree_init())
goto free_out;
+ root_mem_cgroup = memcg;
for_each_possible_cpu(cpu) {
struct memcg_stock_pcp *stock =
&per_cpu(memcg_stock, cpu);
return &memcg->css;
free_out:
__mem_cgroup_free(memcg);
- root_mem_cgroup = NULL;
return ERR_PTR(error);
}
spinlock_t *ptl;
split_huge_page_pmd(walk->mm, pmd);
+ if (pmd_trans_unstable(pmd))
+ return 0;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE)
spinlock_t *ptl;
split_huge_page_pmd(walk->mm, pmd);
+ if (pmd_trans_unstable(pmd))
+ return 0;
retry:
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; addr += PAGE_SIZE) {