ret = -EBUSY;
- if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
- return ret;
+ /*
+ * To minimise LRU disruption, the caller can indicate that it only
+ * wants to isolate pages it will be able to operate on without
+ * blocking - clean pages for the most part.
+ *
+ * ISOLATE_CLEAN means that only clean pages should be isolated. This
+ * is used by reclaim when it is cannot write to backing storage
+ *
+ * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+ * that it is possible to migrate without blocking
+ */
+ if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+ /* All the caller can do on PageWriteback is block */
+ if (PageWriteback(page))
+ return ret;
+
+ if (PageDirty(page)) {
+ struct address_space *mapping;
+
+ /* ISOLATE_CLEAN means only clean pages */
+ if (mode & ISOLATE_CLEAN)
+ return ret;
+
+ /*
+ * Only pages without mappings or that have a
+ * ->migratepage callback are possible to migrate
+ * without blocking
+ */
+ mapping = page_mapping(page);
+ if (mapping && !mapping->a_ops->migratepage)
+ return ret;
+ }
+ }
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
- * @src: The LRU list to pull pages off.
+ * @mz: The mem_cgroup_zone to pull pages from.
* @dst: The temp list to put pages on to.
- * @scanned: The number of pages that were scanned.
+ * @nr_scanned: The number of pages that were scanned.
* @order: The caller's attempted allocation order
* @mode: One of the LRU isolation modes
+ * @active: True [1] if isolating active pages
* @file: True [1] if isolating file [!anon] pages
*
* returns how many pages were moved onto *@dst.
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
- struct list_head *src, struct list_head *dst,
- unsigned long *scanned, int order, isolate_mode_t mode,
- int file)
+ struct mem_cgroup_zone *mz, struct list_head *dst,
+ unsigned long *nr_scanned, int order, isolate_mode_t mode,
+ int active, int file)
{
+ struct lruvec *lruvec;
+ struct list_head *src;
unsigned long nr_taken = 0;
unsigned long nr_lumpy_taken = 0;
unsigned long nr_lumpy_dirty = 0;
unsigned long nr_lumpy_failed = 0;
unsigned long scan;
+ int lru = LRU_BASE;
+
+ lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
+ if (active)
+ lru += LRU_ACTIVE;
+ if (file)
+ lru += LRU_FILE;
+ src = &lruvec->lists[lru];
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
struct page *page;
nr_lumpy_failed++;
}
- *scanned = scan;
+ *nr_scanned = scan;
trace_mm_vmscan_lru_isolate(order,
nr_to_scan, scan,
return nr_taken;
}
-static unsigned long isolate_pages(unsigned long nr, struct mem_cgroup_zone *mz,
- struct list_head *dst,
- unsigned long *scanned, int order,
- isolate_mode_t mode, int active, int file)
-{
- struct lruvec *lruvec;
- int lru = LRU_BASE;
-
- lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
- if (active)
- lru += LRU_ACTIVE;
- if (file)
- lru += LRU_FILE;
- return isolate_lru_pages(nr, &lruvec->lists[lru], dst,
- scanned, order, mode, file);
-}
-
-/*
- * clear_active_flags() is a helper for shrink_active_list(), clearing
- * any active bits from the pages in the list.
- */
-static unsigned long clear_active_flags(struct list_head *page_list,
- unsigned int *count)
-{
- int nr_active = 0;
- int lru;
- struct page *page;
-
- list_for_each_entry(page, page_list, lru) {
- int numpages = hpage_nr_pages(page);
- lru = page_lru_base_type(page);
- if (PageActive(page)) {
- lru += LRU_ACTIVE;
- ClearPageActive(page);
- nr_active += numpages;
- }
- if (count)
- count[lru] += numpages;
- }
-
- return nr_active;
-}
-
/**
* isolate_lru_page - tries to isolate a page from its LRU list
* @page: page to isolate from its LRU list
return isolated > inactive;
}
-/*
- * TODO: Try merging with migrations version of putback_lru_pages
- */
static noinline_for_stack void
-putback_lru_pages(struct mem_cgroup_zone *mz, struct scan_control *sc,
- unsigned long nr_anon, unsigned long nr_file,
- struct list_head *page_list)
+putback_inactive_pages(struct mem_cgroup_zone *mz,
+ struct list_head *page_list)
{
- struct page *page;
- struct pagevec pvec;
- struct zone *zone = mz->zone;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
-
- pagevec_init(&pvec, 1);
+ struct zone *zone = mz->zone;
+ LIST_HEAD(pages_to_free);
/*
* Put back any unfreeable pages.
*/
- spin_lock(&zone->lru_lock);
while (!list_empty(page_list)) {
+ struct page *page = lru_to_page(page_list);
int lru;
- page = lru_to_page(page_list);
+
VM_BUG_ON(PageLRU(page));
list_del(&page->lru);
if (unlikely(!page_evictable(page, NULL))) {
int numpages = hpage_nr_pages(page);
reclaim_stat->recent_rotated[file] += numpages;
}
- if (!pagevec_add(&pvec, page)) {
- spin_unlock_irq(&zone->lru_lock);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
+ if (put_page_testzero(page)) {
+ __ClearPageLRU(page);
+ __ClearPageActive(page);
+ del_page_from_lru_list(zone, page, lru);
+
+ if (unlikely(PageCompound(page))) {
+ spin_unlock_irq(&zone->lru_lock);
+ (*get_compound_page_dtor(page))(page);
+ spin_lock_irq(&zone->lru_lock);
+ } else
+ list_add(&page->lru, &pages_to_free);
}
}
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
- spin_unlock_irq(&zone->lru_lock);
- pagevec_release(&pvec);
+ /*
+ * To save our caller's stack, now use input list for pages to free.
+ */
+ list_splice(&pages_to_free, page_list);
}
static noinline_for_stack void
update_isolated_counts(struct mem_cgroup_zone *mz,
- struct scan_control *sc,
+ struct list_head *page_list,
unsigned long *nr_anon,
- unsigned long *nr_file,
- struct list_head *isolated_list)
+ unsigned long *nr_file)
{
- unsigned long nr_active;
+ struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
struct zone *zone = mz->zone;
unsigned int count[NR_LRU_LISTS] = { 0, };
- struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+ unsigned long nr_active = 0;
+ struct page *page;
+ int lru;
+
+ /*
+ * Count pages and clear active flags
+ */
+ list_for_each_entry(page, page_list, lru) {
+ int numpages = hpage_nr_pages(page);
+ lru = page_lru_base_type(page);
+ if (PageActive(page)) {
+ lru += LRU_ACTIVE;
+ ClearPageActive(page);
+ nr_active += numpages;
+ }
+ count[lru] += numpages;
+ }
- nr_active = clear_active_flags(isolated_list, count);
__count_vm_events(PGDEACTIVATE, nr_active);
__mod_zone_page_state(zone, NR_ACTIVE_FILE,
*nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
*nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
- __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
- __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
reclaim_stat->recent_scanned[0] += *nr_anon;
reclaim_stat->recent_scanned[1] += *nr_file;
spin_lock_irq(&zone->lru_lock);
- nr_taken = isolate_pages(nr_to_scan, mz, &page_list,
- &nr_scanned, sc->order,
- reclaim_mode, 0, file);
+ nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list,
+ &nr_scanned, sc->order,
+ reclaim_mode, 0, file);
if (global_reclaim(sc)) {
zone->pages_scanned += nr_scanned;
if (current_is_kswapd())
return 0;
}
- update_isolated_counts(mz, sc, &nr_anon, &nr_file, &page_list);
+ update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
+
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
spin_unlock_irq(&zone->lru_lock);
priority, &nr_dirty, &nr_writeback);
}
- local_irq_disable();
+ spin_lock_irq(&zone->lru_lock);
+
if (current_is_kswapd())
__count_vm_events(KSWAPD_STEAL, nr_reclaimed);
__count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
- putback_lru_pages(mz, sc, nr_anon, nr_file, &page_list);
+ putback_inactive_pages(mz, &page_list);
+
+ __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+ __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
+ spin_unlock_irq(&zone->lru_lock);
+
+ free_hot_cold_page_list(&page_list, 1);
/*
* If reclaim is isolating dirty pages under writeback, it implies
static void move_active_pages_to_lru(struct zone *zone,
struct list_head *list,
+ struct list_head *pages_to_free,
enum lru_list lru)
{
unsigned long pgmoved = 0;
- struct pagevec pvec;
struct page *page;
- pagevec_init(&pvec, 1);
+ if (buffer_heads_over_limit) {
+ spin_unlock_irq(&zone->lru_lock);
+ list_for_each_entry(page, list, lru) {
+ if (page_has_private(page) && trylock_page(page)) {
+ if (page_has_private(page))
+ try_to_release_page(page, 0);
+ unlock_page(page);
+ }
+ }
+ spin_lock_irq(&zone->lru_lock);
+ }
while (!list_empty(list)) {
struct lruvec *lruvec;
list_move(&page->lru, &lruvec->lists[lru]);
pgmoved += hpage_nr_pages(page);
- if (!pagevec_add(&pvec, page) || list_empty(list)) {
- spin_unlock_irq(&zone->lru_lock);
- if (buffer_heads_over_limit)
- pagevec_strip(&pvec);
- __pagevec_release(&pvec);
- spin_lock_irq(&zone->lru_lock);
+ if (put_page_testzero(page)) {
+ __ClearPageLRU(page);
+ __ClearPageActive(page);
+ del_page_from_lru_list(zone, page, lru);
+
+ if (unlikely(PageCompound(page))) {
+ spin_unlock_irq(&zone->lru_lock);
+ (*get_compound_page_dtor(page))(page);
+ spin_lock_irq(&zone->lru_lock);
+ } else
+ list_add(&page->lru, pages_to_free);
}
}
__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
__count_vm_events(PGDEACTIVATE, pgmoved);
}
-static void shrink_active_list(unsigned long nr_pages,
+static void shrink_active_list(unsigned long nr_to_scan,
struct mem_cgroup_zone *mz,
struct scan_control *sc,
int priority, int file)
{
unsigned long nr_taken;
- unsigned long pgscanned;
+ unsigned long nr_scanned;
unsigned long vm_flags;
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_active);
spin_lock_irq(&zone->lru_lock);
- nr_taken = isolate_pages(nr_pages, mz, &l_hold,
- &pgscanned, sc->order,
- reclaim_mode, 1, file);
-
+ nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold,
+ &nr_scanned, sc->order,
+ reclaim_mode, 1, file);
if (global_reclaim(sc))
- zone->pages_scanned += pgscanned;
+ zone->pages_scanned += nr_scanned;
reclaim_stat->recent_scanned[file] += nr_taken;
- __count_zone_vm_events(PGREFILL, zone, pgscanned);
+ __count_zone_vm_events(PGREFILL, zone, nr_scanned);
if (file)
__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
else
*/
reclaim_stat->recent_rotated[file] += nr_rotated;
- move_active_pages_to_lru(zone, &l_active,
+ move_active_pages_to_lru(zone, &l_active, &l_hold,
LRU_ACTIVE + file * LRU_FILE);
- move_active_pages_to_lru(zone, &l_inactive,
+ move_active_pages_to_lru(zone, &l_inactive, &l_hold,
LRU_BASE + file * LRU_FILE);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
+
+ free_hot_cold_page_list(&l_hold, 1);
}
#ifdef CONFIG_SWAP
unsigned long ap, fp;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
u64 fraction[2], denominator;
- enum lru_list l;
+ enum lru_list lru;
int noswap = 0;
bool force_scan = false;
fraction[1] = fp;
denominator = ap + fp + 1;
out:
- for_each_evictable_lru(l) {
- int file = is_file_lru(l);
+ for_each_evictable_lru(lru) {
+ int file = is_file_lru(lru);
unsigned long scan;
- scan = zone_nr_lru_pages(mz, l);
+ scan = zone_nr_lru_pages(mz, lru);
if (priority || noswap) {
scan >>= priority;
if (!scan && force_scan)
scan = SWAP_CLUSTER_MAX;
scan = div64_u64(scan * fraction[file], denominator);
}
- nr[l] = scan;
+ nr[lru] = scan;
}
}
{
unsigned long nr[NR_LRU_LISTS];
unsigned long nr_to_scan;
- enum lru_list l;
+ enum lru_list lru;
unsigned long nr_reclaimed, nr_scanned;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
struct blk_plug plug;
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
- for_each_evictable_lru(l) {
- if (nr[l]) {
+ for_each_evictable_lru(lru) {
+ if (nr[lru]) {
nr_to_scan = min_t(unsigned long,
- nr[l], SWAP_CLUSTER_MAX);
- nr[l] -= nr_to_scan;
+ nr[lru], SWAP_CLUSTER_MAX);
+ nr[lru] -= nr_to_scan;
- nr_reclaimed += shrink_list(l, nr_to_scan,
+ nr_reclaimed += shrink_list(lru, nr_to_scan,
mz, sc, priority);
}
}
} while (memcg);
}
+/* Returns true if compaction should go ahead for a high-order request */
+static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+{
+ unsigned long balance_gap, watermark;
+ bool watermark_ok;
+
+ /* Do not consider compaction for orders reclaim is meant to satisfy */
+ if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
+ return false;
+
+ /*
+ * Compaction takes time to run and there are potentially other
+ * callers using the pages just freed. Continue reclaiming until
+ * there is a buffer of free pages available to give compaction
+ * a reasonable chance of completing and allocating the page
+ */
+ balance_gap = min(low_wmark_pages(zone),
+ (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+ KSWAPD_ZONE_BALANCE_GAP_RATIO);
+ watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+ watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
+
+ /*
+ * If compaction is deferred, reclaim up to a point where
+ * compaction will have a chance of success when re-enabled
+ */
+ if (compaction_deferred(zone))
+ return watermark_ok;
+
+ /* If compaction is not ready to start, keep reclaiming */
+ if (!compaction_suitable(zone, sc->order))
+ return false;
+
+ return watermark_ok;
+}
+
/*
* This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation
* scan then give up on it.
*
* This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is either ready to begin or deferred.
- * This indicates to the caller that it should retry the allocation or fail.
+ * high-order allocation and compaction is ready to begin. This indicates to
+ * the caller that it should consider retrying the allocation instead of
+ * further reclaim.
*/
static bool shrink_zones(int priority, struct zonelist *zonelist,
struct scan_control *sc)
struct zone *zone;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
- bool should_abort_reclaim = false;
+ bool aborted_reclaim = false;
for_each_zone_zonelist_nodemask(zone, z, zonelist,
gfp_zone(sc->gfp_mask), sc->nodemask) {
* noticable problem, like transparent huge page
* allocations.
*/
- if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&
- (compaction_suitable(zone, sc->order) ||
- compaction_deferred(zone))) {
- should_abort_reclaim = true;
+ if (compaction_ready(zone, sc)) {
+ aborted_reclaim = true;
continue;
}
}
shrink_zone(priority, zone, sc);
}
- return should_abort_reclaim;
+ return aborted_reclaim;
}
static bool zone_reclaimable(struct zone *zone)
struct zoneref *z;
struct zone *zone;
unsigned long writeback_threshold;
+ bool aborted_reclaim;
get_mems_allowed();
delayacct_freepages_start();
sc->nr_scanned = 0;
if (!priority)
disable_swap_token(sc->target_mem_cgroup);
- if (shrink_zones(priority, zonelist, sc))
- break;
+ aborted_reclaim = shrink_zones(priority, zonelist, sc);
/*
* Don't shrink slabs when reclaiming memory from
if (oom_killer_disabled)
return 0;
+ /* Aborted reclaim to try compaction? don't OOM, then */
+ if (aborted_reclaim)
+ return 1;
+
/* top priority shrink_zones still had more to do? don't OOM, then */
if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
return 1;