ARM: OMAP3: PM: remove access to PRM_VOLTCTRL register
[pandora-kernel.git] / mm / vmscan.c
index f54a05b..7c8757f 100644 (file)
@@ -636,7 +636,7 @@ redo:
                 * When racing with an mlock or AS_UNEVICTABLE clearing
                 * (page is unlocked) make sure that if the other thread
                 * does not observe our setting of PG_lru and fails
-                * isolation/check_move_unevictable_page,
+                * isolation/check_move_unevictable_pages,
                 * we see PG_mlocked/AS_UNEVICTABLE cleared below and move
                 * the page back to the evictable list.
                 *
@@ -697,7 +697,7 @@ static enum page_references page_check_references(struct page *page,
                return PAGEREF_RECLAIM;
 
        if (referenced_ptes) {
-               if (PageAnon(page))
+               if (PageSwapBacked(page))
                        return PAGEREF_ACTIVATE;
                /*
                 * All mapped pages start out with page table
@@ -715,7 +715,13 @@ static enum page_references page_check_references(struct page *page,
                 */
                SetPageReferenced(page);
 
-               if (referenced_page)
+               if (referenced_page || referenced_ptes > 1)
+                       return PAGEREF_ACTIVATE;
+
+               /*
+                * Activate file-backed executable pages after first usage.
+                */
+               if (vm_flags & VM_EXEC)
                        return PAGEREF_ACTIVATE;
 
                return PAGEREF_KEEP;
@@ -728,24 +734,6 @@ static enum page_references page_check_references(struct page *page,
        return PAGEREF_RECLAIM;
 }
 
-static noinline_for_stack void free_page_list(struct list_head *free_pages)
-{
-       struct pagevec freed_pvec;
-       struct page *page, *tmp;
-
-       pagevec_init(&freed_pvec, 1);
-
-       list_for_each_entry_safe(page, tmp, free_pages, lru) {
-               list_del(&page->lru);
-               if (!pagevec_add(&freed_pvec, page)) {
-                       __pagevec_free(&freed_pvec);
-                       pagevec_reinit(&freed_pvec);
-               }
-       }
-
-       pagevec_free(&freed_pvec);
-}
-
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -1009,7 +997,7 @@ keep_lumpy:
        if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
                zone_set_flag(zone, ZONE_CONGESTED);
 
-       free_page_list(&free_pages);
+       free_hot_cold_page_list(&free_pages, 1);
 
        list_splice(&ret_pages, page_list);
        count_vm_events(PGACTIVATE, pgactivate);
@@ -1061,8 +1049,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
 
        ret = -EBUSY;
 
-       if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
-               return ret;
+       /*
+        * To minimise LRU disruption, the caller can indicate that it only
+        * wants to isolate pages it will be able to operate on without
+        * blocking - clean pages for the most part.
+        *
+        * ISOLATE_CLEAN means that only clean pages should be isolated. This
+        * is used by reclaim when it is cannot write to backing storage
+        *
+        * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+        * that it is possible to migrate without blocking
+        */
+       if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+               /* All the caller can do on PageWriteback is block */
+               if (PageWriteback(page))
+                       return ret;
+
+               if (PageDirty(page)) {
+                       struct address_space *mapping;
+
+                       /* ISOLATE_CLEAN means only clean pages */
+                       if (mode & ISOLATE_CLEAN)
+                               return ret;
+
+                       /*
+                        * Only pages without mappings or that have a
+                        * ->migratepage callback are possible to migrate
+                        * without blocking
+                        */
+                       mapping = page_mapping(page);
+                       if (mapping && !mapping->a_ops->migratepage)
+                               return ret;
+               }
+       }
 
        if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
                return ret;
@@ -1178,7 +1197,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                         * anon page which don't already have a swap slot is
                         * pointless.
                         */
-                       if (nr_swap_pages <= 0 && PageAnon(cursor_page) &&
+                       if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
                            !PageSwapCache(cursor_page))
                                break;
 
@@ -1874,7 +1893,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
         * latencies, so it's better to scan a minimum amount there as
         * well.
         */
-       if (scanning_global_lru(sc) && current_is_kswapd())
+       if (scanning_global_lru(sc) && current_is_kswapd() &&
+           zone->all_unreclaimable)
                force_scan = true;
        if (!scanning_global_lru(sc))
                force_scan = true;
@@ -1939,10 +1959,10 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
         * proportional to the fraction of recently scanned pages on
         * each list that were recently referenced and in active use.
         */
-       ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1);
+       ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
        ap /= reclaim_stat->recent_rotated[0] + 1;
 
-       fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
+       fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
        fp /= reclaim_stat->recent_rotated[1] + 1;
        spin_unlock_irq(&zone->lru_lock);
 
@@ -1955,7 +1975,7 @@ out:
                unsigned long scan;
 
                scan = zone_nr_lru_pages(zone, sc, l);
-               if (priority || noswap) {
+               if (priority || noswap || !vmscan_swappiness(sc)) {
                        scan >>= priority;
                        if (!scan && force_scan)
                                scan = SWAP_CLUSTER_MAX;
@@ -2012,8 +2032,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
         * inactive lists are large enough, continue reclaiming
         */
        pages_for_compaction = (2UL << sc->order);
-       inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) +
-                               zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+       inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+       if (nr_swap_pages > 0)
+               inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
        if (sc->nr_reclaimed < pages_for_compaction &&
                        inactive_lru_pages > pages_for_compaction)
                return true;
@@ -2088,6 +2109,42 @@ restart:
        throttle_vm_writeout(sc->gfp_mask);
 }
 
+/* Returns true if compaction should go ahead for a high-order request */
+static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+{
+       unsigned long balance_gap, watermark;
+       bool watermark_ok;
+
+       /* Do not consider compaction for orders reclaim is meant to satisfy */
+       if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
+               return false;
+
+       /*
+        * Compaction takes time to run and there are potentially other
+        * callers using the pages just freed. Continue reclaiming until
+        * there is a buffer of free pages available to give compaction
+        * a reasonable chance of completing and allocating the page
+        */
+       balance_gap = min(low_wmark_pages(zone),
+               (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+                       KSWAPD_ZONE_BALANCE_GAP_RATIO);
+       watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+       watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
+
+       /*
+        * If compaction is deferred, reclaim up to a point where
+        * compaction will have a chance of success when re-enabled
+        */
+       if (compaction_deferred(zone))
+               return watermark_ok;
+
+       /* If compaction is not ready to start, keep reclaiming */
+       if (!compaction_suitable(zone, sc->order))
+               return false;
+
+       return watermark_ok;
+}
+
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2105,8 +2162,9 @@ restart:
  * scan then give up on it.
  *
  * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is either ready to begin or deferred.
- * This indicates to the caller that it should retry the allocation or fail.
+ * high-order allocation and compaction is ready to begin. This indicates to
+ * the caller that it should consider retrying the allocation instead of
+ * further reclaim.
  */
 static bool shrink_zones(int priority, struct zonelist *zonelist,
                                        struct scan_control *sc)
@@ -2115,7 +2173,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
        struct zone *zone;
        unsigned long nr_soft_reclaimed;
        unsigned long nr_soft_scanned;
-       bool should_abort_reclaim = false;
+       bool aborted_reclaim = false;
 
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                        gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2140,10 +2198,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
                                 * noticable problem, like transparent huge page
                                 * allocations.
                                 */
-                               if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&
-                                       (compaction_suitable(zone, sc->order) ||
-                                        compaction_deferred(zone))) {
-                                       should_abort_reclaim = true;
+                               if (compaction_ready(zone, sc)) {
+                                       aborted_reclaim = true;
                                        continue;
                                }
                        }
@@ -2165,7 +2221,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist,
                shrink_zone(priority, zone, sc);
        }
 
-       return should_abort_reclaim;
+       return aborted_reclaim;
 }
 
 static bool zone_reclaimable(struct zone *zone)
@@ -2219,8 +2275,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
        struct zoneref *z;
        struct zone *zone;
        unsigned long writeback_threshold;
+       bool aborted_reclaim;
 
-       get_mems_allowed();
        delayacct_freepages_start();
 
        if (scanning_global_lru(sc))
@@ -2230,8 +2286,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                sc->nr_scanned = 0;
                if (!priority)
                        disable_swap_token(sc->mem_cgroup);
-               if (shrink_zones(priority, zonelist, sc))
-                       break;
+               aborted_reclaim = shrink_zones(priority, zonelist, sc);
 
                /*
                 * Don't shrink slabs when reclaiming memory from
@@ -2285,7 +2340,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 
 out:
        delayacct_freepages_end();
-       put_mems_allowed();
 
        if (sc->nr_reclaimed)
                return sc->nr_reclaimed;
@@ -2298,6 +2352,10 @@ out:
        if (oom_killer_disabled)
                return 0;
 
+       /* Aborted reclaim to try compaction? don't OOM, then */
+       if (aborted_reclaim)
+               return 1;
+
        /* top priority shrink_zones still had more to do? don't OOM, then */
        if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
                return 1;
@@ -2824,7 +2882,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
                 * them before going back to sleep.
                 */
                set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold);
-               schedule();
+
+               if (!kthread_should_stop())
+                       schedule();
+
                set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold);
        } else {
                if (remaining)
@@ -2936,6 +2997,8 @@ static int kswapd(void *p)
                                                &balanced_classzone_idx);
                }
        }
+
+       current->reclaim_state = NULL;
        return 0;
 }
 
@@ -3090,14 +3153,17 @@ int kswapd_run(int nid)
 }
 
 /*
- * Called by memory hotplug when all memory in a node is offlined.
+ * Called by memory hotplug when all memory in a node is offlined.  Caller must
+ * hold lock_memory_hotplug().
  */
 void kswapd_stop(int nid)
 {
        struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
 
-       if (kswapd)
+       if (kswapd) {
                kthread_stop(kswapd);
+               NODE_DATA(nid)->kswapd = NULL;
+       }
 }
 
 static int __init kswapd_init(void)
@@ -3353,97 +3419,59 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
        return 1;
 }
 
+#ifdef CONFIG_SHMEM
 /**
- * check_move_unevictable_page - check page for evictability and move to appropriate zone lru list
- * @page: page to check evictability and move to appropriate lru list
- * @zone: zone page is in
+ * check_move_unevictable_pages - check pages for evictability and move to appropriate zone lru list
+ * @pages:     array of pages to check
+ * @nr_pages:  number of pages to check
  *
- * Checks a page for evictability and moves the page to the appropriate
- * zone lru list.
+ * Checks pages for evictability and moves them to the appropriate lru list.
  *
- * Restrictions: zone->lru_lock must be held, page must be on LRU and must
- * have PageUnevictable set.
+ * This function is only used for SysV IPC SHM_UNLOCK.
  */
-static void check_move_unevictable_page(struct page *page, struct zone *zone)
+void check_move_unevictable_pages(struct page **pages, int nr_pages)
 {
-       VM_BUG_ON(PageActive(page));
-
-retry:
-       ClearPageUnevictable(page);
-       if (page_evictable(page, NULL)) {
-               enum lru_list l = page_lru_base_type(page);
+       struct zone *zone = NULL;
+       int pgscanned = 0;
+       int pgrescued = 0;
+       int i;
 
-               __dec_zone_state(zone, NR_UNEVICTABLE);
-               list_move(&page->lru, &zone->lru[l].list);
-               mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
-               __inc_zone_state(zone, NR_INACTIVE_ANON + l);
-               __count_vm_event(UNEVICTABLE_PGRESCUED);
-       } else {
-               /*
-                * rotate unevictable list
-                */
-               SetPageUnevictable(page);
-               list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
-               mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
-               if (page_evictable(page, NULL))
-                       goto retry;
-       }
-}
+       for (i = 0; i < nr_pages; i++) {
+               struct page *page = pages[i];
+               struct zone *pagezone;
 
-/**
- * scan_mapping_unevictable_pages - scan an address space for evictable pages
- * @mapping: struct address_space to scan for evictable pages
- *
- * Scan all pages in mapping.  Check unevictable pages for
- * evictability and move them to the appropriate zone lru list.
- */
-void scan_mapping_unevictable_pages(struct address_space *mapping)
-{
-       pgoff_t next = 0;
-       pgoff_t end   = (i_size_read(mapping->host) + PAGE_CACHE_SIZE - 1) >>
-                        PAGE_CACHE_SHIFT;
-       struct zone *zone;
-       struct pagevec pvec;
+               pgscanned++;
+               pagezone = page_zone(page);
+               if (pagezone != zone) {
+                       if (zone)
+                               spin_unlock_irq(&zone->lru_lock);
+                       zone = pagezone;
+                       spin_lock_irq(&zone->lru_lock);
+               }
 
-       if (mapping->nrpages == 0)
-               return;
+               if (!PageLRU(page) || !PageUnevictable(page))
+                       continue;
 
-       pagevec_init(&pvec, 0);
-       while (next < end &&
-               pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
-               int i;
-               int pg_scanned = 0;
-
-               zone = NULL;
-
-               for (i = 0; i < pagevec_count(&pvec); i++) {
-                       struct page *page = pvec.pages[i];
-                       pgoff_t page_index = page->index;
-                       struct zone *pagezone = page_zone(page);
-
-                       pg_scanned++;
-                       if (page_index > next)
-                               next = page_index;
-                       next++;
-
-                       if (pagezone != zone) {
-                               if (zone)
-                                       spin_unlock_irq(&zone->lru_lock);
-                               zone = pagezone;
-                               spin_lock_irq(&zone->lru_lock);
-                       }
+               if (page_evictable(page, NULL)) {
+                       enum lru_list lru = page_lru_base_type(page);
 
-                       if (PageLRU(page) && PageUnevictable(page))
-                               check_move_unevictable_page(page, zone);
+                       VM_BUG_ON(PageActive(page));
+                       ClearPageUnevictable(page);
+                       __dec_zone_state(zone, NR_UNEVICTABLE);
+                       list_move(&page->lru, &zone->lru[lru].list);
+                       mem_cgroup_move_lists(page, LRU_UNEVICTABLE, lru);
+                       __inc_zone_state(zone, NR_INACTIVE_ANON + lru);
+                       pgrescued++;
                }
-               if (zone)
-                       spin_unlock_irq(&zone->lru_lock);
-               pagevec_release(&pvec);
-
-               count_vm_events(UNEVICTABLE_PGSCANNED, pg_scanned);
        }
 
+       if (zone) {
+               __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+               __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
+               spin_unlock_irq(&zone->lru_lock);
+       }
 }
+#endif /* CONFIG_SHMEM */
 
 static void warn_scan_unevictable_pages(void)
 {
@@ -3475,16 +3503,16 @@ int scan_unevictable_handler(struct ctl_table *table, int write,
  * a specified node's per zone unevictable lists for evictable pages.
  */
 
-static ssize_t read_scan_unevictable_node(struct sys_device *dev,
-                                         struct sysdev_attribute *attr,
+static ssize_t read_scan_unevictable_node(struct device *dev,
+                                         struct device_attribute *attr,
                                          char *buf)
 {
        warn_scan_unevictable_pages();
        return sprintf(buf, "0\n");     /* always zero; should fit... */
 }
 
-static ssize_t write_scan_unevictable_node(struct sys_device *dev,
-                                          struct sysdev_attribute *attr,
+static ssize_t write_scan_unevictable_node(struct device *dev,
+                                          struct device_attribute *attr,
                                        const char *buf, size_t count)
 {
        warn_scan_unevictable_pages();
@@ -3492,17 +3520,17 @@ static ssize_t write_scan_unevictable_node(struct sys_device *dev,
 }
 
 
-static SYSDEV_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
+static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
                        read_scan_unevictable_node,
                        write_scan_unevictable_node);
 
 int scan_unevictable_register_node(struct node *node)
 {
-       return sysdev_create_file(&node->sysdev, &attr_scan_unevictable_pages);
+       return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages);
 }
 
 void scan_unevictable_unregister_node(struct node *node)
 {
-       sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
+       device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages);
 }
 #endif