page allocator: calculate the preferred zone for allocation only once
authorMel Gorman <mel@csn.ul.ie>
Tue, 16 Jun 2009 22:31:59 +0000 (15:31 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 17 Jun 2009 02:47:33 +0000 (19:47 -0700)
get_page_from_freelist() can be called multiple times for an allocation.
Part of this calculates the preferred_zone which is the first usable zone
in the zonelist but the zone depends on the GFP flags specified at the
beginning of the allocation call.  This patch calculates preferred_zone
once.  It's safe to do this because if preferred_zone is NULL at the start
of the call, no amount of direct reclaim or other actions will change the
fact the allocation will fail.

[akpm@linux-foundation.org: remove (void) casts]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/page_alloc.c

index b098596..8fc6d1f 100644 (file)
@@ -1388,26 +1388,21 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
  */
 static struct page *
 get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
-               struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
+               struct zonelist *zonelist, int high_zoneidx, int alloc_flags,
+               struct zone *preferred_zone)
 {
        struct zoneref *z;
        struct page *page = NULL;
        int classzone_idx;
-       struct zone *zone, *preferred_zone;
+       struct zone *zone;
        nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
        int zlc_active = 0;             /* set if using zonelist_cache */
        int did_zlc_setup = 0;          /* just call zlc_setup() one time */
 
-       (void)first_zones_zonelist(zonelist, high_zoneidx, nodemask,
-                                                       &preferred_zone);
-       if (!preferred_zone)
-               return NULL;
-
-       classzone_idx = zone_idx(preferred_zone);
-
        if (WARN_ON_ONCE(order >= MAX_ORDER))
                return NULL;
 
+       classzone_idx = zone_idx(preferred_zone);
 zonelist_scan:
        /*
         * Scan zonelist, looking for a zone with enough free.
@@ -1500,7 +1495,7 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order,
 static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
-       nodemask_t *nodemask)
+       nodemask_t *nodemask, struct zone *preferred_zone)
 {
        struct page *page;
 
@@ -1517,7 +1512,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
         */
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
                order, zonelist, high_zoneidx,
-               ALLOC_WMARK_HIGH|ALLOC_CPUSET);
+               ALLOC_WMARK_HIGH|ALLOC_CPUSET,
+               preferred_zone);
        if (page)
                goto out;
 
@@ -1537,7 +1533,8 @@ out:
 static inline struct page *
 __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
-       nodemask_t *nodemask, int alloc_flags, unsigned long *did_some_progress)
+       nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
+       unsigned long *did_some_progress)
 {
        struct page *page = NULL;
        struct reclaim_state reclaim_state;
@@ -1569,7 +1566,8 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
 
        if (likely(*did_some_progress))
                page = get_page_from_freelist(gfp_mask, nodemask, order,
-                                       zonelist, high_zoneidx, alloc_flags);
+                                       zonelist, high_zoneidx,
+                                       alloc_flags, preferred_zone);
        return page;
 }
 
@@ -1589,13 +1587,14 @@ is_allocation_high_priority(struct task_struct *p, gfp_t gfp_mask)
 static inline struct page *
 __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
-       nodemask_t *nodemask)
+       nodemask_t *nodemask, struct zone *preferred_zone)
 {
        struct page *page;
 
        do {
                page = get_page_from_freelist(gfp_mask, nodemask, order,
-                       zonelist, high_zoneidx, ALLOC_NO_WATERMARKS);
+                       zonelist, high_zoneidx, ALLOC_NO_WATERMARKS,
+                       preferred_zone);
 
                if (!page && gfp_mask & __GFP_NOFAIL)
                        congestion_wait(WRITE, HZ/50);
@@ -1618,7 +1617,7 @@ void wake_all_kswapd(unsigned int order, struct zonelist *zonelist,
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
-       nodemask_t *nodemask)
+       nodemask_t *nodemask, struct zone *preferred_zone)
 {
        const gfp_t wait = gfp_mask & __GFP_WAIT;
        struct page *page = NULL;
@@ -1668,7 +1667,8 @@ restart:
         * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
         */
        page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
-                                               high_zoneidx, alloc_flags);
+                                               high_zoneidx, alloc_flags,
+                                               preferred_zone);
        if (page)
                goto got_pg;
 
@@ -1678,7 +1678,7 @@ rebalance:
                /* Do not dip into emergency reserves if specified */
                if (!(gfp_mask & __GFP_NOMEMALLOC)) {
                        page = __alloc_pages_high_priority(gfp_mask, order,
-                               zonelist, high_zoneidx, nodemask);
+                               zonelist, high_zoneidx, nodemask, preferred_zone);
                        if (page)
                                goto got_pg;
                }
@@ -1695,7 +1695,8 @@ rebalance:
        page = __alloc_pages_direct_reclaim(gfp_mask, order,
                                        zonelist, high_zoneidx,
                                        nodemask,
-                                       alloc_flags, &did_some_progress);
+                                       alloc_flags, preferred_zone,
+                                       &did_some_progress);
        if (page)
                goto got_pg;
 
@@ -1707,7 +1708,7 @@ rebalance:
                if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
                        page = __alloc_pages_may_oom(gfp_mask, order,
                                        zonelist, high_zoneidx,
-                                       nodemask);
+                                       nodemask, preferred_zone);
                        if (page)
                                goto got_pg;
 
@@ -1752,6 +1753,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
                        struct zonelist *zonelist, nodemask_t *nodemask)
 {
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+       struct zone *preferred_zone;
        struct page *page;
 
        lockdep_trace_alloc(gfp_mask);
@@ -1769,11 +1771,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
        if (unlikely(!zonelist->_zonerefs->zone))
                return NULL;
 
+       /* The preferred zone is used for statistics later */
+       first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone);
+       if (!preferred_zone)
+               return NULL;
+
+       /* First allocation attempt */
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
-                       zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+                       zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,
+                       preferred_zone);
        if (unlikely(!page))
                page = __alloc_pages_slowpath(gfp_mask, order,
-                               zonelist, high_zoneidx, nodemask);
+                               zonelist, high_zoneidx, nodemask,
+                               preferred_zone);
 
        return page;
 }