cpuset: mm: reduce large amounts of memory barrier related damage v3
[pandora-kernel.git] / mm / page_alloc.c
index ef6e1a1..065dbe8 100644 (file)
@@ -2282,8 +2282,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 {
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
        struct zone *preferred_zone;
-       struct page *page;
+       struct page *page = NULL;
        int migratetype = allocflags_to_migratetype(gfp_mask);
+       unsigned int cpuset_mems_cookie;
 
        gfp_mask &= gfp_allowed_mask;
 
@@ -2302,15 +2303,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
        if (unlikely(!zonelist->_zonerefs->zone))
                return NULL;
 
-       get_mems_allowed();
+retry_cpuset:
+       cpuset_mems_cookie = get_mems_allowed();
+
        /* The preferred zone is used for statistics later */
        first_zones_zonelist(zonelist, high_zoneidx,
                                nodemask ? : &cpuset_current_mems_allowed,
                                &preferred_zone);
-       if (!preferred_zone) {
-               put_mems_allowed();
-               return NULL;
-       }
+       if (!preferred_zone)
+               goto out;
 
        /* First allocation attempt */
        page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
@@ -2320,9 +2321,19 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
                page = __alloc_pages_slowpath(gfp_mask, order,
                                zonelist, high_zoneidx, nodemask,
                                preferred_zone, migratetype);
-       put_mems_allowed();
 
        trace_mm_page_alloc(page, order, gfp_mask, migratetype);
+
+out:
+       /*
+        * When updating a task's mems_allowed, it is possible to race with
+        * parallel threads in such a way that an allocation can fail while
+        * the mask is being updated. If a page allocation is about to fail,
+        * check if the cpuset changed during allocation and if so, retry.
+        */
+       if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
+               goto retry_cpuset;
+
        return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2546,13 +2557,15 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 bool skip_free_areas_node(unsigned int flags, int nid)
 {
        bool ret = false;
+       unsigned int cpuset_mems_cookie;
 
        if (!(flags & SHOW_MEM_FILTER_NODES))
                goto out;
 
-       get_mems_allowed();
-       ret = !node_isset(nid, cpuset_current_mems_allowed);
-       put_mems_allowed();
+       do {
+               cpuset_mems_cookie = get_mems_allowed();
+               ret = !node_isset(nid, cpuset_current_mems_allowed);
+       } while (!put_mems_allowed(cpuset_mems_cookie));
 out:
        return ret;
 }