cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags

[pandora-kernel.git] / mm / slab.c
diff --git a/mm/slab.c b/mm/slab.c

index 3070b92..881951e 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -191,7 +191,6 @@ struct array_cache {
         unsigned int limit;
         unsigned int batchcount;
         unsigned int touched;
-       spinlock_t lock;
         void *entry[];  /*
                          * Must have this definition in here for the proper
                          * alignment of array_cache. Also simplifies accessing
@@ -203,6 +202,11 @@ struct array_cache {
                          */
  };
  
+struct alien_cache {
+       spinlock_t lock;
+       struct array_cache ac;
+};
+
  #define SLAB_OBJ_PFMEMALLOC    1
  static inline bool is_obj_pfmemalloc(void *objp)
  {
@@ -242,7 +246,8 @@ static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
  static int drain_freelist(struct kmem_cache *cache,
                         struct kmem_cache_node *n, int tofree);
  static void free_block(struct kmem_cache *cachep, void **objpp, int len,
-                       int node);
+                       int node, struct list_head *list);
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list);
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
  static void cache_reap(struct work_struct *unused);
  
@@ -267,7 +272,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
  #define MAKE_LIST(cachep, listp, slab, nodeid)                         \
         do {                                                            \
                 INIT_LIST_HEAD(listp);                                  \
-               list_splice(&(cachep->node[nodeid]->slab), listp);      \
+               list_splice(&get_node(cachep, nodeid)->slab, listp);    \
         } while (0)
  
  #define        MAKE_ALL_LISTS(cachep, ptr, nodeid)                             \
@@ -467,141 +472,6 @@ static struct kmem_cache kmem_cache_boot = {
  
  #define BAD_ALIEN_MAGIC 0x01020304ul
  
-#ifdef CONFIG_LOCKDEP
-
-/*
- * Slab sometimes uses the kmalloc slabs to store the slab headers
- * for other slabs "off slab".
- * The locking for this is tricky in that it nests within the locks
- * of all other slabs in a few places; to deal with this special
- * locking we put on-slab caches into a separate lock-class.
- *
- * We set lock class for alien array caches which are up during init.
- * The lock annotation will be lost if all cpus of a node goes down and
- * then comes back up during hotplug
- */
-static struct lock_class_key on_slab_l3_key;
-static struct lock_class_key on_slab_alc_key;
-
-static struct lock_class_key debugobj_l3_key;
-static struct lock_class_key debugobj_alc_key;
-
-static void slab_set_lock_classes(struct kmem_cache *cachep,
-               struct lock_class_key *l3_key, struct lock_class_key *alc_key,
-               int q)
-{
-       struct array_cache **alc;
-       struct kmem_cache_node *n;
-       int r;
-
-       n = cachep->node[q];
-       if (!n)
-               return;
-
-       lockdep_set_class(&n->list_lock, l3_key);
-       alc = n->alien;
-       /*
-        * FIXME: This check for BAD_ALIEN_MAGIC
-        * should go away when common slab code is taught to
-        * work even without alien caches.
-        * Currently, non NUMA code returns BAD_ALIEN_MAGIC
-        * for alloc_alien_cache,
-        */
-       if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
-               return;
-       for_each_node(r) {
-               if (alc[r])
-                       lockdep_set_class(&alc[r]->lock, alc_key);
-       }
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-       slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-       int node;
-
-       for_each_online_node(node)
-               slab_set_debugobj_lock_classes_node(cachep, node);
-}
-
-static void init_node_lock_keys(int q)
-{
-       int i;
-
-       if (slab_state < UP)
-               return;
-
-       for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
-               struct kmem_cache_node *n;
-               struct kmem_cache *cache = kmalloc_caches[i];
-
-               if (!cache)
-                       continue;
-
-               n = cache->node[q];
-               if (!n || OFF_SLAB(cache))
-                       continue;
-
-               slab_set_lock_classes(cache, &on_slab_l3_key,
-                               &on_slab_alc_key, q);
-       }
-}
-
-static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
-{
-       if (!cachep->node[q])
-               return;
-
-       slab_set_lock_classes(cachep, &on_slab_l3_key,
-                       &on_slab_alc_key, q);
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-       int node;
-
-       VM_BUG_ON(OFF_SLAB(cachep));
-       for_each_node(node)
-               on_slab_lock_classes_node(cachep, node);
-}
-
-static inline void init_lock_keys(void)
-{
-       int node;
-
-       for_each_node(node)
-               init_node_lock_keys(node);
-}
-#else
-static void init_node_lock_keys(int q)
-{
-}
-
-static inline void init_lock_keys(void)
-{
-}
-
-static inline void on_slab_lock_classes(struct kmem_cache *cachep)
-{
-}
-
-static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
-{
-}
-
-static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
-{
-}
-#endif
-
  static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
  
  static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -792,13 +662,8 @@ static void start_cpu_timer(int cpu)
         }
  }
  
-static struct array_cache *alloc_arraycache(int node, int entries,
-                                           int batchcount, gfp_t gfp)
+static void init_arraycache(struct array_cache *ac, int limit, int batch)
  {
-       int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
-       struct array_cache *nc = NULL;
-
-       nc = kmalloc_node(memsize, gfp, node);
         /*
          * The array_cache structures contain pointers to free object.
          * However, when such objects are allocated or transferred to another
@@ -806,15 +671,24 @@ static struct array_cache *alloc_arraycache(int node, int entries,
          * valid references during a kmemleak scan. Therefore, kmemleak must
          * not scan such objects.
          */
-       kmemleak_no_scan(nc);
-       if (nc) {
-               nc->avail = 0;
-               nc->limit = entries;
-               nc->batchcount = batchcount;
-               nc->touched = 0;
-               spin_lock_init(&nc->lock);
+       kmemleak_no_scan(ac);
+       if (ac) {
+               ac->avail = 0;
+               ac->limit = limit;
+               ac->batchcount = batch;
+               ac->touched = 0;
         }
-       return nc;
+}
+
+static struct array_cache *alloc_arraycache(int node, int entries,
+                                           int batchcount, gfp_t gfp)
+{
+       size_t memsize = sizeof(void *) * entries + sizeof(struct array_cache);
+       struct array_cache *ac = NULL;
+
+       ac = kmalloc_node(memsize, gfp, node);
+       init_arraycache(ac, entries, batchcount);
+       return ac;
  }
  
  static inline bool is_slab_pfmemalloc(struct page *page)
@@ -826,7 +700,7 @@ static inline bool is_slab_pfmemalloc(struct page *page)
  static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
                                                 struct array_cache *ac)
  {
-       struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+       struct kmem_cache_node *n = get_node(cachep, numa_mem_id());
         struct page *page;
         unsigned long flags;
  
@@ -881,7 +755,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
                  * If there are empty slabs on the slabs_free list and we are
                  * being forced to refill the cache, mark this one !pfmemalloc.
                  */
-               n = cachep->node[numa_mem_id()];
+               n = get_node(cachep, numa_mem_id());
                 if (!list_empty(&n->slabs_free) && force_refill) {
                         struct page *page = virt_to_head_page(objp);
                         ClearPageSlabPfmemalloc(page);
@@ -961,12 +835,13 @@ static int transfer_objects(struct array_cache *to,
  #define drain_alien_cache(cachep, alien) do { } while (0)
  #define reap_alien(cachep, n) do { } while (0)
  
-static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static inline struct alien_cache **alloc_alien_cache(int node,
+                                               int limit, gfp_t gfp)
  {
-       return (struct array_cache **)BAD_ALIEN_MAGIC;
+       return (struct alien_cache **)BAD_ALIEN_MAGIC;
  }
  
-static inline void free_alien_cache(struct array_cache **ac_ptr)
+static inline void free_alien_cache(struct alien_cache **ac_ptr)
  {
  }
  
@@ -992,46 +867,60 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
  static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
  static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
  
-static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+static struct alien_cache *__alloc_alien_cache(int node, int entries,
+                                               int batch, gfp_t gfp)
  {
-       struct array_cache **ac_ptr;
-       int memsize = sizeof(void *) * nr_node_ids;
+       size_t memsize = sizeof(void *) * entries + sizeof(struct alien_cache);
+       struct alien_cache *alc = NULL;
+
+       alc = kmalloc_node(memsize, gfp, node);
+       init_arraycache(&alc->ac, entries, batch);
+       spin_lock_init(&alc->lock);
+       return alc;
+}
+
+static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
+{
+       struct alien_cache **alc_ptr;
+       size_t memsize = sizeof(void *) * nr_node_ids;
         int i;
  
         if (limit > 1)
                 limit = 12;
-       ac_ptr = kzalloc_node(memsize, gfp, node);
-       if (ac_ptr) {
-               for_each_node(i) {
-                       if (i == node || !node_online(i))
-                               continue;
-                       ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
-                       if (!ac_ptr[i]) {
-                               for (i--; i >= 0; i--)
-                                       kfree(ac_ptr[i]);
-                               kfree(ac_ptr);
-                               return NULL;
-                       }
+       alc_ptr = kzalloc_node(memsize, gfp, node);
+       if (!alc_ptr)
+               return NULL;
+
+       for_each_node(i) {
+               if (i == node || !node_online(i))
+                       continue;
+               alc_ptr[i] = __alloc_alien_cache(node, limit, 0xbaadf00d, gfp);
+               if (!alc_ptr[i]) {
+                       for (i--; i >= 0; i--)
+                               kfree(alc_ptr[i]);
+                       kfree(alc_ptr);
+                       return NULL;
                 }
         }
-       return ac_ptr;
+       return alc_ptr;
  }
  
-static void free_alien_cache(struct array_cache **ac_ptr)
+static void free_alien_cache(struct alien_cache **alc_ptr)
  {
         int i;
  
-       if (!ac_ptr)
+       if (!alc_ptr)
                 return;
         for_each_node(i)
-           kfree(ac_ptr[i]);
-       kfree(ac_ptr);
+           kfree(alc_ptr[i]);
+       kfree(alc_ptr);
  }
  
  static void __drain_alien_cache(struct kmem_cache *cachep,
-                               struct array_cache *ac, int node)
+                               struct array_cache *ac, int node,
+                               struct list_head *list)
  {
-       struct kmem_cache_node *n = cachep->node[node];
+       struct kmem_cache_node *n = get_node(cachep, node);
  
         if (ac->avail) {
                 spin_lock(&n->list_lock);
@@ -1043,7 +932,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
                 if (n->shared)
                         transfer_objects(n->shared, ac, ac->limit);
  
-               free_block(cachep, ac->entry, ac->avail, node);
+               free_block(cachep, ac->entry, ac->avail, node, list);
                 ac->avail = 0;
                 spin_unlock(&n->list_lock);
         }
@@ -1057,28 +946,40 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
         int node = __this_cpu_read(slab_reap_node);
  
         if (n->alien) {
-               struct array_cache *ac = n->alien[node];
+               struct alien_cache *alc = n->alien[node];
+               struct array_cache *ac;
  
-               if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
-                       __drain_alien_cache(cachep, ac, node);
-                       spin_unlock_irq(&ac->lock);
+               if (alc) {
+                       ac = &alc->ac;
+                       if (ac->avail && spin_trylock_irq(&alc->lock)) {
+                               LIST_HEAD(list);
+
+                               __drain_alien_cache(cachep, ac, node, &list);
+                               spin_unlock_irq(&alc->lock);
+                               slabs_destroy(cachep, &list);
+                       }
                 }
         }
  }
  
  static void drain_alien_cache(struct kmem_cache *cachep,
-                               struct array_cache **alien)
+                               struct alien_cache **alien)
  {
         int i = 0;
+       struct alien_cache *alc;
         struct array_cache *ac;
         unsigned long flags;
  
         for_each_online_node(i) {
-               ac = alien[i];
-               if (ac) {
-                       spin_lock_irqsave(&ac->lock, flags);
-                       __drain_alien_cache(cachep, ac, i);
-                       spin_unlock_irqrestore(&ac->lock, flags);
+               alc = alien[i];
+               if (alc) {
+                       LIST_HEAD(list);
+
+                       ac = &alc->ac;
+                       spin_lock_irqsave(&alc->lock, flags);
+                       __drain_alien_cache(cachep, ac, i, &list);
+                       spin_unlock_irqrestore(&alc->lock, flags);
+                       slabs_destroy(cachep, &list);
                 }
         }
  }
@@ -1087,8 +988,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
  {
         int nodeid = page_to_nid(virt_to_page(objp));
         struct kmem_cache_node *n;
-       struct array_cache *alien = NULL;
+       struct alien_cache *alien = NULL;
+       struct array_cache *ac;
         int node;
+       LIST_HEAD(list);
  
         node = numa_mem_id();
  
@@ -1099,21 +1002,25 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
         if (likely(nodeid == node))
                 return 0;
  
-       n = cachep->node[node];
+       n = get_node(cachep, node);
         STATS_INC_NODEFREES(cachep);
         if (n->alien && n->alien[nodeid]) {
                 alien = n->alien[nodeid];
+               ac = &alien->ac;
                 spin_lock(&alien->lock);
-               if (unlikely(alien->avail == alien->limit)) {
+               if (unlikely(ac->avail == ac->limit)) {
                         STATS_INC_ACOVERFLOW(cachep);
-                       __drain_alien_cache(cachep, alien, nodeid);
+                       __drain_alien_cache(cachep, ac, nodeid, &list);
                 }
-               ac_put_obj(cachep, alien, objp);
+               ac_put_obj(cachep, ac, objp);
                 spin_unlock(&alien->lock);
+               slabs_destroy(cachep, &list);
         } else {
-               spin_lock(&(cachep->node[nodeid])->list_lock);
-               free_block(cachep, &objp, 1, nodeid);
-               spin_unlock(&(cachep->node[nodeid])->list_lock);
+               n = get_node(cachep, nodeid);
+               spin_lock(&n->list_lock);
+               free_block(cachep, &objp, 1, nodeid, &list);
+               spin_unlock(&n->list_lock);
+               slabs_destroy(cachep, &list);
         }
         return 1;
  }
@@ -1132,7 +1039,7 @@ static int init_cache_node_node(int node)
  {
         struct kmem_cache *cachep;
         struct kmem_cache_node *n;
-       const int memsize = sizeof(struct kmem_cache_node);
+       const size_t memsize = sizeof(struct kmem_cache_node);
  
         list_for_each_entry(cachep, &slab_caches, list) {
                 /*
@@ -1140,7 +1047,8 @@ static int init_cache_node_node(int node)
                  * begin anything. Make sure some other cpu on this
                  * node has not already allocated this
                  */
-               if (!cachep->node[node]) {
+               n = get_node(cachep, node);
+               if (!n) {
                         n = kmalloc_node(memsize, GFP_KERNEL, node);
                         if (!n)
                                 return -ENOMEM;
@@ -1156,11 +1064,11 @@ static int init_cache_node_node(int node)
                         cachep->node[node] = n;
                 }
  
-               spin_lock_irq(&cachep->node[node]->list_lock);
-               cachep->node[node]->free_limit =
+               spin_lock_irq(&n->list_lock);
+               n->free_limit =
                         (1 + nr_cpus_node(node)) *
                         cachep->batchcount + cachep->num;
-               spin_unlock_irq(&cachep->node[node]->list_lock);
+               spin_unlock_irq(&n->list_lock);
         }
         return 0;
  }
@@ -1181,12 +1089,13 @@ static void cpuup_canceled(long cpu)
         list_for_each_entry(cachep, &slab_caches, list) {
                 struct array_cache *nc;
                 struct array_cache *shared;
-               struct array_cache **alien;
+               struct alien_cache **alien;
+               LIST_HEAD(list);
  
                 /* cpu is dead; no one can alloc from it. */
                 nc = cachep->array[cpu];
                 cachep->array[cpu] = NULL;
-               n = cachep->node[node];
+               n = get_node(cachep, node);
  
                 if (!n)
                         goto free_array_cache;
@@ -1196,7 +1105,7 @@ static void cpuup_canceled(long cpu)
                 /* Free limit for this kmem_cache_node */
                 n->free_limit -= cachep->batchcount;
                 if (nc)
-                       free_block(cachep, nc->entry, nc->avail, node);
+                       free_block(cachep, nc->entry, nc->avail, node, &list);
  
                 if (!cpumask_empty(mask)) {
                         spin_unlock_irq(&n->list_lock);
@@ -1206,7 +1115,7 @@ static void cpuup_canceled(long cpu)
                 shared = n->shared;
                 if (shared) {
                         free_block(cachep, shared->entry,
-                                  shared->avail, node);
+                                  shared->avail, node, &list);
                         n->shared = NULL;
                 }
  
@@ -1221,6 +1130,7 @@ static void cpuup_canceled(long cpu)
                         free_alien_cache(alien);
                 }
  free_array_cache:
+               slabs_destroy(cachep, &list);
                 kfree(nc);
         }
         /*
@@ -1229,7 +1139,7 @@ free_array_cache:
          * shrink each nodelist to its limit.
          */
         list_for_each_entry(cachep, &slab_caches, list) {
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                 if (!n)
                         continue;
                 drain_freelist(cachep, n, slabs_tofree(cachep, n));
@@ -1260,7 +1170,7 @@ static int cpuup_prepare(long cpu)
         list_for_each_entry(cachep, &slab_caches, list) {
                 struct array_cache *nc;
                 struct array_cache *shared = NULL;
-               struct array_cache **alien = NULL;
+               struct alien_cache **alien = NULL;
  
                 nc = alloc_arraycache(node, cachep->limit,
                                         cachep->batchcount, GFP_KERNEL);
@@ -1284,7 +1194,7 @@ static int cpuup_prepare(long cpu)
                         }
                 }
                 cachep->array[cpu] = nc;
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                 BUG_ON(!n);
  
                 spin_lock_irq(&n->list_lock);
@@ -1305,13 +1215,7 @@ static int cpuup_prepare(long cpu)
                 spin_unlock_irq(&n->list_lock);
                 kfree(shared);
                 free_alien_cache(alien);
-               if (cachep->flags & SLAB_DEBUG_OBJECTS)
-                       slab_set_debugobj_lock_classes_node(cachep, node);
-               else if (!OFF_SLAB(cachep) &&
-                        !(cachep->flags & SLAB_DESTROY_BY_RCU))
-                       on_slab_lock_classes_node(cachep, node);
         }
-       init_node_lock_keys(node);
  
         return 0;
  bad:
@@ -1395,7 +1299,7 @@ static int __meminit drain_cache_node_node(int node)
         list_for_each_entry(cachep, &slab_caches, list) {
                 struct kmem_cache_node *n;
  
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                 if (!n)
                         continue;
  
@@ -1575,10 +1479,6 @@ void __init kmem_cache_init(void)
  
                 memcpy(ptr, cpu_cache_get(kmem_cache),
                        sizeof(struct arraycache_init));
-               /*
-                * Do not assume that spinlocks can be initialized via memcpy:
-                */
-               spin_lock_init(&ptr->lock);
  
                 kmem_cache->array[smp_processor_id()] = ptr;
  
@@ -1588,10 +1488,6 @@ void __init kmem_cache_init(void)
                        != &initarray_generic.cache);
                 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
                        sizeof(struct arraycache_init));
-               /*
-                * Do not assume that spinlocks can be initialized via memcpy:
-                */
-               spin_lock_init(&ptr->lock);
  
                 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
         }
@@ -1628,9 +1524,6 @@ void __init kmem_cache_init_late(void)
                         BUG();
         mutex_unlock(&slab_mutex);
  
-       /* Annotate slab for lockdep -- annotate the malloc caches */
-       init_lock_keys();
-
         /* Done! */
         slab_state = FULL;
  
@@ -1690,14 +1583,10 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
         printk(KERN_WARNING "  cache: %s, object size: %d, order: %d\n",
                 cachep->name, cachep->size, cachep->gfporder);
  
-       for_each_online_node(node) {
+       for_each_kmem_cache_node(cachep, node, n) {
                 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
                 unsigned long active_slabs = 0, num_slabs = 0;
  
-               n = cachep->node[node];
-               if (!n)
-                       continue;
-
                 spin_lock_irqsave(&n->list_lock, flags);
                 list_for_each_entry(page, &n->slabs_full, lru) {
                         active_objs += cachep->num;
@@ -1724,7 +1613,8 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
  }
  
  /*
- * Interface to system's page allocator. No need to hold the cache-lock.
+ * Interface to system's page allocator. No need to hold the
+ * kmem_cache_node ->list_lock.
   *
   * If we requested dmaable memory, we will get it. Even if we
   * did not request dmaable memory, we might get it, but that
@@ -2026,9 +1916,9 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
   * @cachep: cache pointer being destroyed
   * @page: page pointer being destroyed
   *
- * Destroy all the objs in a slab, and release the mem back to the system.
- * Before calling the slab must have been unlinked from the cache.  The
- * cache-lock is not held/needed.
+ * Destroy all the objs in a slab page, and release the mem back to the system.
+ * Before calling the slab page must have been unlinked from the cache. The
+ * kmem_cache_node ->list_lock is not held/needed.
   */
  static void slab_destroy(struct kmem_cache *cachep, struct page *page)
  {
@@ -2060,6 +1950,16 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
                 kmem_cache_free(cachep->freelist_cache, freelist);
  }
  
+static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
+{
+       struct page *page, *n;
+
+       list_for_each_entry_safe(page, n, list, lru) {
+               list_del(&page->lru);
+               slab_destroy(cachep, page);
+       }
+}
+
  /**
   * calculate_slab_order - calculate size (page order) of slabs
   * @cachep: pointer to the cache that is being created
@@ -2405,17 +2305,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 return err;
         }
  
-       if (flags & SLAB_DEBUG_OBJECTS) {
-               /*
-                * Would deadlock through slab_destroy()->call_rcu()->
-                * debug_object_activate()->kmem_cache_alloc().
-                */
-               WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
-
-               slab_set_debugobj_lock_classes(cachep);
-       } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
-               on_slab_lock_classes(cachep);
-
         return 0;
  }
  
@@ -2434,7 +2323,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
  {
  #ifdef CONFIG_SMP
         check_irq_off();
-       assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
+       assert_spin_locked(&get_node(cachep, numa_mem_id())->list_lock);
  #endif
  }
  
@@ -2442,7 +2331,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
  {
  #ifdef CONFIG_SMP
         check_irq_off();
-       assert_spin_locked(&cachep->node[node]->list_lock);
+       assert_spin_locked(&get_node(cachep, node)->list_lock);
  #endif
  }
  
@@ -2462,12 +2351,16 @@ static void do_drain(void *arg)
         struct kmem_cache *cachep = arg;
         struct array_cache *ac;
         int node = numa_mem_id();
+       struct kmem_cache_node *n;
+       LIST_HEAD(list);
  
         check_irq_off();
         ac = cpu_cache_get(cachep);
-       spin_lock(&cachep->node[node]->list_lock);
-       free_block(cachep, ac->entry, ac->avail, node);
-       spin_unlock(&cachep->node[node]->list_lock);
+       n = get_node(cachep, node);
+       spin_lock(&n->list_lock);
+       free_block(cachep, ac->entry, ac->avail, node, &list);
+       spin_unlock(&n->list_lock);
+       slabs_destroy(cachep, &list);
         ac->avail = 0;
  }
  
@@ -2478,17 +2371,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
  
         on_each_cpu(do_drain, cachep, 1);
         check_irq_on();
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (n && n->alien)
+       for_each_kmem_cache_node(cachep, node, n)
+               if (n->alien)
                         drain_alien_cache(cachep, n->alien);
-       }
  
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (n)
-                       drain_array(cachep, n, n->shared, 1, node);
-       }
+       for_each_kmem_cache_node(cachep, node, n)
+               drain_array(cachep, n, n->shared, 1, node);
  }
  
  /*
@@ -2534,17 +2422,14 @@ out:
  
  int __kmem_cache_shrink(struct kmem_cache *cachep)
  {
-       int ret = 0, i = 0;
+       int ret = 0;
+       int node;
         struct kmem_cache_node *n;
  
         drain_cpu_caches(cachep);
  
         check_irq_on();
-       for_each_online_node(i) {
-               n = cachep->node[i];
-               if (!n)
-                       continue;
-
+       for_each_kmem_cache_node(cachep, node, n) {
                 drain_freelist(cachep, n, slabs_tofree(cachep, n));
  
                 ret += !list_empty(&n->slabs_full) ||
@@ -2566,13 +2451,11 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
             kfree(cachep->array[i]);
  
         /* NUMA: free the node structures */
-       for_each_online_node(i) {
-               n = cachep->node[i];
-               if (n) {
-                       kfree(n->shared);
-                       free_alien_cache(n->alien);
-                       kfree(n);
-               }
+       for_each_kmem_cache_node(cachep, i, n) {
+               kfree(n->shared);
+               free_alien_cache(n->alien);
+               kfree(n);
+               cachep->node[i] = NULL;
         }
         return 0;
  }
@@ -2751,7 +2634,7 @@ static int cache_grow(struct kmem_cache *cachep,
  
         /* Take the node list lock to change the colour_next on this node */
         check_irq_off();
-       n = cachep->node[nodeid];
+       n = get_node(cachep, nodeid);
         spin_lock(&n->list_lock);
  
         /* Get colour for the slab, and cal the next value. */
@@ -2920,7 +2803,7 @@ retry:
                  */
                 batchcount = BATCHREFILL_LIMIT;
         }
-       n = cachep->node[node];
+       n = get_node(cachep, node);
  
         BUG_ON(ac->avail > 0 || !n);
         spin_lock(&n->list_lock);
@@ -3060,7 +2943,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
  
  static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
  {
-       if (cachep == kmem_cache)
+       if (unlikely(cachep == kmem_cache))
                 return false;
  
         return should_failslab(cachep->object_size, flags, cachep->flags);
@@ -3111,7 +2994,7 @@ out:
  
  #ifdef CONFIG_NUMA
  /*
- * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
+ * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
   *
   * If we are in_interrupt, then process context, including cpusets and
   * mempolicy, may not apply and should not be used for allocation policy.
@@ -3169,8 +3052,8 @@ retry:
                 nid = zone_to_nid(zone);
  
                 if (cpuset_zone_allowed_hardwall(zone, flags) &&
-                       cache->node[nid] &&
-                       cache->node[nid]->free_objects) {
+                       get_node(cache, nid) &&
+                       get_node(cache, nid)->free_objects) {
                                 obj = ____cache_alloc_node(cache,
                                         flags | GFP_THISNODE, nid);
                                 if (obj)
@@ -3233,7 +3116,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
         int x;
  
         VM_BUG_ON(nodeid > num_online_nodes());
-       n = cachep->node[nodeid];
+       n = get_node(cachep, nodeid);
         BUG_ON(!n);
  
  retry:
@@ -3304,7 +3187,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
         if (nodeid == NUMA_NO_NODE)
                 nodeid = slab_node;
  
-       if (unlikely(!cachep->node[nodeid])) {
+       if (unlikely(!get_node(cachep, nodeid))) {
                 /* Node not bootstrapped yet */
                 ptr = fallback_alloc(cachep, flags);
                 goto out;
@@ -3343,7 +3226,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
  {
         void *objp;
  
-       if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
+       if (current->mempolicy || cpuset_do_slab_mem_spread()) {
                 objp = alternate_node_alloc(cache, flags);
                 if (objp)
                         goto out;
@@ -3405,12 +3288,13 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
  
  /*
   * Caller needs to acquire correct kmem_cache_node's list_lock
+ * @list: List of detached free slabs should be freed by caller
   */
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
-                      int node)
+static void free_block(struct kmem_cache *cachep, void **objpp,
+                       int nr_objects, int node, struct list_head *list)
  {
         int i;
-       struct kmem_cache_node *n;
+       struct kmem_cache_node *n = get_node(cachep, node);
  
         for (i = 0; i < nr_objects; i++) {
                 void *objp;
@@ -3420,7 +3304,6 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                 objp = objpp[i];
  
                 page = virt_to_head_page(objp);
-               n = cachep->node[node];
                 list_del(&page->lru);
                 check_spinlock_acquired_node(cachep, node);
                 slab_put_obj(cachep, page, objp, node);
@@ -3431,13 +3314,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
                 if (page->active == 0) {
                         if (n->free_objects > n->free_limit) {
                                 n->free_objects -= cachep->num;
-                               /* No need to drop any previously held
-                                * lock here, even if we have a off-slab slab
-                                * descriptor it is guaranteed to come from
-                                * a different cache, refer to comments before
-                                * alloc_slabmgmt.
-                                */
-                               slab_destroy(cachep, page);
+                               list_add_tail(&page->lru, list);
                         } else {
                                 list_add(&page->lru, &n->slabs_free);
                         }
@@ -3456,13 +3333,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
         int batchcount;
         struct kmem_cache_node *n;
         int node = numa_mem_id();
+       LIST_HEAD(list);
  
         batchcount = ac->batchcount;
  #if DEBUG
         BUG_ON(!batchcount || batchcount > ac->avail);
  #endif
         check_irq_off();
-       n = cachep->node[node];
+       n = get_node(cachep, node);
         spin_lock(&n->list_lock);
         if (n->shared) {
                 struct array_cache *shared_array = n->shared;
@@ -3477,7 +3355,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
                 }
         }
  
-       free_block(cachep, ac->entry, batchcount, node);
+       free_block(cachep, ac->entry, batchcount, node, &list);
  free_done:
  #if STATS
         {
@@ -3498,6 +3376,7 @@ free_done:
         }
  #endif
         spin_unlock(&n->list_lock);
+       slabs_destroy(cachep, &list);
         ac->avail -= batchcount;
         memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
  }
@@ -3754,7 +3633,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
         int node;
         struct kmem_cache_node *n;
         struct array_cache *new_shared;
-       struct array_cache **new_alien = NULL;
+       struct alien_cache **new_alien = NULL;
  
         for_each_online_node(node) {
  
@@ -3775,15 +3654,16 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
                         }
                 }
  
-               n = cachep->node[node];
+               n = get_node(cachep, node);
                 if (n) {
                         struct array_cache *shared = n->shared;
+                       LIST_HEAD(list);
  
                         spin_lock_irq(&n->list_lock);
  
                         if (shared)
                                 free_block(cachep, shared->entry,
-                                               shared->avail, node);
+                                               shared->avail, node, &list);
  
                         n->shared = new_shared;
                         if (!n->alien) {
@@ -3793,6 +3673,7 @@ static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
                         n->free_limit = (1 + nr_cpus_node(node)) *
                                         cachep->batchcount + cachep->num;
                         spin_unlock_irq(&n->list_lock);
+                       slabs_destroy(cachep, &list);
                         kfree(shared);
                         free_alien_cache(new_alien);
                         continue;
@@ -3820,9 +3701,8 @@ fail:
                 /* Cache is not active yet. Roll back what we did */
                 node--;
                 while (node >= 0) {
-                       if (cachep->node[node]) {
-                               n = cachep->node[node];
-
+                       n = get_node(cachep, node);
+                       if (n) {
                                 kfree(n->shared);
                                 free_alien_cache(n->alien);
                                 kfree(n);
@@ -3883,12 +3763,20 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
         cachep->shared = shared;
  
         for_each_online_cpu(i) {
+               LIST_HEAD(list);
                 struct array_cache *ccold = new->new[i];
+               int node;
+               struct kmem_cache_node *n;
+
                 if (!ccold)
                         continue;
-               spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
-               free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
-               spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
+
+               node = cpu_to_mem(i);
+               n = get_node(cachep, node);
+               spin_lock_irq(&n->list_lock);
+               free_block(cachep, ccold->entry, ccold->avail, node, &list);
+               spin_unlock_irq(&n->list_lock);
+               slabs_destroy(cachep, &list);
                 kfree(ccold);
         }
         kfree(new);
@@ -3996,6 +3884,7 @@ skip_setup:
  static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
                          struct array_cache *ac, int force, int node)
  {
+       LIST_HEAD(list);
         int tofree;
  
         if (!ac || !ac->avail)
@@ -4008,12 +3897,13 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
                         tofree = force ? ac->avail : (ac->limit + 4) / 5;
                         if (tofree > ac->avail)
                                 tofree = (ac->avail + 1) / 2;
-                       free_block(cachep, ac->entry, tofree, node);
+                       free_block(cachep, ac->entry, tofree, node, &list);
                         ac->avail -= tofree;
                         memmove(ac->entry, &(ac->entry[tofree]),
                                 sizeof(void *) * ac->avail);
                 }
                 spin_unlock_irq(&n->list_lock);
+               slabs_destroy(cachep, &list);
         }
  }
  
@@ -4048,7 +3938,7 @@ static void cache_reap(struct work_struct *w)
                  * have established with reasonable certainty that
                  * we can do some work if the lock was obtained.
                  */
-               n = searchp->node[node];
+               n = get_node(searchp, node);
  
                 reap_alien(searchp, n);
  
@@ -4100,10 +3990,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
  
         active_objs = 0;
         num_slabs = 0;
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (!n)
-                       continue;
+       for_each_kmem_cache_node(cachep, node, n) {
  
                 check_irq_on();
                 spin_lock_irq(&n->list_lock);
@@ -4328,10 +4215,7 @@ static int leaks_show(struct seq_file *m, void *p)
  
         x[1] = 0;
  
-       for_each_online_node(node) {
-               n = cachep->node[node];
-               if (!n)
-                       continue;
+       for_each_kmem_cache_node(cachep, node, n) {
  
                 check_irq_on();
                 spin_lock_irq(&n->list_lock);