Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/deller...

[pandora-kernel.git] / mm / slab.c
diff --git a/mm/slab.c b/mm/slab.c

index 6ebb951..e7667a3 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -87,7 +87,6 @@
   */
  
  #include       <linux/slab.h>
-#include       "slab.h"
  #include       <linux/mm.h>
  #include       <linux/poison.h>
  #include       <linux/swap.h>
@@ -128,6 +127,8 @@
  
  #include       "internal.h"
  
+#include       "slab.h"
+
  /*
   * DEBUG       - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
   *               0 for faster, smaller code (especially in the critical paths).
@@ -547,15 +548,11 @@ static struct cache_names __initdata cache_names[] = {
  #undef CACHE
  };
  
-static struct arraycache_init initarray_cache __initdata =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
  static struct arraycache_init initarray_generic =
      { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
  
  /* internal cache of cache description objs */
-static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];
  static struct kmem_cache kmem_cache_boot = {
-       .nodelists = kmem_cache_nodelists,
         .batchcount = 1,
         .limit = BOOT_CPUCACHE_ENTRIES,
         .shared = 1,
@@ -645,6 +642,26 @@ static void init_node_lock_keys(int q)
         }
  }
  
+static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
+{
+       struct kmem_list3 *l3;
+       l3 = cachep->nodelists[q];
+       if (!l3)
+               return;
+
+       slab_set_lock_classes(cachep, &on_slab_l3_key,
+                       &on_slab_alc_key, q);
+}
+
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+       int node;
+
+       VM_BUG_ON(OFF_SLAB(cachep));
+       for_each_node(node)
+               on_slab_lock_classes_node(cachep, node);
+}
+
  static inline void init_lock_keys(void)
  {
         int node;
@@ -661,6 +678,14 @@ static inline void init_lock_keys(void)
  {
  }
  
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+}
+
+static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+}
+
  static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
  {
  }
@@ -1389,6 +1414,9 @@ static int __cpuinit cpuup_prepare(long cpu)
                 free_alien_cache(alien);
                 if (cachep->flags & SLAB_DEBUG_OBJECTS)
                         slab_set_debugobj_lock_classes_node(cachep, node);
+               else if (!OFF_SLAB(cachep) &&
+                        !(cachep->flags & SLAB_DESTROY_BY_RCU))
+                       on_slab_lock_classes_node(cachep, node);
         }
         init_node_lock_keys(node);
  
@@ -1559,29 +1587,34 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index)
         }
  }
  
+/*
+ * The memory after the last cpu cache pointer is used for the
+ * the nodelists pointer.
+ */
+static void setup_nodelists_pointer(struct kmem_cache *cachep)
+{
+       cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+}
+
  /*
   * Initialisation.  Called after the page allocator have been initialised and
   * before smp_init().
   */
  void __init kmem_cache_init(void)
  {
-       size_t left_over;
         struct cache_sizes *sizes;
         struct cache_names *names;
         int i;
-       int order;
-       int node;
  
         kmem_cache = &kmem_cache_boot;
+       setup_nodelists_pointer(kmem_cache);
  
         if (num_possible_nodes() == 1)
                 use_alien_caches = 0;
  
-       for (i = 0; i < NUM_INIT_LISTS; i++) {
+       for (i = 0; i < NUM_INIT_LISTS; i++)
                 kmem_list3_init(&initkmem_list3[i]);
-               if (i < MAX_NUMNODES)
-                       kmem_cache->nodelists[i] = NULL;
-       }
+
         set_up_list3s(kmem_cache, CACHE_CACHE);
  
         /*
@@ -1612,37 +1645,16 @@ void __init kmem_cache_init(void)
          * 6) Resize the head arrays of the kmalloc caches to their final sizes.
          */
  
-       node = numa_mem_id();
-
         /* 1) create the kmem_cache */
-       INIT_LIST_HEAD(&slab_caches);
-       list_add(&kmem_cache->list, &slab_caches);
-       kmem_cache->colour_off = cache_line_size();
-       kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
-       kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
  
         /*
          * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
          */
-       kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
-                                 nr_node_ids * sizeof(struct kmem_list3 *);
-       kmem_cache->object_size = kmem_cache->size;
-       kmem_cache->size = ALIGN(kmem_cache->object_size,
-                                       cache_line_size());
-       kmem_cache->reciprocal_buffer_size =
-               reciprocal_value(kmem_cache->size);
-
-       for (order = 0; order < MAX_ORDER; order++) {
-               cache_estimate(order, kmem_cache->size,
-                       cache_line_size(), 0, &left_over, &kmem_cache->num);
-               if (kmem_cache->num)
-                       break;
-       }
-       BUG_ON(!kmem_cache->num);
-       kmem_cache->gfporder = order;
-       kmem_cache->colour = left_over / kmem_cache->colour_off;
-       kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) +
-                                     sizeof(struct slab), cache_line_size());
+       create_boot_cache(kmem_cache, "kmem_cache",
+               offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+                                 nr_node_ids * sizeof(struct kmem_list3 *),
+                                 SLAB_HWCACHE_ALIGN);
+       list_add(&kmem_cache->list, &slab_caches);
  
         /* 2+3) create the kmalloc caches */
         sizes = malloc_sizes;
@@ -1654,23 +1666,13 @@ void __init kmem_cache_init(void)
          * bug.
          */
  
-       sizes[INDEX_AC].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-       sizes[INDEX_AC].cs_cachep->name = names[INDEX_AC].name;
-       sizes[INDEX_AC].cs_cachep->size = sizes[INDEX_AC].cs_size;
-       sizes[INDEX_AC].cs_cachep->object_size = sizes[INDEX_AC].cs_size;
-       sizes[INDEX_AC].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
-       __kmem_cache_create(sizes[INDEX_AC].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
-       list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches);
-
-       if (INDEX_AC != INDEX_L3) {
-               sizes[INDEX_L3].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-               sizes[INDEX_L3].cs_cachep->name = names[INDEX_L3].name;
-               sizes[INDEX_L3].cs_cachep->size = sizes[INDEX_L3].cs_size;
-               sizes[INDEX_L3].cs_cachep->object_size = sizes[INDEX_L3].cs_size;
-               sizes[INDEX_L3].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
-               __kmem_cache_create(sizes[INDEX_L3].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
-               list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches);
-       }
+       sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name,
+                                       sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS);
+
+       if (INDEX_AC != INDEX_L3)
+               sizes[INDEX_L3].cs_cachep =
+                       create_kmalloc_cache(names[INDEX_L3].name,
+                               sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS);
  
         slab_early_init = 0;
  
@@ -1682,24 +1684,14 @@ void __init kmem_cache_init(void)
                  * Note for systems short on memory removing the alignment will
                  * allow tighter packing of the smaller caches.
                  */
-               if (!sizes->cs_cachep) {
-                       sizes->cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-                       sizes->cs_cachep->name = names->name;
-                       sizes->cs_cachep->size = sizes->cs_size;
-                       sizes->cs_cachep->object_size = sizes->cs_size;
-                       sizes->cs_cachep->align = ARCH_KMALLOC_MINALIGN;
-                       __kmem_cache_create(sizes->cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
-                       list_add(&sizes->cs_cachep->list, &slab_caches);
-               }
+               if (!sizes->cs_cachep)
+                       sizes->cs_cachep = create_kmalloc_cache(names->name,
+                                       sizes->cs_size, ARCH_KMALLOC_FLAGS);
+
  #ifdef CONFIG_ZONE_DMA
-               sizes->cs_dmacachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-               sizes->cs_dmacachep->name = names->name_dma;
-               sizes->cs_dmacachep->size = sizes->cs_size;
-               sizes->cs_dmacachep->object_size = sizes->cs_size;
-               sizes->cs_dmacachep->align = ARCH_KMALLOC_MINALIGN;
-               __kmem_cache_create(sizes->cs_dmacachep,
-                              ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC);
-               list_add(&sizes->cs_dmacachep->list, &slab_caches);
+               sizes->cs_dmacachep = create_kmalloc_cache(
+                       names->name_dma, sizes->cs_size,
+                       SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS);
  #endif
                 sizes++;
                 names++;
@@ -1710,7 +1702,6 @@ void __init kmem_cache_init(void)
  
                 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
  
-               BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache);
                 memcpy(ptr, cpu_cache_get(kmem_cache),
                        sizeof(struct arraycache_init));
                 /*
@@ -1904,6 +1895,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                 if (page->pfmemalloc)
                         SetPageSlabPfmemalloc(page + i);
         }
+       memcg_bind_pages(cachep, cachep->gfporder);
  
         if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
                 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1940,9 +1932,11 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
                 __ClearPageSlab(page);
                 page++;
         }
+
+       memcg_release_pages(cachep, cachep->gfporder);
         if (current->reclaim_state)
                 current->reclaim_state->reclaimed_slab += nr_freed;
-       free_pages((unsigned long)addr, cachep->gfporder);
+       free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
  }
  
  static void kmem_rcu_free(struct rcu_head *head)
@@ -2265,7 +2259,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  
         if (slab_state == DOWN) {
                 /*
-                * Note: the first kmem_cache_create must create the cache
+                * Note: Creation of first cache (kmem_cache).
+                * The setup_list3s is taken care
+                * of by the caller of __kmem_cache_create
+                */
+               cachep->array[smp_processor_id()] = &initarray_generic.cache;
+               slab_state = PARTIAL;
+       } else if (slab_state == PARTIAL) {
+               /*
+                * Note: the second kmem_cache_create must create the cache
                  * that's used by kmalloc(24), otherwise the creation of
                  * further caches will BUG().
                  */
@@ -2273,7 +2275,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  
                 /*
                  * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
-                * the first cache, then we need to set up all its list3s,
+                * the second cache, then we need to set up all its list3s,
                  * otherwise the creation of further caches will BUG().
                  */
                 set_up_list3s(cachep, SIZE_AC);
@@ -2282,6 +2284,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
                 else
                         slab_state = PARTIAL_ARRAYCACHE;
         } else {
+               /* Remaining boot caches */
                 cachep->array[smp_processor_id()] =
                         kmalloc(sizeof(struct arraycache_init), gfp);
  
@@ -2314,11 +2317,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  
  /**
   * __kmem_cache_create - Create a cache.
- * @name: A string which is used in /proc/slabinfo to identify this cache.
- * @size: The size of objects to be created in this cache.
- * @align: The required alignment for the objects.
+ * @cachep: cache management descriptor
   * @flags: SLAB flags
- * @ctor: A constructor for the objects.
   *
   * Returns a ptr to the cache on success, NULL on failure.
   * Cannot be called within a int, but can be interrupted.
@@ -2372,22 +2372,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 size &= ~(BYTES_PER_WORD - 1);
         }
  
-       /* calculate the final buffer alignment: */
-
-       /* 1) arch recommendation: can be overridden for debug */
-       if (flags & SLAB_HWCACHE_ALIGN) {
-               /*
-                * Default alignment: as specified by the arch code.  Except if
-                * an object is really small, then squeeze multiple objects into
-                * one cacheline.
-                */
-               ralign = cache_line_size();
-               while (size <= ralign / 2)
-                       ralign /= 2;
-       } else {
-               ralign = BYTES_PER_WORD;
-       }
-
         /*
          * Redzoning and user store require word alignment or possibly larger.
          * Note this will be overridden by architecture or caller mandated
@@ -2404,10 +2388,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 size &= ~(REDZONE_ALIGN - 1);
         }
  
-       /* 2) arch mandated alignment */
-       if (ralign < ARCH_SLAB_MINALIGN) {
-               ralign = ARCH_SLAB_MINALIGN;
-       }
         /* 3) caller mandated alignment */
         if (ralign < cachep->align) {
                 ralign = cachep->align;
@@ -2425,7 +2405,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
         else
                 gfp = GFP_NOWAIT;
  
-       cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+       setup_nodelists_pointer(cachep);
  #if DEBUG
  
         /*
@@ -2544,7 +2524,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
  
                 slab_set_debugobj_lock_classes(cachep);
-       }
+       } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
+               on_slab_lock_classes(cachep);
  
         return 0;
  }
@@ -3508,6 +3489,8 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
         if (slab_should_failslab(cachep, flags))
                 return NULL;
  
+       cachep = memcg_kmem_get_cache(cachep, flags);
+
         cache_alloc_debugcheck_before(cachep, flags);
         local_irq_save(save_flags);
  
@@ -3593,6 +3576,8 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
         if (slab_should_failslab(cachep, flags))
                 return NULL;
  
+       cachep = memcg_kmem_get_cache(cachep, flags);
+
         cache_alloc_debugcheck_before(cachep, flags);
         local_irq_save(save_flags);
         objp = __do_cache_alloc(cachep, flags);
@@ -3906,6 +3891,9 @@ EXPORT_SYMBOL(__kmalloc);
  void kmem_cache_free(struct kmem_cache *cachep, void *objp)
  {
         unsigned long flags;
+       cachep = cache_from_obj(cachep, objp);
+       if (!cachep)
+               return;
  
         local_irq_save(flags);
         debug_check_no_locks_freed(objp, cachep->object_size);
@@ -4053,7 +4041,7 @@ static void do_ccupdate_local(void *info)
  }
  
  /* Always called with the slab_mutex held */
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
                                 int batchcount, int shared, gfp_t gfp)
  {
         struct ccupdate_struct *new;
@@ -4096,12 +4084,49 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
         return alloc_kmemlist(cachep, gfp);
  }
  
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+                               int batchcount, int shared, gfp_t gfp)
+{
+       int ret;
+       struct kmem_cache *c = NULL;
+       int i = 0;
+
+       ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
+
+       if (slab_state < FULL)
+               return ret;
+
+       if ((ret < 0) || !is_root_cache(cachep))
+               return ret;
+
+       VM_BUG_ON(!mutex_is_locked(&slab_mutex));
+       for_each_memcg_cache_index(i) {
+               c = cache_from_memcg(cachep, i);
+               if (c)
+                       /* return value determined by the parent cache only */
+                       __do_tune_cpucache(c, limit, batchcount, shared, gfp);
+       }
+
+       return ret;
+}
+
  /* Called with slab_mutex held always */
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
  {
         int err;
-       int limit, shared;
+       int limit = 0;
+       int shared = 0;
+       int batchcount = 0;
+
+       if (!is_root_cache(cachep)) {
+               struct kmem_cache *root = memcg_root_cache(cachep);
+               limit = root->limit;
+               shared = root->shared;
+               batchcount = root->batchcount;
+       }
  
+       if (limit && shared && batchcount)
+               goto skip_setup;
         /*
          * The head array serves three purposes:
          * - create a LIFO ordering, i.e. return objects that are cache-warm
@@ -4143,7 +4168,9 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
         if (limit > 32)
                 limit = 32;
  #endif
-       err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
+       batchcount = (limit + 1) / 2;
+skip_setup:
+       err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
         if (err)
                 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
                        cachep->name, -err);