Merge branches 'slab/align', 'slab/cleanups', 'slab/fixes', 'slab/memhotadd' and...

author Pekka Enberg <penberg@cs.helsinki.fi>

Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)

committer Pekka Enberg <penberg@cs.helsinki.fi>

Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
author Pekka Enberg <penberg@cs.helsinki.fi>
Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
committer Pekka Enberg <penberg@cs.helsinki.fi>
Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
diff --git a/include/linux/crypto.h b/include/linux/crypto.h

index 24d2e30..a6a7a1c 100644 (file)
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -99,13 +99,7 @@
   * as arm where pointers are 32-bit aligned but there are data types such as
   * u64 which require 64-bit alignment.
   */
-#if defined(ARCH_KMALLOC_MINALIGN)
  #define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN
-#elif defined(ARCH_SLAB_MINALIGN)
-#define CRYPTO_MINALIGN ARCH_SLAB_MINALIGN
-#else
-#define CRYPTO_MINALIGN __alignof__(unsigned long long)
-#endif
  
  #define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN)))
  
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h

index ca6b2b3..1812dac 100644 (file)
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -16,6 +16,30 @@
  #include <linux/compiler.h>
  #include <linux/kmemtrace.h>
  
+#ifndef ARCH_KMALLOC_MINALIGN
+/*
+ * Enforce a minimum alignment for the kmalloc caches.
+ * Usually, the kmalloc caches are cache_line_size() aligned, except when
+ * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
+ * Some archs want to perform DMA into kmalloc caches and need a guaranteed
+ * alignment larger than the alignment of a 64-bit integer.
+ * ARCH_KMALLOC_MINALIGN allows that.
+ * Note that increasing this value may disable some debug features.
+ */
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+#endif
+
+#ifndef ARCH_SLAB_MINALIGN
+/*
+ * Enforce a minimum alignment for all caches.
+ * Intended for archs that get misalignment faults even for BYTES_PER_WORD
+ * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
+ * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
+ * some debug features.
+ */
+#define ARCH_SLAB_MINALIGN 0
+#endif
+
  /*
   * struct kmem_cache
   *
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h

index 0ec00b3..62667f7 100644 (file)
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -1,6 +1,14 @@
  #ifndef __LINUX_SLOB_DEF_H
  #define __LINUX_SLOB_DEF_H
  
+#ifndef ARCH_KMALLOC_MINALIGN
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
+#endif
+
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
+#endif
+
  void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
  
  static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h

index 0249d41..55695c8 100644 (file)
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -116,6 +116,14 @@ struct kmem_cache {
  
  #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
  
+#ifndef ARCH_KMALLOC_MINALIGN
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+#endif
+
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#endif
+
  /*
   * Maximum kmalloc object size handled by SLUB. Larger object allocations
   * are passed through to the page allocator. The page allocator "fastpath"
diff --git a/mm/slab.c b/mm/slab.c

index bac0f4f..50a73fc 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -115,6 +115,7 @@
  #include       <linux/reciprocal_div.h>
  #include       <linux/debugobjects.h>
  #include       <linux/kmemcheck.h>
+#include       <linux/memory.h>
  
  #include       <asm/cacheflush.h>
  #include       <asm/tlbflush.h>
@@ -144,30 +145,6 @@
  #define        BYTES_PER_WORD          sizeof(void *)
  #define        REDZONE_ALIGN           max(BYTES_PER_WORD, __alignof__(unsigned long long))
  
-#ifndef ARCH_KMALLOC_MINALIGN
-/*
- * Enforce a minimum alignment for the kmalloc caches.
- * Usually, the kmalloc caches are cache_line_size() aligned, except when
- * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
- * Some archs want to perform DMA into kmalloc caches and need a guaranteed
- * alignment larger than the alignment of a 64-bit integer.
- * ARCH_KMALLOC_MINALIGN allows that.
- * Note that increasing this value may disable some debug features.
- */
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
-#endif
-
-#ifndef ARCH_SLAB_MINALIGN
-/*
- * Enforce a minimum alignment for all caches.
- * Intended for archs that get misalignment faults even for BYTES_PER_WORD
- * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
- * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
- * some debug features.
- */
-#define ARCH_SLAB_MINALIGN 0
-#endif
-
  #ifndef ARCH_KMALLOC_FLAGS
  #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
  #endif
@@ -1102,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
  }
  #endif
  
+/*
+ * Allocates and initializes nodelists for a node on each slab cache, used for
+ * either memory or cpu hotplug.  If memory is being hot-added, the kmem_list3
+ * will be allocated off-node since memory is not yet online for the new node.
+ * When hotplugging memory or a cpu, existing nodelists are not replaced if
+ * already in use.
+ *
+ * Must hold cache_chain_mutex.
+ */
+static int init_cache_nodelists_node(int node)
+{
+       struct kmem_cache *cachep;
+       struct kmem_list3 *l3;
+       const int memsize = sizeof(struct kmem_list3);
+
+       list_for_each_entry(cachep, &cache_chain, next) {
+               /*
+                * Set up the size64 kmemlist for cpu before we can
+                * begin anything. Make sure some other cpu on this
+                * node has not already allocated this
+                */
+               if (!cachep->nodelists[node]) {
+                       l3 = kmalloc_node(memsize, GFP_KERNEL, node);
+                       if (!l3)
+                               return -ENOMEM;
+                       kmem_list3_init(l3);
+                       l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
+                           ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+
+                       /*
+                        * The l3s don't come and go as CPUs come and
+                        * go.  cache_chain_mutex is sufficient
+                        * protection here.
+                        */
+                       cachep->nodelists[node] = l3;
+               }
+
+               spin_lock_irq(&cachep->nodelists[node]->list_lock);
+               cachep->nodelists[node]->free_limit =
+                       (1 + nr_cpus_node(node)) *
+                       cachep->batchcount + cachep->num;
+               spin_unlock_irq(&cachep->nodelists[node]->list_lock);
+       }
+       return 0;
+}
+
  static void __cpuinit cpuup_canceled(long cpu)
  {
         struct kmem_cache *cachep;
@@ -1172,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu)
         struct kmem_cache *cachep;
         struct kmem_list3 *l3 = NULL;
         int node = cpu_to_node(cpu);
-       const int memsize = sizeof(struct kmem_list3);
+       int err;
  
         /*
          * We need to do this right in the beginning since
@@ -1180,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu)
          * kmalloc_node allows us to add the slab to the right
          * kmem_list3 and not this cpu's kmem_list3
          */
-
-       list_for_each_entry(cachep, &cache_chain, next) {
-               /*
-                * Set up the size64 kmemlist for cpu before we can
-                * begin anything. Make sure some other cpu on this
-                * node has not already allocated this
-                */
-               if (!cachep->nodelists[node]) {
-                       l3 = kmalloc_node(memsize, GFP_KERNEL, node);
-                       if (!l3)
-                               goto bad;
-                       kmem_list3_init(l3);
-                       l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
-                           ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
-
-                       /*
-                        * The l3s don't come and go as CPUs come and
-                        * go.  cache_chain_mutex is sufficient
-                        * protection here.
-                        */
-                       cachep->nodelists[node] = l3;
-               }
-
-               spin_lock_irq(&cachep->nodelists[node]->list_lock);
-               cachep->nodelists[node]->free_limit =
-                       (1 + nr_cpus_node(node)) *
-                       cachep->batchcount + cachep->num;
-               spin_unlock_irq(&cachep->nodelists[node]->list_lock);
-       }
+       err = init_cache_nodelists_node(node);
+       if (err < 0)
+               goto bad;
  
         /*
          * Now we can go ahead with allocating the shared arrays and
@@ -1331,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
         &cpuup_callback, NULL, 0
  };
  
+#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
+/*
+ * Drains freelist for a node on each slab cache, used for memory hot-remove.
+ * Returns -EBUSY if all objects cannot be drained so that the node is not
+ * removed.
+ *
+ * Must hold cache_chain_mutex.
+ */
+static int __meminit drain_cache_nodelists_node(int node)
+{
+       struct kmem_cache *cachep;
+       int ret = 0;
+
+       list_for_each_entry(cachep, &cache_chain, next) {
+               struct kmem_list3 *l3;
+
+               l3 = cachep->nodelists[node];
+               if (!l3)
+                       continue;
+
+               drain_freelist(cachep, l3, l3->free_objects);
+
+               if (!list_empty(&l3->slabs_full) ||
+                   !list_empty(&l3->slabs_partial)) {
+                       ret = -EBUSY;
+                       break;
+               }
+       }
+       return ret;
+}
+
+static int __meminit slab_memory_callback(struct notifier_block *self,
+                                       unsigned long action, void *arg)
+{
+       struct memory_notify *mnb = arg;
+       int ret = 0;
+       int nid;
+
+       nid = mnb->status_change_nid;
+       if (nid < 0)
+               goto out;
+
+       switch (action) {
+       case MEM_GOING_ONLINE:
+               mutex_lock(&cache_chain_mutex);
+               ret = init_cache_nodelists_node(nid);
+               mutex_unlock(&cache_chain_mutex);
+               break;
+       case MEM_GOING_OFFLINE:
+               mutex_lock(&cache_chain_mutex);
+               ret = drain_cache_nodelists_node(nid);
+               mutex_unlock(&cache_chain_mutex);
+               break;
+       case MEM_ONLINE:
+       case MEM_OFFLINE:
+       case MEM_CANCEL_ONLINE:
+       case MEM_CANCEL_OFFLINE:
+               break;
+       }
+out:
+       return ret ? notifier_from_errno(ret) : NOTIFY_OK;
+}
+#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
+
  /*
   * swap the static kmem_list3 with kmalloced memory
   */
-static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
-                       int nodeid)
+static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
+                               int nodeid)
  {
         struct kmem_list3 *ptr;
  
@@ -1580,6 +1641,14 @@ void __init kmem_cache_init_late(void)
          */
         register_cpu_notifier(&cpucache_notifier);
  
+#ifdef CONFIG_NUMA
+       /*
+        * Register a memory hotplug callback that initializes and frees
+        * nodelists.
+        */
+       hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
+#endif
+
         /*
          * The reap timers are started later, with a module init call: That part
          * of the kernel is not yet operational.
@@ -2220,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         if (ralign < align) {
                 ralign = align;
         }
-       /* disable debug if necessary */
-       if (ralign > __alignof__(unsigned long long))
+       /* disable debug if not aligning with REDZONE_ALIGN */
+       if (ralign & (__alignof__(unsigned long long) - 1))
                 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
         /*
          * 4) Store it.
@@ -2247,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
          */
         if (flags & SLAB_RED_ZONE) {
                 /* add space for red zone words */
-               cachep->obj_offset += sizeof(unsigned long long);
-               size += 2 * sizeof(unsigned long long);
+               cachep->obj_offset += align;
+               size += align + sizeof(unsigned long long);
         }
         if (flags & SLAB_STORE_USER) {
                 /* user store requires one word storage behind the end of
@@ -4216,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p)
                 unsigned long node_frees = cachep->node_frees;
                 unsigned long overflows = cachep->node_overflow;
  
-               seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
-                               %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
-                               reaped, errors, max_freeable, node_allocs,
-                               node_frees, overflows);
+               seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
+                          "%4lu %4lu %4lu %4lu %4lu",
+                          allocs, high, grown,
+                          reaped, errors, max_freeable, node_allocs,
+                          node_frees, overflows);
         }
         /* cpu stats */
         {
diff --git a/mm/slob.c b/mm/slob.c

index 837ebd6..23631e2 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -467,14 +467,6 @@ out:
   * End of slob allocator proper. Begin kmem_cache_alloc and kmalloc frontend.
   */
  
-#ifndef ARCH_KMALLOC_MINALIGN
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
-#endif
-
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
-#endif
-
  void *__kmalloc_node(size_t size, gfp_t gfp, int node)
  {
         unsigned int *m;
diff --git a/mm/slub.c b/mm/slub.c

index d2a54fe..e46e312 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -157,14 +157,6 @@
  #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
                 SLAB_CACHE_DMA | SLAB_NOTRACK)
  
-#ifndef ARCH_KMALLOC_MINALIGN
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
-#endif
-
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
-#endif
-
  #define OO_SHIFT       16
  #define OO_MASK                ((1 << OO_SHIFT) - 1)
  #define MAX_OBJS_PER_PAGE      65535 /* since page.objects is u16 */
@@ -1084,7 +1076,7 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node,
         if (node == -1)
                 return alloc_pages(flags, order);
         else
-               return alloc_pages_node(node, flags, order);
+               return alloc_pages_exact_node(node, flags, order);
  }
  
  static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -2429,9 +2421,11 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
  #ifdef CONFIG_SLUB_DEBUG
         void *addr = page_address(page);
         void *p;
-       DECLARE_BITMAP(map, page->objects);
+       long *map = kzalloc(BITS_TO_LONGS(page->objects) * sizeof(long),
+                           GFP_ATOMIC);
  
-       bitmap_zero(map, page->objects);
+       if (!map)
+               return;
         slab_err(s, page, "%s", text);
         slab_lock(page);
         for_each_free_object(p, s, page->freelist)
@@ -2446,6 +2440,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
                 }
         }
         slab_unlock(page);
+       kfree(map);
  #endif
  }
  
@@ -3338,8 +3333,15 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
         struct kmem_cache *s;
         void *ret;
  
-       if (unlikely(size > SLUB_MAX_SIZE))
-               return kmalloc_large_node(size, gfpflags, node);
+       if (unlikely(size > SLUB_MAX_SIZE)) {
+               ret = kmalloc_large_node(size, gfpflags, node);
+
+               trace_kmalloc_node(caller, ret,
+                                  size, PAGE_SIZE << get_order(size),
+                                  gfpflags, node);
+
+               return ret;
+       }
  
         s = get_slab(size, gfpflags);
  
@@ -3651,10 +3653,10 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
  }
  
  static void process_slab(struct loc_track *t, struct kmem_cache *s,
-               struct page *page, enum track_item alloc)
+               struct page *page, enum track_item alloc,
+               long *map)
  {
         void *addr = page_address(page);
-       DECLARE_BITMAP(map, page->objects);
         void *p;
  
         bitmap_zero(map, page->objects);
@@ -3673,11 +3675,14 @@ static int list_locations(struct kmem_cache *s, char *buf,
         unsigned long i;
         struct loc_track t = { 0, 0, NULL };
         int node;
+       unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
+                                    sizeof(unsigned long), GFP_KERNEL);
  
-       if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
-                       GFP_TEMPORARY))
+       if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+                                    GFP_TEMPORARY)) {
+               kfree(map);
                 return sprintf(buf, "Out of memory\n");
-
+       }
         /* Push back cpu slabs */
         flush_all(s);
  
@@ -3691,9 +3696,9 @@ static int list_locations(struct kmem_cache *s, char *buf,
  
                 spin_lock_irqsave(&n->list_lock, flags);
                 list_for_each_entry(page, &n->partial, lru)
-                       process_slab(&t, s, page, alloc);
+                       process_slab(&t, s, page, alloc, map);
                 list_for_each_entry(page, &n->full, lru)
-                       process_slab(&t, s, page, alloc);
+                       process_slab(&t, s, page, alloc, map);
                 spin_unlock_irqrestore(&n->list_lock, flags);
         }
  
@@ -3744,6 +3749,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
         }
  
         free_loc_track(&t);
+       kfree(map);
         if (!t.count)
                 len += sprintf(buf, "No data\n");
         return len;
author	Pekka Enberg <penberg@cs.helsinki.fi>
	Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
committer	Pekka Enberg <penberg@cs.helsinki.fi>
	Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
include/linux/crypto.h		patch \| blob \| history
include/linux/slab_def.h		patch \| blob \| history
include/linux/slob_def.h		patch \| blob \| history
include/linux/slub_def.h		patch \| blob \| history
mm/slab.c		patch \| blob \| history
mm/slob.c		patch \| blob \| history
mm/slub.c		patch \| blob \| history