Merge branches 'slab/align', 'slab/cleanups', 'slab/fixes', 'slab/memhotadd' and...

author Pekka Enberg <penberg@cs.helsinki.fi>

Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)

committer Pekka Enberg <penberg@cs.helsinki.fi>

Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
author Pekka Enberg <penberg@cs.helsinki.fi>
Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
committer Pekka Enberg <penberg@cs.helsinki.fi>
Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
diff --combined mm/slab.c

index bac0f4f,7401ddc,ceb4e3a,525c664,3230cd2,bac0f4f..50a73fc
--- 1/mm/slab.c
--- 2/mm/slab.c
--- 3/mm/slab.c
--- 4/mm/slab.c
--- 5/mm/slab.c
--- 6/mm/slab.c
+++ b/mm/slab.c
@@@@@@@ -115,6 -115,6 -115,6 -115,6 -115,7 -115,6 +115,7 @@@@@@@
       #include  <linux/reciprocal_div.h>
       #include  <linux/debugobjects.h>
       #include  <linux/kmemcheck.h>
++++ +#include  <linux/memory.h>
       
       #include  <asm/cacheflush.h>
       #include  <asm/tlbflush.h>
@@@@@@@ -144,30 -144,6 -144,30 -144,30 -145,30 -144,30 +145,6 @@@@@@@
       #define   BYTES_PER_WORD          sizeof(void *)
       #define   REDZONE_ALIGN           max(BYTES_PER_WORD, __alignof__(unsigned long long))
       
- ----#ifndef ARCH_KMALLOC_MINALIGN
- ----/*
- ---- * Enforce a minimum alignment for the kmalloc caches.
- ---- * Usually, the kmalloc caches are cache_line_size() aligned, except when
- ---- * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
- ---- * Some archs want to perform DMA into kmalloc caches and need a guaranteed
- ---- * alignment larger than the alignment of a 64-bit integer.
- ---- * ARCH_KMALLOC_MINALIGN allows that.
- ---- * Note that increasing this value may disable some debug features.
- ---- */
- ----#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
- ----#endif
- ----
- ----#ifndef ARCH_SLAB_MINALIGN
- ----/*
- ---- * Enforce a minimum alignment for all caches.
- ---- * Intended for archs that get misalignment faults even for BYTES_PER_WORD
- ---- * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
- ---- * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
- ---- * some debug features.
- ---- */
- ----#define ARCH_SLAB_MINALIGN 0
- ----#endif
- ----
       #ifndef ARCH_KMALLOC_FLAGS
       #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
       #endif
@@@@@@@ -1102,6 -1078,6 -1102,6 -1102,6 -1103,52 -1102,6 +1079,52 @@@@@@@ static inline int cache_free_alien(stru
       }
       #endif
       
++++ +/*
++++ + * Allocates and initializes nodelists for a node on each slab cache, used for
++++ + * either memory or cpu hotplug.  If memory is being hot-added, the kmem_list3
++++ + * will be allocated off-node since memory is not yet online for the new node.
++++ + * When hotplugging memory or a cpu, existing nodelists are not replaced if
++++ + * already in use.
++++ + *
++++ + * Must hold cache_chain_mutex.
++++ + */
++++ +static int init_cache_nodelists_node(int node)
++++ +{
++++ +  struct kmem_cache *cachep;
++++ +  struct kmem_list3 *l3;
++++ +  const int memsize = sizeof(struct kmem_list3);
++++ +
++++ +  list_for_each_entry(cachep, &cache_chain, next) {
++++ +          /*
++++ +           * Set up the size64 kmemlist for cpu before we can
++++ +           * begin anything. Make sure some other cpu on this
++++ +           * node has not already allocated this
++++ +           */
++++ +          if (!cachep->nodelists[node]) {
++++ +                  l3 = kmalloc_node(memsize, GFP_KERNEL, node);
++++ +                  if (!l3)
++++ +                          return -ENOMEM;
++++ +                  kmem_list3_init(l3);
++++ +                  l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
++++ +                      ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
++++ +
++++ +                  /*
++++ +                   * The l3s don't come and go as CPUs come and
++++ +                   * go.  cache_chain_mutex is sufficient
++++ +                   * protection here.
++++ +                   */
++++ +                  cachep->nodelists[node] = l3;
++++ +          }
++++ +
++++ +          spin_lock_irq(&cachep->nodelists[node]->list_lock);
++++ +          cachep->nodelists[node]->free_limit =
++++ +                  (1 + nr_cpus_node(node)) *
++++ +                  cachep->batchcount + cachep->num;
++++ +          spin_unlock_irq(&cachep->nodelists[node]->list_lock);
++++ +  }
++++ +  return 0;
++++ +}
++++ +
       static void __cpuinit cpuup_canceled(long cpu)
       {
         struct kmem_cache *cachep;
@@@@@@@ -1172,7 -1148,7 -1172,7 -1172,7 -1219,7 -1172,7 +1195,7 @@@@@@@ static int __cpuinit cpuup_prepare(lon
         struct kmem_cache *cachep;
         struct kmem_list3 *l3 = NULL;
         int node = cpu_to_node(cpu);
---- -  const int memsize = sizeof(struct kmem_list3);
++++ +  int err;
       
         /*
          * We need to do this right in the beginning since
@@@@@@@ -1180,35 -1156,35 -1180,35 -1180,35 -1227,9 -1180,35 +1203,9 @@@@@@@
          * kmalloc_node allows us to add the slab to the right
          * kmem_list3 and not this cpu's kmem_list3
          */
---- -
---- -  list_for_each_entry(cachep, &cache_chain, next) {
---- -          /*
---- -           * Set up the size64 kmemlist for cpu before we can
---- -           * begin anything. Make sure some other cpu on this
---- -           * node has not already allocated this
---- -           */
---- -          if (!cachep->nodelists[node]) {
---- -                  l3 = kmalloc_node(memsize, GFP_KERNEL, node);
---- -                  if (!l3)
---- -                          goto bad;
---- -                  kmem_list3_init(l3);
---- -                  l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
---- -                      ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
---- -
---- -                  /*
---- -                   * The l3s don't come and go as CPUs come and
---- -                   * go.  cache_chain_mutex is sufficient
---- -                   * protection here.
---- -                   */
---- -                  cachep->nodelists[node] = l3;
---- -          }
---- -
---- -          spin_lock_irq(&cachep->nodelists[node]->list_lock);
---- -          cachep->nodelists[node]->free_limit =
---- -                  (1 + nr_cpus_node(node)) *
---- -                  cachep->batchcount + cachep->num;
---- -          spin_unlock_irq(&cachep->nodelists[node]->list_lock);
---- -  }
++++ +  err = init_cache_nodelists_node(node);
++++ +  if (err < 0)
++++ +          goto bad;
       
         /*
          * Now we can go ahead with allocating the shared arrays and
@@@@@@@ -1331,11 -1307,11 -1331,11 -1331,11 -1352,75 -1331,11 +1328,75 @@@@@@@ static struct notifier_block __cpuinitd
         &cpuup_callback, NULL, 0
       };
       
++++ +#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
++++ +/*
++++ + * Drains freelist for a node on each slab cache, used for memory hot-remove.
++++ + * Returns -EBUSY if all objects cannot be drained so that the node is not
++++ + * removed.
++++ + *
++++ + * Must hold cache_chain_mutex.
++++ + */
++++ +static int __meminit drain_cache_nodelists_node(int node)
++++ +{
++++ +  struct kmem_cache *cachep;
++++ +  int ret = 0;
++++ +
++++ +  list_for_each_entry(cachep, &cache_chain, next) {
++++ +          struct kmem_list3 *l3;
++++ +
++++ +          l3 = cachep->nodelists[node];
++++ +          if (!l3)
++++ +                  continue;
++++ +
++++ +          drain_freelist(cachep, l3, l3->free_objects);
++++ +
++++ +          if (!list_empty(&l3->slabs_full) ||
++++ +              !list_empty(&l3->slabs_partial)) {
++++ +                  ret = -EBUSY;
++++ +                  break;
++++ +          }
++++ +  }
++++ +  return ret;
++++ +}
++++ +
++++ +static int __meminit slab_memory_callback(struct notifier_block *self,
++++ +                                  unsigned long action, void *arg)
++++ +{
++++ +  struct memory_notify *mnb = arg;
++++ +  int ret = 0;
++++ +  int nid;
++++ +
++++ +  nid = mnb->status_change_nid;
++++ +  if (nid < 0)
++++ +          goto out;
++++ +
++++ +  switch (action) {
++++ +  case MEM_GOING_ONLINE:
++++ +          mutex_lock(&cache_chain_mutex);
++++ +          ret = init_cache_nodelists_node(nid);
++++ +          mutex_unlock(&cache_chain_mutex);
++++ +          break;
++++ +  case MEM_GOING_OFFLINE:
++++ +          mutex_lock(&cache_chain_mutex);
++++ +          ret = drain_cache_nodelists_node(nid);
++++ +          mutex_unlock(&cache_chain_mutex);
++++ +          break;
++++ +  case MEM_ONLINE:
++++ +  case MEM_OFFLINE:
++++ +  case MEM_CANCEL_ONLINE:
++++ +  case MEM_CANCEL_OFFLINE:
++++ +          break;
++++ +  }
++++ +out:
++++ +  return ret ? notifier_from_errno(ret) : NOTIFY_OK;
++++ +}
++++ +#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
++++ +
       /*
        * swap the static kmem_list3 with kmalloced memory
        */
---- -static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
---- -                  int nodeid)
++++ +static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
++++ +                          int nodeid)
       {
         struct kmem_list3 *ptr;
       
@@@@@@@ -1580,6 -1556,6 -1580,6 -1580,6 -1665,14 -1580,6 +1641,14 @@@@@@@ void __init kmem_cache_init_late(void
          */
         register_cpu_notifier(&cpucache_notifier);
       
++++ +#ifdef CONFIG_NUMA
++++ +  /*
++++ +   * Register a memory hotplug callback that initializes and frees
++++ +   * nodelists.
++++ +   */
++++ +  hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
++++ +#endif
++++ +
         /*
          * The reap timers are started later, with a module init call: That part
          * of the kernel is not yet operational.
@@@@@@@ -2220,8 -2196,8 -2220,8 -2220,8 -2313,8 -2220,8 +2289,8 @@@@@@@ kmem_cache_create (const char *name, si
         if (ralign < align) {
                 ralign = align;
         }
--- --  /* disable debug if necessary */
--- --  if (ralign > __alignof__(unsigned long long))
+++ ++  /* disable debug if not aligning with REDZONE_ALIGN */
+++ ++  if (ralign & (__alignof__(unsigned long long) - 1))
                 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
         /*
          * 4) Store it.
@@@@@@@ -2247,8 -2223,8 -2247,8 -2247,8 -2340,8 -2247,8 +2316,8 @@@@@@@
          */
         if (flags & SLAB_RED_ZONE) {
                 /* add space for red zone words */
--- --          cachep->obj_offset += sizeof(unsigned long long);
--- --          size += 2 * sizeof(unsigned long long);
+++ ++          cachep->obj_offset += align;
+++ ++          size += align + sizeof(unsigned long long);
         }
         if (flags & SLAB_STORE_USER) {
                 /* user store requires one word storage behind the end of
@@@@@@@ -3602,10 -3578,10 -3602,21 -3602,10 -3695,21 -3602,10 +3671,10 @@@@@@@ EXPORT_SYMBOL(kmem_cache_alloc_notrace)
        */
       int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
       {
-  - -   unsigned long addr = (unsigned long)ptr;
-  - -   unsigned long min_addr = PAGE_OFFSET;
-  - -   unsigned long align_mask = BYTES_PER_WORD - 1;
         unsigned long size = cachep->buffer_size;
         struct page *page;
       
-  - -   if (unlikely(addr < min_addr))
-  - -           goto out;
-  - -   if (unlikely(addr > (unsigned long)high_memory - size))
-  - -           goto out;
-  - -   if (unlikely(addr & align_mask))
-  - -           goto out;
-  - -   if (unlikely(!kern_addr_valid(addr)))
-  - -           goto out;
-  - -   if (unlikely(!kern_addr_valid(addr + size - 1)))
+  + +   if (unlikely(!kern_ptr_validate(ptr, size)))
                 goto out;
         page = virt_to_page(ptr);
         if (unlikely(!PageSlab(page)))
@@@@@@@ -4216,10 -4192,10 -4227,11 -4216,10 -4320,10 -4216,10 +4285,11 @@@@@@@ static int s_show(struct seq_file *m, v
                 unsigned long node_frees = cachep->node_frees;
                 unsigned long overflows = cachep->node_overflow;
       
-- ---          seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
-- ---                          %4lu %4lu %4lu %4lu %4lu", allocs, high, grown,
-- ---                          reaped, errors, max_freeable, node_allocs,
-- ---                          node_frees, overflows);
++ +++          seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu "
++ +++                     "%4lu %4lu %4lu %4lu %4lu",
++ +++                     allocs, high, grown,
++ +++                     reaped, errors, max_freeable, node_allocs,
++ +++                     node_frees, overflows);
         }
         /* cpu stats */
         {
diff --combined mm/slub.c

index d2a54fe,c874c3e,b364844,7d6c8b1,b364844,2cdd235..e46e312
--- 1/mm/slub.c
--- 2/mm/slub.c
--- 3/mm/slub.c
--- 4/mm/slub.c
--- 5/mm/slub.c
--- 6/mm/slub.c
+++ b/mm/slub.c
@@@@@@@ -157,14 -157,6 -157,14 -157,14 -157,14 -157,14 +157,6 @@@@@@@
       #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
                 SLAB_CACHE_DMA | SLAB_NOTRACK)
       
- ----#ifndef ARCH_KMALLOC_MINALIGN
- ----#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
- ----#endif
- ----
- ----#ifndef ARCH_SLAB_MINALIGN
- ----#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
- ----#endif
- ----
       #define OO_SHIFT  16
       #define OO_MASK           ((1 << OO_SHIFT) - 1)
       #define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */
@@@@@@@ -1084,7 -1076,7 -1084,7 -1084,7 -1084,7 -1084,7 +1076,7 @@@@@@@ static inline struct page *alloc_slab_p
         if (node == -1)
                 return alloc_pages(flags, order);
         else
-----           return alloc_pages_node(node, flags, order);
+++++           return alloc_pages_exact_node(node, flags, order);
       }
       
       static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
@@@@@@@ -2153,7 -2145,7 -2153,7 -2153,7 -2153,7 -2153,7 +2145,7 @@@@@@@ static int init_kmem_cache_nodes(struc
         int local_node;
       
         if (slab_state >= UP && (s < kmalloc_caches ||
-  ---                   s > kmalloc_caches + KMALLOC_CACHES))
+  +++                   s >= kmalloc_caches + KMALLOC_CACHES))
                 local_node = page_to_nid(virt_to_page(s));
         else
                 local_node = 0;
@@@@@@@ -2386,9 -2378,9 -2386,6 -2386,9 -2386,6 -2386,9 +2378,9 @@@@@@@ int kmem_ptr_validate(struct kmem_cach
       {
         struct page *page;
       
+  + +   if (!kern_ptr_validate(object, s->size))
+  + +           return 0;
+  + + 
         page = get_object_page(object);
       
         if (!page || s != page->slab)
@@@@@@@ -2429,9 -2421,9 -2426,9 -2429,9 -2426,9 -2429,11 +2421,11 @@@@@@@ static void list_slab_objects(struct km
       #ifdef CONFIG_SLUB_DEBUG
         void *addr = page_address(page);
         void *p;
-----   DECLARE_BITMAP(map, page->objects);
+++++   long *map = kzalloc(BITS_TO_LONGS(page->objects) * sizeof(long),
+++++                       GFP_ATOMIC);
       
-----   bitmap_zero(map, page->objects);
+++++   if (!map)
+++++           return;
         slab_err(s, page, "%s", text);
         slab_lock(page);
         for_each_free_object(p, s, page->freelist)
@@@@@@@ -2446,6 -2438,6 -2443,6 -2446,6 -2443,6 -2448,7 +2440,7 @@@@@@@
                 }
         }
         slab_unlock(page);
+++++   kfree(map);
       #endif
       }
       
@@@@@@@ -3338,8 -3330,8 -3335,8 -3338,8 -3335,8 -3341,15 +3333,15 @@@@@@@ void *__kmalloc_node_track_caller(size_
         struct kmem_cache *s;
         void *ret;
       
-----   if (unlikely(size > SLUB_MAX_SIZE))
-----           return kmalloc_large_node(size, gfpflags, node);
+++++   if (unlikely(size > SLUB_MAX_SIZE)) {
+++++           ret = kmalloc_large_node(size, gfpflags, node);
+++++ 
+++++           trace_kmalloc_node(caller, ret,
+++++                              size, PAGE_SIZE << get_order(size),
+++++                              gfpflags, node);
+++++ 
+++++           return ret;
+++++   }
       
         s = get_slab(size, gfpflags);
       
@@@@@@@ -3651,10 -3643,10 -3648,10 -3651,10 -3648,10 -3661,10 +3653,10 @@@@@@@ static int add_location(struct loc_trac
       }
       
       static void process_slab(struct loc_track *t, struct kmem_cache *s,
-----           struct page *page, enum track_item alloc)
+++++           struct page *page, enum track_item alloc,
+++++           long *map)
       {
         void *addr = page_address(page);
-----   DECLARE_BITMAP(map, page->objects);
         void *p;
       
         bitmap_zero(map, page->objects);
@@@@@@@ -3673,11 -3665,11 -3670,11 -3673,11 -3670,11 -3683,14 +3675,14 @@@@@@@ static int list_locations(struct kmem_c
         unsigned long i;
         struct loc_track t = { 0, 0, NULL };
         int node;
+++++   unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->max)) *
+++++                                sizeof(unsigned long), GFP_KERNEL);
       
-----   if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
-----                   GFP_TEMPORARY))
+++++   if (!map || !alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
+++++                                GFP_TEMPORARY)) {
+++++           kfree(map);
                 return sprintf(buf, "Out of memory\n");
----- 
+++++   }
         /* Push back cpu slabs */
         flush_all(s);
       
@@@@@@@ -3691,9 -3683,9 -3688,9 -3691,9 -3688,9 -3704,9 +3696,9 @@@@@@@
       
                 spin_lock_irqsave(&n->list_lock, flags);
                 list_for_each_entry(page, &n->partial, lru)
-----                   process_slab(&t, s, page, alloc);
+++++                   process_slab(&t, s, page, alloc, map);
                 list_for_each_entry(page, &n->full, lru)
-----                   process_slab(&t, s, page, alloc);
+++++                   process_slab(&t, s, page, alloc, map);
                 spin_unlock_irqrestore(&n->list_lock, flags);
         }
       
@@@@@@@ -3744,6 -3736,6 -3741,6 -3744,6 -3741,6 -3757,7 +3749,7 @@@@@@@
         }
       
         free_loc_track(&t);
+++++   kfree(map);
         if (!t.count)
                 len += sprintf(buf, "No data\n");
         return len;
author	Pekka Enberg <penberg@cs.helsinki.fi>
	Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
committer	Pekka Enberg <penberg@cs.helsinki.fi>
	Sat, 22 May 2010 07:57:52 +0000 (10:57 +0300)
		1	2	3	4	5	6
mm/slab.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	blob \| history
mm/slub.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	blob \| history