SLUB: Don't pass __GFP_FAIL for the initial allocation

[pandora-kernel.git] / mm / slub.c
diff --git a/mm/slub.c b/mm/slub.c

index 2701419..819f056 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -179,12 +179,6 @@ static enum {
         SYSFS           /* Sysfs up */
  } slab_state = DOWN;
  
-/*
- * The slab allocator is initialized with interrupts disabled. Therefore, make
- * sure early boot allocations don't accidentally enable interrupts.
- */
-static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
-
  /* A list of all slab caches on the system */
  static DECLARE_RWSEM(slub_lock);
  static LIST_HEAD(slab_caches);
@@ -840,6 +834,11 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node)
         return atomic_long_read(&n->nr_slabs);
  }
  
+static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
+{
+       return atomic_long_read(&n->nr_slabs);
+}
+
  static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
  {
         struct kmem_cache_node *n = get_node(s, node);
@@ -1058,6 +1057,8 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize,
  
  static inline unsigned long slabs_node(struct kmem_cache *s, int node)
                                                         { return 0; }
+static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
+                                                       { return 0; }
  static inline void inc_slabs_node(struct kmem_cache *s, int node,
                                                         int objects) {}
  static inline void dec_slabs_node(struct kmem_cache *s, int node,
@@ -1084,11 +1085,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
  {
         struct page *page;
         struct kmem_cache_order_objects oo = s->oo;
+       gfp_t alloc_gfp;
  
         flags |= s->allocflags;
  
-       page = alloc_slab_page(flags | __GFP_NOWARN | __GFP_NORETRY, node,
-                                                                       oo);
+       /*
+        * Let the initial higher-order allocation fail under memory pressure
+        * so we fall-back to the minimum order allocation.
+        */
+       alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
+
+       page = alloc_slab_page(alloc_gfp, node, oo);
         if (unlikely(!page)) {
                 oo = s->min;
                 /*
@@ -1514,6 +1521,65 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
         return 1;
  }
  
+static int count_free(struct page *page)
+{
+       return page->objects - page->inuse;
+}
+
+static unsigned long count_partial(struct kmem_cache_node *n,
+                                       int (*get_count)(struct page *))
+{
+       unsigned long flags;
+       unsigned long x = 0;
+       struct page *page;
+
+       spin_lock_irqsave(&n->list_lock, flags);
+       list_for_each_entry(page, &n->partial, lru)
+               x += get_count(page);
+       spin_unlock_irqrestore(&n->list_lock, flags);
+       return x;
+}
+
+static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
+{
+#ifdef CONFIG_SLUB_DEBUG
+       return atomic_long_read(&n->total_objects);
+#else
+       return 0;
+#endif
+}
+
+static noinline void
+slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
+{
+       int node;
+
+       printk(KERN_WARNING
+               "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
+               nid, gfpflags);
+       printk(KERN_WARNING "  cache: %s, object size: %d, buffer size: %d, "
+               "default order: %d, min order: %d\n", s->name, s->objsize,
+               s->size, oo_order(s->oo), oo_order(s->min));
+
+       for_each_online_node(node) {
+               struct kmem_cache_node *n = get_node(s, node);
+               unsigned long nr_slabs;
+               unsigned long nr_objs;
+               unsigned long nr_free;
+
+               if (!n)
+                       continue;
+
+               nr_free  = count_partial(n, count_free);
+               nr_slabs = node_nr_slabs(n);
+               nr_objs  = node_nr_objs(n);
+
+               printk(KERN_WARNING
+                       "  node %d: slabs: %ld, objs: %ld, free: %ld\n",
+                       node, nr_slabs, nr_objs, nr_free);
+       }
+}
+
  /*
   * Slow path. The lockless freelist is empty or we need to perform
   * debugging duties.
@@ -1595,6 +1661,8 @@ new_slab:
                 c->page = new;
                 goto load_freelist;
         }
+       if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
+               slab_out_of_memory(s, gfpflags, node);
         return NULL;
  debug:
         if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1624,7 +1692,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
         unsigned long flags;
         unsigned int objsize;
  
-       gfpflags &= slab_gfp_mask;
+       gfpflags &= gfp_allowed_mask;
  
         lockdep_trace_alloc(gfpflags);
         might_sleep_if(gfpflags & __GFP_WAIT);
@@ -2636,6 +2704,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
         struct kmem_cache *s;
         char *text;
         size_t realsize;
+       unsigned long slabflags;
  
         s = kmalloc_caches_dma[index];
         if (s)
@@ -2657,10 +2726,18 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
                          (unsigned int)realsize);
         s = kmalloc(kmem_size, flags & ~SLUB_DMA);
  
+       /*
+        * Must defer sysfs creation to a workqueue because we don't know
+        * what context we are called from. Before sysfs comes up, we don't
+        * need to do anything because our sysfs initcall will start by
+        * adding all existing slabs to sysfs.
+        */
+       slabflags = SLAB_CACHE_DMA|SLAB_NOTRACK;
+       if (slab_state >= SYSFS)
+               slabflags |= __SYSFS_ADD_DEFERRED;
+
         if (!s || !text || !kmem_cache_open(s, flags, text,
-                       realsize, ARCH_KMALLOC_MINALIGN,
-                       SLAB_CACHE_DMA|SLAB_NOTRACK|__SYSFS_ADD_DEFERRED,
-                       NULL)) {
+                       realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
                 kfree(s);
                 kfree(text);
                 goto unlock_out;
@@ -2669,7 +2746,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
         list_add(&s->list, &slab_caches);
         kmalloc_caches_dma[index] = s;
  
-       schedule_work(&sysfs_add_work);
+       if (slab_state >= SYSFS)
+               schedule_work(&sysfs_add_work);
  
  unlock_out:
         up_write(&slub_lock);
@@ -3142,10 +3220,6 @@ void __init kmem_cache_init(void)
  
  void __init kmem_cache_init_late(void)
  {
-       /*
-        * Interrupts are enabled now so all GFP allocations are safe.
-        */
-       slab_gfp_mask = __GFP_BITS_MASK;
  }
  
  /*
@@ -3368,20 +3442,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
  }
  
  #ifdef CONFIG_SLUB_DEBUG
-static unsigned long count_partial(struct kmem_cache_node *n,
-                                       int (*get_count)(struct page *))
-{
-       unsigned long flags;
-       unsigned long x = 0;
-       struct page *page;
-
-       spin_lock_irqsave(&n->list_lock, flags);
-       list_for_each_entry(page, &n->partial, lru)
-               x += get_count(page);
-       spin_unlock_irqrestore(&n->list_lock, flags);
-       return x;
-}
-
  static int count_inuse(struct page *page)
  {
         return page->inuse;
@@ -3392,11 +3452,6 @@ static int count_total(struct page *page)
         return page->objects;
  }
  
-static int count_free(struct page *page)
-{
-       return page->objects - page->inuse;
-}
-
  static int validate_slab(struct kmem_cache *s, struct page *page,
                                                 unsigned long *map)
  {