X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=mm%2Fslab.c;h=0f20843beffdb504574dc471bfaacf12abfd902d;hp=2046da2aa46e41b4189b8b1deba6618d97f88e86;hb=e9e9290f5c85887baf1123a36ec9fdf56a10cf4b;hpb=58ce1fd5805647a58a050bbbbd2252ea5ecb47b3 diff --git a/mm/slab.c b/mm/slab.c index 2046da2aa46e..0f20843beffd 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +107,7 @@ #include #include #include +#include #include #include @@ -307,6 +309,13 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; #define SIZE_AC 1 #define SIZE_L3 (1 + MAX_NUMNODES) +static int drain_freelist(struct kmem_cache *cache, + struct kmem_list3 *l3, int tofree); +static void free_block(struct kmem_cache *cachep, void **objpp, int len, + int node); +static void enable_cpucache(struct kmem_cache *cachep); +static void cache_reap(void *unused); + /* * This function must be completely optimized away if a constant is passed to * it. Mostly the same as what is in linux/slab.h except it returns an index. @@ -331,6 +340,8 @@ static __always_inline int index_of(const size_t size) return 0; } +static int slab_early_init = 1; + #define INDEX_AC index_of(sizeof(struct arraycache_init)) #define INDEX_L3 index_of(sizeof(struct kmem_list3)) @@ -452,7 +463,7 @@ struct kmem_cache { #define STATS_DEC_ACTIVE(x) ((x)->num_active--) #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) #define STATS_INC_GROWN(x) ((x)->grown++) -#define STATS_INC_REAPED(x) ((x)->reaped++) +#define STATS_ADD_REAPED(x,y) ((x)->reaped += (y)) #define STATS_SET_HIGH(x) \ do { \ if ((x)->num_active > (x)->high_mark) \ @@ -476,7 +487,7 @@ struct kmem_cache { #define STATS_DEC_ACTIVE(x) do { } while (0) #define STATS_INC_ALLOCED(x) do { } while (0) #define STATS_INC_GROWN(x) do { } while (0) -#define STATS_INC_REAPED(x) do { } while (0) +#define STATS_ADD_REAPED(x,y) do { } while (0) #define STATS_SET_HIGH(x) do { } while (0) #define STATS_INC_ERR(x) do { } while (0) #define STATS_INC_NODEALLOCS(x) do { } while (0) @@ -490,17 +501,6 @@ struct kmem_cache { #endif #if DEBUG -/* - * Magic nums for obj red zoning. - * Placed in the first word before and the first word after an obj. - */ -#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ -#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */ - -/* ...and for poisoning */ -#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ -#define POISON_FREE 0x6b /* for use-after-free poisoning */ -#define POISON_END 0xa5 /* end-byte of poisoning */ /* * memory layout of objects: @@ -592,6 +592,7 @@ static inline struct kmem_cache *page_get_cache(struct page *page) { if (unlikely(PageCompound(page))) page = (struct page *)page_private(page); + BUG_ON(!PageSlab(page)); return (struct kmem_cache *)page->lru.next; } @@ -604,6 +605,7 @@ static inline struct slab *page_get_slab(struct page *page) { if (unlikely(PageCompound(page))) page = (struct page *)page_private(page); + BUG_ON(!PageSlab(page)); return (struct slab *)page->lru.prev; } @@ -672,6 +674,37 @@ static struct kmem_cache cache_cache = { #endif }; +#ifdef CONFIG_LOCKDEP + +/* + * Slab sometimes uses the kmalloc slabs to store the slab headers + * for other slabs "off slab". + * The locking for this is tricky in that it nests within the locks + * of all other slabs in a few places; to deal with this special + * locking we put on-slab caches into a separate lock-class. + */ +static struct lock_class_key on_slab_key; + +static inline void init_lock_keys(struct cache_sizes *s) +{ + int q; + + for (q = 0; q < MAX_NUMNODES; q++) { + if (!s->cs_cachep->nodelists[q] || OFF_SLAB(s->cs_cachep)) + continue; + lockdep_set_class(&s->cs_cachep->nodelists[q]->list_lock, + &on_slab_key); + } +} + +#else +static inline void init_lock_keys(struct cache_sizes *s) +{ +} +#endif + + + /* Guard access to the cache-chain. */ static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; @@ -705,12 +738,6 @@ int slab_is_available(void) static DEFINE_PER_CPU(struct work_struct, reap_work); -static void free_block(struct kmem_cache *cachep, void **objpp, int len, - int node); -static void enable_cpucache(struct kmem_cache *cachep); -static void cache_reap(void *unused); -static int __node_shrink(struct kmem_cache *cachep, int node); - static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { return cachep->array[smp_processor_id()]; @@ -1079,7 +1106,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) #endif -static int cpuup_callback(struct notifier_block *nfb, +static int __devinit cpuup_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -1246,10 +1273,7 @@ free_array_cache: l3 = cachep->nodelists[node]; if (!l3) continue; - spin_lock_irq(&l3->list_lock); - /* free slabs belonging to this node */ - __node_shrink(cachep, node); - spin_unlock_irq(&l3->list_lock); + drain_freelist(cachep, l3, l3->free_objects); } mutex_unlock(&cache_chain_mutex); break; @@ -1261,7 +1285,9 @@ bad: return NOTIFY_BAD; } -static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; +static struct notifier_block __cpuinitdata cpucache_notifier = { + &cpuup_callback, NULL, 0 +}; /* * swap the static kmem_list3 with kmalloced memory @@ -1277,6 +1303,11 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, local_irq_disable(); memcpy(ptr, list, sizeof(struct kmem_list3)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->list_lock); + MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; local_irq_enable(); @@ -1374,6 +1405,8 @@ void __init kmem_cache_init(void) NULL, NULL); } + slab_early_init = 0; + while (sizes->cs_size != ULONG_MAX) { /* * For performance, all the general caches are L1 aligned. @@ -1389,6 +1422,7 @@ void __init kmem_cache_init(void) ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL, NULL); } + init_lock_keys(sizes); sizes->cs_dmacachep = kmem_cache_create(names->name_dma, sizes->cs_size, @@ -1401,7 +1435,7 @@ void __init kmem_cache_init(void) } /* 4) Replace the bootstrap head arrays */ { - void *ptr; + struct array_cache *ptr; ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); @@ -1409,6 +1443,11 @@ void __init kmem_cache_init(void) BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); memcpy(ptr, cpu_cache_get(&cache_cache), sizeof(struct arraycache_init)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->lock); + cache_cache.array[smp_processor_id()] = ptr; local_irq_enable(); @@ -1419,6 +1458,11 @@ void __init kmem_cache_init(void) != &initarray_generic.cache); memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), sizeof(struct arraycache_init)); + /* + * Do not assume that spinlocks can be initialized via memcpy: + */ + spin_lock_init(&ptr->lock); + malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr; local_irq_enable(); @@ -1508,7 +1552,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) atomic_add(nr_pages, &slab_reclaim_pages); - add_page_state(nr_slab, nr_pages); + add_zone_page_state(page_zone(page), NR_SLAB, nr_pages); for (i = 0; i < nr_pages; i++) __SetPageSlab(page + i); return page_address(page); @@ -1523,12 +1567,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) struct page *page = virt_to_page(addr); const unsigned long nr_freed = i; + sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed); while (i--) { BUG_ON(!PageSlab(page)); __ClearPageSlab(page); page++; } - sub_page_state(nr_slab, nr_freed); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; free_pages((unsigned long)addr, cachep->gfporder); @@ -2104,8 +2148,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, #endif #endif - /* Determine if the slab management is 'on' or 'off' slab. */ - if (size >= (PAGE_SIZE >> 3)) + /* + * Determine if the slab management is 'on' or 'off' slab. + * (bootstrapping cannot cope with offslab caches so don't do + * it too early on.) + */ + if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init) /* * Size is large, assume best to place the slab management obj * off-slab (should allow better packing of objs). @@ -2245,32 +2293,45 @@ static void drain_cpu_caches(struct kmem_cache *cachep) } } -static int __node_shrink(struct kmem_cache *cachep, int node) +/* + * Remove slabs from the list of free slabs. + * Specify the number of slabs to drain in tofree. + * + * Returns the actual number of slabs released. + */ +static int drain_freelist(struct kmem_cache *cache, + struct kmem_list3 *l3, int tofree) { + struct list_head *p; + int nr_freed; struct slab *slabp; - struct kmem_list3 *l3 = cachep->nodelists[node]; - int ret; - for (;;) { - struct list_head *p; + nr_freed = 0; + while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { + spin_lock_irq(&l3->list_lock); p = l3->slabs_free.prev; - if (p == &l3->slabs_free) - break; + if (p == &l3->slabs_free) { + spin_unlock_irq(&l3->list_lock); + goto out; + } - slabp = list_entry(l3->slabs_free.prev, struct slab, list); + slabp = list_entry(p, struct slab, list); #if DEBUG BUG_ON(slabp->inuse); #endif list_del(&slabp->list); - - l3->free_objects -= cachep->num; + /* + * Safe to drop the lock. The slab is no longer linked + * to the cache. + */ + l3->free_objects -= cache->num; spin_unlock_irq(&l3->list_lock); - slab_destroy(cachep, slabp); - spin_lock_irq(&l3->list_lock); + slab_destroy(cache, slabp); + nr_freed++; } - ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial); - return ret; +out: + return nr_freed; } static int __cache_shrink(struct kmem_cache *cachep) @@ -2283,11 +2344,13 @@ static int __cache_shrink(struct kmem_cache *cachep) check_irq_on(); for_each_online_node(i) { l3 = cachep->nodelists[i]; - if (l3) { - spin_lock_irq(&l3->list_lock); - ret += __node_shrink(cachep, i); - spin_unlock_irq(&l3->list_lock); - } + if (!l3) + continue; + + drain_freelist(cachep, l3, l3->free_objects); + + ret += !list_empty(&l3->slabs_full) || + !list_empty(&l3->slabs_partial); } return (ret ? 1 : 0); } @@ -2669,15 +2732,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, kfree_debugcheck(objp); page = virt_to_page(objp); - if (page_get_cache(page) != cachep) { - printk(KERN_ERR "mismatch in kmem_cache_free: expected " - "cache %p, got %p\n", - page_get_cache(page), cachep); - printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); - printk(KERN_ERR "%p is %s.\n", page_get_cache(page), - page_get_cache(page)->name); - WARN_ON(1); - } slabp = page_get_slab(page); if (cachep->flags & SLAB_RED_ZONE) { @@ -3276,26 +3330,10 @@ EXPORT_SYMBOL(kmalloc_node); #endif /** - * kmalloc - allocate memory + * __do_kmalloc - allocate memory * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate. + * @flags: the type of memory to allocate (see kmalloc). * @caller: function caller for debug tracking of the caller - * - * kmalloc is the normal method of allocating memory - * in the kernel. - * - * The @flags argument may be one of: - * - * %GFP_USER - Allocate memory on behalf of user. May sleep. - * - * %GFP_KERNEL - Allocate normal kernel ram. May sleep. - * - * %GFP_ATOMIC - Allocation will not sleep. Use inside interrupt handlers. - * - * Additionally, the %GFP_DMA flag may be set to indicate the memory - * must be suitable for DMA. This can mean different things on different - * platforms. For example, on i386, it means that the memory must come - * from the first 16MB. */ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, void *caller) @@ -3393,6 +3431,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) { unsigned long flags; + BUG_ON(virt_to_cache(objp) != cachep); + local_irq_save(flags); __cache_free(cachep, objp); local_irq_restore(flags); @@ -3418,7 +3458,7 @@ void kfree(const void *objp) local_irq_save(flags); kfree_debugcheck(objp); c = virt_to_cache(objp); - mutex_debug_check_no_locks_freed(objp, obj_size(c)); + debug_check_no_locks_freed(objp, obj_size(c)); __cache_free(c, (void *)objp); local_irq_restore(flags); } @@ -3714,10 +3754,6 @@ static void cache_reap(void *unused) } list_for_each_entry(searchp, &cache_chain, next) { - struct list_head *p; - int tofree; - struct slab *slabp; - check_irq_on(); /* @@ -3742,47 +3778,22 @@ static void cache_reap(void *unused) drain_array(searchp, l3, l3->shared, 0, node); - if (l3->free_touched) { + if (l3->free_touched) l3->free_touched = 0; - goto next; - } - - tofree = (l3->free_limit + 5 * searchp->num - 1) / - (5 * searchp->num); - do { - /* - * Do not lock if there are no free blocks. - */ - if (list_empty(&l3->slabs_free)) - break; + else { + int freed; - spin_lock_irq(&l3->list_lock); - p = l3->slabs_free.next; - if (p == &(l3->slabs_free)) { - spin_unlock_irq(&l3->list_lock); - break; - } - - slabp = list_entry(p, struct slab, list); - BUG_ON(slabp->inuse); - list_del(&slabp->list); - STATS_INC_REAPED(searchp); - - /* - * Safe to drop the lock. The slab is no longer linked - * to the cache. searchp cannot disappear, we hold - * cache_chain_lock - */ - l3->free_objects -= searchp->num; - spin_unlock_irq(&l3->list_lock); - slab_destroy(searchp, slabp); - } while (--tofree > 0); + freed = drain_freelist(searchp, l3, (l3->free_limit + + 5 * searchp->num - 1) / (5 * searchp->num)); + STATS_ADD_REAPED(searchp, freed); + } next: cond_resched(); } check_irq_on(); mutex_unlock(&cache_chain_mutex); next_reap_node(); + refresh_cpu_vm_stats(smp_processor_id()); /* Set up the next iteration */ schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); }