From 9dfc6e68bfe6ee452efb1a4e9ca26a9007f2b864 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 18 Dec 2009 16:26:20 -0600 Subject: SLUB: Use this_cpu operations in slub Using per cpu allocations removes the needs for the per cpu arrays in the kmem_cache struct. These could get quite big if we have to support systems with thousands of cpus. The use of this_cpu_xx operations results in: 1. The size of kmem_cache for SMP configuration shrinks since we will only need 1 pointer instead of NR_CPUS. The same pointer can be used by all processors. Reduces cache footprint of the allocator. 2. We can dynamically size kmem_cache according to the actual nodes in the system meaning less memory overhead for configurations that may potentially support up to 1k NUMA nodes / 4k cpus. 3. We can remove the diddle widdle with allocating and releasing of kmem_cache_cpu structures when bringing up and shutting down cpus. The cpu alloc logic will do it all for us. Removes some portions of the cpu hotplug functionality. 4. Fastpath performance increases since per cpu pointer lookups and address calculations are avoided. V7-V8 - Convert missed get_cpu_slab() under CONFIG_SLUB_STATS Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 202 +++++++++++++++----------------------------------------------- 1 file changed, 48 insertions(+), 154 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 8d71aaf888d7..d6c9ecf629d5 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -242,15 +242,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) #endif } -static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) -{ -#ifdef CONFIG_SMP - return s->cpu_slab[cpu]; -#else - return &s->cpu_slab; -#endif -} - /* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) @@ -1124,7 +1115,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) return NULL; - stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); + stat(this_cpu_ptr(s->cpu_slab), ORDER_FALLBACK); } if (kmemcheck_enabled @@ -1422,7 +1413,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); + struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); __ClearPageSlubFrozen(page); if (page->inuse) { @@ -1454,7 +1445,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) slab_unlock(page); } else { slab_unlock(page); - stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); + stat(__this_cpu_ptr(s->cpu_slab), FREE_SLAB); discard_slab(s, page); } } @@ -1507,7 +1498,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) */ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); if (likely(c && c->page)) flush_slab(s, c); @@ -1673,7 +1664,7 @@ new_slab: local_irq_disable(); if (new) { - c = get_cpu_slab(s, smp_processor_id()); + c = __this_cpu_ptr(s->cpu_slab); stat(c, ALLOC_SLAB); if (c->page) flush_slab(s, c); @@ -1711,7 +1702,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void **object; struct kmem_cache_cpu *c; unsigned long flags; - unsigned int objsize; + unsigned long objsize; gfpflags &= gfp_allowed_mask; @@ -1722,14 +1713,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, return NULL; local_irq_save(flags); - c = get_cpu_slab(s, smp_processor_id()); + c = __this_cpu_ptr(s->cpu_slab); + object = c->freelist; objsize = c->objsize; - if (unlikely(!c->freelist || !node_match(c, node))) + if (unlikely(!object || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); else { - object = c->freelist; c->freelist = object[c->offset]; stat(c, ALLOC_FASTPATH); } @@ -1800,7 +1791,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, void **object = (void *)x; struct kmem_cache_cpu *c; - c = get_cpu_slab(s, raw_smp_processor_id()); + c = __this_cpu_ptr(s->cpu_slab); stat(c, FREE_SLOWPATH); slab_lock(page); @@ -1872,7 +1863,7 @@ static __always_inline void slab_free(struct kmem_cache *s, kmemleak_free_recursive(x, s->flags); local_irq_save(flags); - c = get_cpu_slab(s, smp_processor_id()); + c = __this_cpu_ptr(s->cpu_slab); kmemcheck_slab_free(s, object, c->objsize); debug_check_no_locks_freed(object, c->objsize); if (!(s->flags & SLAB_DEBUG_OBJECTS)) @@ -2095,130 +2086,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) #endif } -#ifdef CONFIG_SMP -/* - * Per cpu array for per cpu structures. - * - * The per cpu array places all kmem_cache_cpu structures from one processor - * close together meaning that it becomes possible that multiple per cpu - * structures are contained in one cacheline. This may be particularly - * beneficial for the kmalloc caches. - * - * A desktop system typically has around 60-80 slabs. With 100 here we are - * likely able to get per cpu structures for all caches from the array defined - * here. We must be able to cover all kmalloc caches during bootstrap. - * - * If the per cpu array is exhausted then fall back to kmalloc - * of individual cachelines. No sharing is possible then. - */ -#define NR_KMEM_CACHE_CPU 100 - -static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU], - kmem_cache_cpu); - -static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); -static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); - -static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, - int cpu, gfp_t flags) -{ - struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); - - if (c) - per_cpu(kmem_cache_cpu_free, cpu) = - (void *)c->freelist; - else { - /* Table overflow: So allocate ourselves */ - c = kmalloc_node( - ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), - flags, cpu_to_node(cpu)); - if (!c) - return NULL; - } - - init_kmem_cache_cpu(s, c); - return c; -} - -static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) -{ - if (c < per_cpu(kmem_cache_cpu, cpu) || - c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { - kfree(c); - return; - } - c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); - per_cpu(kmem_cache_cpu_free, cpu) = c; -} - -static void free_kmem_cache_cpus(struct kmem_cache *s) -{ - int cpu; - - for_each_online_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - - if (c) { - s->cpu_slab[cpu] = NULL; - free_kmem_cache_cpu(c, cpu); - } - } -} - -static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) -{ - int cpu; - - for_each_online_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); +static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]); - if (c) - continue; - - c = alloc_kmem_cache_cpu(s, cpu, flags); - if (!c) { - free_kmem_cache_cpus(s); - return 0; - } - s->cpu_slab[cpu] = c; - } - return 1; -} - -/* - * Initialize the per cpu array. - */ -static void init_alloc_cpu_cpu(int cpu) -{ - int i; - - if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once))) - return; - - for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) - free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); - - cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)); -} - -static void __init init_alloc_cpu(void) +static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) { int cpu; - for_each_online_cpu(cpu) - init_alloc_cpu_cpu(cpu); - } + if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches) + /* + * Boot time creation of the kmalloc array. Use static per cpu data + * since the per cpu allocator is not available yet. + */ + s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches); + else + s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); -#else -static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} -static inline void init_alloc_cpu(void) {} + if (!s->cpu_slab) + return 0; -static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) -{ - init_kmem_cache_cpu(s, &s->cpu_slab); + for_each_possible_cpu(cpu) + init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu)); return 1; } -#endif #ifdef CONFIG_NUMA /* @@ -2609,9 +2498,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) int node; flush_all(s); - + free_percpu(s->cpu_slab); /* Attempt to free all objects */ - free_kmem_cache_cpus(s); for_each_node_state(node, N_NORMAL_MEMORY) { struct kmem_cache_node *n = get_node(s, node); @@ -2760,7 +2648,19 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) realsize = kmalloc_caches[index].objsize; text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize); - s = kmalloc(kmem_size, flags & ~SLUB_DMA); + + if (flags & __GFP_WAIT) + s = kmalloc(kmem_size, flags & ~SLUB_DMA); + else { + int i; + + s = NULL; + for (i = 0; i < SLUB_PAGE_SHIFT; i++) + if (kmalloc_caches[i].size) { + s = kmalloc_caches + i; + break; + } + } /* * Must defer sysfs creation to a workqueue because we don't know @@ -3176,8 +3076,6 @@ void __init kmem_cache_init(void) int i; int caches = 0; - init_alloc_cpu(); - #ifdef CONFIG_NUMA /* * Must first have the slab cache available for the allocations of the @@ -3261,8 +3159,10 @@ void __init kmem_cache_init(void) #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); - kmem_size = offsetof(struct kmem_cache, cpu_slab) + - nr_cpu_ids * sizeof(struct kmem_cache_cpu *); +#endif +#ifdef CONFIG_NUMA + kmem_size = offsetof(struct kmem_cache, node) + + nr_node_ids * sizeof(struct kmem_cache_node *); #else kmem_size = sizeof(struct kmem_cache); #endif @@ -3365,7 +3265,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, * per cpu structures */ for_each_online_cpu(cpu) - get_cpu_slab(s, cpu)->objsize = s->objsize; + per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize; s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); @@ -3422,11 +3322,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - init_alloc_cpu_cpu(cpu); down_read(&slub_lock); list_for_each_entry(s, &slab_caches, list) - s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, - GFP_KERNEL); + init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu)); up_read(&slub_lock); break; @@ -3436,13 +3334,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, case CPU_DEAD_FROZEN: down_read(&slub_lock); list_for_each_entry(s, &slab_caches, list) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); - local_irq_save(flags); __flush_cpu_slab(s, cpu); local_irq_restore(flags); - free_kmem_cache_cpu(c, cpu); - s->cpu_slab[cpu] = NULL; } up_read(&slub_lock); break; @@ -3928,7 +3822,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, int cpu; for_each_possible_cpu(cpu) { - struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); if (!c || c->node < 0) continue; @@ -4353,7 +4247,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) return -ENOMEM; for_each_online_cpu(cpu) { - unsigned x = get_cpu_slab(s, cpu)->stat[si]; + unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; data[cpu] = x; sum += x; @@ -4376,7 +4270,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si) int cpu; for_each_online_cpu(cpu) - get_cpu_slab(s, cpu)->stat[si] = 0; + per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; } #define STAT_ATTR(si, text) \ -- cgit v1.2.3 From 756dee75872a2a764b478e18076360b8a4ec9045 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 18 Dec 2009 16:26:21 -0600 Subject: SLUB: Get rid of dynamic DMA kmalloc cache allocation Dynamic DMA kmalloc cache allocation is troublesome since the new percpu allocator does not support allocations in atomic contexts. Reserve some statically allocated kmalloc_cpu structures instead. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 19 +++++++++++-------- mm/slub.c | 24 ++++++++++-------------- 2 files changed, 21 insertions(+), 22 deletions(-) (limited to 'mm/slub.c') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 17ebe0f89bf3..a78fb4ac2015 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -131,11 +131,21 @@ struct kmem_cache { #define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2) +#ifdef CONFIG_ZONE_DMA +#define SLUB_DMA __GFP_DMA +/* Reserve extra caches for potential DMA use */ +#define KMALLOC_CACHES (2 * SLUB_PAGE_SHIFT - 6) +#else +/* Disable DMA functionality */ +#define SLUB_DMA (__force gfp_t)0 +#define KMALLOC_CACHES SLUB_PAGE_SHIFT +#endif + /* * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT]; +extern struct kmem_cache kmalloc_caches[KMALLOC_CACHES]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -203,13 +213,6 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) return &kmalloc_caches[index]; } -#ifdef CONFIG_ZONE_DMA -#define SLUB_DMA __GFP_DMA -#else -/* Disable DMA functionality */ -#define SLUB_DMA (__force gfp_t)0 -#endif - void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); diff --git a/mm/slub.c b/mm/slub.c index d6c9ecf629d5..cdb7f0214af0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2092,7 +2092,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) { int cpu; - if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches) + if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) /* * Boot time creation of the kmalloc array. Use static per cpu data * since the per cpu allocator is not available yet. @@ -2539,7 +2539,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; +struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); static int __init setup_slub_min_order(char *str) @@ -2629,6 +2629,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) char *text; size_t realsize; unsigned long slabflags; + int i; s = kmalloc_caches_dma[index]; if (s) @@ -2649,18 +2650,13 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize); - if (flags & __GFP_WAIT) - s = kmalloc(kmem_size, flags & ~SLUB_DMA); - else { - int i; + s = NULL; + for (i = 0; i < KMALLOC_CACHES; i++) + if (!kmalloc_caches[i].size) + break; - s = NULL; - for (i = 0; i < SLUB_PAGE_SHIFT; i++) - if (kmalloc_caches[i].size) { - s = kmalloc_caches + i; - break; - } - } + BUG_ON(i >= KMALLOC_CACHES); + s = kmalloc_caches + i; /* * Must defer sysfs creation to a workqueue because we don't know @@ -2674,7 +2670,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) if (!s || !text || !kmem_cache_open(s, flags, text, realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { - kfree(s); + s->size = 0; kfree(text); goto unlock_out; } -- cgit v1.2.3 From ff12059ed14b0773d7bbef86f98218ada6c20770 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 18 Dec 2009 16:26:22 -0600 Subject: SLUB: this_cpu: Remove slub kmem_cache fields Remove the fields in struct kmem_cache_cpu that were used to cache data from struct kmem_cache when they were in different cachelines. The cacheline that holds the per cpu array pointer now also holds these values. We can cut down the struct kmem_cache_cpu size to almost half. The get_freepointer() and set_freepointer() functions that used to be only intended for the slow path now are also useful for the hot path since access to the size field does not require accessing an additional cacheline anymore. This results in consistent use of functions for setting the freepointer of objects throughout SLUB. Also we initialize all possible kmem_cache_cpu structures when a slab is created. No need to initialize them when a processor or node comes online. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 2 -- mm/slub.c | 76 +++++++++++------------------------------------- 2 files changed, 17 insertions(+), 61 deletions(-) (limited to 'mm/slub.c') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a78fb4ac2015..0249d4175bac 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -38,8 +38,6 @@ struct kmem_cache_cpu { void **freelist; /* Pointer to first free per cpu object */ struct page *page; /* The slab from which we are allocating */ int node; /* The node of the page (or -1 for debug) */ - unsigned int offset; /* Freepointer offset (in word units) */ - unsigned int objsize; /* Size of an object (from kmem_cache) */ #ifdef CONFIG_SLUB_STATS unsigned stat[NR_SLUB_STAT_ITEMS]; #endif diff --git a/mm/slub.c b/mm/slub.c index cdb7f0214af0..30d2dde27563 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -260,13 +260,6 @@ static inline int check_valid_pointer(struct kmem_cache *s, return 1; } -/* - * Slow version of get and set free pointer. - * - * This version requires touching the cache lines of kmem_cache which - * we avoid to do in the fast alloc free paths. There we obtain the offset - * from the page struct. - */ static inline void *get_freepointer(struct kmem_cache *s, void *object) { return *(void **)(object + s->offset); @@ -1473,10 +1466,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) /* Retrieve object from cpu_freelist */ object = c->freelist; - c->freelist = c->freelist[c->offset]; + c->freelist = get_freepointer(s, c->freelist); /* And put onto the regular freelist */ - object[c->offset] = page->freelist; + set_freepointer(s, object, page->freelist); page->freelist = object; page->inuse--; } @@ -1635,7 +1628,7 @@ load_freelist: if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) goto debug; - c->freelist = object[c->offset]; + c->freelist = get_freepointer(s, object); c->page->inuse = c->page->objects; c->page->freelist = NULL; c->node = page_to_nid(c->page); @@ -1681,7 +1674,7 @@ debug: goto another_slab; c->page->inuse++; - c->page->freelist = object[c->offset]; + c->page->freelist = get_freepointer(s, object); c->node = -1; goto unlock_out; } @@ -1702,7 +1695,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void **object; struct kmem_cache_cpu *c; unsigned long flags; - unsigned long objsize; gfpflags &= gfp_allowed_mask; @@ -1715,22 +1707,21 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, local_irq_save(flags); c = __this_cpu_ptr(s->cpu_slab); object = c->freelist; - objsize = c->objsize; if (unlikely(!object || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); else { - c->freelist = object[c->offset]; + c->freelist = get_freepointer(s, object); stat(c, ALLOC_FASTPATH); } local_irq_restore(flags); if (unlikely(gfpflags & __GFP_ZERO) && object) - memset(object, 0, objsize); + memset(object, 0, s->objsize); - kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); - kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); + kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); + kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags); return object; } @@ -1785,7 +1776,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, unsigned long addr, unsigned int offset) + void *x, unsigned long addr) { void *prior; void **object = (void *)x; @@ -1799,7 +1790,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, goto debug; checks_ok: - prior = object[offset] = page->freelist; + prior = page->freelist; + set_freepointer(s, object, prior); page->freelist = object; page->inuse--; @@ -1864,16 +1856,16 @@ static __always_inline void slab_free(struct kmem_cache *s, kmemleak_free_recursive(x, s->flags); local_irq_save(flags); c = __this_cpu_ptr(s->cpu_slab); - kmemcheck_slab_free(s, object, c->objsize); - debug_check_no_locks_freed(object, c->objsize); + kmemcheck_slab_free(s, object, s->objsize); + debug_check_no_locks_freed(object, s->objsize); if (!(s->flags & SLAB_DEBUG_OBJECTS)) - debug_check_no_obj_freed(object, c->objsize); + debug_check_no_obj_freed(object, s->objsize); if (likely(page == c->page && c->node >= 0)) { - object[c->offset] = c->freelist; + set_freepointer(s, object, c->freelist); c->freelist = object; stat(c, FREE_FASTPATH); } else - __slab_free(s, page, x, addr, c->offset); + __slab_free(s, page, x, addr); local_irq_restore(flags); } @@ -2060,19 +2052,6 @@ static unsigned long calculate_alignment(unsigned long flags, return ALIGN(align, sizeof(void *)); } -static void init_kmem_cache_cpu(struct kmem_cache *s, - struct kmem_cache_cpu *c) -{ - c->page = NULL; - c->freelist = NULL; - c->node = 0; - c->offset = s->offset / sizeof(void *); - c->objsize = s->objsize; -#ifdef CONFIG_SLUB_STATS - memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned)); -#endif -} - static void init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) { @@ -2090,8 +2069,6 @@ static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]); static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) { - int cpu; - if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) /* * Boot time creation of the kmalloc array. Use static per cpu data @@ -2104,8 +2081,6 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) if (!s->cpu_slab) return 0; - for_each_possible_cpu(cpu) - init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu)); return 1; } @@ -2391,6 +2366,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) return 1; + free_kmem_cache_nodes(s); error: if (flags & SLAB_PANIC) @@ -3247,22 +3223,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, down_write(&slub_lock); s = find_mergeable(size, align, flags, name, ctor); if (s) { - int cpu; - s->refcount++; /* * Adjust the object sizes so that we clear * the complete object on kzalloc. */ s->objsize = max(s->objsize, (int)size); - - /* - * And then we need to update the object size in the - * per cpu structures - */ - for_each_online_cpu(cpu) - per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize; - s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); @@ -3316,14 +3282,6 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, unsigned long flags; switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - down_read(&slub_lock); - list_for_each_entry(s, &slab_caches, list) - init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu)); - up_read(&slub_lock); - break; - case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: -- cgit v1.2.3 From 84e554e6865c4f4ae84d38800cf270b9a67901cc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 18 Dec 2009 16:26:23 -0600 Subject: SLUB: Make slub statistics use this_cpu_inc this_cpu_inc() translates into a single instruction on x86 and does not need any register. So use it in stat(). We also want to avoid the calculation of the per cpu kmem_cache_cpu structure pointer. So pass a kmem_cache pointer instead of a kmem_cache_cpu pointer. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 30d2dde27563..bddae72f6f49 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -217,10 +217,10 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) #endif -static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) +static inline void stat(struct kmem_cache *s, enum stat_item si) { #ifdef CONFIG_SLUB_STATS - c->stat[si]++; + __this_cpu_inc(s->cpu_slab->stat[si]); #endif } @@ -1108,7 +1108,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if (!page) return NULL; - stat(this_cpu_ptr(s->cpu_slab), ORDER_FALLBACK); + stat(s, ORDER_FALLBACK); } if (kmemcheck_enabled @@ -1406,23 +1406,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); - struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); __ClearPageSlubFrozen(page); if (page->inuse) { if (page->freelist) { add_partial(n, page, tail); - stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); + stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); } else { - stat(c, DEACTIVATE_FULL); + stat(s, DEACTIVATE_FULL); if (SLABDEBUG && PageSlubDebug(page) && (s->flags & SLAB_STORE_USER)) add_full(n, page); } slab_unlock(page); } else { - stat(c, DEACTIVATE_EMPTY); + stat(s, DEACTIVATE_EMPTY); if (n->nr_partial < s->min_partial) { /* * Adding an empty slab to the partial slabs in order @@ -1438,7 +1437,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) slab_unlock(page); } else { slab_unlock(page); - stat(__this_cpu_ptr(s->cpu_slab), FREE_SLAB); + stat(s, FREE_SLAB); discard_slab(s, page); } } @@ -1453,7 +1452,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) int tail = 1; if (page->freelist) - stat(c, DEACTIVATE_REMOTE_FREES); + stat(s, DEACTIVATE_REMOTE_FREES); /* * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely @@ -1479,7 +1478,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { - stat(c, CPUSLAB_FLUSH); + stat(s, CPUSLAB_FLUSH); slab_lock(c->page); deactivate_slab(s, c); } @@ -1619,7 +1618,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (unlikely(!node_match(c, node))) goto another_slab; - stat(c, ALLOC_REFILL); + stat(s, ALLOC_REFILL); load_freelist: object = c->page->freelist; @@ -1634,7 +1633,7 @@ load_freelist: c->node = page_to_nid(c->page); unlock_out: slab_unlock(c->page); - stat(c, ALLOC_SLOWPATH); + stat(s, ALLOC_SLOWPATH); return object; another_slab: @@ -1644,7 +1643,7 @@ new_slab: new = get_partial(s, gfpflags, node); if (new) { c->page = new; - stat(c, ALLOC_FROM_PARTIAL); + stat(s, ALLOC_FROM_PARTIAL); goto load_freelist; } @@ -1658,7 +1657,7 @@ new_slab: if (new) { c = __this_cpu_ptr(s->cpu_slab); - stat(c, ALLOC_SLAB); + stat(s, ALLOC_SLAB); if (c->page) flush_slab(s, c); slab_lock(new); @@ -1713,7 +1712,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, else { c->freelist = get_freepointer(s, object); - stat(c, ALLOC_FASTPATH); + stat(s, ALLOC_FASTPATH); } local_irq_restore(flags); @@ -1780,10 +1779,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, { void *prior; void **object = (void *)x; - struct kmem_cache_cpu *c; - c = __this_cpu_ptr(s->cpu_slab); - stat(c, FREE_SLOWPATH); + stat(s, FREE_SLOWPATH); slab_lock(page); if (unlikely(SLABDEBUG && PageSlubDebug(page))) @@ -1796,7 +1793,7 @@ checks_ok: page->inuse--; if (unlikely(PageSlubFrozen(page))) { - stat(c, FREE_FROZEN); + stat(s, FREE_FROZEN); goto out_unlock; } @@ -1809,7 +1806,7 @@ checks_ok: */ if (unlikely(!prior)) { add_partial(get_node(s, page_to_nid(page)), page, 1); - stat(c, FREE_ADD_PARTIAL); + stat(s, FREE_ADD_PARTIAL); } out_unlock: @@ -1822,10 +1819,10 @@ slab_empty: * Slab still on the partial list. */ remove_partial(s, page); - stat(c, FREE_REMOVE_PARTIAL); + stat(s, FREE_REMOVE_PARTIAL); } slab_unlock(page); - stat(c, FREE_SLAB); + stat(s, FREE_SLAB); discard_slab(s, page); return; @@ -1863,7 +1860,7 @@ static __always_inline void slab_free(struct kmem_cache *s, if (likely(page == c->page && c->node >= 0)) { set_freepointer(s, object, c->freelist); c->freelist = object; - stat(c, FREE_FASTPATH); + stat(s, FREE_FASTPATH); } else __slab_free(s, page, x, addr); -- cgit v1.2.3 From 7738dd9e8f2bc1c249e00c9c20e018448fac0084 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 15 Jan 2010 12:49:56 -0800 Subject: slub: remove impossible condition `s' cannot be NULL if kmalloc_caches is not NULL. This conditional would trigger a NULL pointer on `s', anyway, since it is immediately derefernced if true. Acked-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index bddae72f6f49..8fbb2fd70b64 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2641,7 +2641,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) if (slab_state >= SYSFS) slabflags |= __SYSFS_ADD_DEFERRED; - if (!s || !text || !kmem_cache_open(s, flags, text, + if (!text || !kmem_cache_open(s, flags, text, realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { s->size = 0; kfree(text); -- cgit v1.2.3 From 91efd773c74bb26b5409c85ad755d536448e229c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 21 Jan 2010 17:43:35 -0600 Subject: dma kmalloc handling fixes 1. We need kmalloc_percpu for all of the now extended kmalloc caches array not just for each shift value. 2. init_kmem_cache_nodes() must assume node 0 locality for statically allocated dma kmem_cache structures even after boot is complete. Reported-and-tested-by: Alex Chiang Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 8fbb2fd70b64..bd4a9e942ace 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2062,7 +2062,7 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) #endif } -static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]); +static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) { @@ -2148,7 +2148,8 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) int node; int local_node; - if (slab_state >= UP) + if (slab_state >= UP && (s < kmalloc_caches || + s > kmalloc_caches + KMALLOC_CACHES)) local_node = page_to_nid(virt_to_page(s)); else local_node = 0; -- cgit v1.2.3 From 4c13dd3b48fcb6fbe44f241eb11a057ecd1cba75 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Fri, 26 Feb 2010 09:36:12 +0300 Subject: failslab: add ability to filter slab caches This patch allow to inject faults only for specific slabs. In order to preserve default behavior cache filter is off by default (all caches are faulty). One may define specific set of slabs like this: # mark skbuff_head_cache as faulty echo 1 > /sys/kernel/slab/skbuff_head_cache/failslab # Turn on cache filter (off by default) echo 1 > /sys/kernel/debug/failslab/cache-filter # Turn on fault injection echo 1 > /sys/kernel/debug/failslab/times echo 1 > /sys/kernel/debug/failslab/probability Acked-by: David Rientjes Acked-by: Akinobu Mita Acked-by: Christoph Lameter Signed-off-by: Dmitry Monakhov Signed-off-by: Pekka Enberg --- Documentation/vm/slub.txt | 1 + include/linux/fault-inject.h | 5 +++-- include/linux/slab.h | 5 +++++ mm/failslab.c | 18 +++++++++++++++--- mm/slab.c | 2 +- mm/slub.c | 29 +++++++++++++++++++++++++++-- 6 files changed, 52 insertions(+), 8 deletions(-) (limited to 'mm/slub.c') diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt index b37300edf27c..07375e73981a 100644 --- a/Documentation/vm/slub.txt +++ b/Documentation/vm/slub.txt @@ -41,6 +41,7 @@ Possible debug options are P Poisoning (object and padding) U User tracking (free and alloc) T Trace (please only use on single slabs) + A Toggle failslab filter mark for the cache O Switch debugging off for caches that would have caused higher minimum slab orders - Switch all debugging off (useful if the kernel is diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 06ca9b21dad2..7b64ad40e4ce 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -82,9 +82,10 @@ static inline void cleanup_fault_attr_dentries(struct fault_attr *attr) #endif /* CONFIG_FAULT_INJECTION */ #ifdef CONFIG_FAILSLAB -extern bool should_failslab(size_t size, gfp_t gfpflags); +extern bool should_failslab(size_t size, gfp_t gfpflags, unsigned long flags); #else -static inline bool should_failslab(size_t size, gfp_t gfpflags) +static inline bool should_failslab(size_t size, gfp_t gfpflags, + unsigned long flags) { return false; } diff --git a/include/linux/slab.h b/include/linux/slab.h index 2da8372519f5..488446289cab 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -70,6 +70,11 @@ #else # define SLAB_NOTRACK 0x00000000UL #endif +#ifdef CONFIG_FAILSLAB +# define SLAB_FAILSLAB 0x02000000UL /* Fault injection mark */ +#else +# define SLAB_FAILSLAB 0x00000000UL +#endif /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ diff --git a/mm/failslab.c b/mm/failslab.c index 9339de5f0a91..bb41f98dd8b7 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -1,18 +1,22 @@ #include #include +#include static struct { struct fault_attr attr; u32 ignore_gfp_wait; + int cache_filter; #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS struct dentry *ignore_gfp_wait_file; + struct dentry *cache_filter_file; #endif } failslab = { .attr = FAULT_ATTR_INITIALIZER, .ignore_gfp_wait = 1, + .cache_filter = 0, }; -bool should_failslab(size_t size, gfp_t gfpflags) +bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) { if (gfpflags & __GFP_NOFAIL) return false; @@ -20,6 +24,9 @@ bool should_failslab(size_t size, gfp_t gfpflags) if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT)) return false; + if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB)) + return false; + return should_fail(&failslab.attr, size); } @@ -30,7 +37,6 @@ static int __init setup_failslab(char *str) __setup("failslab=", setup_failslab); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS - static int __init failslab_debugfs_init(void) { mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; @@ -46,8 +52,14 @@ static int __init failslab_debugfs_init(void) debugfs_create_bool("ignore-gfp-wait", mode, dir, &failslab.ignore_gfp_wait); - if (!failslab.ignore_gfp_wait_file) { + failslab.cache_filter_file = + debugfs_create_bool("cache-filter", mode, dir, + &failslab.cache_filter); + + if (!failslab.ignore_gfp_wait_file || + !failslab.cache_filter_file) { err = -ENOMEM; + debugfs_remove(failslab.cache_filter_file); debugfs_remove(failslab.ignore_gfp_wait_file); cleanup_fault_attr_dentries(&failslab.attr); } diff --git a/mm/slab.c b/mm/slab.c index 7451bdacaf18..33496b704859 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3101,7 +3101,7 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) if (cachep == &cache_cache) return false; - return should_failslab(obj_size(cachep), flags); + return should_failslab(obj_size(cachep), flags, cachep->flags); } static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) diff --git a/mm/slub.c b/mm/slub.c index 8d71aaf888d7..cab5288736c8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -151,7 +151,8 @@ * Set of flags that will prevent slab merging */ #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ - SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) + SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ + SLAB_FAILSLAB) #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ SLAB_CACHE_DMA | SLAB_NOTRACK) @@ -1020,6 +1021,9 @@ static int __init setup_slub_debug(char *str) case 't': slub_debug |= SLAB_TRACE; break; + case 'a': + slub_debug |= SLAB_FAILSLAB; + break; default: printk(KERN_ERR "slub_debug option '%c' " "unknown. skipped\n", *str); @@ -1718,7 +1722,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, lockdep_trace_alloc(gfpflags); might_sleep_if(gfpflags & __GFP_WAIT); - if (should_failslab(s->objsize, gfpflags)) + if (should_failslab(s->objsize, gfpflags, s->flags)) return NULL; local_irq_save(flags); @@ -4171,6 +4175,23 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf, } SLAB_ATTR(trace); +#ifdef CONFIG_FAILSLAB +static ssize_t failslab_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); +} + +static ssize_t failslab_store(struct kmem_cache *s, const char *buf, + size_t length) +{ + s->flags &= ~SLAB_FAILSLAB; + if (buf[0] == '1') + s->flags |= SLAB_FAILSLAB; + return length; +} +SLAB_ATTR(failslab); +#endif + static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); @@ -4467,6 +4488,10 @@ static struct attribute *slab_attrs[] = { &deactivate_remote_frees_attr.attr, &order_fallback_attr.attr, #endif +#ifdef CONFIG_FAILSLAB + &failslab_attr.attr, +#endif + NULL }; -- cgit v1.2.3 From 1154fab73ccbab010cfaa272b6987c624cfd63c6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 1 Mar 2010 16:04:45 +1100 Subject: SLUB: Fix per-cpu merge conflict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The slab tree adds a percpu variable usage case (commit 9dfc6e68bfe6ee452efb1a4e9ca26a9007f2b864 "SLUB: Use this_cpu operations in slub"), but the percpu tree removes the prefixing of percpu variables (commit dd17c8f72993f9461e9c19250e3f155d6d99df22 "percpu: remove per_cpu__ prefix"), thus causing the following compilation error: CC mm/slub.o mm/slub.c: In function ‘alloc_kmem_cache_cpus’: mm/slub.c:2078: error: implicit declaration of function ‘per_cpu_var’ mm/slub.c:2078: warning: assignment makes pointer from integer without a cast make[1]: *** [mm/slub.o] Error 1 Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 3525a4ec9794..0bfd3863d521 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2075,7 +2075,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) * Boot time creation of the kmalloc array. Use static per cpu data * since the per cpu allocator is not available yet. */ - s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches); + s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches); else s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); -- cgit v1.2.3