diff options
Diffstat (limited to '')
-rw-r--r-- | mm/slab.c | 192 |
1 files changed, 174 insertions, 18 deletions
diff --git a/mm/slab.c b/mm/slab.c index 1fcf3ac94b6c..55d84a22ad96 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -124,6 +124,8 @@ #include <trace/events/kmem.h> +#include "internal.h" + /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. * 0 for faster, smaller code (especially in the critical paths). @@ -152,6 +154,12 @@ #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN #endif +/* + * true if a page was allocated from pfmemalloc reserves for network-based + * swap + */ +static bool pfmemalloc_active __read_mostly; + /* Legal flag mask for kmem_cache_create(). */ #if DEBUG # define CREATE_MASK (SLAB_RED_ZONE | \ @@ -257,9 +265,30 @@ struct array_cache { * Must have this definition in here for the proper * alignment of array_cache. Also simplifies accessing * the entries. + * + * Entries should not be directly dereferenced as + * entries belonging to slabs marked pfmemalloc will + * have the lower bits set SLAB_OBJ_PFMEMALLOC */ }; +#define SLAB_OBJ_PFMEMALLOC 1 +static inline bool is_obj_pfmemalloc(void *objp) +{ + return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC; +} + +static inline void set_obj_pfmemalloc(void **objp) +{ + *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC); + return; +} + +static inline void clear_obj_pfmemalloc(void **objp) +{ + *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC); +} + /* * bootstrap: The caches do not work without cpuarrays anymore, but the * cpuarrays are allocated from the generic caches... @@ -900,6 +929,102 @@ static struct array_cache *alloc_arraycache(int node, int entries, return nc; } +static inline bool is_slab_pfmemalloc(struct slab *slabp) +{ + struct page *page = virt_to_page(slabp->s_mem); + + return PageSlabPfmemalloc(page); +} + +/* Clears pfmemalloc_active if no slabs have pfmalloc set */ +static void recheck_pfmemalloc_active(struct kmem_cache *cachep, + struct array_cache *ac) +{ + struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; + struct slab *slabp; + unsigned long flags; + + if (!pfmemalloc_active) + return; + + spin_lock_irqsave(&l3->list_lock, flags); + list_for_each_entry(slabp, &l3->slabs_full, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + list_for_each_entry(slabp, &l3->slabs_partial, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + list_for_each_entry(slabp, &l3->slabs_free, list) + if (is_slab_pfmemalloc(slabp)) + goto out; + + pfmemalloc_active = false; +out: + spin_unlock_irqrestore(&l3->list_lock, flags); +} + +static void *ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, + gfp_t flags, bool force_refill) +{ + int i; + void *objp = ac->entry[--ac->avail]; + + /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ + if (unlikely(is_obj_pfmemalloc(objp))) { + struct kmem_list3 *l3; + + if (gfp_pfmemalloc_allowed(flags)) { + clear_obj_pfmemalloc(&objp); + return objp; + } + + /* The caller cannot use PFMEMALLOC objects, find another one */ + for (i = 1; i < ac->avail; i++) { + /* If a !PFMEMALLOC object is found, swap them */ + if (!is_obj_pfmemalloc(ac->entry[i])) { + objp = ac->entry[i]; + ac->entry[i] = ac->entry[ac->avail]; + ac->entry[ac->avail] = objp; + return objp; + } + } + + /* + * If there are empty slabs on the slabs_free list and we are + * being forced to refill the cache, mark this one !pfmemalloc. + */ + l3 = cachep->nodelists[numa_mem_id()]; + if (!list_empty(&l3->slabs_free) && force_refill) { + struct slab *slabp = virt_to_slab(objp); + ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem)); + clear_obj_pfmemalloc(&objp); + recheck_pfmemalloc_active(cachep, ac); + return objp; + } + + /* No !PFMEMALLOC objects available */ + ac->avail++; + objp = NULL; + } + + return objp; +} + +static void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, + void *objp) +{ + if (unlikely(pfmemalloc_active)) { + /* Some pfmemalloc slabs exist, check if this is one */ + struct page *page = virt_to_page(objp); + if (PageSlabPfmemalloc(page)) + set_obj_pfmemalloc(&objp); + } + + ac->entry[ac->avail++] = objp; +} + /* * Transfer objects in one arraycache to another. * Locking must be handled by the caller. @@ -1076,7 +1201,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) STATS_INC_ACOVERFLOW(cachep); __drain_alien_cache(cachep, alien, nodeid); } - alien->entry[alien->avail++] = objp; + ac_put_obj(cachep, alien, objp); spin_unlock(&alien->lock); } else { spin_lock(&(cachep->nodelists[nodeid])->list_lock); @@ -1759,6 +1884,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) return NULL; } + /* Record if ALLOC_PFMEMALLOC was set when allocating the slab */ + if (unlikely(page->pfmemalloc)) + pfmemalloc_active = true; + nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) add_zone_page_state(page_zone(page), @@ -1766,9 +1895,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) else add_zone_page_state(page_zone(page), NR_SLAB_UNRECLAIMABLE, nr_pages); - for (i = 0; i < nr_pages; i++) + for (i = 0; i < nr_pages; i++) { __SetPageSlab(page + i); + if (page->pfmemalloc) + SetPageSlabPfmemalloc(page + i); + } + if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); @@ -1800,6 +1933,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) NR_SLAB_UNRECLAIMABLE, nr_freed); while (i--) { BUG_ON(!PageSlab(page)); + __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); page++; } @@ -3015,16 +3149,19 @@ bad: #define check_slabp(x,y) do { } while(0) #endif -static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) +static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, + bool force_refill) { int batchcount; struct kmem_list3 *l3; struct array_cache *ac; int node; -retry: check_irq_off(); node = numa_mem_id(); + if (unlikely(force_refill)) + goto force_grow; +retry: ac = cpu_cache_get(cachep); batchcount = ac->batchcount; if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { @@ -3074,8 +3211,8 @@ retry: STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, - node); + ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, + node)); } check_slabp(cachep, slabp); @@ -3094,18 +3231,22 @@ alloc_done: if (unlikely(!ac->avail)) { int x; +force_grow: x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); /* cache_grow can reenable interrupts, then ac could change. */ ac = cpu_cache_get(cachep); - if (!x && ac->avail == 0) /* no objects in sight? abort */ + + /* no objects in sight? abort */ + if (!x && (ac->avail == 0 || force_refill)) return NULL; if (!ac->avail) /* objects refilled by interrupt? */ goto retry; } ac->touched = 1; - return ac->entry[--ac->avail]; + + return ac_get_obj(cachep, ac, flags, force_refill); } static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, @@ -3187,23 +3328,35 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *objp; struct array_cache *ac; + bool force_refill = false; check_irq_off(); ac = cpu_cache_get(cachep); if (likely(ac->avail)) { - STATS_INC_ALLOCHIT(cachep); ac->touched = 1; - objp = ac->entry[--ac->avail]; - } else { - STATS_INC_ALLOCMISS(cachep); - objp = cache_alloc_refill(cachep, flags); + objp = ac_get_obj(cachep, ac, flags, false); + /* - * the 'ac' may be updated by cache_alloc_refill(), - * and kmemleak_erase() requires its correct value. + * Allow for the possibility all avail objects are not allowed + * by the current flags */ - ac = cpu_cache_get(cachep); + if (objp) { + STATS_INC_ALLOCHIT(cachep); + goto out; + } + force_refill = true; } + + STATS_INC_ALLOCMISS(cachep); + objp = cache_alloc_refill(cachep, flags, force_refill); + /* + * the 'ac' may be updated by cache_alloc_refill(), + * and kmemleak_erase() requires its correct value. + */ + ac = cpu_cache_get(cachep); + +out: /* * To avoid a false negative, if an object that is in one of the * per-CPU caches is leaked, we need to make sure kmemleak doesn't @@ -3525,9 +3678,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, struct kmem_list3 *l3; for (i = 0; i < nr_objects; i++) { - void *objp = objpp[i]; + void *objp; struct slab *slabp; + clear_obj_pfmemalloc(&objpp[i]); + objp = objpp[i]; + slabp = virt_to_slab(objp); l3 = cachep->nodelists[node]; list_del(&slabp->list); @@ -3645,7 +3801,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, cache_flusharray(cachep, ac); } - ac->entry[ac->avail++] = objp; + ac_put_obj(cachep, ac, objp); } /** |