diff options
author | Vlastimil Babka <vbabka@suse.cz> | 2023-12-28 19:19:50 +0100 |
---|---|---|
committer | Vlastimil Babka <vbabka@suse.cz> | 2024-01-04 12:32:19 +0100 |
commit | 61d7e367f8bcc8083f02dcc5ce89b98b1480929d (patch) | |
tree | 3252503bb4edf51ca281f20d50836743e065b563 /mm/slub.c | |
parent | slub: Update frozen slabs documentations in the source (diff) | |
parent | mm/slub: free KFENCE objects in slab_free_hook() (diff) | |
download | linux-61d7e367f8bcc8083f02dcc5ce89b98b1480929d.tar.xz linux-61d7e367f8bcc8083f02dcc5ce89b98b1480929d.zip |
Merge branch 'slab/for-6.8/slub-hook-cleanups' into slab/for-next
Merge the SLAB allocator removal and a number of subsequent SLUB
cleanups and optimizations.
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 743 |
1 files changed, 679 insertions, 64 deletions
diff --git a/mm/slub.c b/mm/slub.c index 4fc203a4fa03..fac07382d3a6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -34,6 +34,7 @@ #include <linux/memory.h> #include <linux/math64.h> #include <linux/fault-inject.h> +#include <linux/kmemleak.h> #include <linux/stacktrace.h> #include <linux/prefetch.h> #include <linux/memcontrol.h> @@ -345,6 +346,60 @@ static void debugfs_slab_add(struct kmem_cache *); static inline void debugfs_slab_add(struct kmem_cache *s) { } #endif +enum stat_item { + ALLOC_FASTPATH, /* Allocation from cpu slab */ + ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ + FREE_FASTPATH, /* Free to cpu slab */ + FREE_SLOWPATH, /* Freeing not to cpu slab */ + FREE_FROZEN, /* Freeing to frozen slab */ + FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ + FREE_REMOVE_PARTIAL, /* Freeing removes last object */ + ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */ + ALLOC_SLAB, /* Cpu slab acquired from page allocator */ + ALLOC_REFILL, /* Refill cpu slab from slab freelist */ + ALLOC_NODE_MISMATCH, /* Switching cpu slab */ + FREE_SLAB, /* Slab freed to the page allocator */ + CPUSLAB_FLUSH, /* Abandoning of the cpu slab */ + DEACTIVATE_FULL, /* Cpu slab was full when deactivated */ + DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */ + DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ + DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ + DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ + DEACTIVATE_BYPASS, /* Implicit deactivation */ + ORDER_FALLBACK, /* Number of times fallback was necessary */ + CMPXCHG_DOUBLE_CPU_FAIL,/* Failures of this_cpu_cmpxchg_double */ + CMPXCHG_DOUBLE_FAIL, /* Failures of slab freelist update */ + CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */ + CPU_PARTIAL_FREE, /* Refill cpu partial on free */ + CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */ + CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ + NR_SLUB_STAT_ITEMS +}; + +#ifndef CONFIG_SLUB_TINY +/* + * When changing the layout, make sure freelist and tid are still compatible + * with this_cpu_cmpxchg_double() alignment requirements. + */ +struct kmem_cache_cpu { + union { + struct { + void **freelist; /* Pointer to next available object */ + unsigned long tid; /* Globally unique transaction id */ + }; + freelist_aba_t freelist_tid; + }; + struct slab *slab; /* The slab from which we are allocating */ +#ifdef CONFIG_SLUB_CPU_PARTIAL + struct slab *partial; /* Partially allocated frozen slabs */ +#endif + local_lock_t lock; /* Protects the fields above */ +#ifdef CONFIG_SLUB_STATS + unsigned int stat[NR_SLUB_STAT_ITEMS]; +#endif +}; +#endif /* CONFIG_SLUB_TINY */ + static inline void stat(const struct kmem_cache *s, enum stat_item si) { #ifdef CONFIG_SLUB_STATS @@ -356,6 +411,41 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si) #endif } +static inline +void stat_add(const struct kmem_cache *s, enum stat_item si, int v) +{ +#ifdef CONFIG_SLUB_STATS + raw_cpu_add(s->cpu_slab->stat[si], v); +#endif +} + +/* + * The slab lists for all objects. + */ +struct kmem_cache_node { + spinlock_t list_lock; + unsigned long nr_partial; + struct list_head partial; +#ifdef CONFIG_SLUB_DEBUG + atomic_long_t nr_slabs; + atomic_long_t total_objects; + struct list_head full; +#endif +}; + +static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) +{ + return s->node[node]; +} + +/* + * Iterator over all nodes. The body will be executed for each node that has + * a kmem_cache_node structure allocated (which is true for all online nodes) + */ +#define for_each_kmem_cache_node(__s, __node, __n) \ + for (__node = 0; __node < nr_node_ids; __node++) \ + if ((__n = get_node(__s, __node))) + /* * Tracks for which NUMA nodes we have kmem_cache_nodes allocated. * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily @@ -1774,12 +1864,214 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, #endif #endif /* CONFIG_SLUB_DEBUG */ +static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) +{ + return (s->flags & SLAB_RECLAIM_ACCOUNT) ? + NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; +} + +#ifdef CONFIG_MEMCG_KMEM +static inline void memcg_free_slab_cgroups(struct slab *slab) +{ + kfree(slab_objcgs(slab)); + slab->memcg_data = 0; +} + +static inline size_t obj_full_size(struct kmem_cache *s) +{ + /* + * For each accounted object there is an extra space which is used + * to store obj_cgroup membership. Charge it too. + */ + return s->size + sizeof(struct obj_cgroup *); +} + +/* + * Returns false if the allocation should fail. + */ +static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct list_lru *lru, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) +{ + /* + * The obtained objcg pointer is safe to use within the current scope, + * defined by current task or set_active_memcg() pair. + * obj_cgroup_get() is used to get a permanent reference. + */ + struct obj_cgroup *objcg = current_obj_cgroup(); + if (!objcg) + return true; + + if (lru) { + int ret; + struct mem_cgroup *memcg; + + memcg = get_mem_cgroup_from_objcg(objcg); + ret = memcg_list_lru_alloc(memcg, lru, flags); + css_put(&memcg->css); + + if (ret) + return false; + } + + if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) + return false; + + *objcgp = objcg; + return true; +} + +/* + * Returns false if the allocation should fail. + */ +static __fastpath_inline +bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru, + struct obj_cgroup **objcgp, size_t objects, + gfp_t flags) +{ + if (!memcg_kmem_online()) + return true; + + if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))) + return true; + + return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects, + flags)); +} + +static void __memcg_slab_post_alloc_hook(struct kmem_cache *s, + struct obj_cgroup *objcg, + gfp_t flags, size_t size, + void **p) +{ + struct slab *slab; + unsigned long off; + size_t i; + + flags &= gfp_allowed_mask; + + for (i = 0; i < size; i++) { + if (likely(p[i])) { + slab = virt_to_slab(p[i]); + + if (!slab_objcgs(slab) && + memcg_alloc_slab_cgroups(slab, s, flags, false)) { + obj_cgroup_uncharge(objcg, obj_full_size(s)); + continue; + } + + off = obj_to_index(s, slab, p[i]); + obj_cgroup_get(objcg); + slab_objcgs(slab)[off] = objcg; + mod_objcg_state(objcg, slab_pgdat(slab), + cache_vmstat_idx(s), obj_full_size(s)); + } else { + obj_cgroup_uncharge(objcg, obj_full_size(s)); + } + } +} + +static __fastpath_inline +void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, + gfp_t flags, size_t size, void **p) +{ + if (likely(!memcg_kmem_online() || !objcg)) + return; + + return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p); +} + +static void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, + void **p, int objects, + struct obj_cgroup **objcgs) +{ + for (int i = 0; i < objects; i++) { + struct obj_cgroup *objcg; + unsigned int off; + + off = obj_to_index(s, slab, p[i]); + objcg = objcgs[off]; + if (!objcg) + continue; + + objcgs[off] = NULL; + obj_cgroup_uncharge(objcg, obj_full_size(s)); + mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s), + -obj_full_size(s)); + obj_cgroup_put(objcg); + } +} + +static __fastpath_inline +void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, + int objects) +{ + struct obj_cgroup **objcgs; + + if (!memcg_kmem_online()) + return; + + objcgs = slab_objcgs(slab); + if (likely(!objcgs)) + return; + + __memcg_slab_free_hook(s, slab, p, objects, objcgs); +} + +static inline +void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects, + struct obj_cgroup *objcg) +{ + if (objcg) + obj_cgroup_uncharge(objcg, objects * obj_full_size(s)); +} +#else /* CONFIG_MEMCG_KMEM */ +static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr) +{ + return NULL; +} + +static inline void memcg_free_slab_cgroups(struct slab *slab) +{ +} + +static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct list_lru *lru, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) +{ + return true; +} + +static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, + struct obj_cgroup *objcg, + gfp_t flags, size_t size, + void **p) +{ +} + +static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, + void **p, int objects) +{ +} + +static inline +void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects, + struct obj_cgroup *objcg) +{ +} +#endif /* CONFIG_MEMCG_KMEM */ + /* * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. + * + * Returns true if freeing of the object can proceed, false if its reuse + * was delayed by KASAN quarantine, or it was returned to KFENCE. */ -static __always_inline bool slab_free_hook(struct kmem_cache *s, - void *x, bool init) +static __always_inline +bool slab_free_hook(struct kmem_cache *s, void *x, bool init) { kmemleak_free_recursive(x, s->flags); kmsan_slab_free(s, x); @@ -1794,6 +2086,9 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, __kcsan_check_access(x, s->object_size, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT); + if (kfence_free(x)) + return false; + /* * As memory initialization might be integrated into KASAN, * kasan_slab_free and initialization memset's must be @@ -1802,7 +2097,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, * The initialization memset's clear the object and the metadata, * but don't touch the SLAB redzone. */ - if (init) { + if (unlikely(init)) { int rsize; if (!kasan_has_integrated_init()) @@ -1812,7 +2107,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, s->size - s->inuse - rsize); } /* KASAN might put x into memory quarantine, delaying its reuse. */ - return kasan_slab_free(s, x, init); + return !kasan_slab_free(s, x, init); } static inline bool slab_free_freelist_hook(struct kmem_cache *s, @@ -1822,23 +2117,26 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, void *object; void *next = *head; - void *old_tail = *tail ? *tail : *head; + void *old_tail = *tail; + bool init; if (is_kfence_address(next)) { slab_free_hook(s, next, false); - return true; + return false; } /* Head and tail of the reconstructed freelist */ *head = NULL; *tail = NULL; + init = slab_want_init_on_free(s); + do { object = next; next = get_freepointer(s, object); /* If object's reuse doesn't have to be delayed */ - if (!slab_free_hook(s, object, slab_want_init_on_free(s))) { + if (likely(slab_free_hook(s, object, init))) { /* Move object to the new freelist */ set_freepointer(s, object, *head); *head = object; @@ -1853,9 +2151,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, } } while (object != old_tail); - if (*head == *tail) - *tail = NULL; - return *head != NULL; } @@ -2008,6 +2303,26 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ +static __always_inline void account_slab(struct slab *slab, int order, + struct kmem_cache *s, gfp_t gfp) +{ + if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT)) + memcg_alloc_slab_cgroups(slab, s, gfp, true); + + mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), + PAGE_SIZE << order); +} + +static __always_inline void unaccount_slab(struct slab *slab, int order, + struct kmem_cache *s) +{ + if (memcg_kmem_online()) + memcg_free_slab_cgroups(slab); + + mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), + -(PAGE_SIZE << order)); +} + static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct slab *slab; @@ -3420,6 +3735,86 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, 0, sizeof(void *)); } +noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) +{ + if (__should_failslab(s, gfpflags)) + return -ENOMEM; + return 0; +} +ALLOW_ERROR_INJECTION(should_failslab, ERRNO); + +static __fastpath_inline +struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, + struct list_lru *lru, + struct obj_cgroup **objcgp, + size_t size, gfp_t flags) +{ + flags &= gfp_allowed_mask; + + might_alloc(flags); + + if (unlikely(should_failslab(s, flags))) + return NULL; + + if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))) + return NULL; + + return s; +} + +static __fastpath_inline +void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, + gfp_t flags, size_t size, void **p, bool init, + unsigned int orig_size) +{ + unsigned int zero_size = s->object_size; + bool kasan_init = init; + size_t i; + gfp_t init_flags = flags & gfp_allowed_mask; + + /* + * For kmalloc object, the allocated memory size(object_size) is likely + * larger than the requested size(orig_size). If redzone check is + * enabled for the extra space, don't zero it, as it will be redzoned + * soon. The redzone operation for this extra space could be seen as a + * replacement of current poisoning under certain debug option, and + * won't break other sanity checks. + */ + if (kmem_cache_debug_flags(s, SLAB_STORE_USER | SLAB_RED_ZONE) && + (s->flags & SLAB_KMALLOC)) + zero_size = orig_size; + + /* + * When slub_debug is enabled, avoid memory initialization integrated + * into KASAN and instead zero out the memory via the memset below with + * the proper size. Otherwise, KASAN might overwrite SLUB redzones and + * cause false-positive reports. This does not lead to a performance + * penalty on production builds, as slub_debug is not intended to be + * enabled there. + */ + if (__slub_debug_enabled()) + kasan_init = false; + + /* + * As memory initialization might be integrated into KASAN, + * kasan_slab_alloc and initialization memset must be + * kept together to avoid discrepancies in behavior. + * + * As p[i] might get tagged, memset and kmemleak hook come after KASAN. + */ + for (i = 0; i < size; i++) { + p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init); + if (p[i] && init && (!kasan_init || + !kasan_has_integrated_init())) + memset(p[i], 0, zero_size); + kmemleak_alloc_recursive(p[i], s->object_size, 1, + s->flags, init_flags); + kmsan_slab_alloc(s, p[i], init_flags); + } + + memcg_slab_post_alloc_hook(s, objcg, flags, size, p); +} + /* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call @@ -3438,7 +3833,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list bool init = false; s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags); - if (!s) + if (unlikely(!s)) return NULL; object = kfence_alloc(s, orig_size, gfpflags); @@ -3460,53 +3855,169 @@ out: return object; } -static __fastpath_inline void *slab_alloc(struct kmem_cache *s, struct list_lru *lru, - gfp_t gfpflags, unsigned long addr, size_t orig_size) +void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { - return slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, addr, orig_size); + void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_, + s->object_size); + + trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE); + + return ret; } +EXPORT_SYMBOL(kmem_cache_alloc); -static __fastpath_inline -void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, - gfp_t gfpflags) +void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, + gfp_t gfpflags) { - void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size); + void *ret = slab_alloc_node(s, lru, gfpflags, NUMA_NO_NODE, _RET_IP_, + s->object_size); trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, NUMA_NO_NODE); return ret; } +EXPORT_SYMBOL(kmem_cache_alloc_lru); -void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) +/** + * kmem_cache_alloc_node - Allocate an object on the specified node + * @s: The cache to allocate from. + * @gfpflags: See kmalloc(). + * @node: node number of the target node. + * + * Identical to kmem_cache_alloc but it will allocate memory on the given + * node, which can improve the performance for cpu bound structures. + * + * Fallback to other node is possible if __GFP_THISNODE is not set. + * + * Return: pointer to the new object or %NULL in case of error + */ +void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { - return __kmem_cache_alloc_lru(s, NULL, gfpflags); + void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size); + + trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node); + + return ret; } -EXPORT_SYMBOL(kmem_cache_alloc); +EXPORT_SYMBOL(kmem_cache_alloc_node); -void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, - gfp_t gfpflags) +/* + * To avoid unnecessary overhead, we pass through large allocation requests + * directly to the page allocator. We use __GFP_COMP, because we will need to + * know the allocation order to free the pages properly in kfree. + */ +static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) { - return __kmem_cache_alloc_lru(s, lru, gfpflags); + struct page *page; + void *ptr = NULL; + unsigned int order = get_order(size); + + if (unlikely(flags & GFP_SLAB_BUG_MASK)) + flags = kmalloc_fix_flags(flags); + + flags |= __GFP_COMP; + page = alloc_pages_node(node, flags, order); + if (page) { + ptr = page_address(page); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, + PAGE_SIZE << order); + } + + ptr = kasan_kmalloc_large(ptr, size, flags); + /* As ptr might get tagged, call kmemleak hook after KASAN. */ + kmemleak_alloc(ptr, size, 1, flags); + kmsan_kmalloc_large(ptr, size, flags); + + return ptr; } -EXPORT_SYMBOL(kmem_cache_alloc_lru); -void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t orig_size, - unsigned long caller) +void *kmalloc_large(size_t size, gfp_t flags) +{ + void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); + + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), + flags, NUMA_NO_NODE); + return ret; +} +EXPORT_SYMBOL(kmalloc_large); + +void *kmalloc_large_node(size_t size, gfp_t flags, int node) { - return slab_alloc_node(s, NULL, gfpflags, node, - caller, orig_size); + void *ret = __kmalloc_large_node(size, flags, node); + + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), + flags, node); + return ret; } +EXPORT_SYMBOL(kmalloc_large_node); -void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) +static __always_inline +void *__do_kmalloc_node(size_t size, gfp_t flags, int node, + unsigned long caller) { - void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size); + struct kmem_cache *s; + void *ret; - trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags, node); + if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { + ret = __kmalloc_large_node(size, flags, node); + trace_kmalloc(caller, ret, size, + PAGE_SIZE << get_order(size), flags, node); + return ret; + } + + if (unlikely(!size)) + return ZERO_SIZE_PTR; + + s = kmalloc_slab(size, flags, caller); + ret = slab_alloc_node(s, NULL, flags, node, caller, size); + ret = kasan_kmalloc(s, ret, size, flags); + trace_kmalloc(caller, ret, size, s->size, flags, node); return ret; } -EXPORT_SYMBOL(kmem_cache_alloc_node); + +void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __do_kmalloc_node(size, flags, node, _RET_IP_); +} +EXPORT_SYMBOL(__kmalloc_node); + +void *__kmalloc(size_t size, gfp_t flags) +{ + return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); +} +EXPORT_SYMBOL(__kmalloc); + +void *__kmalloc_node_track_caller(size_t size, gfp_t flags, + int node, unsigned long caller) +{ + return __do_kmalloc_node(size, flags, node, caller); +} +EXPORT_SYMBOL(__kmalloc_node_track_caller); + +void *kmalloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) +{ + void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, + _RET_IP_, size); + + trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, NUMA_NO_NODE); + + ret = kasan_kmalloc(s, ret, size, gfpflags); + return ret; +} +EXPORT_SYMBOL(kmalloc_trace); + +void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) +{ + void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size); + + trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags, node); + + ret = kasan_kmalloc(s, ret, size, gfpflags); + return ret; +} +EXPORT_SYMBOL(kmalloc_node_trace); static noinline void free_to_partial_list( struct kmem_cache *s, struct slab *slab, @@ -3592,9 +4103,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab, stat(s, FREE_SLOWPATH); - if (kfence_free(head)) - return; - if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { free_to_partial_list(s, slab, head, tail, cnt, addr); return; @@ -3716,7 +4224,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s, struct slab *slab, void *head, void *tail, int cnt, unsigned long addr) { - void *tail_obj = tail ? : head; struct kmem_cache_cpu *c; unsigned long tid; void **freelist; @@ -3735,14 +4242,14 @@ redo: barrier(); if (unlikely(slab != c->slab)) { - __slab_free(s, slab, head, tail_obj, cnt, addr); + __slab_free(s, slab, head, tail, cnt, addr); return; } if (USE_LOCKLESS_FAST_PATH()) { freelist = READ_ONCE(c->freelist); - set_freepointer(s, tail_obj, freelist); + set_freepointer(s, tail, freelist); if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) { note_cmpxchg_failure("slab_free", s, tid); @@ -3759,60 +4266,143 @@ redo: tid = c->tid; freelist = c->freelist; - set_freepointer(s, tail_obj, freelist); + set_freepointer(s, tail, freelist); c->freelist = head; c->tid = next_tid(tid); local_unlock(&s->cpu_slab->lock); } - stat(s, FREE_FASTPATH); + stat_add(s, FREE_FASTPATH, cnt); } #else /* CONFIG_SLUB_TINY */ static void do_slab_free(struct kmem_cache *s, struct slab *slab, void *head, void *tail, int cnt, unsigned long addr) { - void *tail_obj = tail ? : head; - - __slab_free(s, slab, head, tail_obj, cnt, addr); + __slab_free(s, slab, head, tail, cnt, addr); } #endif /* CONFIG_SLUB_TINY */ -static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, - void *head, void *tail, void **p, int cnt, - unsigned long addr) +static __fastpath_inline +void slab_free(struct kmem_cache *s, struct slab *slab, void *object, + unsigned long addr) +{ + memcg_slab_free_hook(s, slab, &object, 1); + + if (likely(slab_free_hook(s, object, slab_want_init_on_free(s)))) + do_slab_free(s, slab, object, object, 1, addr); +} + +static __fastpath_inline +void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head, + void *tail, void **p, int cnt, unsigned long addr) { memcg_slab_free_hook(s, slab, p, cnt); /* * With KASAN enabled slab_free_freelist_hook modifies the freelist * to remove objects, whose reuse must be delayed. */ - if (slab_free_freelist_hook(s, &head, &tail, &cnt)) + if (likely(slab_free_freelist_hook(s, &head, &tail, &cnt))) do_slab_free(s, slab, head, tail, cnt, addr); } #ifdef CONFIG_KASAN_GENERIC void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) { - do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr); + do_slab_free(cache, virt_to_slab(x), x, x, 1, addr); } #endif -void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller) +static inline struct kmem_cache *virt_to_cache(const void *obj) +{ + struct slab *slab; + + slab = virt_to_slab(obj); + if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n", __func__)) + return NULL; + return slab->slab_cache; +} + +static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) { - slab_free(s, virt_to_slab(x), x, NULL, &x, 1, caller); + struct kmem_cache *cachep; + + if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) && + !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) + return s; + + cachep = virt_to_cache(x); + if (WARN(cachep && cachep != s, + "%s: Wrong slab cache. %s but object is from %s\n", + __func__, s->name, cachep->name)) + print_tracking(cachep, x); + return cachep; } +/** + * kmem_cache_free - Deallocate an object + * @s: The cache the allocation was from. + * @x: The previously allocated object. + * + * Free an object which was previously allocated from this + * cache. + */ void kmem_cache_free(struct kmem_cache *s, void *x) { s = cache_from_obj(s, x); if (!s) return; trace_kmem_cache_free(_RET_IP_, x, s); - slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_); + slab_free(s, virt_to_slab(x), x, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_free); +static void free_large_kmalloc(struct folio *folio, void *object) +{ + unsigned int order = folio_order(folio); + + if (WARN_ON_ONCE(order == 0)) + pr_warn_once("object pointer: 0x%p\n", object); + + kmemleak_free(object); + kasan_kfree_large(object); + kmsan_kfree_large(object); + + mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B, + -(PAGE_SIZE << order)); + __free_pages(folio_page(folio, 0), order); +} + +/** + * kfree - free previously allocated memory + * @object: pointer returned by kmalloc() or kmem_cache_alloc() + * + * If @object is NULL, no operation is performed. + */ +void kfree(const void *object) +{ + struct folio *folio; + struct slab *slab; + struct kmem_cache *s; + void *x = (void *)object; + + trace_kfree(_RET_IP_, object); + + if (unlikely(ZERO_OR_NULL_PTR(object))) + return; + + folio = virt_to_folio(object); + if (unlikely(!folio_test_slab(folio))) { + free_large_kmalloc(folio, (void *)object); + return; + } + + slab = folio_slab(folio); + s = slab->slab_cache; + slab_free(s, slab, x, _RET_IP_); +} +EXPORT_SYMBOL(kfree); + struct detached_freelist { struct slab *slab; void *tail; @@ -3892,6 +4482,27 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, return same; } +/* + * Internal bulk free of objects that were not initialised by the post alloc + * hooks and thus should not be processed by the free hooks + */ +static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) +{ + if (!size) + return; + + do { + struct detached_freelist df; + + size = build_detached_freelist(s, size, p, &df); + if (!df.slab) + continue; + + do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, + _RET_IP_); + } while (likely(size)); +} + /* Note that interrupts must be enabled when calling this function. */ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) { @@ -3905,15 +4516,16 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) if (!df.slab) continue; - slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt, - _RET_IP_); + slab_free_bulk(df.s, df.slab, df.freelist, df.tail, &p[size], + df.cnt, _RET_IP_); } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); #ifndef CONFIG_SLUB_TINY -static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, - size_t size, void **p, struct obj_cgroup *objcg) +static inline +int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, + void **p) { struct kmem_cache_cpu *c; unsigned long irqflags; @@ -3967,6 +4579,7 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, c->freelist = get_freepointer(s, object); p[i] = object; maybe_wipe_obj_freeptr(s, p[i]); + stat(s, ALLOC_FASTPATH); } c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, irqflags); @@ -3976,14 +4589,13 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, error: slub_put_cpu_ptr(s->cpu_slab); - slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size); - kmem_cache_free_bulk(s, i, p); + __kmem_cache_free_bulk(s, i, p); return 0; } #else /* CONFIG_SLUB_TINY */ static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, - size_t size, void **p, struct obj_cgroup *objcg) + size_t size, void **p) { int i; @@ -4006,8 +4618,7 @@ static int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, return i; error: - slab_post_alloc_hook(s, objcg, flags, i, p, false, s->object_size); - kmem_cache_free_bulk(s, i, p); + __kmem_cache_free_bulk(s, i, p); return 0; } #endif /* CONFIG_SLUB_TINY */ @@ -4027,15 +4638,19 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, if (unlikely(!s)) return 0; - i = __kmem_cache_alloc_bulk(s, flags, size, p, objcg); + i = __kmem_cache_alloc_bulk(s, flags, size, p); /* * memcg and kmem_cache debug support and memory initialization. * Done outside of the IRQ disabled fastpath loop. */ - if (i != 0) + if (likely(i != 0)) { slab_post_alloc_hook(s, objcg, flags, size, p, slab_want_init_on_alloc(flags, s), s->object_size); + } else { + memcg_slab_alloc_error_hook(s, size, objcg); + } + return i; } EXPORT_SYMBOL(kmem_cache_alloc_bulk); |