From a0a44d9175b349df2462089140fb7f292100bd7c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 27 May 2024 11:01:28 +0200 Subject: mm, slab: don't wrap internal functions with alloc_hooks() The functions __kmalloc_noprof(), kmalloc_large_noprof(), kmalloc_trace_noprof() and their _node variants are all internal to the implementations of kmalloc_noprof() and kmalloc_node_noprof() and are only declared in the "public" slab.h and exported so that those implementations can be static inline and distinguish the build-time constant size variants. The only other users for some of the internal functions are slub_kunit and fortify_kunit tests which make very short-lived allocations. Therefore we can stop wrapping them with the alloc_hooks() macro. Instead add a __ prefix to all of them and a comment documenting these as internal. Also rename __kmalloc_trace() to __kmalloc_cache() which is more descriptive - it is a variant of __kmalloc() where the exact kmalloc cache has been already determined. The usage in fortify_kunit can be removed completely, as the internal functions should be tested already through kmalloc() tests in the test variant that passes non-constant allocation size. Reported-by: Kent Overstreet Cc: Suren Baghdasaryan Cc: Kees Cook Reviewed-by: Kent Overstreet Acked-by: David Rientjes Signed-off-by: Vlastimil Babka --- mm/slub.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 0809760cf789..95e0a3332c44 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4053,7 +4053,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof); * directly to the page allocator. We use __GFP_COMP, because we will need to * know the allocation order to free the pages properly in kfree. */ -static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) +static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) { struct folio *folio; void *ptr = NULL; @@ -4078,25 +4078,25 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) return ptr; } -void *kmalloc_large_noprof(size_t size, gfp_t flags) +void *__kmalloc_large_noprof(size_t size, gfp_t flags) { - void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); + void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE); trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), flags, NUMA_NO_NODE); return ret; } -EXPORT_SYMBOL(kmalloc_large_noprof); +EXPORT_SYMBOL(__kmalloc_large_noprof); -void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) +void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) { - void *ret = __kmalloc_large_node(size, flags, node); + void *ret = ___kmalloc_large_node(size, flags, node); trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), flags, node); return ret; } -EXPORT_SYMBOL(kmalloc_large_node_noprof); +EXPORT_SYMBOL(__kmalloc_large_node_noprof); static __always_inline void *__do_kmalloc_node(size_t size, gfp_t flags, int node, @@ -4106,7 +4106,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, void *ret; if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { - ret = __kmalloc_large_node(size, flags, node); + ret = __kmalloc_large_node_noprof(size, flags, node); trace_kmalloc(caller, ret, size, PAGE_SIZE << get_order(size), flags, node); return ret; @@ -4142,7 +4142,7 @@ void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, } EXPORT_SYMBOL(kmalloc_node_track_caller_noprof); -void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) +void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_, size); @@ -4152,10 +4152,10 @@ void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; } -EXPORT_SYMBOL(kmalloc_trace_noprof); +EXPORT_SYMBOL(__kmalloc_cache_noprof); -void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) +void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) { void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size); @@ -4164,7 +4164,7 @@ void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; } -EXPORT_SYMBOL(kmalloc_node_trace_noprof); +EXPORT_SYMBOL(__kmalloc_cache_node_noprof); static noinline void free_to_partial_list( struct kmem_cache *s, struct slab *slab, -- cgit v1.2.3 From 4d2bcefa965b06a1f2be6912456bcfa86a34f184 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 31 May 2024 13:29:02 +0100 Subject: mm: Reduce the number of slab->folio casts Mark a few more folio functions as taking a const folio pointer, which allows us to remove a few places in slab which cast away the const. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka --- include/linux/mm.h | 6 +++--- mm/slab.h | 4 ++-- mm/slub.c | 6 ++---- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'mm/slub.c') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..a983d371fafa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1105,7 +1105,7 @@ static inline unsigned int compound_order(struct page *page) * * Return: The order of the folio. */ -static inline unsigned int folio_order(struct folio *folio) +static inline unsigned int folio_order(const struct folio *folio) { if (!folio_test_large(folio)) return 0; @@ -2145,7 +2145,7 @@ static inline struct folio *folio_next(struct folio *folio) * it from being split. It is not necessary for the folio to be locked. * Return: The base-2 logarithm of the size of this folio. */ -static inline unsigned int folio_shift(struct folio *folio) +static inline unsigned int folio_shift(const struct folio *folio) { return PAGE_SHIFT + folio_order(folio); } @@ -2158,7 +2158,7 @@ static inline unsigned int folio_shift(struct folio *folio) * it from being split. It is not necessary for the folio to be locked. * Return: The number of bytes in this folio. */ -static inline size_t folio_size(struct folio *folio) +static inline size_t folio_size(const struct folio *folio) { return PAGE_SIZE << folio_order(folio); } diff --git a/mm/slab.h b/mm/slab.h index 5f8f47c5bee0..b16e63191578 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -166,7 +166,7 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t) */ static inline bool slab_test_pfmemalloc(const struct slab *slab) { - return folio_test_active((struct folio *)slab_folio(slab)); + return folio_test_active(slab_folio(slab)); } static inline void slab_set_pfmemalloc(struct slab *slab) @@ -211,7 +211,7 @@ static inline struct slab *virt_to_slab(const void *addr) static inline int slab_order(const struct slab *slab) { - return folio_order((struct folio *)slab_folio(slab)); + return folio_order(slab_folio(slab)); } static inline size_t slab_size(const struct slab *slab) diff --git a/mm/slub.c b/mm/slub.c index 95e0a3332c44..b8ba068ca079 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -962,11 +962,9 @@ void print_tracking(struct kmem_cache *s, void *object) static void print_slab_info(const struct slab *slab) { - struct folio *folio = (struct folio *)slab_folio(slab); - pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n", slab, slab->objects, slab->inuse, slab->freelist, - folio_flags(folio, 0)); + &slab->__page_flags); } /* @@ -2532,7 +2530,7 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab) */ static inline bool slab_test_node_partial(const struct slab *slab) { - return folio_test_workingset((struct folio *)slab_folio(slab)); + return folio_test_workingset(slab_folio(slab)); } static inline void slab_set_node_partial(struct slab *slab) -- cgit v1.2.3 From 47d911b02cbe61494bb066ad84cc66d25091d506 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Fri, 7 Jun 2024 16:40:12 +0800 Subject: slab: make check_object() more consistent Now check_object() calls check_bytes_and_report() multiple times to check every section of the object it cares about, like left and right redzones, object poison, paddings poison and freepointer. It will abort the checking process and return 0 once it finds an error. There are two inconsistencies in check_object(), which are alignment padding checking and object padding checking. We only print the error messages but don't return 0 to tell callers that something is wrong and needs to be handled. Please see alloc_debug_processing() and free_debug_processing() for details. We want to do all checks without skipping, so use a local variable "ret" to save each check result and change check_bytes_and_report() to only report specific error findings. Then at end of check_object(), print the trailer once if any found an error. Suggested-by: Vlastimil Babka Signed-off-by: Chengming Zhou Reviewed-by: Vlastimil Babka Signed-off-by: Vlastimil Babka --- mm/slub.c | 62 +++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 21 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index b8ba068ca079..5df8c8302784 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -788,8 +788,24 @@ static bool slab_add_kunit_errors(void) kunit_put_resource(resource); return true; } + +static bool slab_in_kunit_test(void) +{ + struct kunit_resource *resource; + + if (!kunit_get_current_test()) + return false; + + resource = kunit_find_named_resource(current->kunit_test, "slab_errors"); + if (!resource) + return false; + + kunit_put_resource(resource); + return true; +} #else static inline bool slab_add_kunit_errors(void) { return false; } +static inline bool slab_in_kunit_test(void) { return false; } #endif static inline unsigned int size_from_object(struct kmem_cache *s) @@ -1190,8 +1206,6 @@ static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", fault, end - 1, fault - addr, fault[0], value); - print_trailer(s, slab, object); - add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); skip_bug_print: restore_bytes(s, what, value, fault, end); @@ -1300,15 +1314,16 @@ static int check_object(struct kmem_cache *s, struct slab *slab, u8 *p = object; u8 *endobject = object + s->object_size; unsigned int orig_size, kasan_meta_size; + int ret = 1; if (s->flags & SLAB_RED_ZONE) { if (!check_bytes_and_report(s, slab, object, "Left Redzone", object - s->red_left_pad, val, s->red_left_pad)) - return 0; + ret = 0; if (!check_bytes_and_report(s, slab, object, "Right Redzone", endobject, val, s->inuse - s->object_size)) - return 0; + ret = 0; if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { orig_size = get_orig_size(s, object); @@ -1317,14 +1332,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab, !check_bytes_and_report(s, slab, object, "kmalloc Redzone", p + orig_size, val, s->object_size - orig_size)) { - return 0; + ret = 0; } } } else { if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { - check_bytes_and_report(s, slab, p, "Alignment padding", + if (!check_bytes_and_report(s, slab, p, "Alignment padding", endobject, POISON_INUSE, - s->inuse - s->object_size); + s->inuse - s->object_size)) + ret = 0; } } @@ -1340,27 +1356,25 @@ static int check_object(struct kmem_cache *s, struct slab *slab, !check_bytes_and_report(s, slab, p, "Poison", p + kasan_meta_size, POISON_FREE, s->object_size - kasan_meta_size - 1)) - return 0; + ret = 0; if (kasan_meta_size < s->object_size && !check_bytes_and_report(s, slab, p, "End Poison", p + s->object_size - 1, POISON_END, 1)) - return 0; + ret = 0; } /* * check_pad_bytes cleans up on its own. */ - check_pad_bytes(s, slab, p); + if (!check_pad_bytes(s, slab, p)) + ret = 0; } - if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE) - /* - * Object and freepointer overlap. Cannot check - * freepointer while object is allocated. - */ - return 1; - - /* Check free pointer validity */ - if (!check_valid_pointer(s, slab, get_freepointer(s, p))) { + /* + * Cannot check freepointer while object is allocated if + * object and freepointer overlap. + */ + if ((freeptr_outside_object(s) || val != SLUB_RED_ACTIVE) && + !check_valid_pointer(s, slab, get_freepointer(s, p))) { object_err(s, slab, p, "Freepointer corrupt"); /* * No choice but to zap it and thus lose the remainder @@ -1368,9 +1382,15 @@ static int check_object(struct kmem_cache *s, struct slab *slab, * another error because the object count is now wrong. */ set_freepointer(s, p, NULL); - return 0; + ret = 0; } - return 1; + + if (!ret && !slab_in_kunit_test()) { + print_trailer(s, slab, object); + add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); + } + + return ret; } static int check_slab(struct kmem_cache *s, struct slab *slab) -- cgit v1.2.3 From adef2aeaa2b936c97865d56c59be2cb7266acbb7 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Fri, 7 Jun 2024 16:40:13 +0800 Subject: slab: don't put freepointer outside of object if only orig_size The commit 946fa0dbf2d8 ("mm/slub: extend redzone check to extra allocated kmalloc space than requested") will extend right redzone when allocating for orig_size < object_size. So we can't overlay the freepointer in the object space in this case. But the code looks like it forgot to check SLAB_RED_ZONE, since there won't be extended right redzone if only orig_size enabled. As we are here, make this complex conditional expressions a little prettier and add some comments about extending right redzone when slub_debug_orig_size() enabled. Reviewed-by: Feng Tang Reviewed-by: Vlastimil Babka Signed-off-by: Chengming Zhou Signed-off-by: Vlastimil Babka --- mm/slub.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'mm/slub.c') diff --git a/mm/slub.c b/mm/slub.c index 5df8c8302784..5fc052a4f38f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5167,10 +5167,9 @@ static int calculate_sizes(struct kmem_cache *s) */ s->inuse = size; - if (slub_debug_orig_size(s) || - (flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || - ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || - s->ctor) { + if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor || + ((flags & SLAB_RED_ZONE) && + (s->object_size < sizeof(void *) || slub_debug_orig_size(s)))) { /* * Relocate free pointer after the object if it is not * permitted to overwrite the first word of the object on @@ -5178,7 +5177,9 @@ static int calculate_sizes(struct kmem_cache *s) * * This is the case if we do RCU, have a constructor or * destructor, are poisoning the objects, or are - * redzoning an object smaller than sizeof(void *). + * redzoning an object smaller than sizeof(void *) or are + * redzoning an object with slub_debug_orig_size() enabled, + * in which case the right redzone may be extended. * * The assumption that s->offset >= s->inuse means free * pointer is outside of the object is used in the -- cgit v1.2.3 From 4a24bbabc826eaba6f94a9741712117b8e2b0587 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Fri, 7 Jun 2024 16:40:14 +0800 Subject: slab: delete useless RED_INACTIVE and RED_ACTIVE These seem useless since we use the SLUB_RED_INACTIVE and SLUB_RED_ACTIVE, so just delete them, no functional change. Reviewed-by: Vlastimil Babka Signed-off-by: Chengming Zhou Reviewed-by: Christoph Lameter (Ampere) Signed-off-by: Vlastimil Babka --- include/linux/poison.h | 7 ++----- mm/slub.c | 4 ++-- tools/include/linux/poison.h | 7 ++----- 3 files changed, 6 insertions(+), 12 deletions(-) (limited to 'mm/slub.c') diff --git a/include/linux/poison.h b/include/linux/poison.h index 1f0ee2459f2a..9c1a035af97c 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -38,11 +38,8 @@ * Magic nums for obj red zoning. * Placed in the first word before and the first word after an obj. */ -#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ -#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ - -#define SLUB_RED_INACTIVE 0xbb -#define SLUB_RED_ACTIVE 0xcc +#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */ +#define SLUB_RED_ACTIVE 0xcc /* when obj is active */ /* ...and for poisoning */ #define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ diff --git a/mm/slub.c b/mm/slub.c index 5fc052a4f38f..3d19a0ee411f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1228,8 +1228,8 @@ skip_bug_print: * Padding is extended by another word if Redzoning is enabled and * object_size == inuse. * - * We fill with 0xbb (RED_INACTIVE) for inactive objects and with - * 0xcc (RED_ACTIVE) for objects in use. + * We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with + * 0xcc (SLUB_RED_ACTIVE) for objects in use. * * object + s->inuse * Meta data starts here. diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index 2e6338ac5eed..e530e54046c9 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -47,11 +47,8 @@ * Magic nums for obj red zoning. * Placed in the first word before and the first word after an obj. */ -#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ -#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ - -#define SLUB_RED_INACTIVE 0xbb -#define SLUB_RED_ACTIVE 0xcc +#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */ +#define SLUB_RED_ACTIVE 0xcc /* when obj is active */ /* ...and for poisoning */ #define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ -- cgit v1.2.3 From 67f2df3b82d091ed095d0e47e1f3a9d3e18e4e41 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jul 2024 12:12:59 -0700 Subject: mm/slab: Plumb kmem_buckets into __do_kmalloc_node() Introduce CONFIG_SLAB_BUCKETS which provides the infrastructure to support separated kmalloc buckets (in the following kmem_buckets_create() patches and future codetag-based separation). Since this will provide a mitigation for a very common case of exploits, it is recommended to enable this feature for general purpose distros. By default, the new Kconfig will be enabled if CONFIG_SLAB_FREELIST_HARDENED is enabled (and it is added to the hardening.config Kconfig fragment). To be able to choose which buckets to allocate from, make the buckets available to the internal kmalloc interfaces by adding them as the second argument, rather than depending on the buckets being chosen from the fixed set of global buckets. Where the bucket is not available, pass NULL, which means "use the default system kmalloc bucket set" (the prior existing behavior), as implemented in kmalloc_slab(). To avoid adding the extra argument when !CONFIG_SLAB_BUCKETS, only the top-level macros and static inlines use the buckets argument (where they are stripped out and compiled out respectively). The actual extern functions can then be built without the argument, and the internals fall back to the global kmalloc buckets unconditionally. Co-developed-by: Vlastimil Babka Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 27 ++++++++++++++++++++++----- kernel/configs/hardening.config | 1 + mm/Kconfig | 17 +++++++++++++++++ mm/slab.h | 6 ++++-- mm/slab_common.c | 2 +- mm/slub.c | 20 ++++++++++---------- scripts/kernel-doc | 1 + 7 files changed, 56 insertions(+), 18 deletions(-) (limited to 'mm/slub.c') diff --git a/include/linux/slab.h b/include/linux/slab.h index 922bf15794f7..a9200d453087 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -570,6 +570,21 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) +/* + * These macros allow declaring a kmem_buckets * parameter alongside size, which + * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call + * sites don't have to pass NULL. + */ +#ifdef CONFIG_SLAB_BUCKETS +#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) +#define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) +#define PASS_BUCKET_PARAM(_b) (_b) +#else +#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) +#define PASS_BUCKET_PARAMS(_size, _b) (_size) +#define PASS_BUCKET_PARAM(_b) NULL +#endif + /* * The following functions are not to be used directly and are intended only * for internal use from kmalloc() and kmalloc_node() @@ -579,7 +594,7 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) +void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) @@ -680,7 +695,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } - return __kmalloc_node_noprof(size, flags, node); + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); } #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) @@ -731,8 +746,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi */ #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) -void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node, - unsigned long caller) __alloc_size(1); +void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, + unsigned long caller) __alloc_size(1); +#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ + __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) #define kmalloc_node_track_caller(...) \ alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) @@ -758,7 +775,7 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_ return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) return kmalloc_node_noprof(bytes, flags, node); - return __kmalloc_node_noprof(bytes, flags, node); + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); } #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config index 8a7ce7a6b3ab..3fabb8f55ef6 100644 --- a/kernel/configs/hardening.config +++ b/kernel/configs/hardening.config @@ -20,6 +20,7 @@ CONFIG_RANDOMIZE_MEMORY=y # Randomize allocator freelists, harden metadata. CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y +CONFIG_SLAB_BUCKETS=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y CONFIG_RANDOM_KMALLOC_CACHES=y diff --git a/mm/Kconfig b/mm/Kconfig index b4cb45255a54..e0dfb268717c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -273,6 +273,23 @@ config SLAB_FREELIST_HARDENED sacrifices to harden the kernel slab allocator against common freelist exploit methods. +config SLAB_BUCKETS + bool "Support allocation from separate kmalloc buckets" + depends on !SLUB_TINY + default SLAB_FREELIST_HARDENED + help + Kernel heap attacks frequently depend on being able to create + specifically-sized allocations with user-controlled contents + that will be allocated into the same kmalloc bucket as a + target object. To avoid sharing these allocation buckets, + provide an explicitly separated set of buckets to be used for + user-controlled allocations. This may very slightly increase + memory fragmentation, though in practice it's only a handful + of extra pages since the bulk of user-controlled allocations + are relatively long-lived. + + If unsure, say Y. + config SLUB_STATS default n bool "Enable performance statistics" diff --git a/mm/slab.h b/mm/slab.h index b16e63191578..d5e8034af9d5 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -403,16 +403,18 @@ static inline unsigned int size_index_elem(unsigned int bytes) * KMALLOC_MAX_CACHE_SIZE and the caller must check that. */ static inline struct kmem_cache * -kmalloc_slab(size_t size, gfp_t flags, unsigned long caller) +kmalloc_slab(size_t size, kmem_buckets *b, gfp_t flags, unsigned long caller) { unsigned int index; + if (!b) + b = &kmalloc_caches[kmalloc_type(flags, caller)]; if (size <= 192) index = kmalloc_size_index[size_index_elem(size)]; else index = fls(size - 1); - return kmalloc_caches[kmalloc_type(flags, caller)][index]; + return (*b)[index]; } gfp_t kmalloc_fix_flags(gfp_t flags); diff --git a/mm/slab_common.c b/mm/slab_common.c index ff60f91e4edc..bcc1e13d7f86 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -703,7 +703,7 @@ size_t kmalloc_size_roundup(size_t size) * The flags don't matter since size_index is common to all. * Neither does the caller for just getting ->object_size. */ - return kmalloc_slab(size, GFP_KERNEL, 0)->object_size; + return kmalloc_slab(size, NULL, GFP_KERNEL, 0)->object_size; } /* Above the smaller buckets, size is a multiple of page size. */ diff --git a/mm/slub.c b/mm/slub.c index 3d19a0ee411f..80f0a51242d1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4117,7 +4117,7 @@ void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) EXPORT_SYMBOL(__kmalloc_large_node_noprof); static __always_inline -void *__do_kmalloc_node(size_t size, gfp_t flags, int node, +void *__do_kmalloc_node(size_t size, kmem_buckets *b, gfp_t flags, int node, unsigned long caller) { struct kmem_cache *s; @@ -4133,32 +4133,32 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, if (unlikely(!size)) return ZERO_SIZE_PTR; - s = kmalloc_slab(size, flags, caller); + s = kmalloc_slab(size, b, flags, caller); ret = slab_alloc_node(s, NULL, flags, node, caller, size); ret = kasan_kmalloc(s, ret, size, flags); trace_kmalloc(caller, ret, size, s->size, flags, node); return ret; } - -void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) +void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) { - return __do_kmalloc_node(size, flags, node, _RET_IP_); + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, _RET_IP_); } EXPORT_SYMBOL(__kmalloc_node_noprof); void *__kmalloc_noprof(size_t size, gfp_t flags) { - return __do_kmalloc_node(size, flags, NUMA_NO_NODE, _RET_IP_); + return __do_kmalloc_node(size, NULL, flags, NUMA_NO_NODE, _RET_IP_); } EXPORT_SYMBOL(__kmalloc_noprof); -void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, - int node, unsigned long caller) +void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, + int node, unsigned long caller) { - return __do_kmalloc_node(size, flags, node, caller); + return __do_kmalloc_node(size, PASS_BUCKET_PARAM(b), flags, node, caller); + } -EXPORT_SYMBOL(kmalloc_node_track_caller_noprof); +EXPORT_SYMBOL(__kmalloc_node_track_caller_noprof); void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) { diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 95a59ac78f82..2791f8195203 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1729,6 +1729,7 @@ sub dump_function($$) { $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//; + $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/; my $define = $prototype =~ s/^#\s*define\s+//; #ak added $prototype =~ s/__attribute_const__ +//; $prototype =~ s/__attribute__\s*\(\( -- cgit v1.2.3