diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 1192 |
1 files changed, 600 insertions, 592 deletions
diff --git a/mm/slub.c b/mm/slub.c index a8626825a829..261474092e43 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -48,7 +48,7 @@ * 1. slab_mutex (Global Mutex) * 2. node->list_lock (Spinlock) * 3. kmem_cache->cpu_slab->lock (Local lock) - * 4. slab_lock(page) (Only on some arches or for debugging) + * 4. slab_lock(slab) (Only on some arches or for debugging) * 5. object_map_lock (Only for debugging) * * slab_mutex @@ -64,19 +64,19 @@ * * The slab_lock is only used for debugging and on arches that do not * have the ability to do a cmpxchg_double. It only protects: - * A. page->freelist -> List of object free in a page - * B. page->inuse -> Number of objects in use - * C. page->objects -> Number of objects in page - * D. page->frozen -> frozen state + * A. slab->freelist -> List of free objects in a slab + * B. slab->inuse -> Number of objects in use + * C. slab->objects -> Number of objects in slab + * D. slab->frozen -> frozen state * * Frozen slabs * * If a slab is frozen then it is exempt from list management. It is not * on any list except per cpu partial list. The processor that froze the - * slab is the one who can perform list operations on the page. Other + * slab is the one who can perform list operations on the slab. Other * processors may put objects onto the freelist but the processor that * froze the slab is the only one that can retrieve the objects from the - * page's freelist. + * slab's freelist. * * list_lock * @@ -135,7 +135,7 @@ * minimal so we rely on the page allocators per cpu caches for * fast frees and allocs. * - * page->frozen The slab is frozen and exempt from list processing. + * slab->frozen The slab is frozen and exempt from list processing. * This means that the slab is dedicated to a purpose * such as satisfying allocations for a specific * processor. Objects may be freed in the slab while @@ -250,7 +250,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) #define OO_SHIFT 16 #define OO_MASK ((1 << OO_SHIFT) - 1) -#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ +#define MAX_OBJS_PER_PAGE 32767 /* since slab.objects is u15 */ /* Internal SLUB flags */ /* Poison object */ @@ -417,18 +417,18 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x) #ifdef CONFIG_SLUB_CPU_PARTIAL static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) { - unsigned int nr_pages; + unsigned int nr_slabs; s->cpu_partial = nr_objects; /* * We take the number of objects but actually limit the number of - * pages on the per cpu partial list, in order to limit excessive - * growth of the list. For simplicity we assume that the pages will + * slabs on the per cpu partial list, in order to limit excessive + * growth of the list. For simplicity we assume that the slabs will * be half-full. */ - nr_pages = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo)); - s->cpu_partial_pages = nr_pages; + nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo)); + s->cpu_partial_slabs = nr_slabs; } #else static inline void @@ -440,28 +440,32 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects) /* * Per slab locking using the pagelock */ -static __always_inline void __slab_lock(struct page *page) +static __always_inline void __slab_lock(struct slab *slab) { + struct page *page = slab_page(slab); + VM_BUG_ON_PAGE(PageTail(page), page); bit_spin_lock(PG_locked, &page->flags); } -static __always_inline void __slab_unlock(struct page *page) +static __always_inline void __slab_unlock(struct slab *slab) { + struct page *page = slab_page(slab); + VM_BUG_ON_PAGE(PageTail(page), page); __bit_spin_unlock(PG_locked, &page->flags); } -static __always_inline void slab_lock(struct page *page, unsigned long *flags) +static __always_inline void slab_lock(struct slab *slab, unsigned long *flags) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_save(*flags); - __slab_lock(page); + __slab_lock(slab); } -static __always_inline void slab_unlock(struct page *page, unsigned long *flags) +static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags) { - __slab_unlock(page); + __slab_unlock(slab); if (IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_restore(*flags); } @@ -471,7 +475,7 @@ static __always_inline void slab_unlock(struct page *page, unsigned long *flags) * by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different * so we disable interrupts as part of slab_[un]lock(). */ -static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, +static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, void *freelist_old, unsigned long counters_old, void *freelist_new, unsigned long counters_new, const char *n) @@ -481,7 +485,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&page->freelist, &page->counters, + if (cmpxchg_double(&slab->freelist, &slab->counters, freelist_old, counters_old, freelist_new, counters_new)) return true; @@ -491,15 +495,15 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page /* init to 0 to prevent spurious warnings */ unsigned long flags = 0; - slab_lock(page, &flags); - if (page->freelist == freelist_old && - page->counters == counters_old) { - page->freelist = freelist_new; - page->counters = counters_new; - slab_unlock(page, &flags); + slab_lock(slab, &flags); + if (slab->freelist == freelist_old && + slab->counters == counters_old) { + slab->freelist = freelist_new; + slab->counters = counters_new; + slab_unlock(slab, &flags); return true; } - slab_unlock(page, &flags); + slab_unlock(slab, &flags); } cpu_relax(); @@ -512,7 +516,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page return false; } -static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, +static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab, void *freelist_old, unsigned long counters_old, void *freelist_new, unsigned long counters_new, const char *n) @@ -520,7 +524,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, #if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) if (s->flags & __CMPXCHG_DOUBLE) { - if (cmpxchg_double(&page->freelist, &page->counters, + if (cmpxchg_double(&slab->freelist, &slab->counters, freelist_old, counters_old, freelist_new, counters_new)) return true; @@ -530,16 +534,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, unsigned long flags; local_irq_save(flags); - __slab_lock(page); - if (page->freelist == freelist_old && - page->counters == counters_old) { - page->freelist = freelist_new; - page->counters = counters_new; - __slab_unlock(page); + __slab_lock(slab); + if (slab->freelist == freelist_old && + slab->counters == counters_old) { + slab->freelist = freelist_new; + slab->counters = counters_new; + __slab_unlock(slab); local_irq_restore(flags); return true; } - __slab_unlock(page); + __slab_unlock(slab); local_irq_restore(flags); } @@ -558,14 +562,14 @@ static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; static DEFINE_RAW_SPINLOCK(object_map_lock); static void __fill_map(unsigned long *obj_map, struct kmem_cache *s, - struct page *page) + struct slab *slab) { - void *addr = page_address(page); + void *addr = slab_address(slab); void *p; - bitmap_zero(obj_map, page->objects); + bitmap_zero(obj_map, slab->objects); - for (p = page->freelist; p; p = get_freepointer(s, p)) + for (p = slab->freelist; p; p = get_freepointer(s, p)) set_bit(__obj_to_index(s, addr, p), obj_map); } @@ -590,19 +594,19 @@ static inline bool slab_add_kunit_errors(void) { return false; } #endif /* - * Determine a map of object in use on a page. + * Determine a map of objects in use in a slab. * - * Node listlock must be held to guarantee that the page does + * Node listlock must be held to guarantee that the slab does * not vanish from under us. */ -static unsigned long *get_map(struct kmem_cache *s, struct page *page) +static unsigned long *get_map(struct kmem_cache *s, struct slab *slab) __acquires(&object_map_lock) { VM_BUG_ON(!irqs_disabled()); raw_spin_lock(&object_map_lock); - __fill_map(object_map, s, page); + __fill_map(object_map, s, slab); return object_map; } @@ -663,17 +667,17 @@ static inline void metadata_access_disable(void) /* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, - struct page *page, void *object) + struct slab *slab, void *object) { void *base; if (!object) return 1; - base = page_address(page); + base = slab_address(slab); object = kasan_reset_tag(object); object = restore_red_left(s, object); - if (object < base || object >= base + page->objects * s->size || + if (object < base || object >= base + slab->objects * s->size || (object - base) % s->size) { return 0; } @@ -784,12 +788,13 @@ void print_tracking(struct kmem_cache *s, void *object) print_track("Freed", get_track(s, object, TRACK_FREE), pr_time); } -static void print_page_info(struct page *page) +static void print_slab_info(const struct slab *slab) { - pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n", - page, page->objects, page->inuse, page->freelist, - &page->flags); + struct folio *folio = (struct folio *)slab_folio(slab); + pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n", + slab, slab->objects, slab->inuse, slab->freelist, + folio_flags(folio, 0)); } static void slab_bug(struct kmem_cache *s, char *fmt, ...) @@ -822,28 +827,14 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) va_end(args); } -static bool freelist_corrupted(struct kmem_cache *s, struct page *page, - void **freelist, void *nextfree) -{ - if ((s->flags & SLAB_CONSISTENCY_CHECKS) && - !check_valid_pointer(s, page, nextfree) && freelist) { - object_err(s, page, *freelist, "Freechain corrupt"); - *freelist = NULL; - slab_fix(s, "Isolate corrupted freechain"); - return true; - } - - return false; -} - -static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) +static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p) { unsigned int off; /* Offset of last byte */ - u8 *addr = page_address(page); + u8 *addr = slab_address(slab); print_tracking(s, p); - print_page_info(page); + print_slab_info(slab); pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n", p, p - addr, get_freepointer(s, p)); @@ -875,18 +866,32 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) dump_stack(); } -void object_err(struct kmem_cache *s, struct page *page, +static void object_err(struct kmem_cache *s, struct slab *slab, u8 *object, char *reason) { if (slab_add_kunit_errors()) return; slab_bug(s, "%s", reason); - print_trailer(s, page, object); + print_trailer(s, slab, object); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } -static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, +static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, + void **freelist, void *nextfree) +{ + if ((s->flags & SLAB_CONSISTENCY_CHECKS) && + !check_valid_pointer(s, slab, nextfree) && freelist) { + object_err(s, slab, *freelist, "Freechain corrupt"); + *freelist = NULL; + slab_fix(s, "Isolate corrupted freechain"); + return true; + } + + return false; +} + +static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab, const char *fmt, ...) { va_list args; @@ -899,7 +904,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page, vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); slab_bug(s, "%s", buf); - print_page_info(page); + print_slab_info(slab); dump_stack(); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); } @@ -927,13 +932,13 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data, memset(from, data, to - from); } -static int check_bytes_and_report(struct kmem_cache *s, struct page *page, +static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab, u8 *object, char *what, u8 *start, unsigned int value, unsigned int bytes) { u8 *fault; u8 *end; - u8 *addr = page_address(page); + u8 *addr = slab_address(slab); metadata_access_enable(); fault = memchr_inv(kasan_reset_tag(start), value, bytes); @@ -952,7 +957,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n", fault, end - 1, fault - addr, fault[0], value); - print_trailer(s, page, object); + print_trailer(s, slab, object); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); skip_bug_print: @@ -998,7 +1003,7 @@ skip_bug_print: * may be used with merged slabcaches. */ -static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) +static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) { unsigned long off = get_info_end(s); /* The end of info */ @@ -1011,12 +1016,12 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) if (size_from_object(s) == off) return 1; - return check_bytes_and_report(s, page, p, "Object padding", + return check_bytes_and_report(s, slab, p, "Object padding", p + off, POISON_INUSE, size_from_object(s) - off); } /* Check the pad bytes at the end of a slab page */ -static int slab_pad_check(struct kmem_cache *s, struct page *page) +static int slab_pad_check(struct kmem_cache *s, struct slab *slab) { u8 *start; u8 *fault; @@ -1028,8 +1033,8 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) if (!(s->flags & SLAB_POISON)) return 1; - start = page_address(page); - length = page_size(page); + start = slab_address(slab); + length = slab_size(slab); end = start + length; remainder = length % s->size; if (!remainder) @@ -1044,7 +1049,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) while (end > fault && end[-1] == POISON_INUSE) end--; - slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu", + slab_err(s, slab, "Padding overwritten. 0x%p-0x%p @offset=%tu", fault, end - 1, fault - start); print_section(KERN_ERR, "Padding ", pad, remainder); @@ -1052,23 +1057,23 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) return 0; } -static int check_object(struct kmem_cache *s, struct page *page, +static int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { u8 *p = object; u8 *endobject = object + s->object_size; if (s->flags & SLAB_RED_ZONE) { - if (!check_bytes_and_report(s, page, object, "Left Redzone", + if (!check_bytes_and_report(s, slab, object, "Left Redzone", object - s->red_left_pad, val, s->red_left_pad)) return 0; - if (!check_bytes_and_report(s, page, object, "Right Redzone", + if (!check_bytes_and_report(s, slab, object, "Right Redzone", endobject, val, s->inuse - s->object_size)) return 0; } else { if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) { - check_bytes_and_report(s, page, p, "Alignment padding", + check_bytes_and_report(s, slab, p, "Alignment padding", endobject, POISON_INUSE, s->inuse - s->object_size); } @@ -1076,15 +1081,15 @@ static int check_object(struct kmem_cache *s, struct page *page, if (s->flags & SLAB_POISON) { if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && - (!check_bytes_and_report(s, page, p, "Poison", p, + (!check_bytes_and_report(s, slab, p, "Poison", p, POISON_FREE, s->object_size - 1) || - !check_bytes_and_report(s, page, p, "End Poison", + !check_bytes_and_report(s, slab, p, "End Poison", p + s->object_size - 1, POISON_END, 1))) return 0; /* * check_pad_bytes cleans up on its own. */ - check_pad_bytes(s, page, p); + check_pad_bytes(s, slab, p); } if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE) @@ -1095,8 +1100,8 @@ static int check_object(struct kmem_cache *s, struct page *page, return 1; /* Check free pointer validity */ - if (!check_valid_pointer(s, page, get_freepointer(s, p))) { - object_err(s, page, p, "Freepointer corrupt"); + if (!check_valid_pointer(s, slab, get_freepointer(s, p))) { + object_err(s, slab, p, "Freepointer corrupt"); /* * No choice but to zap it and thus lose the remainder * of the free objects in this slab. May cause @@ -1108,55 +1113,55 @@ static int check_object(struct kmem_cache *s, struct page *page, return 1; } -static int check_slab(struct kmem_cache *s, struct page *page) +static int check_slab(struct kmem_cache *s, struct slab *slab) { int maxobj; - if (!PageSlab(page)) { - slab_err(s, page, "Not a valid slab page"); + if (!folio_test_slab(slab_folio(slab))) { + slab_err(s, slab, "Not a valid slab page"); return 0; } - maxobj = order_objects(compound_order(page), s->size); - if (page->objects > maxobj) { - slab_err(s, page, "objects %u > max %u", - page->objects, maxobj); + maxobj = order_objects(slab_order(slab), s->size); + if (slab->objects > maxobj) { + slab_err(s, slab, "objects %u > max %u", + slab->objects, maxobj); return 0; } - if (page->inuse > page->objects) { - slab_err(s, page, "inuse %u > max %u", - page->inuse, page->objects); + if (slab->inuse > slab->objects) { + slab_err(s, slab, "inuse %u > max %u", + slab->inuse, slab->objects); return 0; } /* Slab_pad_check fixes things up after itself */ - slab_pad_check(s, page); + slab_pad_check(s, slab); return 1; } /* - * Determine if a certain object on a page is on the freelist. Must hold the + * Determine if a certain object in a slab is on the freelist. Must hold the * slab lock to guarantee that the chains are in a consistent state. */ -static int on_freelist(struct kmem_cache *s, struct page *page, void *search) +static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search) { int nr = 0; void *fp; void *object = NULL; int max_objects; - fp = page->freelist; - while (fp && nr <= page->objects) { + fp = slab->freelist; + while (fp && nr <= slab->objects) { if (fp == search) return 1; - if (!check_valid_pointer(s, page, fp)) { + if (!check_valid_pointer(s, slab, fp)) { if (object) { - object_err(s, page, object, + object_err(s, slab, object, "Freechain corrupt"); set_freepointer(s, object, NULL); } else { - slab_err(s, page, "Freepointer corrupt"); - page->freelist = NULL; - page->inuse = page->objects; + slab_err(s, slab, "Freepointer corrupt"); + slab->freelist = NULL; + slab->inuse = slab->objects; slab_fix(s, "Freelist cleared"); return 0; } @@ -1167,34 +1172,34 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) nr++; } - max_objects = order_objects(compound_order(page), s->size); + max_objects = order_objects(slab_order(slab), s->size); if (max_objects > MAX_OBJS_PER_PAGE) max_objects = MAX_OBJS_PER_PAGE; - if (page->objects != max_objects) { - slab_err(s, page, "Wrong number of objects. Found %d but should be %d", - page->objects, max_objects); - page->objects = max_objects; + if (slab->objects != max_objects) { + slab_err(s, slab, "Wrong number of objects. Found %d but should be %d", + slab->objects, max_objects); + slab->objects = max_objects; slab_fix(s, "Number of objects adjusted"); } - if (page->inuse != page->objects - nr) { - slab_err(s, page, "Wrong object count. Counter is %d but counted were %d", - page->inuse, page->objects - nr); - page->inuse = page->objects - nr; + if (slab->inuse != slab->objects - nr) { + slab_err(s, slab, "Wrong object count. Counter is %d but counted were %d", + slab->inuse, slab->objects - nr); + slab->inuse = slab->objects - nr; slab_fix(s, "Object count adjusted"); } return search == NULL; } -static void trace(struct kmem_cache *s, struct page *page, void *object, +static void trace(struct kmem_cache *s, struct slab *slab, void *object, int alloc) { if (s->flags & SLAB_TRACE) { pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n", s->name, alloc ? "alloc" : "free", - object, page->inuse, - page->freelist); + object, slab->inuse, + slab->freelist); if (!alloc) print_section(KERN_INFO, "Object ", (void *)object, @@ -1208,22 +1213,22 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, * Tracking of fully allocated slabs for debugging purposes. */ static void add_full(struct kmem_cache *s, - struct kmem_cache_node *n, struct page *page) + struct kmem_cache_node *n, struct slab *slab) { if (!(s->flags & SLAB_STORE_USER)) return; lockdep_assert_held(&n->list_lock); - list_add(&page->slab_list, &n->full); + list_add(&slab->slab_list, &n->full); } -static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) +static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab) { if (!(s->flags & SLAB_STORE_USER)) return; lockdep_assert_held(&n->list_lock); - list_del(&page->slab_list); + list_del(&slab->slab_list); } /* Tracking of the number of slabs for debugging purposes */ @@ -1263,7 +1268,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) } /* Object debug checks for alloc/free paths */ -static void setup_object_debug(struct kmem_cache *s, struct page *page, +static void setup_object_debug(struct kmem_cache *s, struct slab *slab, void *object) { if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON)) @@ -1274,89 +1279,89 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, } static -void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) +void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) { if (!kmem_cache_debug_flags(s, SLAB_POISON)) return; metadata_access_enable(); - memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page)); + memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); metadata_access_disable(); } static inline int alloc_consistency_checks(struct kmem_cache *s, - struct page *page, void *object) + struct slab *slab, void *object) { - if (!check_slab(s, page)) + if (!check_slab(s, slab)) return 0; - if (!check_valid_pointer(s, page, object)) { - object_err(s, page, object, "Freelist Pointer check fails"); + if (!check_valid_pointer(s, slab, object)) { + object_err(s, slab, object, "Freelist Pointer check fails"); return 0; } - if (!check_object(s, page, object, SLUB_RED_INACTIVE)) + if (!check_object(s, slab, object, SLUB_RED_INACTIVE)) return 0; return 1; } static noinline int alloc_debug_processing(struct kmem_cache *s, - struct page *page, + struct slab *slab, void *object, unsigned long addr) { if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!alloc_consistency_checks(s, page, object)) + if (!alloc_consistency_checks(s, slab, object)) goto bad; } /* Success perform special debug activities for allocs */ if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_ALLOC, addr); - trace(s, page, object, 1); + trace(s, slab, object, 1); init_object(s, object, SLUB_RED_ACTIVE); return 1; bad: - if (PageSlab(page)) { + if (folio_test_slab(slab_folio(slab))) { /* * If this is a slab page then lets do the best we can * to avoid issues in the future. Marking all objects * as used avoids touching the remaining objects. */ slab_fix(s, "Marking all objects used"); - page->inuse = page->objects; - page->freelist = NULL; + slab->inuse = slab->objects; + slab->freelist = NULL; } return 0; } static inline int free_consistency_checks(struct kmem_cache *s, - struct page *page, void *object, unsigned long addr) + struct slab *slab, void *object, unsigned long addr) { - if (!check_valid_pointer(s, page, object)) { - slab_err(s, page, "Invalid object pointer 0x%p", object); + if (!check_valid_pointer(s, slab, object)) { + slab_err(s, slab, "Invalid object pointer 0x%p", object); return 0; } - if (on_freelist(s, page, object)) { - object_err(s, page, object, "Object already free"); + if (on_freelist(s, slab, object)) { + object_err(s, slab, object, "Object already free"); return 0; } - if (!check_object(s, page, object, SLUB_RED_ACTIVE)) + if (!check_object(s, slab, object, SLUB_RED_ACTIVE)) return 0; - if (unlikely(s != page->slab_cache)) { - if (!PageSlab(page)) { - slab_err(s, page, "Attempt to free object(0x%p) outside of slab", + if (unlikely(s != slab->slab_cache)) { + if (!folio_test_slab(slab_folio(slab))) { + slab_err(s, slab, "Attempt to free object(0x%p) outside of slab", object); - } else if (!page->slab_cache) { + } else if (!slab->slab_cache) { pr_err("SLUB <none>: no slab for object 0x%p.\n", object); dump_stack(); } else - object_err(s, page, object, + object_err(s, slab, object, "page slab pointer corrupt."); return 0; } @@ -1365,21 +1370,21 @@ static inline int free_consistency_checks(struct kmem_cache *s, /* Supports checking bulk free of a constructed freelist */ static noinline int free_debug_processing( - struct kmem_cache *s, struct page *page, + struct kmem_cache *s, struct slab *slab, void *head, void *tail, int bulk_cnt, unsigned long addr) { - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + struct kmem_cache_node *n = get_node(s, slab_nid(slab)); void *object = head; int cnt = 0; unsigned long flags, flags2; int ret = 0; spin_lock_irqsave(&n->list_lock, flags); - slab_lock(page, &flags2); + slab_lock(slab, &flags2); if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!check_slab(s, page)) + if (!check_slab(s, slab)) goto out; } @@ -1387,13 +1392,13 @@ next_object: cnt++; if (s->flags & SLAB_CONSISTENCY_CHECKS) { - if (!free_consistency_checks(s, page, object, addr)) + if (!free_consistency_checks(s, slab, object, addr)) goto out; } if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); - trace(s, page, object, 0); + trace(s, slab, object, 0); /* Freepointer not overwritten by init_object(), SLAB_POISON moved it */ init_object(s, object, SLUB_RED_INACTIVE); @@ -1406,10 +1411,10 @@ next_object: out: if (cnt != bulk_cnt) - slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n", + slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n", bulk_cnt, cnt); - slab_unlock(page, &flags2); + slab_unlock(slab, &flags2); spin_unlock_irqrestore(&n->list_lock, flags); if (!ret) slab_fix(s, "Object at 0x%p not freed", object); @@ -1624,26 +1629,26 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, } #else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, - struct page *page, void *object) {} + struct slab *slab, void *object) {} static inline -void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {} +void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {} static inline int alloc_debug_processing(struct kmem_cache *s, - struct page *page, void *object, unsigned long addr) { return 0; } + struct slab *slab, void *object, unsigned long addr) { return 0; } static inline int free_debug_processing( - struct kmem_cache *s, struct page *page, + struct kmem_cache *s, struct slab *slab, void *head, void *tail, int bulk_cnt, unsigned long addr) { return 0; } -static inline int slab_pad_check(struct kmem_cache *s, struct page *page) +static inline int slab_pad_check(struct kmem_cache *s, struct slab *slab) { return 1; } -static inline int check_object(struct kmem_cache *s, struct page *page, +static inline int check_object(struct kmem_cache *s, struct slab *slab, void *object, u8 val) { return 1; } static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, - struct page *page) {} + struct slab *slab) {} static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, - struct page *page) {} + struct slab *slab) {} slab_flags_t kmem_cache_flags(unsigned int object_size, slab_flags_t flags, const char *name) { @@ -1662,7 +1667,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} -static bool freelist_corrupted(struct kmem_cache *s, struct page *page, +static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, void **freelist, void *nextfree) { return false; @@ -1767,10 +1772,10 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, return *head != NULL; } -static void *setup_object(struct kmem_cache *s, struct page *page, +static void *setup_object(struct kmem_cache *s, struct slab *slab, void *object) { - setup_object_debug(s, page, object); + setup_object_debug(s, slab, object); object = kasan_init_slab_obj(s, object); if (unlikely(s->ctor)) { kasan_unpoison_object_data(s, object); @@ -1783,18 +1788,27 @@ static void *setup_object(struct kmem_cache *s, struct page *page, /* * Slab allocation and freeing */ -static inline struct page *alloc_slab_page(struct kmem_cache *s, +static inline struct slab *alloc_slab_page(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_order_objects oo) { - struct page *page; + struct folio *folio; + struct slab *slab; unsigned int order = oo_order(oo); if (node == NUMA_NO_NODE) - page = alloc_pages(flags, order); + folio = (struct folio *)alloc_pages(flags, order); else - page = __alloc_pages_node(node, flags, order); + folio = (struct folio *)__alloc_pages_node(node, flags, order); + + if (!folio) + return NULL; + + slab = folio_slab(folio); + __folio_set_slab(folio); + if (page_is_pfmemalloc(folio_page(folio, 0))) + slab_set_pfmemalloc(slab); - return page; + return slab; } #ifdef CONFIG_SLAB_FREELIST_RANDOM @@ -1839,7 +1853,7 @@ static void __init init_freelist_randomization(void) } /* Get the next entry on the pre-computed freelist randomized */ -static void *next_freelist_entry(struct kmem_cache *s, struct page *page, +static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab, unsigned long *pos, void *start, unsigned long page_limit, unsigned long freelist_count) @@ -1861,32 +1875,32 @@ static void *next_freelist_entry(struct kmem_cache *s, struct page *page, } /* Shuffle the single linked freelist based on a random pre-computed sequence */ -static bool shuffle_freelist(struct kmem_cache *s, struct page *page) +static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) { void *start; void *cur; void *next; unsigned long idx, pos, page_limit, freelist_count; - if (page->objects < 2 || !s->random_seq) + if (slab->objects < 2 || !s->random_seq) return false; freelist_count = oo_objects(s->oo); pos = get_random_int() % freelist_count; - page_limit = page->objects * s->size; - start = fixup_red_left(s, page_address(page)); + page_limit = slab->objects * s->size; + start = fixup_red_left(s, slab_address(slab)); /* First entry is used as the base of the freelist */ - cur = next_freelist_entry(s, page, &pos, start, page_limit, + cur = next_freelist_entry(s, slab, &pos, start, page_limit, freelist_count); - cur = setup_object(s, page, cur); - page->freelist = cur; + cur = setup_object(s, slab, cur); + slab->freelist = cur; - for (idx = 1; idx < page->objects; idx++) { - next = next_freelist_entry(s, page, &pos, start, page_limit, + for (idx = 1; idx < slab->objects; idx++) { + next = next_freelist_entry(s, slab, &pos, start, page_limit, freelist_count); - next = setup_object(s, page, next); + next = setup_object(s, slab, next); set_freepointer(s, cur, next); cur = next; } @@ -1900,15 +1914,15 @@ static inline int init_cache_random_seq(struct kmem_cache *s) return 0; } static inline void init_freelist_randomization(void) { } -static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page) +static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab) { return false; } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ -static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) +static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { - struct page *page; + struct slab *slab; struct kmem_cache_order_objects oo = s->oo; gfp_t alloc_gfp; void *start, *p, *next; @@ -1927,63 +1941,60 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min)) alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL); - page = alloc_slab_page(s, alloc_gfp, node, oo); - if (unlikely(!page)) { + slab = alloc_slab_page(s, alloc_gfp, node, oo); + if (unlikely(!slab)) { oo = s->min; alloc_gfp = flags; /* * Allocation may have failed due to fragmentation. * Try a lower order alloc if possible */ - page = alloc_slab_page(s, alloc_gfp, node, oo); - if (unlikely(!page)) + slab = alloc_slab_page(s, alloc_gfp, node, oo); + if (unlikely(!slab)) goto out; stat(s, ORDER_FALLBACK); } - page->objects = oo_objects(oo); + slab->objects = oo_objects(oo); - account_slab_page(page, oo_order(oo), s, flags); + account_slab(slab, oo_order(oo), s, flags); - page->slab_cache = s; - __SetPageSlab(page); - if (page_is_pfmemalloc(page)) - SetPageSlabPfmemalloc(page); + slab->slab_cache = s; - kasan_poison_slab(page); + kasan_poison_slab(slab); - start = page_address(page); + start = slab_address(slab); - setup_page_debug(s, page, start); + setup_slab_debug(s, slab, start); - shuffle = shuffle_freelist(s, page); + shuffle = shuffle_freelist(s, slab); if (!shuffle) { start = fixup_red_left(s, start); - start = setup_object(s, page, start); - page->freelist = start; - for (idx = 0, p = start; idx < page->objects - 1; idx++) { + start = setup_object(s, slab, start); + slab->freelist = start; + for (idx = 0, p = start; idx < slab->objects - 1; idx++) { next = p + s->size; - next = setup_object(s, page, next); + next = setup_object(s, slab, next); set_freepointer(s, p, next); p = next; } set_freepointer(s, p, NULL); } - page->inuse = page->objects; - page->frozen = 1; + slab->inuse = slab->objects; + slab->frozen = 1; out: - if (!page) + if (!slab) return NULL; - inc_slabs_node(s, page_to_nid(page), page->objects); + inc_slabs_node(s, slab_nid(slab), slab->objects); - return page; + return slab; } -static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) +static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node) { if (unlikely(flags & GFP_SLAB_BUG_MASK)) flags = kmalloc_fix_flags(flags); @@ -1994,76 +2005,75 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node); } -static void __free_slab(struct kmem_cache *s, struct page *page) +static void __free_slab(struct kmem_cache *s, struct slab *slab) { - int order = compound_order(page); + struct folio *folio = slab_folio(slab); + int order = folio_order(folio); int pages = 1 << order; if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) { void *p; - slab_pad_check(s, page); - for_each_object(p, s, page_address(page), - page->objects) - check_object(s, page, p, SLUB_RED_INACTIVE); + slab_pad_check(s, slab); + for_each_object(p, s, slab_address(slab), slab->objects) + check_object(s, slab, p, SLUB_RED_INACTIVE); } - __ClearPageSlabPfmemalloc(page); - __ClearPageSlab(page); - /* In union with page->mapping where page allocator expects NULL */ - page->slab_cache = NULL; + __slab_clear_pfmemalloc(slab); + __folio_clear_slab(folio); + folio->mapping = NULL; if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; - unaccount_slab_page(page, order, s); - __free_pages(page, order); + unaccount_slab(slab, order, s); + __free_pages(folio_page(folio, 0), order); } static void rcu_free_slab(struct rcu_head *h) { - struct page *page = container_of(h, struct page, rcu_head); + struct slab *slab = container_of(h, struct slab, rcu_head); - __free_slab(page->slab_cache, page); + __free_slab(slab->slab_cache, slab); } -static void free_slab(struct kmem_cache *s, struct page *page) +static void free_slab(struct kmem_cache *s, struct slab *slab) { if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { - call_rcu(&page->rcu_head, rcu_free_slab); + call_rcu(&slab->rcu_head, rcu_free_slab); } else - __free_slab(s, page); + __free_slab(s, slab); } -static void discard_slab(struct kmem_cache *s, struct page *page) +static void discard_slab(struct kmem_cache *s, struct slab *slab) { - dec_slabs_node(s, page_to_nid(page), page->objects); - free_slab(s, page); + dec_slabs_node(s, slab_nid(slab), slab->objects); + free_slab(s, slab); } /* * Management of partially allocated slabs. */ static inline void -__add_partial(struct kmem_cache_node *n, struct page *page, int tail) +__add_partial(struct kmem_cache_node *n, struct slab *slab, int tail) { n->nr_partial++; if (tail == DEACTIVATE_TO_TAIL) - list_add_tail(&page->slab_list, &n->partial); + list_add_tail(&slab->slab_list, &n->partial); else - list_add(&page->slab_list, &n->partial); + list_add(&slab->slab_list, &n->partial); } static inline void add_partial(struct kmem_cache_node *n, - struct page *page, int tail) + struct slab *slab, int tail) { lockdep_assert_held(&n->list_lock); - __add_partial(n, page, tail); + __add_partial(n, slab, tail); } static inline void remove_partial(struct kmem_cache_node *n, - struct page *page) + struct slab *slab) { lockdep_assert_held(&n->list_lock); - list_del(&page->slab_list); + list_del(&slab->slab_list); n->nr_partial--; } @@ -2074,12 +2084,12 @@ static inline void remove_partial(struct kmem_cache_node *n, * Returns a list of objects or NULL if it fails. */ static inline void *acquire_slab(struct kmem_cache *s, - struct kmem_cache_node *n, struct page *page, + struct kmem_cache_node *n, struct slab *slab, int mode) { void *freelist; unsigned long counters; - struct page new; + struct slab new; lockdep_assert_held(&n->list_lock); @@ -2088,11 +2098,11 @@ static inline void *acquire_slab(struct kmem_cache *s, * The old freelist is the list of objects for the * per cpu allocation list. */ - freelist = page->freelist; - counters = page->counters; + freelist = slab->freelist; + counters = slab->counters; new.counters = counters; if (mode) { - new.inuse = page->objects; + new.inuse = slab->objects; new.freelist = NULL; } else { new.freelist = freelist; @@ -2101,35 +2111,35 @@ static inline void *acquire_slab(struct kmem_cache *s, VM_BUG_ON(new.frozen); new.frozen = 1; - if (!__cmpxchg_double_slab(s, page, + if (!__cmpxchg_double_slab(s, slab, freelist, counters, new.freelist, new.counters, "acquire_slab")) return NULL; - remove_partial(n, page); + remove_partial(n, slab); WARN_ON(!freelist); return freelist; } #ifdef CONFIG_SLUB_CPU_PARTIAL -static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); +static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain); #else -static inline void put_cpu_partial(struct kmem_cache *s, struct page *page, +static inline void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain) { } #endif -static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); +static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags); /* * Try to allocate a partial slab from a specific node. */ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, - struct page **ret_page, gfp_t gfpflags) + struct slab **ret_slab, gfp_t gfpflags) { - struct page *page, *page2; + struct slab *slab, *slab2; void *object = NULL; unsigned long flags; - unsigned int partial_pages = 0; + unsigned int partial_slabs = 0; /* * Racy check. If we mistakenly see no partial slabs then we @@ -2141,28 +2151,28 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, return NULL; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry_safe(page, page2, &n->partial, slab_list) { + list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) { void *t; - if (!pfmemalloc_match(page, gfpflags)) + if (!pfmemalloc_match(slab, gfpflags)) continue; - t = acquire_slab(s, n, page, object == NULL); + t = acquire_slab(s, n, slab, object == NULL); if (!t) break; if (!object) { - *ret_page = page; + *ret_slab = slab; stat(s, ALLOC_FROM_PARTIAL); object = t; } else { - put_cpu_partial(s, page, 0); + put_cpu_partial(s, slab, 0); stat(s, CPU_PARTIAL_NODE); - partial_pages++; + partial_slabs++; } #ifdef CONFIG_SLUB_CPU_PARTIAL if (!kmem_cache_has_cpu_partial(s) - || partial_pages > s->cpu_partial_pages / 2) + || partial_slabs > s->cpu_partial_slabs / 2) break; #else break; @@ -2174,10 +2184,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, } /* - * Get a page from somewhere. Search in increasing NUMA distances. + * Get a slab from somewhere. Search in increasing NUMA distances. */ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, - struct page **ret_page) + struct slab **ret_slab) { #ifdef CONFIG_NUMA struct zonelist *zonelist; @@ -2219,7 +2229,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, if (n && cpuset_zone_allowed(zone, flags) && n->nr_partial > s->min_partial) { - object = get_partial_node(s, n, ret_page, flags); + object = get_partial_node(s, n, ret_slab, flags); if (object) { /* * Don't check read_mems_allowed_retry() @@ -2238,10 +2248,10 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, } /* - * Get a partial page, lock it and return it. + * Get a partial slab, lock it and return it. */ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, - struct page **ret_page) + struct slab **ret_slab) { void *object; int searchnode = node; @@ -2249,11 +2259,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, if (node == NUMA_NO_NODE) searchnode = numa_mem_id(); - object = get_partial_node(s, get_node(s, searchnode), ret_page, flags); + object = get_partial_node(s, get_node(s, searchnode), ret_slab, flags); if (object || node != NUMA_NO_NODE) return object; - return get_any_partial(s, flags, ret_page); + return get_any_partial(s, flags, ret_slab); } #ifdef CONFIG_PREEMPTION @@ -2330,25 +2340,25 @@ static void init_kmem_cache_cpus(struct kmem_cache *s) } /* - * Finishes removing the cpu slab. Merges cpu's freelist with page's freelist, + * Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist, * unfreezes the slabs and puts it on the proper list. * Assumes the slab has been already safely taken away from kmem_cache_cpu * by the caller. */ -static void deactivate_slab(struct kmem_cache *s, struct page *page, +static void deactivate_slab(struct kmem_cache *s, struct slab *slab, void *freelist) { enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + struct kmem_cache_node *n = get_node(s, slab_nid(slab)); int lock = 0, free_delta = 0; enum slab_modes l = M_NONE, m = M_NONE; void *nextfree, *freelist_iter, *freelist_tail; int tail = DEACTIVATE_TO_HEAD; unsigned long flags = 0; - struct page new; - struct page old; + struct slab new; + struct slab old; - if (page->freelist) { + if (slab->freelist) { stat(s, DEACTIVATE_REMOTE_FREES); tail = DEACTIVATE_TO_TAIL; } @@ -2367,7 +2377,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, * 'freelist_iter' is already corrupted. So isolate all objects * starting at 'freelist_iter' by skipping them. */ - if (freelist_corrupted(s, page, &freelist_iter, nextfree)) + if (freelist_corrupted(s, slab, &freelist_iter, nextfree)) break; freelist_tail = freelist_iter; @@ -2377,25 +2387,25 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, } /* - * Stage two: Unfreeze the page while splicing the per-cpu - * freelist to the head of page's freelist. + * Stage two: Unfreeze the slab while splicing the per-cpu + * freelist to the head of slab's freelist. * - * Ensure that the page is unfrozen while the list presence + * Ensure that the slab is unfrozen while the list presence * reflects the actual number of objects during unfreeze. * * We setup the list membership and then perform a cmpxchg - * with the count. If there is a mismatch then the page - * is not unfrozen but the page is on the wrong list. + * with the count. If there is a mismatch then the slab + * is not unfrozen but the slab is on the wrong list. * * Then we restart the process which may have to remove - * the page from the list that we just put it on again + * the slab from the list that we just put it on again * because the number of objects in the slab may have * changed. */ redo: - old.freelist = READ_ONCE(page->freelist); - old.counters = READ_ONCE(page->counters); + old.freelist = READ_ONCE(slab->freelist); + old.counters = READ_ONCE(slab->counters); VM_BUG_ON(!old.frozen); /* Determine target state of the slab */ @@ -2416,9 +2426,8 @@ redo: if (!lock) { lock = 1; /* - * Taking the spinlock removes the possibility - * that acquire_slab() will see a slab page that - * is frozen + * Taking the spinlock removes the possibility that + * acquire_slab() will see a slab that is frozen */ spin_lock_irqsave(&n->list_lock, flags); } @@ -2437,18 +2446,18 @@ redo: if (l != m) { if (l == M_PARTIAL) - remove_partial(n, page); + remove_partial(n, slab); else if (l == M_FULL) - remove_full(s, n, page); + remove_full(s, n, slab); if (m == M_PARTIAL) - add_partial(n, page, tail); + add_partial(n, slab, tail); else if (m == M_FULL) - add_full(s, n, page); + add_full(s, n, slab); } l = m; - if (!cmpxchg_double_slab(s, page, + if (!cmpxchg_double_slab(s, slab, old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")) @@ -2463,26 +2472,26 @@ redo: stat(s, DEACTIVATE_FULL); else if (m == M_FREE) { stat(s, DEACTIVATE_EMPTY); - discard_slab(s, page); + discard_slab(s, slab); stat(s, FREE_SLAB); } } #ifdef CONFIG_SLUB_CPU_PARTIAL -static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) +static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab) { struct kmem_cache_node *n = NULL, *n2 = NULL; - struct page *page, *discard_page = NULL; + struct slab *slab, *slab_to_discard = NULL; unsigned long flags = 0; - while (partial_page) { - struct page new; - struct page old; + while (partial_slab) { + struct slab new; + struct slab old; - page = partial_page; - partial_page = page->next; + slab = partial_slab; + partial_slab = slab->next; - n2 = get_node(s, page_to_nid(page)); + n2 = get_node(s, slab_nid(slab)); if (n != n2) { if (n) spin_unlock_irqrestore(&n->list_lock, flags); @@ -2493,8 +2502,8 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) do { - old.freelist = page->freelist; - old.counters = page->counters; + old.freelist = slab->freelist; + old.counters = slab->counters; VM_BUG_ON(!old.frozen); new.counters = old.counters; @@ -2502,16 +2511,16 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) new.frozen = 0; - } while (!__cmpxchg_double_slab(s, page, + } while (!__cmpxchg_double_slab(s, slab, old.freelist, old.counters, new.freelist, new.counters, "unfreezing slab")); if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) { - page->next = discard_page; - discard_page = page; + slab->next = slab_to_discard; + slab_to_discard = slab; } else { - add_partial(n, page, DEACTIVATE_TO_TAIL); + add_partial(n, slab, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } } @@ -2519,12 +2528,12 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) if (n) spin_unlock_irqrestore(&n->list_lock, flags); - while (discard_page) { - page = discard_page; - discard_page = discard_page->next; + while (slab_to_discard) { + slab = slab_to_discard; + slab_to_discard = slab_to_discard->next; stat(s, DEACTIVATE_EMPTY); - discard_slab(s, page); + discard_slab(s, slab); stat(s, FREE_SLAB); } } @@ -2534,73 +2543,73 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page) */ static void unfreeze_partials(struct kmem_cache *s) { - struct page *partial_page; + struct slab *partial_slab; unsigned long flags; local_lock_irqsave(&s->cpu_slab->lock, flags); - partial_page = this_cpu_read(s->cpu_slab->partial); + partial_slab = this_cpu_read(s->cpu_slab->partial); this_cpu_write(s->cpu_slab->partial, NULL); local_unlock_irqrestore(&s->cpu_slab->lock, flags); - if (partial_page) - __unfreeze_partials(s, partial_page); + if (partial_slab) + __unfreeze_partials(s, partial_slab); } static void unfreeze_partials_cpu(struct kmem_cache *s, struct kmem_cache_cpu *c) { - struct page *partial_page; + struct slab *partial_slab; - partial_page = slub_percpu_partial(c); + partial_slab = slub_percpu_partial(c); c->partial = NULL; - if (partial_page) - __unfreeze_partials(s, partial_page); + if (partial_slab) + __unfreeze_partials(s, partial_slab); } /* - * Put a page that was just frozen (in __slab_free|get_partial_node) into a - * partial page slot if available. + * Put a slab that was just frozen (in __slab_free|get_partial_node) into a + * partial slab slot if available. * * If we did not find a slot then simply move all the partials to the * per node partial list. */ -static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) +static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain) { - struct page *oldpage; - struct page *page_to_unfreeze = NULL; + struct slab *oldslab; + struct slab *slab_to_unfreeze = NULL; unsigned long flags; - int pages = 0; + int slabs = 0; local_lock_irqsave(&s->cpu_slab->lock, flags); - oldpage = this_cpu_read(s->cpu_slab->partial); + oldslab = this_cpu_read(s->cpu_slab->partial); - if (oldpage) { - if (drain && oldpage->pages >= s->cpu_partial_pages) { + if (oldslab) { + if (drain && oldslab->slabs >= s->cpu_partial_slabs) { /* * Partial array is full. Move the existing set to the * per node partial list. Postpone the actual unfreezing * outside of the critical section. */ - page_to_unfreeze = oldpage; - oldpage = NULL; + slab_to_unfreeze = oldslab; + oldslab = NULL; } else { - pages = oldpage->pages; + slabs = oldslab->slabs; } } - pages++; + slabs++; - page->pages = pages; - page->next = oldpage; + slab->slabs = slabs; + slab->next = oldslab; - this_cpu_write(s->cpu_slab->partial, page); + this_cpu_write(s->cpu_slab->partial, slab); local_unlock_irqrestore(&s->cpu_slab->lock, flags); - if (page_to_unfreeze) { - __unfreeze_partials(s, page_to_unfreeze); + if (slab_to_unfreeze) { + __unfreeze_partials(s, slab_to_unfreeze); stat(s, CPU_PARTIAL_DRAIN); } } @@ -2616,22 +2625,22 @@ static inline void unfreeze_partials_cpu(struct kmem_cache *s, static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { unsigned long flags; - struct page *page; + struct slab *slab; void *freelist; local_lock_irqsave(&s->cpu_slab->lock, flags); - page = c->page; + slab = c->slab; freelist = c->freelist; - c->page = NULL; + c->slab = NULL; c->freelist = NULL; c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, flags); - if (page) { - deactivate_slab(s, page, freelist); + if (slab) { + deactivate_slab(s, slab, freelist); stat(s, CPUSLAB_FLUSH); } } @@ -2640,14 +2649,14 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); void *freelist = c->freelist; - struct page *page = c->page; + struct slab *slab = c->slab; - c->page = NULL; + c->slab = NULL; c->freelist = NULL; c->tid = next_tid(c->tid); - if (page) { - deactivate_slab(s, page, freelist); + if (slab) { + deactivate_slab(s, slab, freelist); stat(s, CPUSLAB_FLUSH); } @@ -2676,7 +2685,7 @@ static void flush_cpu_slab(struct work_struct *w) s = sfw->s; c = this_cpu_ptr(s->cpu_slab); - if (c->page) + if (c->slab) flush_slab(s, c); unfreeze_partials(s); @@ -2686,7 +2695,7 @@ static bool has_cpu_slab(int cpu, struct kmem_cache *s) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); - return c->page || slub_percpu_partial(c); + return c->slab || slub_percpu_partial(c); } static DEFINE_MUTEX(flush_lock); @@ -2748,19 +2757,19 @@ static int slub_cpu_dead(unsigned int cpu) * Check if the objects in a per cpu structure fit numa * locality expectations. */ -static inline int node_match(struct page *page, int node) +static inline int node_match(struct slab *slab, int node) { #ifdef CONFIG_NUMA - if (node != NUMA_NO_NODE && page_to_nid(page) != node) + if (node != NUMA_NO_NODE && slab_nid(slab) != node) return 0; #endif return 1; } #ifdef CONFIG_SLUB_DEBUG -static int count_free(struct page *page) +static int count_free(struct slab *slab) { - return page->objects - page->inuse; + return slab->objects - slab->inuse; } static inline unsigned long node_nr_objs(struct kmem_cache_node *n) @@ -2771,15 +2780,15 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n) #if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) static unsigned long count_partial(struct kmem_cache_node *n, - int (*get_count)(struct page *)) + int (*get_count)(struct slab *)) { unsigned long flags; unsigned long x = 0; - struct page *page; + struct slab *slab; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, slab_list) - x += get_count(page); + list_for_each_entry(slab, &n->partial, slab_list) + x += get_count(slab); spin_unlock_irqrestore(&n->list_lock, flags); return x; } @@ -2822,54 +2831,41 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) #endif } -static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) -{ - if (unlikely(PageSlabPfmemalloc(page))) - return gfp_pfmemalloc_allowed(gfpflags); - - return true; -} - -/* - * A variant of pfmemalloc_match() that tests page flags without asserting - * PageSlab. Intended for opportunistic checks before taking a lock and - * rechecking that nobody else freed the page under us. - */ -static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags) +static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags) { - if (unlikely(__PageSlabPfmemalloc(page))) + if (unlikely(slab_test_pfmemalloc(slab))) return gfp_pfmemalloc_allowed(gfpflags); return true; } /* - * Check the page->freelist of a page and either transfer the freelist to the - * per cpu freelist or deactivate the page. + * Check the slab->freelist and either transfer the freelist to the + * per cpu freelist or deactivate the slab. * - * The page is still frozen if the return value is not NULL. + * The slab is still frozen if the return value is not NULL. * - * If this function returns NULL then the page has been unfrozen. + * If this function returns NULL then the slab has been unfrozen. */ -static inline void *get_freelist(struct kmem_cache *s, struct page *page) +static inline void *get_freelist(struct kmem_cache *s, struct slab *slab) { - struct page new; + struct slab new; unsigned long counters; void *freelist; lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock)); do { - freelist = page->freelist; - counters = page->counters; + freelist = slab->freelist; + counters = slab->counters; new.counters = counters; VM_BUG_ON(!new.frozen); - new.inuse = page->objects; + new.inuse = slab->objects; new.frozen = freelist != NULL; - } while (!__cmpxchg_double_slab(s, page, + } while (!__cmpxchg_double_slab(s, slab, freelist, counters, NULL, new.counters, "get_freelist")); @@ -2900,15 +2896,15 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, unsigned long addr, struct kmem_cache_cpu *c) { void *freelist; - struct page *page; + struct slab *slab; unsigned long flags; stat(s, ALLOC_SLOWPATH); -reread_page: +reread_slab: - page = READ_ONCE(c->page); - if (!page) { + slab = READ_ONCE(c->slab); + if (!slab) { /* * if the node is not online or has no normal memory, just * ignore the node constraint @@ -2920,7 +2916,7 @@ reread_page: } redo: - if (unlikely(!node_match(page, node))) { + if (unlikely(!node_match(slab, node))) { /* * same as above but node_match() being false already * implies node != NUMA_NO_NODE @@ -2939,23 +2935,23 @@ redo: * PFMEMALLOC but right now, we are losing the pfmemalloc * information when the page leaves the per-cpu allocator */ - if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags))) + if (unlikely(!pfmemalloc_match(slab, gfpflags))) goto deactivate_slab; - /* must check again c->page in case we got preempted and it changed */ + /* must check again c->slab in case we got preempted and it changed */ local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(page != c->page)) { + if (unlikely(slab != c->slab)) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_page; + goto reread_slab; } freelist = c->freelist; if (freelist) goto load_freelist; - freelist = get_freelist(s, page); + freelist = get_freelist(s, slab); if (!freelist) { - c->page = NULL; + c->slab = NULL; local_unlock_irqrestore(&s->cpu_slab->lock, flags); stat(s, DEACTIVATE_BYPASS); goto new_slab; @@ -2969,10 +2965,10 @@ load_freelist: /* * freelist is pointing to the list of objects to be used. - * page is pointing to the page from which the objects are obtained. - * That page must be frozen for per cpu allocations to work. + * slab is pointing to the slab from which the objects are obtained. + * That slab must be frozen for per cpu allocations to work. */ - VM_BUG_ON(!c->page->frozen); + VM_BUG_ON(!c->slab->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, flags); @@ -2981,23 +2977,23 @@ load_freelist: deactivate_slab: local_lock_irqsave(&s->cpu_slab->lock, flags); - if (page != c->page) { + if (slab != c->slab) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_page; + goto reread_slab; } freelist = c->freelist; - c->page = NULL; + c->slab = NULL; c->freelist = NULL; local_unlock_irqrestore(&s->cpu_slab->lock, flags); - deactivate_slab(s, page, freelist); + deactivate_slab(s, slab, freelist); new_slab: if (slub_percpu_partial(c)) { local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(c->page)) { + if (unlikely(c->slab)) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_page; + goto reread_slab; } if (unlikely(!slub_percpu_partial(c))) { local_unlock_irqrestore(&s->cpu_slab->lock, flags); @@ -3005,8 +3001,8 @@ new_slab: goto new_objects; } - page = c->page = slub_percpu_partial(c); - slub_set_percpu_partial(c, page); + slab = c->slab = slub_percpu_partial(c); + slub_set_percpu_partial(c, slab); local_unlock_irqrestore(&s->cpu_slab->lock, flags); stat(s, CPU_PARTIAL_ALLOC); goto redo; @@ -3014,32 +3010,32 @@ new_slab: new_objects: - freelist = get_partial(s, gfpflags, node, &page); + freelist = get_partial(s, gfpflags, node, &slab); if (freelist) - goto check_new_page; + goto check_new_slab; slub_put_cpu_ptr(s->cpu_slab); - page = new_slab(s, gfpflags, node); + slab = new_slab(s, gfpflags, node); c = slub_get_cpu_ptr(s->cpu_slab); - if (unlikely(!page)) { + if (unlikely(!slab)) { slab_out_of_memory(s, gfpflags, node); return NULL; } /* - * No other reference to the page yet so we can + * No other reference to the slab yet so we can * muck around with it freely without cmpxchg */ - freelist = page->freelist; - page->freelist = NULL; + freelist = slab->freelist; + slab->freelist = NULL; stat(s, ALLOC_SLAB); -check_new_page: +check_new_slab: if (kmem_cache_debug(s)) { - if (!alloc_debug_processing(s, page, freelist, addr)) { + if (!alloc_debug_processing(s, slab, freelist, addr)) { /* Slab failed checks. Next slab needed */ goto new_slab; } else { @@ -3051,39 +3047,39 @@ check_new_page: } } - if (unlikely(!pfmemalloc_match(page, gfpflags))) + if (unlikely(!pfmemalloc_match(slab, gfpflags))) /* * For !pfmemalloc_match() case we don't load freelist so that * we don't make further mismatched allocations easier. */ goto return_single; -retry_load_page: +retry_load_slab: local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(c->page)) { + if (unlikely(c->slab)) { void *flush_freelist = c->freelist; - struct page *flush_page = c->page; + struct slab *flush_slab = c->slab; - c->page = NULL; + c->slab = NULL; c->freelist = NULL; c->tid = next_tid(c->tid); local_unlock_irqrestore(&s->cpu_slab->lock, flags); - deactivate_slab(s, flush_page, flush_freelist); + deactivate_slab(s, flush_slab, flush_freelist); stat(s, CPUSLAB_FLUSH); - goto retry_load_page; + goto retry_load_slab; } - c->page = page; + c->slab = slab; goto load_freelist; return_single: - deactivate_slab(s, page, get_freepointer(s, freelist)); + deactivate_slab(s, slab, get_freepointer(s, freelist)); return freelist; } @@ -3140,7 +3136,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, { void *object; struct kmem_cache_cpu *c; - struct page *page; + struct slab *slab; unsigned long tid; struct obj_cgroup *objcg = NULL; bool init = false; @@ -3172,9 +3168,9 @@ redo: /* * Irqless object alloc/free algorithm used here depends on sequence * of fetching cpu_slab's data. tid should be fetched before anything - * on c to guarantee that object and page associated with previous tid + * on c to guarantee that object and slab associated with previous tid * won't be used with current tid. If we fetch tid first, object and - * page could be one associated with next tid and our alloc/free + * slab could be one associated with next tid and our alloc/free * request will be failed. In this case, we will retry. So, no problem. */ barrier(); @@ -3187,7 +3183,7 @@ redo: */ object = c->freelist; - page = c->page; + slab = c->slab; /* * We cannot use the lockless fastpath on PREEMPT_RT because if a * slowpath has taken the local_lock_irqsave(), it is not protected @@ -3196,7 +3192,7 @@ redo: * there is a suitable cpu freelist. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) || - unlikely(!object || !page || !node_match(page, node))) { + unlikely(!object || !slab || !node_match(slab, node))) { object = __slab_alloc(s, gfpflags, node, addr, c); } else { void *next_object = get_freepointer_safe(s, object); @@ -3298,17 +3294,17 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace); * have a longer lifetime than the cpu slabs in most processing loads. * * So we still attempt to reduce cache line usage. Just take the slab - * lock and free the item. If there is no additional partial page + * lock and free the item. If there is no additional partial slab * handling required then we can return immediately. */ -static void __slab_free(struct kmem_cache *s, struct page *page, +static void __slab_free(struct kmem_cache *s, struct slab *slab, void *head, void *tail, int cnt, unsigned long addr) { void *prior; int was_frozen; - struct page new; + struct slab new; unsigned long counters; struct kmem_cache_node *n = NULL; unsigned long flags; @@ -3319,7 +3315,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, return; if (kmem_cache_debug(s) && - !free_debug_processing(s, page, head, tail, cnt, addr)) + !free_debug_processing(s, slab, head, tail, cnt, addr)) return; do { @@ -3327,8 +3323,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, spin_unlock_irqrestore(&n->list_lock, flags); n = NULL; } - prior = page->freelist; - counters = page->counters; + prior = slab->freelist; + counters = slab->counters; set_freepointer(s, tail, prior); new.counters = counters; was_frozen = new.frozen; @@ -3347,7 +3343,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, } else { /* Needs to be taken off a list */ - n = get_node(s, page_to_nid(page)); + n = get_node(s, slab_nid(slab)); /* * Speculatively acquire the list_lock. * If the cmpxchg does not succeed then we may @@ -3361,7 +3357,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, } } - } while (!cmpxchg_double_slab(s, page, + } while (!cmpxchg_double_slab(s, slab, prior, counters, head, new.counters, "__slab_free")); @@ -3376,10 +3372,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page, stat(s, FREE_FROZEN); } else if (new.frozen) { /* - * If we just froze the page then put it onto the + * If we just froze the slab then put it onto the * per cpu partial list. */ - put_cpu_partial(s, page, 1); + put_cpu_partial(s, slab, 1); stat(s, CPU_PARTIAL_FREE); } @@ -3394,8 +3390,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page, * then add it. */ if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) { - remove_full(s, n, page); - add_partial(n, page, DEACTIVATE_TO_TAIL); + remove_full(s, n, slab); + add_partial(n, slab, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } spin_unlock_irqrestore(&n->list_lock, flags); @@ -3406,16 +3402,16 @@ slab_empty: /* * Slab on the partial list. */ - remove_partial(n, page); + remove_partial(n, slab); stat(s, FREE_REMOVE_PARTIAL); } else { /* Slab must be on the full list */ - remove_full(s, n, page); + remove_full(s, n, slab); } spin_unlock_irqrestore(&n->list_lock, flags); stat(s, FREE_SLAB); - discard_slab(s, page); + discard_slab(s, slab); } /* @@ -3430,11 +3426,11 @@ slab_empty: * with all sorts of special processing. * * Bulk free of a freelist with several objects (all pointing to the - * same page) possible by specifying head and tail ptr, plus objects + * same slab) possible by specifying head and tail ptr, plus objects * count (cnt). Bulk free indicated by tail pointer being set. */ static __always_inline void do_slab_free(struct kmem_cache *s, - struct page *page, void *head, void *tail, + struct slab *slab, void *head, void *tail, int cnt, unsigned long addr) { void *tail_obj = tail ? : head; @@ -3457,7 +3453,7 @@ redo: /* Same with comment on barrier() in slab_alloc_node() */ barrier(); - if (likely(page == c->page)) { + if (likely(slab == c->slab)) { #ifndef CONFIG_PREEMPT_RT void **freelist = READ_ONCE(c->freelist); @@ -3483,7 +3479,7 @@ redo: local_lock(&s->cpu_slab->lock); c = this_cpu_ptr(s->cpu_slab); - if (unlikely(page != c->page)) { + if (unlikely(slab != c->slab)) { local_unlock(&s->cpu_slab->lock); goto redo; } @@ -3498,11 +3494,11 @@ redo: #endif stat(s, FREE_FASTPATH); } else - __slab_free(s, page, head, tail_obj, cnt, addr); + __slab_free(s, slab, head, tail_obj, cnt, addr); } -static __always_inline void slab_free(struct kmem_cache *s, struct page *page, +static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab, void *head, void *tail, int cnt, unsigned long addr) { @@ -3511,13 +3507,13 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, * to remove objects, whose reuse must be delayed. */ if (slab_free_freelist_hook(s, &head, &tail, &cnt)) - do_slab_free(s, page, head, tail, cnt, addr); + do_slab_free(s, slab, head, tail, cnt, addr); } #ifdef CONFIG_KASAN_GENERIC void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr) { - do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr); + do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr); } #endif @@ -3527,35 +3523,36 @@ void kmem_cache_free(struct kmem_cache *s, void *x) if (!s) return; trace_kmem_cache_free(_RET_IP_, x, s->name); - slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_); + slab_free(s, virt_to_slab(x), x, NULL, 1, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_free); struct detached_freelist { - struct page *page; + struct slab *slab; void *tail; void *freelist; int cnt; struct kmem_cache *s; }; -static inline void free_nonslab_page(struct page *page, void *object) +static inline void free_large_kmalloc(struct folio *folio, void *object) { - unsigned int order = compound_order(page); + unsigned int order = folio_order(folio); - if (WARN_ON_ONCE(!PageCompound(page))) + if (WARN_ON_ONCE(order == 0)) pr_warn_once("object pointer: 0x%p\n", object); kfree_hook(object); - mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); - __free_pages(page, order); + mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B, + -(PAGE_SIZE << order)); + __free_pages(folio_page(folio, 0), order); } /* * This function progressively scans the array with free objects (with * a limited look ahead) and extract objects belonging to the same - * page. It builds a detached freelist directly within the given - * page/objects. This can happen without any need for + * slab. It builds a detached freelist directly within the given + * slab/objects. This can happen without any need for * synchronization, because the objects are owned by running process. * The freelist is build up as a single linked list in the objects. * The idea is, that this detached freelist can then be bulk @@ -3570,10 +3567,11 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, size_t first_skipped_index = 0; int lookahead = 3; void *object; - struct page *page; + struct folio *folio; + struct slab *slab; /* Always re-init detached_freelist */ - df->page = NULL; + df->slab = NULL; do { object = p[--size]; @@ -3583,17 +3581,19 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!object) return 0; - page = virt_to_head_page(object); + folio = virt_to_folio(object); if (!s) { /* Handle kalloc'ed objects */ - if (unlikely(!PageSlab(page))) { - free_nonslab_page(page, object); + if (unlikely(!folio_test_slab(folio))) { + free_large_kmalloc(folio, object); p[size] = NULL; /* mark object processed */ return size; } /* Derive kmem_cache from object */ - df->s = page->slab_cache; + slab = folio_slab(folio); + df->s = slab->slab_cache; } else { + slab = folio_slab(folio); df->s = cache_from_obj(s, object); /* Support for memcg */ } @@ -3605,7 +3605,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, } /* Start new detached freelist */ - df->page = page; + df->slab = slab; set_freepointer(df->s, object, NULL); df->tail = object; df->freelist = object; @@ -3617,8 +3617,8 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!object) continue; /* Skip processed objects */ - /* df->page is always set at this point */ - if (df->page == virt_to_head_page(object)) { + /* df->slab is always set at this point */ + if (df->slab == virt_to_slab(object)) { /* Opportunity build freelist */ set_freepointer(df->s, object, df->freelist); df->freelist = object; @@ -3650,10 +3650,10 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) struct detached_freelist df; size = build_detached_freelist(s, size, p, &df); - if (!df.page) + if (!df.slab) continue; - slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_); + slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_); } while (likely(size)); } EXPORT_SYMBOL(kmem_cache_free_bulk); @@ -3787,7 +3787,7 @@ static unsigned int slub_min_objects; * requested a higher minimum order then we start with that one instead of * the smallest order which will fit the object. */ -static inline unsigned int slab_order(unsigned int size, +static inline unsigned int calc_slab_order(unsigned int size, unsigned int min_objects, unsigned int max_order, unsigned int fract_leftover) { @@ -3851,7 +3851,7 @@ static inline int calculate_order(unsigned int size) fraction = 16; while (fraction >= 4) { - order = slab_order(size, min_objects, + order = calc_slab_order(size, min_objects, slub_max_order, fraction); if (order <= slub_max_order) return order; @@ -3864,14 +3864,14 @@ static inline int calculate_order(unsigned int size) * We were unable to place multiple objects in a slab. Now * lets see if we can place a single object there. */ - order = slab_order(size, 1, slub_max_order, 1); + order = calc_slab_order(size, 1, slub_max_order, 1); if (order <= slub_max_order) return order; /* * Doh this slab cannot be placed using slub_max_order. */ - order = slab_order(size, 1, MAX_ORDER, 1); + order = calc_slab_order(size, 1, MAX_ORDER, 1); if (order < MAX_ORDER) return order; return -ENOSYS; @@ -3923,38 +3923,38 @@ static struct kmem_cache *kmem_cache_node; */ static void early_kmem_cache_node_alloc(int node) { - struct page *page; + struct slab *slab; struct kmem_cache_node *n; BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); - page = new_slab(kmem_cache_node, GFP_NOWAIT, node); + slab = new_slab(kmem_cache_node, GFP_NOWAIT, node); - BUG_ON(!page); - if (page_to_nid(page) != node) { + BUG_ON(!slab); + if (slab_nid(slab) != node) { pr_err("SLUB: Unable to allocate memory from node %d\n", node); pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n"); } - n = page->freelist; + n = slab->freelist; BUG_ON(!n); #ifdef CONFIG_SLUB_DEBUG init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); init_tracking(kmem_cache_node, n); #endif n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); - page->freelist = get_freepointer(kmem_cache_node, n); - page->inuse = 1; - page->frozen = 0; + slab->freelist = get_freepointer(kmem_cache_node, n); + slab->inuse = 1; + slab->frozen = 0; kmem_cache_node->node[node] = n; init_kmem_cache_node(n); - inc_slabs_node(kmem_cache_node, node, page->objects); + inc_slabs_node(kmem_cache_node, node, slab->objects); /* * No locks need to be taken here as it has just been * initialized and there is no concurrent access. */ - __add_partial(n, page, DEACTIVATE_TO_HEAD); + __add_partial(n, slab, DEACTIVATE_TO_HEAD); } static void free_kmem_cache_nodes(struct kmem_cache *s) @@ -4212,7 +4212,7 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) #endif /* - * The larger the object size is, the more pages we want on the partial + * The larger the object size is, the more slabs we want on the partial * list to avoid pounding the page allocator excessively. */ set_min_partial(s, ilog2(s->size) / 2); @@ -4240,20 +4240,20 @@ error: return -EINVAL; } -static void list_slab_objects(struct kmem_cache *s, struct page *page, +static void list_slab_objects(struct kmem_cache *s, struct slab *slab, const char *text) { #ifdef CONFIG_SLUB_DEBUG - void *addr = page_address(page); + void *addr = slab_address(slab); unsigned long flags; unsigned long *map; void *p; - slab_err(s, page, text, s->name); - slab_lock(page, &flags); + slab_err(s, slab, text, s->name); + slab_lock(slab, &flags); - map = get_map(s, page); - for_each_object(p, s, addr, page->objects) { + map = get_map(s, slab); + for_each_object(p, s, addr, slab->objects) { if (!test_bit(__obj_to_index(s, addr, p), map)) { pr_err("Object 0x%p @offset=%tu\n", p, p - addr); @@ -4261,7 +4261,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, } } put_map(map); - slab_unlock(page, &flags); + slab_unlock(slab, &flags); #endif } @@ -4273,23 +4273,23 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) { LIST_HEAD(discard); - struct page *page, *h; + struct slab *slab, *h; BUG_ON(irqs_disabled()); spin_lock_irq(&n->list_lock); - list_for_each_entry_safe(page, h, &n->partial, slab_list) { - if (!page->inuse) { - remove_partial(n, page); - list_add(&page->slab_list, &discard); + list_for_each_entry_safe(slab, h, &n->partial, slab_list) { + if (!slab->inuse) { + remove_partial(n, slab); + list_add(&slab->slab_list, &discard); } else { - list_slab_objects(s, page, + list_slab_objects(s, slab, "Objects remaining in %s on __kmem_cache_shutdown()"); } } spin_unlock_irq(&n->list_lock); - list_for_each_entry_safe(page, h, &discard, slab_list) - discard_slab(s, page); + list_for_each_entry_safe(slab, h, &discard, slab_list) + discard_slab(s, slab); } bool __kmem_cache_empty(struct kmem_cache *s) @@ -4322,31 +4322,32 @@ int __kmem_cache_shutdown(struct kmem_cache *s) } #ifdef CONFIG_PRINTK -void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page) +void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab) { void *base; int __maybe_unused i; unsigned int objnr; void *objp; void *objp0; - struct kmem_cache *s = page->slab_cache; + struct kmem_cache *s = slab->slab_cache; struct track __maybe_unused *trackp; kpp->kp_ptr = object; - kpp->kp_page = page; + kpp->kp_slab = slab; kpp->kp_slab_cache = s; - base = page_address(page); + base = slab_address(slab); objp0 = kasan_reset_tag(object); #ifdef CONFIG_SLUB_DEBUG objp = restore_red_left(s, objp0); #else objp = objp0; #endif - objnr = obj_to_index(s, page, objp); + objnr = obj_to_index(s, slab, objp); kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp); objp = base + s->size * objnr; kpp->kp_objp = objp; - if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) || + if (WARN_ON_ONCE(objp < base || objp >= base + slab->objects * s->size + || (objp - base) % s->size) || !(s->flags & SLAB_STORE_USER)) return; #ifdef CONFIG_SLUB_DEBUG @@ -4484,8 +4485,8 @@ EXPORT_SYMBOL(__kmalloc_node); * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -void __check_heap_object(const void *ptr, unsigned long n, struct page *page, - bool to_user) +void __check_heap_object(const void *ptr, unsigned long n, + const struct slab *slab, bool to_user) { struct kmem_cache *s; unsigned int offset; @@ -4494,10 +4495,10 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, ptr = kasan_reset_tag(ptr); /* Find object and usable object size. */ - s = page->slab_cache; + s = slab->slab_cache; /* Reject impossible pointers. */ - if (ptr < page_address(page)) + if (ptr < slab_address(slab)) usercopy_abort("SLUB object not in SLUB page?!", NULL, to_user, 0, n); @@ -4505,7 +4506,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, if (is_kfence) offset = ptr - kfence_object_start(ptr); else - offset = (ptr - page_address(page)) % s->size; + offset = (ptr - slab_address(slab)) % s->size; /* Adjust for redzone and reject if within the redzone. */ if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) { @@ -4527,25 +4528,24 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, size_t __ksize(const void *object) { - struct page *page; + struct folio *folio; if (unlikely(object == ZERO_SIZE_PTR)) return 0; - page = virt_to_head_page(object); + folio = virt_to_folio(object); - if (unlikely(!PageSlab(page))) { - WARN_ON(!PageCompound(page)); - return page_size(page); - } + if (unlikely(!folio_test_slab(folio))) + return folio_size(folio); - return slab_ksize(page->slab_cache); + return slab_ksize(folio_slab(folio)->slab_cache); } EXPORT_SYMBOL(__ksize); void kfree(const void *x) { - struct page *page; + struct folio *folio; + struct slab *slab; void *object = (void *)x; trace_kfree(_RET_IP_, x); @@ -4553,12 +4553,13 @@ void kfree(const void *x) if (unlikely(ZERO_OR_NULL_PTR(x))) return; - page = virt_to_head_page(x); - if (unlikely(!PageSlab(page))) { - free_nonslab_page(page, object); + folio = virt_to_folio(x); + if (unlikely(!folio_test_slab(folio))) { + free_large_kmalloc(folio, object); return; } - slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); + slab = folio_slab(folio); + slab_free(slab->slab_cache, slab, object, NULL, 1, _RET_IP_); } EXPORT_SYMBOL(kfree); @@ -4578,8 +4579,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) int node; int i; struct kmem_cache_node *n; - struct page *page; - struct page *t; + struct slab *slab; + struct slab *t; struct list_head discard; struct list_head promote[SHRINK_PROMOTE_MAX]; unsigned long flags; @@ -4596,22 +4597,22 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) * Build lists of slabs to discard or promote. * * Note that concurrent frees may occur while we hold the - * list_lock. page->inuse here is the upper limit. + * list_lock. slab->inuse here is the upper limit. */ - list_for_each_entry_safe(page, t, &n->partial, slab_list) { - int free = page->objects - page->inuse; + list_for_each_entry_safe(slab, t, &n->partial, slab_list) { + int free = slab->objects - slab->inuse; - /* Do not reread page->inuse */ + /* Do not reread slab->inuse */ barrier(); /* We do not keep full slabs on the list */ BUG_ON(free <= 0); - if (free == page->objects) { - list_move(&page->slab_list, &discard); + if (free == slab->objects) { + list_move(&slab->slab_list, &discard); n->nr_partial--; } else if (free <= SHRINK_PROMOTE_MAX) - list_move(&page->slab_list, promote + free - 1); + list_move(&slab->slab_list, promote + free - 1); } /* @@ -4624,8 +4625,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s) spin_unlock_irqrestore(&n->list_lock, flags); /* Release empty slabs */ - list_for_each_entry_safe(page, t, &discard, slab_list) - discard_slab(s, page); + list_for_each_entry_safe(slab, t, &discard, slab_list) + discard_slab(s, slab); if (slabs_node(s, node)) ret = 1; @@ -4786,7 +4787,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) */ __flush_cpu_slab(s, smp_processor_id()); for_each_kmem_cache_node(s, node, n) { - struct page *p; + struct slab *p; list_for_each_entry(p, &n->partial, slab_list) p->slab_cache = s; @@ -4964,54 +4965,54 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller); #endif #ifdef CONFIG_SYSFS -static int count_inuse(struct page *page) +static int count_inuse(struct slab *slab) { - return page->inuse; + return slab->inuse; } -static int count_total(struct page *page) +static int count_total(struct slab *slab) { - return page->objects; + return slab->objects; } #endif #ifdef CONFIG_SLUB_DEBUG -static void validate_slab(struct kmem_cache *s, struct page *page, +static void validate_slab(struct kmem_cache *s, struct slab *slab, unsigned long *obj_map) { void *p; - void *addr = page_address(page); + void *addr = slab_address(slab); unsigned long flags; - slab_lock(page, &flags); + slab_lock(slab, &flags); - if (!check_slab(s, page) || !on_freelist(s, page, NULL)) + if (!check_slab(s, slab) || !on_freelist(s, slab, NULL)) goto unlock; /* Now we know that a valid freelist exists */ - __fill_map(obj_map, s, page); - for_each_object(p, s, addr, page->objects) { + __fill_map(obj_map, s, slab); + for_each_object(p, s, addr, slab->objects) { u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ? SLUB_RED_INACTIVE : SLUB_RED_ACTIVE; - if (!check_object(s, page, p, val)) + if (!check_object(s, slab, p, val)) break; } unlock: - slab_unlock(page, &flags); + slab_unlock(slab, &flags); } static int validate_slab_node(struct kmem_cache *s, struct kmem_cache_node *n, unsigned long *obj_map) { unsigned long count = 0; - struct page *page; + struct slab *slab; unsigned long flags; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, slab_list) { - validate_slab(s, page, obj_map); + list_for_each_entry(slab, &n->partial, slab_list) { + validate_slab(s, slab, obj_map); count++; } if (count != n->nr_partial) { @@ -5023,8 +5024,8 @@ static int validate_slab_node(struct kmem_cache *s, if (!(s->flags & SLAB_STORE_USER)) goto out; - list_for_each_entry(page, &n->full, slab_list) { - validate_slab(s, page, obj_map); + list_for_each_entry(slab, &n->full, slab_list) { + validate_slab(s, slab, obj_map); count++; } if (count != atomic_long_read(&n->nr_slabs)) { @@ -5081,6 +5082,7 @@ struct loc_track { unsigned long max; unsigned long count; struct location *loc; + loff_t idx; }; static struct dentry *slab_debugfs_root; @@ -5189,15 +5191,15 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, } static void process_slab(struct loc_track *t, struct kmem_cache *s, - struct page *page, enum track_item alloc, + struct slab *slab, enum track_item alloc, unsigned long *obj_map) { - void *addr = page_address(page); + void *addr = slab_address(slab); void *p; - __fill_map(obj_map, s, page); + __fill_map(obj_map, s, slab); - for_each_object(p, s, addr, page->objects) + for_each_object(p, s, addr, slab->objects) if (!test_bit(__obj_to_index(s, addr, p), obj_map)) add_location(t, s, get_track(s, p, alloc)); } @@ -5239,35 +5241,37 @@ static ssize_t show_slab_objects(struct kmem_cache *s, struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); int node; - struct page *page; + struct slab *slab; - page = READ_ONCE(c->page); - if (!page) + slab = READ_ONCE(c->slab); + if (!slab) continue; - node = page_to_nid(page); + node = slab_nid(slab); if (flags & SO_TOTAL) - x = page->objects; + x = slab->objects; else if (flags & SO_OBJECTS) - x = page->inuse; + x = slab->inuse; else x = 1; total += x; nodes[node] += x; - page = slub_percpu_partial_read_once(c); - if (page) { - node = page_to_nid(page); +#ifdef CONFIG_SLUB_CPU_PARTIAL + slab = slub_percpu_partial_read_once(c); + if (slab) { + node = slab_nid(slab); if (flags & SO_TOTAL) WARN_ON_ONCE(1); else if (flags & SO_OBJECTS) WARN_ON_ONCE(1); else - x = page->pages; + x = slab->slabs; total += x; nodes[node] += x; } +#endif } } @@ -5466,33 +5470,35 @@ SLAB_ATTR_RO(objects_partial); static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf) { int objects = 0; - int pages = 0; - int cpu; + int slabs = 0; + int cpu __maybe_unused; int len = 0; +#ifdef CONFIG_SLUB_CPU_PARTIAL for_each_online_cpu(cpu) { - struct page *page; + struct slab *slab; - page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); + slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); - if (page) - pages += page->pages; + if (slab) + slabs += slab->slabs; } +#endif - /* Approximate half-full pages , see slub_set_cpu_partial() */ - objects = (pages * oo_objects(s->oo)) / 2; - len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages); + /* Approximate half-full slabs, see slub_set_cpu_partial() */ + objects = (slabs * oo_objects(s->oo)) / 2; + len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs); -#ifdef CONFIG_SMP +#if defined(CONFIG_SLUB_CPU_PARTIAL) && defined(CONFIG_SMP) for_each_online_cpu(cpu) { - struct page *page; + struct slab *slab; - page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); - if (page) { - pages = READ_ONCE(page->pages); - objects = (pages * oo_objects(s->oo)) / 2; + slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu)); + if (slab) { + slabs = READ_ONCE(slab->slabs); + objects = (slabs * oo_objects(s->oo)) / 2; len += sysfs_emit_at(buf, len, " C%d=%d(%d)", - cpu, objects, pages); + cpu, objects, slabs); } } #endif @@ -6052,11 +6058,11 @@ __initcall(slab_sysfs_init); #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) static int slab_debugfs_show(struct seq_file *seq, void *v) { - - struct location *l; - unsigned int idx = *(unsigned int *)v; struct loc_track *t = seq->private; + struct location *l; + unsigned long idx; + idx = (unsigned long) t->idx; if (idx < t->count) { l = &t->loc[idx]; @@ -6105,16 +6111,18 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos) { struct loc_track *t = seq->private; - v = ppos; - ++*ppos; + t->idx = ++(*ppos); if (*ppos <= t->count) - return v; + return ppos; return NULL; } static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos) { + struct loc_track *t = seq->private; + + t->idx = *ppos; return ppos; } @@ -6158,16 +6166,16 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) for_each_kmem_cache_node(s, node, n) { unsigned long flags; - struct page *page; + struct slab *slab; if (!atomic_long_read(&n->nr_slabs)) continue; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, slab_list) - process_slab(t, s, page, alloc, obj_map); - list_for_each_entry(page, &n->full, slab_list) - process_slab(t, s, page, alloc, obj_map); + list_for_each_entry(slab, &n->partial, slab_list) + process_slab(t, s, slab, alloc, obj_map); + list_for_each_entry(slab, &n->full, slab_list) + process_slab(t, s, slab, alloc, obj_map); spin_unlock_irqrestore(&n->list_lock, flags); } |