summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/Makefile2
-rw-r--r--mm/backing-dev.c7
-rw-r--r--mm/bootmem_info.c7
-rw-r--r--mm/damon/core.c20
-rw-r--r--mm/damon/dbgfs.c15
-rw-r--r--mm/damon/vaddr-test.h79
-rw-r--r--mm/damon/vaddr.c2
-rw-r--r--mm/filemap.c34
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/kasan/common.c27
-rw-r--r--mm/kasan/generic.c8
-rw-r--r--mm/kasan/kasan.h1
-rw-r--r--mm/kasan/quarantine.c2
-rw-r--r--mm/kasan/report.c13
-rw-r--r--mm/kasan/report_tags.c10
-rw-r--r--mm/kfence/core.c18
-rw-r--r--mm/kfence/kfence_test.c6
-rw-r--r--mm/memcontrol.c161
-rw-r--r--mm/memory-failure.c33
-rw-r--r--mm/memory_hotplug.c1
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/slab.c456
-rw-r--r--mm/slab.h302
-rw-r--r--mm/slab_common.c14
-rw-r--r--mm/slob.c62
-rw-r--r--mm/slub.c1192
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap_slots.c1
-rw-r--r--mm/usercopy.c13
-rw-r--r--mm/vmscan.c65
-rw-r--r--mm/zsmalloc.c18
32 files changed, 1428 insertions, 1150 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 28edafc820ad..356f4f2c779e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -428,7 +428,7 @@ config THP_SWAP
# UP and nommu archs use km based percpu allocator
#
config NEED_PER_CPU_KM
- depends on !SMP
+ depends on !SMP || !MMU
bool
default y
diff --git a/mm/Makefile b/mm/Makefile
index d6c0042e3aa0..7919cd7f13f2 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -15,6 +15,8 @@ KCSAN_SANITIZE_slab_common.o := n
KCSAN_SANITIZE_slab.o := n
KCSAN_SANITIZE_slub.o := n
KCSAN_SANITIZE_page_alloc.o := n
+# But enable explicit instrumentation for memory barriers.
+KCSAN_INSTRUMENT_BARRIERS := y
# These files are disabled because they produce non-interesting and/or
# flaky coverage that is not a function of syscall inputs. E.g. slab is out of
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1eead4761011..eae96dfe0261 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -945,6 +945,13 @@ void bdi_unregister(struct backing_dev_info *bdi)
wb_shutdown(&bdi->wb);
cgwb_bdi_unregister(bdi);
+ /*
+ * If this BDI's min ratio has been set, use bdi_set_min_ratio() to
+ * update the global bdi_min_ratio.
+ */
+ if (bdi->min_ratio)
+ bdi_set_min_ratio(bdi, 0);
+
if (bdi->dev) {
bdi_debug_unregister(bdi);
device_unregister(bdi->dev);
diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
index f03f42f426f6..f18a631e7479 100644
--- a/mm/bootmem_info.c
+++ b/mm/bootmem_info.c
@@ -15,7 +15,7 @@
void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
{
- page->freelist = (void *)type;
+ page->index = type;
SetPagePrivate(page);
set_page_private(page, info);
page_ref_inc(page);
@@ -23,14 +23,13 @@ void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
void put_page_bootmem(struct page *page)
{
- unsigned long type;
+ unsigned long type = page->index;
- type = (unsigned long) page->freelist;
BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
if (page_ref_dec_return(page) == 1) {
- page->freelist = NULL;
+ page->index = 0;
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index c381b3c525d0..e92497895202 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -282,7 +282,6 @@ int damon_set_targets(struct damon_ctx *ctx,
for (i = 0; i < nr_ids; i++) {
t = damon_new_target(ids[i]);
if (!t) {
- pr_err("Failed to alloc damon_target\n");
/* The caller should do cleanup of the ids itself */
damon_for_each_target_safe(t, next, ctx)
damon_destroy_target(t);
@@ -312,16 +311,10 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
unsigned long aggr_int, unsigned long primitive_upd_int,
unsigned long min_nr_reg, unsigned long max_nr_reg)
{
- if (min_nr_reg < 3) {
- pr_err("min_nr_regions (%lu) must be at least 3\n",
- min_nr_reg);
+ if (min_nr_reg < 3)
return -EINVAL;
- }
- if (min_nr_reg > max_nr_reg) {
- pr_err("invalid nr_regions. min (%lu) > max (%lu)\n",
- min_nr_reg, max_nr_reg);
+ if (min_nr_reg > max_nr_reg)
return -EINVAL;
- }
ctx->sample_interval = sample_int;
ctx->aggr_interval = aggr_int;
@@ -980,10 +973,11 @@ static unsigned long damos_wmark_wait_us(struct damos *scheme)
static void kdamond_usleep(unsigned long usecs)
{
- if (usecs > 100 * 1000)
- schedule_timeout_interruptible(usecs_to_jiffies(usecs));
+ /* See Documentation/timers/timers-howto.rst for the thresholds */
+ if (usecs > 20 * USEC_PER_MSEC)
+ schedule_timeout_idle(usecs_to_jiffies(usecs));
else
- usleep_range(usecs, usecs + 1);
+ usleep_idle_range(usecs, usecs + 1);
}
/* Returns negative error code if it's not activated but should return */
@@ -1038,7 +1032,7 @@ static int kdamond_fn(void *data)
ctx->callback.after_sampling(ctx))
done = true;
- usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
+ kdamond_usleep(ctx->sample_interval);
if (ctx->primitive.check_accesses)
max_nr_accesses = ctx->primitive.check_accesses(ctx);
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index 9b520bb4a3e7..ad65436756af 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -210,10 +210,8 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
&wmarks.low, &parsed);
if (ret != 18)
break;
- if (!damos_action_valid(action)) {
- pr_err("wrong action %d\n", action);
+ if (!damos_action_valid(action))
goto fail;
- }
pos += parsed;
scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a,
@@ -355,6 +353,7 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
const char __user *buf, size_t count, loff_t *ppos)
{
struct damon_ctx *ctx = file->private_data;
+ struct damon_target *t, *next_t;
bool id_is_pid = true;
char *kbuf, *nrs;
unsigned long *targets;
@@ -399,8 +398,12 @@ static ssize_t dbgfs_target_ids_write(struct file *file,
goto unlock_out;
}
- /* remove targets with previously-set primitive */
- damon_set_targets(ctx, NULL, 0);
+ /* remove previously set targets */
+ damon_for_each_target_safe(t, next_t, ctx) {
+ if (targetid_is_pid(ctx))
+ put_pid((struct pid *)t->id);
+ damon_destroy_target(t);
+ }
/* Configure the context for the address space type */
if (id_is_pid)
@@ -652,10 +655,12 @@ static void dbgfs_before_terminate(struct damon_ctx *ctx)
if (!targetid_is_pid(ctx))
return;
+ mutex_lock(&ctx->kdamond_lock);
damon_for_each_target_safe(t, next, ctx) {
put_pid((struct pid *)t->id);
damon_destroy_target(t);
}
+ mutex_unlock(&ctx->kdamond_lock);
}
static struct damon_ctx *dbgfs_new_ctx(void)
diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h
index ecfd0b2ed222..6a1b9272ea12 100644
--- a/mm/damon/vaddr-test.h
+++ b/mm/damon/vaddr-test.h
@@ -135,7 +135,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
struct damon_addr_range *three_regions,
unsigned long *expected, int nr_expected)
{
- struct damon_ctx *ctx = damon_new_ctx();
struct damon_target *t;
struct damon_region *r;
int i;
@@ -145,7 +144,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
r = damon_new_region(regions[i * 2], regions[i * 2 + 1]);
damon_add_region(r, t);
}
- damon_add_target(ctx, t);
damon_va_apply_three_regions(t, three_regions);
@@ -154,8 +152,6 @@ static void damon_do_test_apply_three_regions(struct kunit *test,
KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]);
KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]);
}
-
- damon_destroy_ctx(ctx);
}
/*
@@ -252,60 +248,59 @@ static void damon_test_apply_three_regions4(struct kunit *test)
new_three_regions, expected, ARRAY_SIZE(expected));
}
-static void damon_test_split_evenly(struct kunit *test)
+static void damon_test_split_evenly_fail(struct kunit *test,
+ unsigned long start, unsigned long end, unsigned int nr_pieces)
{
- struct damon_ctx *c = damon_new_ctx();
- struct damon_target *t;
- struct damon_region *r;
- unsigned long i;
-
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5),
- -EINVAL);
-
- t = damon_new_target(42);
- r = damon_new_region(0, 100);
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 0), -EINVAL);
+ struct damon_target *t = damon_new_target(42);
+ struct damon_region *r = damon_new_region(start, end);
damon_add_region(r, t);
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 10), 0);
- KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 10u);
+ KUNIT_EXPECT_EQ(test,
+ damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u);
- i = 0;
damon_for_each_region(r, t) {
- KUNIT_EXPECT_EQ(test, r->ar.start, i++ * 10);
- KUNIT_EXPECT_EQ(test, r->ar.end, i * 10);
+ KUNIT_EXPECT_EQ(test, r->ar.start, start);
+ KUNIT_EXPECT_EQ(test, r->ar.end, end);
}
+
damon_free_target(t);
+}
+
+static void damon_test_split_evenly_succ(struct kunit *test,
+ unsigned long start, unsigned long end, unsigned int nr_pieces)
+{
+ struct damon_target *t = damon_new_target(42);
+ struct damon_region *r = damon_new_region(start, end);
+ unsigned long expected_width = (end - start) / nr_pieces;
+ unsigned long i = 0;
- t = damon_new_target(42);
- r = damon_new_region(5, 59);
damon_add_region(r, t);
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 5), 0);
- KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u);
+ KUNIT_EXPECT_EQ(test,
+ damon_va_evenly_split_region(t, r, nr_pieces), 0);
+ KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces);
- i = 0;
damon_for_each_region(r, t) {
- if (i == 4)
+ if (i == nr_pieces - 1)
break;
- KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i++);
- KUNIT_EXPECT_EQ(test, r->ar.end, 5 + 10 * i);
+ KUNIT_EXPECT_EQ(test,
+ r->ar.start, start + i++ * expected_width);
+ KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width);
}
- KUNIT_EXPECT_EQ(test, r->ar.start, 5 + 10 * i);
- KUNIT_EXPECT_EQ(test, r->ar.end, 59ul);
+ KUNIT_EXPECT_EQ(test, r->ar.start, start + i * expected_width);
+ KUNIT_EXPECT_EQ(test, r->ar.end, end);
damon_free_target(t);
+}
- t = damon_new_target(42);
- r = damon_new_region(5, 6);
- damon_add_region(r, t);
- KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(t, r, 2), -EINVAL);
- KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u);
+static void damon_test_split_evenly(struct kunit *test)
+{
+ KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5),
+ -EINVAL);
- damon_for_each_region(r, t) {
- KUNIT_EXPECT_EQ(test, r->ar.start, 5ul);
- KUNIT_EXPECT_EQ(test, r->ar.end, 6ul);
- }
- damon_free_target(t);
- damon_destroy_ctx(c);
+ damon_test_split_evenly_fail(test, 0, 100, 0);
+ damon_test_split_evenly_succ(test, 0, 100, 10);
+ damon_test_split_evenly_succ(test, 5, 59, 5);
+ damon_test_split_evenly_fail(test, 5, 6, 2);
}
static struct kunit_case damon_test_cases[] = {
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 35fe49080ee9..20a9a9d69eb1 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -13,6 +13,7 @@
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
#include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
#include "prmtv-common.h"
@@ -626,7 +627,6 @@ int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
case DAMOS_STAT:
return 0;
default:
- pr_warn("Wrong action %d\n", scheme->action);
return -EINVAL;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index daa0e23a6ee6..3baf03c0f608 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -646,8 +646,8 @@ static bool mapping_needs_writeback(struct address_space *mapping)
return mapping->nrpages;
}
-static bool filemap_range_has_writeback(struct address_space *mapping,
- loff_t start_byte, loff_t end_byte)
+bool filemap_range_has_writeback(struct address_space *mapping,
+ loff_t start_byte, loff_t end_byte)
{
XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT);
pgoff_t max = end_byte >> PAGE_SHIFT;
@@ -667,34 +667,8 @@ static bool filemap_range_has_writeback(struct address_space *mapping,
}
rcu_read_unlock();
return page != NULL;
-
-}
-
-/**
- * filemap_range_needs_writeback - check if range potentially needs writeback
- * @mapping: address space within which to check
- * @start_byte: offset in bytes where the range starts
- * @end_byte: offset in bytes where the range ends (inclusive)
- *
- * Find at least one page in the range supplied, usually used to check if
- * direct writing in this range will trigger a writeback. Used by O_DIRECT
- * read/write with IOCB_NOWAIT, to see if the caller needs to do
- * filemap_write_and_wait_range() before proceeding.
- *
- * Return: %true if the caller should do filemap_write_and_wait_range() before
- * doing O_DIRECT to a page in this range, %false otherwise.
- */
-bool filemap_range_needs_writeback(struct address_space *mapping,
- loff_t start_byte, loff_t end_byte)
-{
- if (!mapping_needs_writeback(mapping))
- return false;
- if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
- !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
- return false;
- return filemap_range_has_writeback(mapping, start_byte, end_byte);
}
-EXPORT_SYMBOL_GPL(filemap_range_needs_writeback);
+EXPORT_SYMBOL_GPL(filemap_range_has_writeback);
/**
* filemap_write_and_wait_range - write out & wait on a file range
@@ -3253,8 +3227,6 @@ static struct page *next_uptodate_page(struct page *page,
goto skip;
if (!PageUptodate(page) || PageReadahead(page))
goto skip;
- if (PageHWPoison(page))
- goto skip;
if (!trylock_page(page))
goto skip;
if (page->mapping != mapping)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index abcd1785c629..a1baa198519a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2973,7 +2973,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
struct huge_bootmem_page *m = NULL; /* initialize for clang */
int nr_nodes, node;
- if (nid >= nr_online_nodes)
+ if (nid != NUMA_NO_NODE && nid >= nr_online_nodes)
return 0;
/* do node specific alloc */
if (nid != NUMA_NO_NODE) {
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 8428da2aaf17..7c06db78a76c 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -247,8 +247,9 @@ struct kasan_free_meta *kasan_get_free_meta(struct kmem_cache *cache,
}
#endif
-void __kasan_poison_slab(struct page *page)
+void __kasan_poison_slab(struct slab *slab)
{
+ struct page *page = slab_page(slab);
unsigned long i;
for (i = 0; i < compound_nr(page); i++)
@@ -298,7 +299,7 @@ static inline u8 assign_tag(struct kmem_cache *cache,
/* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
#ifdef CONFIG_SLAB
/* For SLAB assign tags based on the object index in the freelist. */
- return (u8)obj_to_index(cache, virt_to_head_page(object), (void *)object);
+ return (u8)obj_to_index(cache, virt_to_slab(object), (void *)object);
#else
/*
* For SLUB assign a random tag during slab creation, otherwise reuse
@@ -341,7 +342,7 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
if (is_kfence_address(object))
return false;
- if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
+ if (unlikely(nearest_obj(cache, virt_to_slab(object), object) !=
object)) {
kasan_report_invalid_free(tagged_object, ip);
return true;
@@ -401,9 +402,9 @@ void __kasan_kfree_large(void *ptr, unsigned long ip)
void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
{
- struct page *page;
+ struct folio *folio;
- page = virt_to_head_page(ptr);
+ folio = virt_to_folio(ptr);
/*
* Even though this function is only called for kmem_cache_alloc and
@@ -411,12 +412,14 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
* !PageSlab() when the size provided to kmalloc is larger than
* KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc.
*/
- if (unlikely(!PageSlab(page))) {
+ if (unlikely(!folio_test_slab(folio))) {
if (____kasan_kfree_large(ptr, ip))
return;
- kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE, false);
+ kasan_poison(ptr, folio_size(folio), KASAN_FREE_PAGE, false);
} else {
- ____kasan_slab_free(page->slab_cache, ptr, ip, false, false);
+ struct slab *slab = folio_slab(folio);
+
+ ____kasan_slab_free(slab->slab_cache, ptr, ip, false, false);
}
}
@@ -560,7 +563,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flags)
{
- struct page *page;
+ struct slab *slab;
if (unlikely(object == ZERO_SIZE_PTR))
return (void *)object;
@@ -572,13 +575,13 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag
*/
kasan_unpoison(object, size, false);
- page = virt_to_head_page(object);
+ slab = virt_to_slab(object);
/* Piggy-back on kmalloc() instrumentation to poison the redzone. */
- if (unlikely(!PageSlab(page)))
+ if (unlikely(!slab))
return __kasan_kmalloc_large(object, size, flags);
else
- return ____kasan_kmalloc(page->slab_cache, object, size, flags);
+ return ____kasan_kmalloc(slab->slab_cache, object, size, flags);
}
bool __kasan_check_byte(const void *address, unsigned long ip)
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 84a038b07c6f..a25ad4090615 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -330,16 +330,16 @@ DEFINE_ASAN_SET_SHADOW(f8);
static void __kasan_record_aux_stack(void *addr, bool can_alloc)
{
- struct page *page = kasan_addr_to_page(addr);
+ struct slab *slab = kasan_addr_to_slab(addr);
struct kmem_cache *cache;
struct kasan_alloc_meta *alloc_meta;
void *object;
- if (is_kfence_address(addr) || !(page && PageSlab(page)))
+ if (is_kfence_address(addr) || !slab)
return;
- cache = page->slab_cache;
- object = nearest_obj(cache, page, addr);
+ cache = slab->slab_cache;
+ object = nearest_obj(cache, slab, addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (!alloc_meta)
return;
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index aebd8df86a1f..c17fa8d26ffe 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -265,6 +265,7 @@ bool kasan_report(unsigned long addr, size_t size,
void kasan_report_invalid_free(void *object, unsigned long ip);
struct page *kasan_addr_to_page(const void *addr);
+struct slab *kasan_addr_to_slab(const void *addr);
depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc);
void kasan_set_track(struct kasan_track *track, gfp_t flags);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index d8ccff4c1275..587da8995f2d 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -117,7 +117,7 @@ static unsigned long quarantine_batch_size;
static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
{
- return virt_to_head_page(qlink)->slab_cache;
+ return virt_to_slab(qlink)->slab_cache;
}
static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 0bc10f452f7e..3ad9624dcc56 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -150,6 +150,14 @@ struct page *kasan_addr_to_page(const void *addr)
return NULL;
}
+struct slab *kasan_addr_to_slab(const void *addr)
+{
+ if ((addr >= (void *)PAGE_OFFSET) &&
+ (addr < high_memory))
+ return virt_to_slab(addr);
+ return NULL;
+}
+
static void describe_object_addr(struct kmem_cache *cache, void *object,
const void *addr)
{
@@ -248,8 +256,9 @@ static void print_address_description(void *addr, u8 tag)
pr_err("\n");
if (page && PageSlab(page)) {
- struct kmem_cache *cache = page->slab_cache;
- void *object = nearest_obj(cache, page, addr);
+ struct slab *slab = page_slab(page);
+ struct kmem_cache *cache = slab->slab_cache;
+ void *object = nearest_obj(cache, slab, addr);
describe_object(cache, object, addr, tag);
}
diff --git a/mm/kasan/report_tags.c b/mm/kasan/report_tags.c
index 8a319fc16dab..1b41de88c53e 100644
--- a/mm/kasan/report_tags.c
+++ b/mm/kasan/report_tags.c
@@ -12,7 +12,7 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
#ifdef CONFIG_KASAN_TAGS_IDENTIFY
struct kasan_alloc_meta *alloc_meta;
struct kmem_cache *cache;
- struct page *page;
+ struct slab *slab;
const void *addr;
void *object;
u8 tag;
@@ -20,10 +20,10 @@ const char *kasan_get_bug_type(struct kasan_access_info *info)
tag = get_tag(info->access_addr);
addr = kasan_reset_tag(info->access_addr);
- page = kasan_addr_to_page(addr);
- if (page && PageSlab(page)) {
- cache = page->slab_cache;
- object = nearest_obj(cache, page, (void *)addr);
+ slab = kasan_addr_to_slab(addr);
+ if (slab) {
+ cache = slab->slab_cache;
+ object = nearest_obj(cache, slab, (void *)addr);
alloc_meta = kasan_get_alloc_meta(cache, object);
if (alloc_meta) {
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 09945784df9e..5ad40e3add45 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -360,7 +360,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
{
struct kfence_metadata *meta = NULL;
unsigned long flags;
- struct page *page;
+ struct slab *slab;
void *addr;
/* Try to obtain a free object. */
@@ -424,13 +424,14 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
alloc_covered_add(alloc_stack_hash, 1);
- /* Set required struct page fields. */
- page = virt_to_page(meta->addr);
- page->slab_cache = cache;
- if (IS_ENABLED(CONFIG_SLUB))
- page->objects = 1;
- if (IS_ENABLED(CONFIG_SLAB))
- page->s_mem = addr;
+ /* Set required slab fields. */
+ slab = virt_to_slab((void *)meta->addr);
+ slab->slab_cache = cache;
+#if defined(CONFIG_SLUB)
+ slab->objects = 1;
+#elif defined(CONFIG_SLAB)
+ slab->s_mem = addr;
+#endif
/* Memory initialization. */
for_each_canary(meta, set_canary_byte);
@@ -683,6 +684,7 @@ static const struct file_operations objects_fops = {
.open = open_objects,
.read = seq_read,
.llseek = seq_lseek,
+ .release = seq_release,
};
static int __init kfence_debugfs_init(void)
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 695030c1fff8..a22b1af85577 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -282,7 +282,7 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
alloc = kmalloc(size, gfp);
if (is_kfence_address(alloc)) {
- struct page *page = virt_to_head_page(alloc);
+ struct slab *slab = virt_to_slab(alloc);
struct kmem_cache *s = test_cache ?:
kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
@@ -291,8 +291,8 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
* even for KFENCE objects; these are required so that
* memcg accounting works correctly.
*/
- KUNIT_EXPECT_EQ(test, obj_to_index(s, page, alloc), 0U);
- KUNIT_EXPECT_EQ(test, objs_per_slab_page(s, page), 1);
+ KUNIT_EXPECT_EQ(test, obj_to_index(s, slab, alloc), 0U);
+ KUNIT_EXPECT_EQ(test, objs_per_slab(s, slab), 1);
if (policy == ALLOCATE_ANY)
return alloc;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6863a834ed42..4a7b3ebf8e48 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -776,24 +776,6 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
rcu_read_unlock();
}
-/*
- * mod_objcg_mlstate() may be called with irq enabled, so
- * mod_memcg_lruvec_state() should be used.
- */
-static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
- struct pglist_data *pgdat,
- enum node_stat_item idx, int nr)
-{
- struct mem_cgroup *memcg;
- struct lruvec *lruvec;
-
- rcu_read_lock();
- memcg = obj_cgroup_memcg(objcg);
- lruvec = mem_cgroup_lruvec(memcg, pgdat);
- mod_memcg_lruvec_state(lruvec, idx, nr);
- rcu_read_unlock();
-}
-
/**
* __count_memcg_events - account VM events in a cgroup
* @memcg: the memory cgroup
@@ -2137,41 +2119,6 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
}
#endif
-/*
- * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
- * sequence used in this case to access content from object stock is slow.
- * To optimize for user context access, there are now two object stocks for
- * task context and interrupt context access respectively.
- *
- * The task context object stock can be accessed by disabling preemption only
- * which is cheap in non-preempt kernel. The interrupt context object stock
- * can only be accessed after disabling interrupt. User context code can
- * access interrupt object stock, but not vice versa.
- */
-static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
-{
- struct memcg_stock_pcp *stock;
-
- if (likely(in_task())) {
- *pflags = 0UL;
- preempt_disable();
- stock = this_cpu_ptr(&memcg_stock);
- return &stock->task_obj;
- }
-
- local_irq_save(*pflags);
- stock = this_cpu_ptr(&memcg_stock);
- return &stock->irq_obj;
-}
-
-static inline void put_obj_stock(unsigned long flags)
-{
- if (likely(in_task()))
- preempt_enable();
- else
- local_irq_restore(flags);
-}
-
/**
* consume_stock: Try to consume stocked charge on this cpu.
* @memcg: memcg to consume from.
@@ -2816,31 +2763,84 @@ retry:
*/
#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT)
-int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
- gfp_t gfp, bool new_page)
+/*
+ * Most kmem_cache_alloc() calls are from user context. The irq disable/enable
+ * sequence used in this case to access content from object stock is slow.
+ * To optimize for user context access, there are now two object stocks for
+ * task context and interrupt context access respectively.
+ *
+ * The task context object stock can be accessed by disabling preemption only
+ * which is cheap in non-preempt kernel. The interrupt context object stock
+ * can only be accessed after disabling interrupt. User context code can
+ * access interrupt object stock, but not vice versa.
+ */
+static inline struct obj_stock *get_obj_stock(unsigned long *pflags)
+{
+ struct memcg_stock_pcp *stock;
+
+ if (likely(in_task())) {
+ *pflags = 0UL;
+ preempt_disable();
+ stock = this_cpu_ptr(&memcg_stock);
+ return &stock->task_obj;
+ }
+
+ local_irq_save(*pflags);
+ stock = this_cpu_ptr(&memcg_stock);
+ return &stock->irq_obj;
+}
+
+static inline void put_obj_stock(unsigned long flags)
{
- unsigned int objects = objs_per_slab_page(s, page);
+ if (likely(in_task()))
+ preempt_enable();
+ else
+ local_irq_restore(flags);
+}
+
+/*
+ * mod_objcg_mlstate() may be called with irq enabled, so
+ * mod_memcg_lruvec_state() should be used.
+ */
+static inline void mod_objcg_mlstate(struct obj_cgroup *objcg,
+ struct pglist_data *pgdat,
+ enum node_stat_item idx, int nr)
+{
+ struct mem_cgroup *memcg;
+ struct lruvec *lruvec;
+
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ mod_memcg_lruvec_state(lruvec, idx, nr);
+ rcu_read_unlock();
+}
+
+int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab)
+{
+ unsigned int objects = objs_per_slab(s, slab);
unsigned long memcg_data;
void *vec;
gfp &= ~OBJCGS_CLEAR_MASK;
vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
- page_to_nid(page));
+ slab_nid(slab));
if (!vec)
return -ENOMEM;
memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
- if (new_page) {
+ if (new_slab) {
/*
- * If the slab page is brand new and nobody can yet access
- * it's memcg_data, no synchronization is required and
- * memcg_data can be simply assigned.
+ * If the slab is brand new and nobody can yet access its
+ * memcg_data, no synchronization is required and memcg_data can
+ * be simply assigned.
*/
- page->memcg_data = memcg_data;
- } else if (cmpxchg(&page->memcg_data, 0, memcg_data)) {
+ slab->memcg_data = memcg_data;
+ } else if (cmpxchg(&slab->memcg_data, 0, memcg_data)) {
/*
- * If the slab page is already in use, somebody can allocate
- * and assign obj_cgroups in parallel. In this case the existing
+ * If the slab is already in use, somebody can allocate and
+ * assign obj_cgroups in parallel. In this case the existing
* objcg vector should be reused.
*/
kfree(vec);
@@ -2865,38 +2865,43 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
*/
struct mem_cgroup *mem_cgroup_from_obj(void *p)
{
- struct page *page;
+ struct folio *folio;
if (mem_cgroup_disabled())
return NULL;
- page = virt_to_head_page(p);
+ folio = virt_to_folio(p);
/*
* Slab objects are accounted individually, not per-page.
* Memcg membership data for each individual object is saved in
- * the page->obj_cgroups.
+ * slab->memcg_data.
*/
- if (page_objcgs_check(page)) {
- struct obj_cgroup *objcg;
+ if (folio_test_slab(folio)) {
+ struct obj_cgroup **objcgs;
+ struct slab *slab;
unsigned int off;
- off = obj_to_index(page->slab_cache, page, p);
- objcg = page_objcgs(page)[off];
- if (objcg)
- return obj_cgroup_memcg(objcg);
+ slab = folio_slab(folio);
+ objcgs = slab_objcgs(slab);
+ if (!objcgs)
+ return NULL;
+
+ off = obj_to_index(slab->slab_cache, slab, p);
+ if (objcgs[off])
+ return obj_cgroup_memcg(objcgs[off]);
return NULL;
}
/*
- * page_memcg_check() is used here, because page_has_obj_cgroups()
- * check above could fail because the object cgroups vector wasn't set
- * at that moment, but it can be set concurrently.
+ * page_memcg_check() is used here, because in theory we can encounter
+ * a folio where the slab flag has been cleared already, but
+ * slab->memcg_data has not been freed yet
* page_memcg_check(page) will guarantee that a proper memory
* cgroup pointer or NULL will be returned.
*/
- return page_memcg_check(page);
+ return page_memcg_check(folio_page(folio, 0));
}
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 07c875fdeaf0..f1c389f7e669 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1470,17 +1470,12 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (!(flags & MF_COUNT_INCREASED)) {
res = get_hwpoison_page(p, flags);
if (!res) {
- /*
- * Check "filter hit" and "race with other subpage."
- */
lock_page(head);
- if (PageHWPoison(head)) {
- if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
- || (p != head && TestSetPageHWPoison(head))) {
+ if (hwpoison_filter(p)) {
+ if (TestClearPageHWPoison(head))
num_poisoned_pages_dec();
- unlock_page(head);
- return 0;
- }
+ unlock_page(head);
+ return 0;
}
unlock_page(head);
res = MF_FAILED;
@@ -1651,21 +1646,28 @@ int memory_failure(unsigned long pfn, int flags)
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
+ mutex_lock(&mf_mutex);
+
p = pfn_to_online_page(pfn);
if (!p) {
+ res = arch_memory_failure(pfn, flags);
+ if (res == 0)
+ goto unlock_mutex;
+
if (pfn_valid(pfn)) {
pgmap = get_dev_pagemap(pfn, NULL);
- if (pgmap)
- return memory_failure_dev_pagemap(pfn, flags,
- pgmap);
+ if (pgmap) {
+ res = memory_failure_dev_pagemap(pfn, flags,
+ pgmap);
+ goto unlock_mutex;
+ }
}
pr_err("Memory failure: %#lx: memory outside kernel control\n",
pfn);
- return -ENXIO;
+ res = -ENXIO;
+ goto unlock_mutex;
}
- mutex_lock(&mf_mutex);
-
try_again:
if (PageHuge(p)) {
res = memory_failure_hugetlb(pfn, flags);
@@ -2239,6 +2241,7 @@ retry:
} else if (ret == 0) {
if (soft_offline_free_page(page) && try_again) {
try_again = false;
+ flags &= ~MF_COUNT_INCREASED;
goto retry;
}
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 852041f6be41..2a9627dc784c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -35,6 +35,7 @@
#include <linux/memblock.h>
#include <linux/compaction.h>
#include <linux/rmap.h>
+#include <linux/module.h>
#include <asm/tlbflush.h>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 10e9c87260ed..f6248affaf38 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2140,8 +2140,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
* memory with both reclaim and compact as well.
*/
if (!page && (gfp & __GFP_DIRECT_RECLAIM))
- page = __alloc_pages_node(hpage_node,
- gfp, order);
+ page = __alloc_pages(gfp, order, hpage_node, nmask);
goto out;
}
diff --git a/mm/slab.c b/mm/slab.c
index ca4822f6b2b6..ddf5737c63d9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -218,7 +218,7 @@ static void cache_reap(struct work_struct *unused);
static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
void **list);
static inline void fixup_slab_list(struct kmem_cache *cachep,
- struct kmem_cache_node *n, struct page *page,
+ struct kmem_cache_node *n, struct slab *slab,
void **list);
static int slab_early_init = 1;
@@ -372,10 +372,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
static int slab_max_order = SLAB_MAX_ORDER_LO;
static bool slab_max_order_set __initdata;
-static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
- unsigned int idx)
+static inline void *index_to_obj(struct kmem_cache *cache,
+ const struct slab *slab, unsigned int idx)
{
- return page->s_mem + cache->size * idx;
+ return slab->s_mem + cache->size * idx;
}
#define BOOT_CPUCACHE_ENTRIES 1
@@ -550,17 +550,17 @@ static struct array_cache *alloc_arraycache(int node, int entries,
}
static noinline void cache_free_pfmemalloc(struct kmem_cache *cachep,
- struct page *page, void *objp)
+ struct slab *slab, void *objp)
{
struct kmem_cache_node *n;
- int page_node;
+ int slab_node;
LIST_HEAD(list);
- page_node = page_to_nid(page);
- n = get_node(cachep, page_node);
+ slab_node = slab_nid(slab);
+ n = get_node(cachep, slab_node);
spin_lock(&n->list_lock);
- free_block(cachep, &objp, 1, page_node, &list);
+ free_block(cachep, &objp, 1, slab_node, &list);
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
@@ -761,7 +761,7 @@ static void drain_alien_cache(struct kmem_cache *cachep,
}
static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
- int node, int page_node)
+ int node, int slab_node)
{
struct kmem_cache_node *n;
struct alien_cache *alien = NULL;
@@ -770,21 +770,21 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
n = get_node(cachep, node);
STATS_INC_NODEFREES(cachep);
- if (n->alien && n->alien[page_node]) {
- alien = n->alien[page_node];
+ if (n->alien && n->alien[slab_node]) {
+ alien = n->alien[slab_node];
ac = &alien->ac;
spin_lock(&alien->lock);
if (unlikely(ac->avail == ac->limit)) {
STATS_INC_ACOVERFLOW(cachep);
- __drain_alien_cache(cachep, ac, page_node, &list);
+ __drain_alien_cache(cachep, ac, slab_node, &list);
}
__free_one(ac, objp);
spin_unlock(&alien->lock);
slabs_destroy(cachep, &list);
} else {
- n = get_node(cachep, page_node);
+ n = get_node(cachep, slab_node);
spin_lock(&n->list_lock);
- free_block(cachep, &objp, 1, page_node, &list);
+ free_block(cachep, &objp, 1, slab_node, &list);
spin_unlock(&n->list_lock);
slabs_destroy(cachep, &list);
}
@@ -793,16 +793,16 @@ static int __cache_free_alien(struct kmem_cache *cachep, void *objp,
static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
- int page_node = page_to_nid(virt_to_page(objp));
+ int slab_node = slab_nid(virt_to_slab(objp));
int node = numa_mem_id();
/*
* Make sure we are not freeing a object from another node to the array
* cache on this cpu.
*/
- if (likely(node == page_node))
+ if (likely(node == slab_node))
return 0;
- return __cache_free_alien(cachep, objp, node, page_node);
+ return __cache_free_alien(cachep, objp, node, slab_node);
}
/*
@@ -1367,57 +1367,60 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
* did not request dmaable memory, we might get it, but that
* would be relatively rare and ignorable.
*/
-static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
+static struct slab *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
{
- struct page *page;
+ struct folio *folio;
+ struct slab *slab;
flags |= cachep->allocflags;
- page = __alloc_pages_node(nodeid, flags, cachep->gfporder);
- if (!page) {
+ folio = (struct folio *) __alloc_pages_node(nodeid, flags, cachep->gfporder);
+ if (!folio) {
slab_out_of_memory(cachep, flags, nodeid);
return NULL;
}
- account_slab_page(page, cachep->gfporder, cachep, flags);
- __SetPageSlab(page);
+ slab = folio_slab(folio);
+
+ account_slab(slab, cachep->gfporder, cachep, flags);
+ __folio_set_slab(folio);
/* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
- if (sk_memalloc_socks() && page_is_pfmemalloc(page))
- SetPageSlabPfmemalloc(page);
+ if (sk_memalloc_socks() && page_is_pfmemalloc(folio_page(folio, 0)))
+ slab_set_pfmemalloc(slab);
- return page;
+ return slab;
}
/*
* Interface to system's page release.
*/
-static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
+static void kmem_freepages(struct kmem_cache *cachep, struct slab *slab)
{
int order = cachep->gfporder;
+ struct folio *folio = slab_folio(slab);
- BUG_ON(!PageSlab(page));
- __ClearPageSlabPfmemalloc(page);
- __ClearPageSlab(page);
- page_mapcount_reset(page);
- /* In union with page->mapping where page allocator expects NULL */
- page->slab_cache = NULL;
+ BUG_ON(!folio_test_slab(folio));
+ __slab_clear_pfmemalloc(slab);
+ __folio_clear_slab(folio);
+ page_mapcount_reset(folio_page(folio, 0));
+ folio->mapping = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += 1 << order;
- unaccount_slab_page(page, order, cachep);
- __free_pages(page, order);
+ unaccount_slab(slab, order, cachep);
+ __free_pages(folio_page(folio, 0), order);
}
static void kmem_rcu_free(struct rcu_head *head)
{
struct kmem_cache *cachep;
- struct page *page;
+ struct slab *slab;
- page = container_of(head, struct page, rcu_head);
- cachep = page->slab_cache;
+ slab = container_of(head, struct slab, rcu_head);
+ cachep = slab->slab_cache;
- kmem_freepages(cachep, page);
+ kmem_freepages(cachep, slab);
}
#if DEBUG
@@ -1553,18 +1556,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print some data about the neighboring objects, if they
* exist:
*/
- struct page *page = virt_to_head_page(objp);
+ struct slab *slab = virt_to_slab(objp);
unsigned int objnr;
- objnr = obj_to_index(cachep, page, objp);
+ objnr = obj_to_index(cachep, slab, objp);
if (objnr) {
- objp = index_to_obj(cachep, page, objnr - 1);
+ objp = index_to_obj(cachep, slab, objnr - 1);
realobj = (char *)objp + obj_offset(cachep);
pr_err("Prev obj: start=%px, len=%d\n", realobj, size);
print_objinfo(cachep, objp, 2);
}
if (objnr + 1 < cachep->num) {
- objp = index_to_obj(cachep, page, objnr + 1);
+ objp = index_to_obj(cachep, slab, objnr + 1);
realobj = (char *)objp + obj_offset(cachep);
pr_err("Next obj: start=%px, len=%d\n", realobj, size);
print_objinfo(cachep, objp, 2);
@@ -1575,17 +1578,17 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
#if DEBUG
static void slab_destroy_debugcheck(struct kmem_cache *cachep,
- struct page *page)
+ struct slab *slab)
{
int i;
if (OBJFREELIST_SLAB(cachep) && cachep->flags & SLAB_POISON) {
- poison_obj(cachep, page->freelist - obj_offset(cachep),
+ poison_obj(cachep, slab->freelist - obj_offset(cachep),
POISON_FREE);
}
for (i = 0; i < cachep->num; i++) {
- void *objp = index_to_obj(cachep, page, i);
+ void *objp = index_to_obj(cachep, slab, i);
if (cachep->flags & SLAB_POISON) {
check_poison_obj(cachep, objp);
@@ -1601,7 +1604,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
}
#else
static void slab_destroy_debugcheck(struct kmem_cache *cachep,
- struct page *page)
+ struct slab *slab)
{
}
#endif
@@ -1609,22 +1612,22 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
/**
* slab_destroy - destroy and release all objects in a slab
* @cachep: cache pointer being destroyed
- * @page: page pointer being destroyed
+ * @slab: slab being destroyed
*
- * Destroy all the objs in a slab page, and release the mem back to the system.
- * Before calling the slab page must have been unlinked from the cache. The
+ * Destroy all the objs in a slab, and release the mem back to the system.
+ * Before calling the slab must have been unlinked from the cache. The
* kmem_cache_node ->list_lock is not held/needed.
*/
-static void slab_destroy(struct kmem_cache *cachep, struct page *page)
+static void slab_destroy(struct kmem_cache *cachep, struct slab *slab)
{
void *freelist;
- freelist = page->freelist;
- slab_destroy_debugcheck(cachep, page);
+ freelist = slab->freelist;
+ slab_destroy_debugcheck(cachep, slab);
if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU))
- call_rcu(&page->rcu_head, kmem_rcu_free);
+ call_rcu(&slab->rcu_head, kmem_rcu_free);
else
- kmem_freepages(cachep, page);
+ kmem_freepages(cachep, slab);
/*
* From now on, we don't use freelist
@@ -1640,11 +1643,11 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
*/
static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
{
- struct page *page, *n;
+ struct slab *slab, *n;
- list_for_each_entry_safe(page, n, list, slab_list) {
- list_del(&page->slab_list);
- slab_destroy(cachep, page);
+ list_for_each_entry_safe(slab, n, list, slab_list) {
+ list_del(&slab->slab_list);
+ slab_destroy(cachep, slab);
}
}
@@ -2194,7 +2197,7 @@ static int drain_freelist(struct kmem_cache *cache,
{
struct list_head *p;
int nr_freed;
- struct page *page;
+ struct slab *slab;
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
@@ -2206,8 +2209,8 @@ static int drain_freelist(struct kmem_cache *cache,
goto out;
}
- page = list_entry(p, struct page, slab_list);
- list_del(&page->slab_list);
+ slab = list_entry(p, struct slab, slab_list);
+ list_del(&slab->slab_list);
n->free_slabs--;
n->total_slabs--;
/*
@@ -2216,7 +2219,7 @@ static int drain_freelist(struct kmem_cache *cache,
*/
n->free_objects -= cache->num;
spin_unlock_irq(&n->list_lock);
- slab_destroy(cache, page);
+ slab_destroy(cache, slab);
nr_freed++;
}
out:
@@ -2291,14 +2294,14 @@ void __kmem_cache_release(struct kmem_cache *cachep)
* which are all initialized during kmem_cache_init().
*/
static void *alloc_slabmgmt(struct kmem_cache *cachep,
- struct page *page, int colour_off,
+ struct slab *slab, int colour_off,
gfp_t local_flags, int nodeid)
{
void *freelist;
- void *addr = page_address(page);
+ void *addr = slab_address(slab);
- page->s_mem = addr + colour_off;
- page->active = 0;
+ slab->s_mem = addr + colour_off;
+ slab->active = 0;
if (OBJFREELIST_SLAB(cachep))
freelist = NULL;
@@ -2315,24 +2318,24 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
return freelist;
}
-static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
+static inline freelist_idx_t get_free_obj(struct slab *slab, unsigned int idx)
{
- return ((freelist_idx_t *)page->freelist)[idx];
+ return ((freelist_idx_t *) slab->freelist)[idx];
}
-static inline void set_free_obj(struct page *page,
+static inline void set_free_obj(struct slab *slab,
unsigned int idx, freelist_idx_t val)
{
- ((freelist_idx_t *)(page->freelist))[idx] = val;
+ ((freelist_idx_t *)(slab->freelist))[idx] = val;
}
-static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
+static void cache_init_objs_debug(struct kmem_cache *cachep, struct slab *slab)
{
#if DEBUG
int i;
for (i = 0; i < cachep->num; i++) {
- void *objp = index_to_obj(cachep, page, i);
+ void *objp = index_to_obj(cachep, slab, i);
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = NULL;
@@ -2416,17 +2419,17 @@ static freelist_idx_t next_random_slot(union freelist_init_state *state)
}
/* Swap two freelist entries */
-static void swap_free_obj(struct page *page, unsigned int a, unsigned int b)
+static void swap_free_obj(struct slab *slab, unsigned int a, unsigned int b)
{
- swap(((freelist_idx_t *)page->freelist)[a],
- ((freelist_idx_t *)page->freelist)[b]);
+ swap(((freelist_idx_t *) slab->freelist)[a],
+ ((freelist_idx_t *) slab->freelist)[b]);
}
/*
* Shuffle the freelist initialization state based on pre-computed lists.
* return true if the list was successfully shuffled, false otherwise.
*/
-static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
+static bool shuffle_freelist(struct kmem_cache *cachep, struct slab *slab)
{
unsigned int objfreelist = 0, i, rand, count = cachep->num;
union freelist_init_state state;
@@ -2443,7 +2446,7 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
objfreelist = count - 1;
else
objfreelist = next_random_slot(&state);
- page->freelist = index_to_obj(cachep, page, objfreelist) +
+ slab->freelist = index_to_obj(cachep, slab, objfreelist) +
obj_offset(cachep);
count--;
}
@@ -2454,51 +2457,51 @@ static bool shuffle_freelist(struct kmem_cache *cachep, struct page *page)
*/
if (!precomputed) {
for (i = 0; i < count; i++)
- set_free_obj(page, i, i);
+ set_free_obj(slab, i, i);
/* Fisher-Yates shuffle */
for (i = count - 1; i > 0; i--) {
rand = prandom_u32_state(&state.rnd_state);
rand %= (i + 1);
- swap_free_obj(page, i, rand);
+ swap_free_obj(slab, i, rand);
}
} else {
for (i = 0; i < count; i++)
- set_free_obj(page, i, next_random_slot(&state));
+ set_free_obj(slab, i, next_random_slot(&state));
}
if (OBJFREELIST_SLAB(cachep))
- set_free_obj(page, cachep->num - 1, objfreelist);
+ set_free_obj(slab, cachep->num - 1, objfreelist);
return true;
}
#else
static inline bool shuffle_freelist(struct kmem_cache *cachep,
- struct page *page)
+ struct slab *slab)
{
return false;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
static void cache_init_objs(struct kmem_cache *cachep,
- struct page *page)
+ struct slab *slab)
{
int i;
void *objp;
bool shuffled;
- cache_init_objs_debug(cachep, page);
+ cache_init_objs_debug(cachep, slab);
/* Try to randomize the freelist if enabled */
- shuffled = shuffle_freelist(cachep, page);
+ shuffled = shuffle_freelist(cachep, slab);
if (!shuffled && OBJFREELIST_SLAB(cachep)) {
- page->freelist = index_to_obj(cachep, page, cachep->num - 1) +
+ slab->freelist = index_to_obj(cachep, slab, cachep->num - 1) +
obj_offset(cachep);
}
for (i = 0; i < cachep->num; i++) {
- objp = index_to_obj(cachep, page, i);
+ objp = index_to_obj(cachep, slab, i);
objp = kasan_init_slab_obj(cachep, objp);
/* constructor could break poison info */
@@ -2509,68 +2512,56 @@ static void cache_init_objs(struct kmem_cache *cachep,
}
if (!shuffled)
- set_free_obj(page, i, i);
+ set_free_obj(slab, i, i);
}
}
-static void *slab_get_obj(struct kmem_cache *cachep, struct page *page)
+static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab)
{
void *objp;
- objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
- page->active++;
+ objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active));
+ slab->active++;
return objp;
}
static void slab_put_obj(struct kmem_cache *cachep,
- struct page *page, void *objp)
+ struct slab *slab, void *objp)
{
- unsigned int objnr = obj_to_index(cachep, page, objp);
+ unsigned int objnr = obj_to_index(cachep, slab, objp);
#if DEBUG
unsigned int i;
/* Verify double free bug */
- for (i = page->active; i < cachep->num; i++) {
- if (get_free_obj(page, i) == objnr) {
+ for (i = slab->active; i < cachep->num; i++) {
+ if (get_free_obj(slab, i) == objnr) {
pr_err("slab: double free detected in cache '%s', objp %px\n",
cachep->name, objp);
BUG();
}
}
#endif
- page->active--;
- if (!page->freelist)
- page->freelist = objp + obj_offset(cachep);
-
- set_free_obj(page, page->active, objnr);
-}
+ slab->active--;
+ if (!slab->freelist)
+ slab->freelist = objp + obj_offset(cachep);
-/*
- * Map pages beginning at addr to the given cache and slab. This is required
- * for the slab allocator to be able to lookup the cache and slab of a
- * virtual address for kfree, ksize, and slab debugging.
- */
-static void slab_map_pages(struct kmem_cache *cache, struct page *page,
- void *freelist)
-{
- page->slab_cache = cache;
- page->freelist = freelist;
+ set_free_obj(slab, slab->active, objnr);
}
/*
* Grow (by 1) the number of slabs within a cache. This is called by
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
-static struct page *cache_grow_begin(struct kmem_cache *cachep,
+static struct slab *cache_grow_begin(struct kmem_cache *cachep,
gfp_t flags, int nodeid)
{
void *freelist;
size_t offset;
gfp_t local_flags;
- int page_node;
+ int slab_node;
struct kmem_cache_node *n;
- struct page *page;
+ struct slab *slab;
/*
* Be lazy and only check for valid flags here, keeping it out of the
@@ -2590,12 +2581,12 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
- page = kmem_getpages(cachep, local_flags, nodeid);
- if (!page)
+ slab = kmem_getpages(cachep, local_flags, nodeid);
+ if (!slab)
goto failed;
- page_node = page_to_nid(page);
- n = get_node(cachep, page_node);
+ slab_node = slab_nid(slab);
+ n = get_node(cachep, slab_node);
/* Get colour for the slab, and cal the next value. */
n->colour_next++;
@@ -2613,54 +2604,55 @@ static struct page *cache_grow_begin(struct kmem_cache *cachep,
* page_address() in the latter returns a non-tagged pointer,
* as it should be for slab pages.
*/
- kasan_poison_slab(page);
+ kasan_poison_slab(slab);
/* Get slab management. */
- freelist = alloc_slabmgmt(cachep, page, offset,
- local_flags & ~GFP_CONSTRAINT_MASK, page_node);
+ freelist = alloc_slabmgmt(cachep, slab, offset,
+ local_flags & ~GFP_CONSTRAINT_MASK, slab_node);
if (OFF_SLAB(cachep) && !freelist)
goto opps1;
- slab_map_pages(cachep, page, freelist);
+ slab->slab_cache = cachep;
+ slab->freelist = freelist;
- cache_init_objs(cachep, page);
+ cache_init_objs(cachep, slab);
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
- return page;
+ return slab;
opps1:
- kmem_freepages(cachep, page);
+ kmem_freepages(cachep, slab);
failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
return NULL;
}
-static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
{
struct kmem_cache_node *n;
void *list = NULL;
check_irq_off();
- if (!page)
+ if (!slab)
return;
- INIT_LIST_HEAD(&page->slab_list);
- n = get_node(cachep, page_to_nid(page));
+ INIT_LIST_HEAD(&slab->slab_list);
+ n = get_node(cachep, slab_nid(slab));
spin_lock(&n->list_lock);
n->total_slabs++;
- if (!page->active) {
- list_add_tail(&page->slab_list, &n->slabs_free);
+ if (!slab->active) {
+ list_add_tail(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else
- fixup_slab_list(cachep, n, page, &list);
+ fixup_slab_list(cachep, n, slab, &list);
STATS_INC_GROWN(cachep);
- n->free_objects += cachep->num - page->active;
+ n->free_objects += cachep->num - slab->active;
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
@@ -2708,13 +2700,13 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
unsigned int objnr;
- struct page *page;
+ struct slab *slab;
BUG_ON(virt_to_cache(objp) != cachep);
objp -= obj_offset(cachep);
kfree_debugcheck(objp);
- page = virt_to_head_page(objp);
+ slab = virt_to_slab(objp);
if (cachep->flags & SLAB_RED_ZONE) {
verify_redzone_free(cachep, objp);
@@ -2724,10 +2716,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = (void *)caller;
- objnr = obj_to_index(cachep, page, objp);
+ objnr = obj_to_index(cachep, slab, objp);
BUG_ON(objnr >= cachep->num);
- BUG_ON(objp != index_to_obj(cachep, page, objnr));
+ BUG_ON(objp != index_to_obj(cachep, slab, objnr));
if (cachep->flags & SLAB_POISON) {
poison_obj(cachep, objp, POISON_FREE);
@@ -2757,97 +2749,97 @@ static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
}
static inline void fixup_slab_list(struct kmem_cache *cachep,
- struct kmem_cache_node *n, struct page *page,
+ struct kmem_cache_node *n, struct slab *slab,
void **list)
{
/* move slabp to correct slabp list: */
- list_del(&page->slab_list);
- if (page->active == cachep->num) {
- list_add(&page->slab_list, &n->slabs_full);
+ list_del(&slab->slab_list);
+ if (slab->active == cachep->num) {
+ list_add(&slab->slab_list, &n->slabs_full);
if (OBJFREELIST_SLAB(cachep)) {
#if DEBUG
/* Poisoning will be done without holding the lock */
if (cachep->flags & SLAB_POISON) {
- void **objp = page->freelist;
+ void **objp = slab->freelist;
*objp = *list;
*list = objp;
}
#endif
- page->freelist = NULL;
+ slab->freelist = NULL;
}
} else
- list_add(&page->slab_list, &n->slabs_partial);
+ list_add(&slab->slab_list, &n->slabs_partial);
}
/* Try to find non-pfmemalloc slab if needed */
-static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
- struct page *page, bool pfmemalloc)
+static noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n,
+ struct slab *slab, bool pfmemalloc)
{
- if (!page)
+ if (!slab)
return NULL;
if (pfmemalloc)
- return page;
+ return slab;
- if (!PageSlabPfmemalloc(page))
- return page;
+ if (!slab_test_pfmemalloc(slab))
+ return slab;
/* No need to keep pfmemalloc slab if we have enough free objects */
if (n->free_objects > n->free_limit) {
- ClearPageSlabPfmemalloc(page);
- return page;
+ slab_clear_pfmemalloc(slab);
+ return slab;
}
/* Move pfmemalloc slab to the end of list to speed up next search */
- list_del(&page->slab_list);
- if (!page->active) {
- list_add_tail(&page->slab_list, &n->slabs_free);
+ list_del(&slab->slab_list);
+ if (!slab->active) {
+ list_add_tail(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else
- list_add_tail(&page->slab_list, &n->slabs_partial);
+ list_add_tail(&slab->slab_list, &n->slabs_partial);
- list_for_each_entry(page, &n->slabs_partial, slab_list) {
- if (!PageSlabPfmemalloc(page))
- return page;
+ list_for_each_entry(slab, &n->slabs_partial, slab_list) {
+ if (!slab_test_pfmemalloc(slab))
+ return slab;
}
n->free_touched = 1;
- list_for_each_entry(page, &n->slabs_free, slab_list) {
- if (!PageSlabPfmemalloc(page)) {
+ list_for_each_entry(slab, &n->slabs_free, slab_list) {
+ if (!slab_test_pfmemalloc(slab)) {
n->free_slabs--;
- return page;
+ return slab;
}
}
return NULL;
}
-static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
+static struct slab *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
{
- struct page *page;
+ struct slab *slab;
assert_spin_locked(&n->list_lock);
- page = list_first_entry_or_null(&n->slabs_partial, struct page,
+ slab = list_first_entry_or_null(&n->slabs_partial, struct slab,
slab_list);
- if (!page) {
+ if (!slab) {
n->free_touched = 1;
- page = list_first_entry_or_null(&n->slabs_free, struct page,
+ slab = list_first_entry_or_null(&n->slabs_free, struct slab,
slab_list);
- if (page)
+ if (slab)
n->free_slabs--;
}
if (sk_memalloc_socks())
- page = get_valid_first_slab(n, page, pfmemalloc);
+ slab = get_valid_first_slab(n, slab, pfmemalloc);
- return page;
+ return slab;
}
static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
struct kmem_cache_node *n, gfp_t flags)
{
- struct page *page;
+ struct slab *slab;
void *obj;
void *list = NULL;
@@ -2855,16 +2847,16 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
return NULL;
spin_lock(&n->list_lock);
- page = get_first_slab(n, true);
- if (!page) {
+ slab = get_first_slab(n, true);
+ if (!slab) {
spin_unlock(&n->list_lock);
return NULL;
}
- obj = slab_get_obj(cachep, page);
+ obj = slab_get_obj(cachep, slab);
n->free_objects--;
- fixup_slab_list(cachep, n, page, &list);
+ fixup_slab_list(cachep, n, slab, &list);
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
@@ -2877,20 +2869,20 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep,
* or cache_grow_end() for new slab
*/
static __always_inline int alloc_block(struct kmem_cache *cachep,
- struct array_cache *ac, struct page *page, int batchcount)
+ struct array_cache *ac, struct slab *slab, int batchcount)
{
/*
* There must be at least one object available for
* allocation.
*/
- BUG_ON(page->active >= cachep->num);
+ BUG_ON(slab->active >= cachep->num);
- while (page->active < cachep->num && batchcount--) {
+ while (slab->active < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
- ac->entry[ac->avail++] = slab_get_obj(cachep, page);
+ ac->entry[ac->avail++] = slab_get_obj(cachep, slab);
}
return batchcount;
@@ -2903,7 +2895,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
struct array_cache *ac, *shared;
int node;
void *list = NULL;
- struct page *page;
+ struct slab *slab;
check_irq_off();
node = numa_mem_id();
@@ -2936,14 +2928,14 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
while (batchcount > 0) {
/* Get slab alloc is to come from. */
- page = get_first_slab(n, false);
- if (!page)
+ slab = get_first_slab(n, false);
+ if (!slab)
goto must_grow;
check_spinlock_acquired(cachep);
- batchcount = alloc_block(cachep, ac, page, batchcount);
- fixup_slab_list(cachep, n, page, &list);
+ batchcount = alloc_block(cachep, ac, slab, batchcount);
+ fixup_slab_list(cachep, n, slab, &list);
}
must_grow:
@@ -2962,16 +2954,16 @@ direct_grow:
return obj;
}
- page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
+ slab = cache_grow_begin(cachep, gfp_exact_node(flags), node);
/*
* cache_grow_begin() can reenable interrupts,
* then ac could change.
*/
ac = cpu_cache_get(cachep);
- if (!ac->avail && page)
- alloc_block(cachep, ac, page, batchcount);
- cache_grow_end(cachep, page);
+ if (!ac->avail && slab)
+ alloc_block(cachep, ac, slab, batchcount);
+ cache_grow_end(cachep, slab);
if (!ac->avail)
return NULL;
@@ -3101,7 +3093,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
struct zone *zone;
enum zone_type highest_zoneidx = gfp_zone(flags);
void *obj = NULL;
- struct page *page;
+ struct slab *slab;
int nid;
unsigned int cpuset_mems_cookie;
@@ -3137,10 +3129,10 @@ retry:
* We may trigger various forms of reclaim on the allowed
* set and go into memory reserves if necessary.
*/
- page = cache_grow_begin(cache, flags, numa_mem_id());
- cache_grow_end(cache, page);
- if (page) {
- nid = page_to_nid(page);
+ slab = cache_grow_begin(cache, flags, numa_mem_id());
+ cache_grow_end(cache, slab);
+ if (slab) {
+ nid = slab_nid(slab);
obj = ____cache_alloc_node(cache,
gfp_exact_node(flags), nid);
@@ -3164,7 +3156,7 @@ retry:
static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
{
- struct page *page;
+ struct slab *slab;
struct kmem_cache_node *n;
void *obj = NULL;
void *list = NULL;
@@ -3175,8 +3167,8 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
check_irq_off();
spin_lock(&n->list_lock);
- page = get_first_slab(n, false);
- if (!page)
+ slab = get_first_slab(n, false);
+ if (!slab)
goto must_grow;
check_spinlock_acquired_node(cachep, nodeid);
@@ -3185,12 +3177,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
- BUG_ON(page->active == cachep->num);
+ BUG_ON(slab->active == cachep->num);
- obj = slab_get_obj(cachep, page);
+ obj = slab_get_obj(cachep, slab);
n->free_objects--;
- fixup_slab_list(cachep, n, page, &list);
+ fixup_slab_list(cachep, n, slab, &list);
spin_unlock(&n->list_lock);
fixup_objfreelist_debug(cachep, &list);
@@ -3198,12 +3190,12 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
must_grow:
spin_unlock(&n->list_lock);
- page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
- if (page) {
+ slab = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid);
+ if (slab) {
/* This slab isn't counted yet so don't update free_objects */
- obj = slab_get_obj(cachep, page);
+ obj = slab_get_obj(cachep, slab);
}
- cache_grow_end(cachep, page);
+ cache_grow_end(cachep, slab);
return obj ? obj : fallback_alloc(cachep, flags);
}
@@ -3333,40 +3325,40 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
{
int i;
struct kmem_cache_node *n = get_node(cachep, node);
- struct page *page;
+ struct slab *slab;
n->free_objects += nr_objects;
for (i = 0; i < nr_objects; i++) {
void *objp;
- struct page *page;
+ struct slab *slab;
objp = objpp[i];
- page = virt_to_head_page(objp);
- list_del(&page->slab_list);
+ slab = virt_to_slab(objp);
+ list_del(&slab->slab_list);
check_spinlock_acquired_node(cachep, node);
- slab_put_obj(cachep, page, objp);
+ slab_put_obj(cachep, slab, objp);
STATS_DEC_ACTIVE(cachep);
/* fixup slab chains */
- if (page->active == 0) {
- list_add(&page->slab_list, &n->slabs_free);
+ if (slab->active == 0) {
+ list_add(&slab->slab_list, &n->slabs_free);
n->free_slabs++;
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
- list_add_tail(&page->slab_list, &n->slabs_partial);
+ list_add_tail(&slab->slab_list, &n->slabs_partial);
}
}
while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
n->free_objects -= cachep->num;
- page = list_last_entry(&n->slabs_free, struct page, slab_list);
- list_move(&page->slab_list, list);
+ slab = list_last_entry(&n->slabs_free, struct slab, slab_list);
+ list_move(&slab->slab_list, list);
n->free_slabs--;
n->total_slabs--;
}
@@ -3402,10 +3394,10 @@ free_done:
#if STATS
{
int i = 0;
- struct page *page;
+ struct slab *slab;
- list_for_each_entry(page, &n->slabs_free, slab_list) {
- BUG_ON(page->active);
+ list_for_each_entry(slab, &n->slabs_free, slab_list) {
+ BUG_ON(slab->active);
i++;
}
@@ -3481,10 +3473,10 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
}
if (sk_memalloc_socks()) {
- struct page *page = virt_to_head_page(objp);
+ struct slab *slab = virt_to_slab(objp);
- if (unlikely(PageSlabPfmemalloc(page))) {
- cache_free_pfmemalloc(cachep, page, objp);
+ if (unlikely(slab_test_pfmemalloc(slab))) {
+ cache_free_pfmemalloc(cachep, slab, objp);
return;
}
}
@@ -3657,21 +3649,21 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif /* CONFIG_NUMA */
#ifdef CONFIG_PRINTK
-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
+void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
struct kmem_cache *cachep;
unsigned int objnr;
void *objp;
kpp->kp_ptr = object;
- kpp->kp_page = page;
- cachep = page->slab_cache;
+ kpp->kp_slab = slab;
+ cachep = slab->slab_cache;
kpp->kp_slab_cache = cachep;
objp = object - obj_offset(cachep);
kpp->kp_data_offset = obj_offset(cachep);
- page = virt_to_head_page(objp);
- objnr = obj_to_index(cachep, page, objp);
- objp = index_to_obj(cachep, page, objnr);
+ slab = virt_to_slab(objp);
+ objnr = obj_to_index(cachep, slab, objp);
+ objp = index_to_obj(cachep, slab, objnr);
kpp->kp_objp = objp;
if (DEBUG && cachep->flags & SLAB_STORE_USER)
kpp->kp_ret = *dbg_userword(cachep, objp);
@@ -4177,8 +4169,8 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
* Returns NULL if check passes, otherwise const char * to name of cache
* to indicate an error.
*/
-void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
- bool to_user)
+void __check_heap_object(const void *ptr, unsigned long n,
+ const struct slab *slab, bool to_user)
{
struct kmem_cache *cachep;
unsigned int objnr;
@@ -4187,15 +4179,15 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
ptr = kasan_reset_tag(ptr);
/* Find and validate object. */
- cachep = page->slab_cache;
- objnr = obj_to_index(cachep, page, (void *)ptr);
+ cachep = slab->slab_cache;
+ objnr = obj_to_index(cachep, slab, (void *)ptr);
BUG_ON(objnr >= cachep->num);
/* Find offset within object. */
if (is_kfence_address(ptr))
offset = ptr - kfence_object_start(ptr);
else
- offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
+ offset = ptr - index_to_obj(cachep, slab, objnr) - obj_offset(cachep);
/* Allow address range falling entirely within usercopy region. */
if (offset >= cachep->useroffset &&
diff --git a/mm/slab.h b/mm/slab.h
index 56ad7eea3ddf..95b9a74a2d51 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -5,6 +5,197 @@
* Internal slab definitions
*/
+/* Reuses the bits in struct page */
+struct slab {
+ unsigned long __page_flags;
+
+#if defined(CONFIG_SLAB)
+
+ union {
+ struct list_head slab_list;
+ struct rcu_head rcu_head;
+ };
+ struct kmem_cache *slab_cache;
+ void *freelist; /* array of free object indexes */
+ void *s_mem; /* first object */
+ unsigned int active;
+
+#elif defined(CONFIG_SLUB)
+
+ union {
+ struct list_head slab_list;
+ struct rcu_head rcu_head;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ struct {
+ struct slab *next;
+ int slabs; /* Nr of slabs left */
+ };
+#endif
+ };
+ struct kmem_cache *slab_cache;
+ /* Double-word boundary */
+ void *freelist; /* first free object */
+ union {
+ unsigned long counters;
+ struct {
+ unsigned inuse:16;
+ unsigned objects:15;
+ unsigned frozen:1;
+ };
+ };
+ unsigned int __unused;
+
+#elif defined(CONFIG_SLOB)
+
+ struct list_head slab_list;
+ void *__unused_1;
+ void *freelist; /* first free block */
+ long units;
+ unsigned int __unused_2;
+
+#else
+#error "Unexpected slab allocator configured"
+#endif
+
+ atomic_t __page_refcount;
+#ifdef CONFIG_MEMCG
+ unsigned long memcg_data;
+#endif
+};
+
+#define SLAB_MATCH(pg, sl) \
+ static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl))
+SLAB_MATCH(flags, __page_flags);
+SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */
+SLAB_MATCH(slab_list, slab_list);
+#ifndef CONFIG_SLOB
+SLAB_MATCH(rcu_head, rcu_head);
+SLAB_MATCH(slab_cache, slab_cache);
+#endif
+#ifdef CONFIG_SLAB
+SLAB_MATCH(s_mem, s_mem);
+SLAB_MATCH(active, active);
+#endif
+SLAB_MATCH(_refcount, __page_refcount);
+#ifdef CONFIG_MEMCG
+SLAB_MATCH(memcg_data, memcg_data);
+#endif
+#undef SLAB_MATCH
+static_assert(sizeof(struct slab) <= sizeof(struct page));
+
+/**
+ * folio_slab - Converts from folio to slab.
+ * @folio: The folio.
+ *
+ * Currently struct slab is a different representation of a folio where
+ * folio_test_slab() is true.
+ *
+ * Return: The slab which contains this folio.
+ */
+#define folio_slab(folio) (_Generic((folio), \
+ const struct folio *: (const struct slab *)(folio), \
+ struct folio *: (struct slab *)(folio)))
+
+/**
+ * slab_folio - The folio allocated for a slab
+ * @slab: The slab.
+ *
+ * Slabs are allocated as folios that contain the individual objects and are
+ * using some fields in the first struct page of the folio - those fields are
+ * now accessed by struct slab. It is occasionally necessary to convert back to
+ * a folio in order to communicate with the rest of the mm. Please use this
+ * helper function instead of casting yourself, as the implementation may change
+ * in the future.
+ */
+#define slab_folio(s) (_Generic((s), \
+ const struct slab *: (const struct folio *)s, \
+ struct slab *: (struct folio *)s))
+
+/**
+ * page_slab - Converts from first struct page to slab.
+ * @p: The first (either head of compound or single) page of slab.
+ *
+ * A temporary wrapper to convert struct page to struct slab in situations where
+ * we know the page is the compound head, or single order-0 page.
+ *
+ * Long-term ideally everything would work with struct slab directly or go
+ * through folio to struct slab.
+ *
+ * Return: The slab which contains this page
+ */
+#define page_slab(p) (_Generic((p), \
+ const struct page *: (const struct slab *)(p), \
+ struct page *: (struct slab *)(p)))
+
+/**
+ * slab_page - The first struct page allocated for a slab
+ * @slab: The slab.
+ *
+ * A convenience wrapper for converting slab to the first struct page of the
+ * underlying folio, to communicate with code not yet converted to folio or
+ * struct slab.
+ */
+#define slab_page(s) folio_page(slab_folio(s), 0)
+
+/*
+ * If network-based swap is enabled, sl*b must keep track of whether pages
+ * were allocated from pfmemalloc reserves.
+ */
+static inline bool slab_test_pfmemalloc(const struct slab *slab)
+{
+ return folio_test_active((struct folio *)slab_folio(slab));
+}
+
+static inline void slab_set_pfmemalloc(struct slab *slab)
+{
+ folio_set_active(slab_folio(slab));
+}
+
+static inline void slab_clear_pfmemalloc(struct slab *slab)
+{
+ folio_clear_active(slab_folio(slab));
+}
+
+static inline void __slab_clear_pfmemalloc(struct slab *slab)
+{
+ __folio_clear_active(slab_folio(slab));
+}
+
+static inline void *slab_address(const struct slab *slab)
+{
+ return folio_address(slab_folio(slab));
+}
+
+static inline int slab_nid(const struct slab *slab)
+{
+ return folio_nid(slab_folio(slab));
+}
+
+static inline pg_data_t *slab_pgdat(const struct slab *slab)
+{
+ return folio_pgdat(slab_folio(slab));
+}
+
+static inline struct slab *virt_to_slab(const void *addr)
+{
+ struct folio *folio = virt_to_folio(addr);
+
+ if (!folio_test_slab(folio))
+ return NULL;
+
+ return folio_slab(folio);
+}
+
+static inline int slab_order(const struct slab *slab)
+{
+ return folio_order((struct folio *)slab_folio(slab));
+}
+
+static inline size_t slab_size(const struct slab *slab)
+{
+ return PAGE_SIZE << slab_order(slab);
+}
+
#ifdef CONFIG_SLOB
/*
* Common fields provided in kmem_cache by all slab allocators
@@ -245,15 +436,33 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
}
#ifdef CONFIG_MEMCG_KMEM
-int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
- gfp_t gfp, bool new_page);
+/*
+ * slab_objcgs - get the object cgroups vector associated with a slab
+ * @slab: a pointer to the slab struct
+ *
+ * Returns a pointer to the object cgroups vector associated with the slab,
+ * or NULL if no such vector has been associated yet.
+ */
+static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
+{
+ unsigned long memcg_data = READ_ONCE(slab->memcg_data);
+
+ VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS),
+ slab_page(slab));
+ VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab));
+
+ return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
+}
+
+int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s,
+ gfp_t gfp, bool new_slab);
void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
enum node_stat_item idx, int nr);
-static inline void memcg_free_page_obj_cgroups(struct page *page)
+static inline void memcg_free_slab_cgroups(struct slab *slab)
{
- kfree(page_objcgs(page));
- page->memcg_data = 0;
+ kfree(slab_objcgs(slab));
+ slab->memcg_data = 0;
}
static inline size_t obj_full_size(struct kmem_cache *s)
@@ -298,7 +507,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
gfp_t flags, size_t size,
void **p)
{
- struct page *page;
+ struct slab *slab;
unsigned long off;
size_t i;
@@ -307,19 +516,19 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
for (i = 0; i < size; i++) {
if (likely(p[i])) {
- page = virt_to_head_page(p[i]);
+ slab = virt_to_slab(p[i]);
- if (!page_objcgs(page) &&
- memcg_alloc_page_obj_cgroups(page, s, flags,
+ if (!slab_objcgs(slab) &&
+ memcg_alloc_slab_cgroups(slab, s, flags,
false)) {
obj_cgroup_uncharge(objcg, obj_full_size(s));
continue;
}
- off = obj_to_index(s, page, p[i]);
+ off = obj_to_index(s, slab, p[i]);
obj_cgroup_get(objcg);
- page_objcgs(page)[off] = objcg;
- mod_objcg_state(objcg, page_pgdat(page),
+ slab_objcgs(slab)[off] = objcg;
+ mod_objcg_state(objcg, slab_pgdat(slab),
cache_vmstat_idx(s), obj_full_size(s));
} else {
obj_cgroup_uncharge(objcg, obj_full_size(s));
@@ -334,7 +543,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
struct kmem_cache *s;
struct obj_cgroup **objcgs;
struct obj_cgroup *objcg;
- struct page *page;
+ struct slab *slab;
unsigned int off;
int i;
@@ -345,43 +554,52 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
if (unlikely(!p[i]))
continue;
- page = virt_to_head_page(p[i]);
- objcgs = page_objcgs_check(page);
+ slab = virt_to_slab(p[i]);
+ /* we could be given a kmalloc_large() object, skip those */
+ if (!slab)
+ continue;
+
+ objcgs = slab_objcgs(slab);
if (!objcgs)
continue;
if (!s_orig)
- s = page->slab_cache;
+ s = slab->slab_cache;
else
s = s_orig;
- off = obj_to_index(s, page, p[i]);
+ off = obj_to_index(s, slab, p[i]);
objcg = objcgs[off];
if (!objcg)
continue;
objcgs[off] = NULL;
obj_cgroup_uncharge(objcg, obj_full_size(s));
- mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
+ mod_objcg_state(objcg, slab_pgdat(slab), cache_vmstat_idx(s),
-obj_full_size(s));
obj_cgroup_put(objcg);
}
}
#else /* CONFIG_MEMCG_KMEM */
+static inline struct obj_cgroup **slab_objcgs(struct slab *slab)
+{
+ return NULL;
+}
+
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{
return NULL;
}
-static inline int memcg_alloc_page_obj_cgroups(struct page *page,
+static inline int memcg_alloc_slab_cgroups(struct slab *slab,
struct kmem_cache *s, gfp_t gfp,
- bool new_page)
+ bool new_slab)
{
return 0;
}
-static inline void memcg_free_page_obj_cgroups(struct page *page)
+static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
@@ -405,35 +623,35 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s,
}
#endif /* CONFIG_MEMCG_KMEM */
+#ifndef CONFIG_SLOB
static inline struct kmem_cache *virt_to_cache(const void *obj)
{
- struct page *page;
+ struct slab *slab;
- page = virt_to_head_page(obj);
- if (WARN_ONCE(!PageSlab(page), "%s: Object is not a Slab page!\n",
+ slab = virt_to_slab(obj);
+ if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n",
__func__))
return NULL;
- return page->slab_cache;
+ return slab->slab_cache;
}
-static __always_inline void account_slab_page(struct page *page, int order,
- struct kmem_cache *s,
- gfp_t gfp)
+static __always_inline void account_slab(struct slab *slab, int order,
+ struct kmem_cache *s, gfp_t gfp)
{
if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT))
- memcg_alloc_page_obj_cgroups(page, s, gfp, true);
+ memcg_alloc_slab_cgroups(slab, s, gfp, true);
- mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+ mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
PAGE_SIZE << order);
}
-static __always_inline void unaccount_slab_page(struct page *page, int order,
- struct kmem_cache *s)
+static __always_inline void unaccount_slab(struct slab *slab, int order,
+ struct kmem_cache *s)
{
if (memcg_kmem_enabled())
- memcg_free_page_obj_cgroups(page);
+ memcg_free_slab_cgroups(slab);
- mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
+ mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
}
@@ -452,6 +670,7 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
print_tracking(cachep, x);
return cachep;
}
+#endif /* CONFIG_SLOB */
static inline size_t slab_ksize(const struct kmem_cache *s)
{
@@ -635,7 +854,7 @@ static inline void debugfs_slab_release(struct kmem_cache *s) { }
#define KS_ADDRS_COUNT 16
struct kmem_obj_info {
void *kp_ptr;
- struct page *kp_page;
+ struct slab *kp_slab;
void *kp_objp;
unsigned long kp_data_offset;
struct kmem_cache *kp_slab_cache;
@@ -643,7 +862,18 @@ struct kmem_obj_info {
void *kp_stack[KS_ADDRS_COUNT];
void *kp_free_stack[KS_ADDRS_COUNT];
};
-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page);
+void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab);
+#endif
+
+#ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR
+void __check_heap_object(const void *ptr, unsigned long n,
+ const struct slab *slab, bool to_user);
+#else
+static inline
+void __check_heap_object(const void *ptr, unsigned long n,
+ const struct slab *slab, bool to_user)
+{
+}
#endif
#endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index e5d080a93009..dc15566141d4 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -550,13 +550,13 @@ bool slab_is_available(void)
*/
bool kmem_valid_obj(void *object)
{
- struct page *page;
+ struct folio *folio;
/* Some arches consider ZERO_SIZE_PTR to be a valid address. */
if (object < (void *)PAGE_SIZE || !virt_addr_valid(object))
return false;
- page = virt_to_head_page(object);
- return PageSlab(page);
+ folio = virt_to_folio(object);
+ return folio_test_slab(folio);
}
EXPORT_SYMBOL_GPL(kmem_valid_obj);
@@ -579,18 +579,18 @@ void kmem_dump_obj(void *object)
{
char *cp = IS_ENABLED(CONFIG_MMU) ? "" : "/vmalloc";
int i;
- struct page *page;
+ struct slab *slab;
unsigned long ptroffset;
struct kmem_obj_info kp = { };
if (WARN_ON_ONCE(!virt_addr_valid(object)))
return;
- page = virt_to_head_page(object);
- if (WARN_ON_ONCE(!PageSlab(page))) {
+ slab = virt_to_slab(object);
+ if (WARN_ON_ONCE(!slab)) {
pr_cont(" non-slab memory.\n");
return;
}
- kmem_obj_info(&kp, object, page);
+ kmem_obj_info(&kp, object, slab);
if (kp.kp_slab_cache)
pr_cont(" slab%s %s", cp, kp.kp_slab_cache->name);
else
diff --git a/mm/slob.c b/mm/slob.c
index 03deee1e6a94..60c5842215f1 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -30,7 +30,7 @@
* If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
* alloc_pages() directly, allocating compound pages so the page order
* does not have to be separately tracked.
- * These objects are detected in kfree() because PageSlab()
+ * These objects are detected in kfree() because folio_test_slab()
* is false for them.
*
* SLAB is emulated on top of SLOB by simply calling constructors and
@@ -105,21 +105,21 @@ static LIST_HEAD(free_slob_large);
/*
* slob_page_free: true for pages on free_slob_pages list.
*/
-static inline int slob_page_free(struct page *sp)
+static inline int slob_page_free(struct slab *slab)
{
- return PageSlobFree(sp);
+ return PageSlobFree(slab_page(slab));
}
-static void set_slob_page_free(struct page *sp, struct list_head *list)
+static void set_slob_page_free(struct slab *slab, struct list_head *list)
{
- list_add(&sp->slab_list, list);
- __SetPageSlobFree(sp);
+ list_add(&slab->slab_list, list);
+ __SetPageSlobFree(slab_page(slab));
}
-static inline void clear_slob_page_free(struct page *sp)
+static inline void clear_slob_page_free(struct slab *slab)
{
- list_del(&sp->slab_list);
- __ClearPageSlobFree(sp);
+ list_del(&slab->slab_list);
+ __ClearPageSlobFree(slab_page(slab));
}
#define SLOB_UNIT sizeof(slob_t)
@@ -234,7 +234,7 @@ static void slob_free_pages(void *b, int order)
* freelist, in this case @page_removed_from_list will be set to
* true (set to false otherwise).
*/
-static void *slob_page_alloc(struct page *sp, size_t size, int align,
+static void *slob_page_alloc(struct slab *sp, size_t size, int align,
int align_offset, bool *page_removed_from_list)
{
slob_t *prev, *cur, *aligned = NULL;
@@ -301,7 +301,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
int align_offset)
{
- struct page *sp;
+ struct folio *folio;
+ struct slab *sp;
struct list_head *slob_list;
slob_t *b = NULL;
unsigned long flags;
@@ -323,7 +324,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
* If there's a node specification, search for a partial
* page with a matching node id in the freelist.
*/
- if (node != NUMA_NO_NODE && page_to_nid(sp) != node)
+ if (node != NUMA_NO_NODE && slab_nid(sp) != node)
continue;
#endif
/* Enough room on this page? */
@@ -358,8 +359,9 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node);
if (!b)
return NULL;
- sp = virt_to_page(b);
- __SetPageSlab(sp);
+ folio = virt_to_folio(b);
+ __folio_set_slab(folio);
+ sp = folio_slab(folio);
spin_lock_irqsave(&slob_lock, flags);
sp->units = SLOB_UNITS(PAGE_SIZE);
@@ -381,7 +383,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
*/
static void slob_free(void *block, int size)
{
- struct page *sp;
+ struct slab *sp;
slob_t *prev, *next, *b = (slob_t *)block;
slobidx_t units;
unsigned long flags;
@@ -391,7 +393,7 @@ static void slob_free(void *block, int size)
return;
BUG_ON(!size);
- sp = virt_to_page(block);
+ sp = virt_to_slab(block);
units = SLOB_UNITS(size);
spin_lock_irqsave(&slob_lock, flags);
@@ -401,8 +403,7 @@ static void slob_free(void *block, int size)
if (slob_page_free(sp))
clear_slob_page_free(sp);
spin_unlock_irqrestore(&slob_lock, flags);
- __ClearPageSlab(sp);
- page_mapcount_reset(sp);
+ __folio_clear_slab(slab_folio(sp));
slob_free_pages(b, 0);
return;
}
@@ -462,10 +463,10 @@ out:
}
#ifdef CONFIG_PRINTK
-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
+void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
kpp->kp_ptr = object;
- kpp->kp_page = page;
+ kpp->kp_slab = slab;
}
#endif
@@ -544,7 +545,7 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
void kfree(const void *block)
{
- struct page *sp;
+ struct folio *sp;
trace_kfree(_RET_IP_, block);
@@ -552,16 +553,17 @@ void kfree(const void *block)
return;
kmemleak_free(block);
- sp = virt_to_page(block);
- if (PageSlab(sp)) {
+ sp = virt_to_folio(block);
+ if (folio_test_slab(sp)) {
int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
} else {
- unsigned int order = compound_order(sp);
- mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
+ unsigned int order = folio_order(sp);
+
+ mod_node_page_state(folio_pgdat(sp), NR_SLAB_UNRECLAIMABLE_B,
-(PAGE_SIZE << order));
- __free_pages(sp, order);
+ __free_pages(folio_page(sp, 0), order);
}
}
@@ -570,7 +572,7 @@ EXPORT_SYMBOL(kfree);
/* can't use ksize for kmem_cache_alloc memory, only kmalloc */
size_t __ksize(const void *block)
{
- struct page *sp;
+ struct folio *folio;
int align;
unsigned int *m;
@@ -578,9 +580,9 @@ size_t __ksize(const void *block)
if (unlikely(block == ZERO_SIZE_PTR))
return 0;
- sp = virt_to_page(block);
- if (unlikely(!PageSlab(sp)))
- return page_size(sp);
+ folio = virt_to_folio(block);
+ if (unlikely(!folio_test_slab(folio)))
+ return folio_size(folio);
align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
m = (unsigned int *)(block - align);
diff --git a/mm/slub.c b/mm/slub.c
index a8626825a829..261474092e43 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -48,7 +48,7 @@
* 1. slab_mutex (Global Mutex)
* 2. node->list_lock (Spinlock)
* 3. kmem_cache->cpu_slab->lock (Local lock)
- * 4. slab_lock(page) (Only on some arches or for debugging)
+ * 4. slab_lock(slab) (Only on some arches or for debugging)
* 5. object_map_lock (Only for debugging)
*
* slab_mutex
@@ -64,19 +64,19 @@
*
* The slab_lock is only used for debugging and on arches that do not
* have the ability to do a cmpxchg_double. It only protects:
- * A. page->freelist -> List of object free in a page
- * B. page->inuse -> Number of objects in use
- * C. page->objects -> Number of objects in page
- * D. page->frozen -> frozen state
+ * A. slab->freelist -> List of free objects in a slab
+ * B. slab->inuse -> Number of objects in use
+ * C. slab->objects -> Number of objects in slab
+ * D. slab->frozen -> frozen state
*
* Frozen slabs
*
* If a slab is frozen then it is exempt from list management. It is not
* on any list except per cpu partial list. The processor that froze the
- * slab is the one who can perform list operations on the page. Other
+ * slab is the one who can perform list operations on the slab. Other
* processors may put objects onto the freelist but the processor that
* froze the slab is the only one that can retrieve the objects from the
- * page's freelist.
+ * slab's freelist.
*
* list_lock
*
@@ -135,7 +135,7 @@
* minimal so we rely on the page allocators per cpu caches for
* fast frees and allocs.
*
- * page->frozen The slab is frozen and exempt from list processing.
+ * slab->frozen The slab is frozen and exempt from list processing.
* This means that the slab is dedicated to a purpose
* such as satisfying allocations for a specific
* processor. Objects may be freed in the slab while
@@ -250,7 +250,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
#define OO_SHIFT 16
#define OO_MASK ((1 << OO_SHIFT) - 1)
-#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
+#define MAX_OBJS_PER_PAGE 32767 /* since slab.objects is u15 */
/* Internal SLUB flags */
/* Poison object */
@@ -417,18 +417,18 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
#ifdef CONFIG_SLUB_CPU_PARTIAL
static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
- unsigned int nr_pages;
+ unsigned int nr_slabs;
s->cpu_partial = nr_objects;
/*
* We take the number of objects but actually limit the number of
- * pages on the per cpu partial list, in order to limit excessive
- * growth of the list. For simplicity we assume that the pages will
+ * slabs on the per cpu partial list, in order to limit excessive
+ * growth of the list. For simplicity we assume that the slabs will
* be half-full.
*/
- nr_pages = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
- s->cpu_partial_pages = nr_pages;
+ nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
+ s->cpu_partial_slabs = nr_slabs;
}
#else
static inline void
@@ -440,28 +440,32 @@ slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
/*
* Per slab locking using the pagelock
*/
-static __always_inline void __slab_lock(struct page *page)
+static __always_inline void __slab_lock(struct slab *slab)
{
+ struct page *page = slab_page(slab);
+
VM_BUG_ON_PAGE(PageTail(page), page);
bit_spin_lock(PG_locked, &page->flags);
}
-static __always_inline void __slab_unlock(struct page *page)
+static __always_inline void __slab_unlock(struct slab *slab)
{
+ struct page *page = slab_page(slab);
+
VM_BUG_ON_PAGE(PageTail(page), page);
__bit_spin_unlock(PG_locked, &page->flags);
}
-static __always_inline void slab_lock(struct page *page, unsigned long *flags)
+static __always_inline void slab_lock(struct slab *slab, unsigned long *flags)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_save(*flags);
- __slab_lock(page);
+ __slab_lock(slab);
}
-static __always_inline void slab_unlock(struct page *page, unsigned long *flags)
+static __always_inline void slab_unlock(struct slab *slab, unsigned long *flags)
{
- __slab_unlock(page);
+ __slab_unlock(slab);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
local_irq_restore(*flags);
}
@@ -471,7 +475,7 @@ static __always_inline void slab_unlock(struct page *page, unsigned long *flags)
* by an _irqsave() lock variant. Except on PREEMPT_RT where locks are different
* so we disable interrupts as part of slab_[un]lock().
*/
-static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
+static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
const char *n)
@@ -481,7 +485,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist, &page->counters,
+ if (cmpxchg_double(&slab->freelist, &slab->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return true;
@@ -491,15 +495,15 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
/* init to 0 to prevent spurious warnings */
unsigned long flags = 0;
- slab_lock(page, &flags);
- if (page->freelist == freelist_old &&
- page->counters == counters_old) {
- page->freelist = freelist_new;
- page->counters = counters_new;
- slab_unlock(page, &flags);
+ slab_lock(slab, &flags);
+ if (slab->freelist == freelist_old &&
+ slab->counters == counters_old) {
+ slab->freelist = freelist_new;
+ slab->counters = counters_new;
+ slab_unlock(slab, &flags);
return true;
}
- slab_unlock(page, &flags);
+ slab_unlock(slab, &flags);
}
cpu_relax();
@@ -512,7 +516,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
return false;
}
-static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
+static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
void *freelist_old, unsigned long counters_old,
void *freelist_new, unsigned long counters_new,
const char *n)
@@ -520,7 +524,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist, &page->counters,
+ if (cmpxchg_double(&slab->freelist, &slab->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return true;
@@ -530,16 +534,16 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
unsigned long flags;
local_irq_save(flags);
- __slab_lock(page);
- if (page->freelist == freelist_old &&
- page->counters == counters_old) {
- page->freelist = freelist_new;
- page->counters = counters_new;
- __slab_unlock(page);
+ __slab_lock(slab);
+ if (slab->freelist == freelist_old &&
+ slab->counters == counters_old) {
+ slab->freelist = freelist_new;
+ slab->counters = counters_new;
+ __slab_unlock(slab);
local_irq_restore(flags);
return true;
}
- __slab_unlock(page);
+ __slab_unlock(slab);
local_irq_restore(flags);
}
@@ -558,14 +562,14 @@ static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
static DEFINE_RAW_SPINLOCK(object_map_lock);
static void __fill_map(unsigned long *obj_map, struct kmem_cache *s,
- struct page *page)
+ struct slab *slab)
{
- void *addr = page_address(page);
+ void *addr = slab_address(slab);
void *p;
- bitmap_zero(obj_map, page->objects);
+ bitmap_zero(obj_map, slab->objects);
- for (p = page->freelist; p; p = get_freepointer(s, p))
+ for (p = slab->freelist; p; p = get_freepointer(s, p))
set_bit(__obj_to_index(s, addr, p), obj_map);
}
@@ -590,19 +594,19 @@ static inline bool slab_add_kunit_errors(void) { return false; }
#endif
/*
- * Determine a map of object in use on a page.
+ * Determine a map of objects in use in a slab.
*
- * Node listlock must be held to guarantee that the page does
+ * Node listlock must be held to guarantee that the slab does
* not vanish from under us.
*/
-static unsigned long *get_map(struct kmem_cache *s, struct page *page)
+static unsigned long *get_map(struct kmem_cache *s, struct slab *slab)
__acquires(&object_map_lock)
{
VM_BUG_ON(!irqs_disabled());
raw_spin_lock(&object_map_lock);
- __fill_map(object_map, s, page);
+ __fill_map(object_map, s, slab);
return object_map;
}
@@ -663,17 +667,17 @@ static inline void metadata_access_disable(void)
/* Verify that a pointer has an address that is valid within a slab page */
static inline int check_valid_pointer(struct kmem_cache *s,
- struct page *page, void *object)
+ struct slab *slab, void *object)
{
void *base;
if (!object)
return 1;
- base = page_address(page);
+ base = slab_address(slab);
object = kasan_reset_tag(object);
object = restore_red_left(s, object);
- if (object < base || object >= base + page->objects * s->size ||
+ if (object < base || object >= base + slab->objects * s->size ||
(object - base) % s->size) {
return 0;
}
@@ -784,12 +788,13 @@ void print_tracking(struct kmem_cache *s, void *object)
print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
}
-static void print_page_info(struct page *page)
+static void print_slab_info(const struct slab *slab)
{
- pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
- page, page->objects, page->inuse, page->freelist,
- &page->flags);
+ struct folio *folio = (struct folio *)slab_folio(slab);
+ pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n",
+ slab, slab->objects, slab->inuse, slab->freelist,
+ folio_flags(folio, 0));
}
static void slab_bug(struct kmem_cache *s, char *fmt, ...)
@@ -822,28 +827,14 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
va_end(args);
}
-static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
- void **freelist, void *nextfree)
-{
- if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
- !check_valid_pointer(s, page, nextfree) && freelist) {
- object_err(s, page, *freelist, "Freechain corrupt");
- *freelist = NULL;
- slab_fix(s, "Isolate corrupted freechain");
- return true;
- }
-
- return false;
-}
-
-static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
+static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
{
unsigned int off; /* Offset of last byte */
- u8 *addr = page_address(page);
+ u8 *addr = slab_address(slab);
print_tracking(s, p);
- print_page_info(page);
+ print_slab_info(slab);
pr_err("Object 0x%p @offset=%tu fp=0x%p\n\n",
p, p - addr, get_freepointer(s, p));
@@ -875,18 +866,32 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
dump_stack();
}
-void object_err(struct kmem_cache *s, struct page *page,
+static void object_err(struct kmem_cache *s, struct slab *slab,
u8 *object, char *reason)
{
if (slab_add_kunit_errors())
return;
slab_bug(s, "%s", reason);
- print_trailer(s, page, object);
+ print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
-static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
+static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
+ void **freelist, void *nextfree)
+{
+ if ((s->flags & SLAB_CONSISTENCY_CHECKS) &&
+ !check_valid_pointer(s, slab, nextfree) && freelist) {
+ object_err(s, slab, *freelist, "Freechain corrupt");
+ *freelist = NULL;
+ slab_fix(s, "Isolate corrupted freechain");
+ return true;
+ }
+
+ return false;
+}
+
+static __printf(3, 4) void slab_err(struct kmem_cache *s, struct slab *slab,
const char *fmt, ...)
{
va_list args;
@@ -899,7 +904,7 @@ static __printf(3, 4) void slab_err(struct kmem_cache *s, struct page *page,
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
slab_bug(s, "%s", buf);
- print_page_info(page);
+ print_slab_info(slab);
dump_stack();
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
@@ -927,13 +932,13 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
memset(from, data, to - from);
}
-static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
+static int check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
u8 *object, char *what,
u8 *start, unsigned int value, unsigned int bytes)
{
u8 *fault;
u8 *end;
- u8 *addr = page_address(page);
+ u8 *addr = slab_address(slab);
metadata_access_enable();
fault = memchr_inv(kasan_reset_tag(start), value, bytes);
@@ -952,7 +957,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
fault, end - 1, fault - addr,
fault[0], value);
- print_trailer(s, page, object);
+ print_trailer(s, slab, object);
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
skip_bug_print:
@@ -998,7 +1003,7 @@ skip_bug_print:
* may be used with merged slabcaches.
*/
-static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
+static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
{
unsigned long off = get_info_end(s); /* The end of info */
@@ -1011,12 +1016,12 @@ static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
if (size_from_object(s) == off)
return 1;
- return check_bytes_and_report(s, page, p, "Object padding",
+ return check_bytes_and_report(s, slab, p, "Object padding",
p + off, POISON_INUSE, size_from_object(s) - off);
}
/* Check the pad bytes at the end of a slab page */
-static int slab_pad_check(struct kmem_cache *s, struct page *page)
+static int slab_pad_check(struct kmem_cache *s, struct slab *slab)
{
u8 *start;
u8 *fault;
@@ -1028,8 +1033,8 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
if (!(s->flags & SLAB_POISON))
return 1;
- start = page_address(page);
- length = page_size(page);
+ start = slab_address(slab);
+ length = slab_size(slab);
end = start + length;
remainder = length % s->size;
if (!remainder)
@@ -1044,7 +1049,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
while (end > fault && end[-1] == POISON_INUSE)
end--;
- slab_err(s, page, "Padding overwritten. 0x%p-0x%p @offset=%tu",
+ slab_err(s, slab, "Padding overwritten. 0x%p-0x%p @offset=%tu",
fault, end - 1, fault - start);
print_section(KERN_ERR, "Padding ", pad, remainder);
@@ -1052,23 +1057,23 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
return 0;
}
-static int check_object(struct kmem_cache *s, struct page *page,
+static int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val)
{
u8 *p = object;
u8 *endobject = object + s->object_size;
if (s->flags & SLAB_RED_ZONE) {
- if (!check_bytes_and_report(s, page, object, "Left Redzone",
+ if (!check_bytes_and_report(s, slab, object, "Left Redzone",
object - s->red_left_pad, val, s->red_left_pad))
return 0;
- if (!check_bytes_and_report(s, page, object, "Right Redzone",
+ if (!check_bytes_and_report(s, slab, object, "Right Redzone",
endobject, val, s->inuse - s->object_size))
return 0;
} else {
if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
- check_bytes_and_report(s, page, p, "Alignment padding",
+ check_bytes_and_report(s, slab, p, "Alignment padding",
endobject, POISON_INUSE,
s->inuse - s->object_size);
}
@@ -1076,15 +1081,15 @@ static int check_object(struct kmem_cache *s, struct page *page,
if (s->flags & SLAB_POISON) {
if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
- (!check_bytes_and_report(s, page, p, "Poison", p,
+ (!check_bytes_and_report(s, slab, p, "Poison", p,
POISON_FREE, s->object_size - 1) ||
- !check_bytes_and_report(s, page, p, "End Poison",
+ !check_bytes_and_report(s, slab, p, "End Poison",
p + s->object_size - 1, POISON_END, 1)))
return 0;
/*
* check_pad_bytes cleans up on its own.
*/
- check_pad_bytes(s, page, p);
+ check_pad_bytes(s, slab, p);
}
if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
@@ -1095,8 +1100,8 @@ static int check_object(struct kmem_cache *s, struct page *page,
return 1;
/* Check free pointer validity */
- if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
- object_err(s, page, p, "Freepointer corrupt");
+ if (!check_valid_pointer(s, slab, get_freepointer(s, p))) {
+ object_err(s, slab, p, "Freepointer corrupt");
/*
* No choice but to zap it and thus lose the remainder
* of the free objects in this slab. May cause
@@ -1108,55 +1113,55 @@ static int check_object(struct kmem_cache *s, struct page *page,
return 1;
}
-static int check_slab(struct kmem_cache *s, struct page *page)
+static int check_slab(struct kmem_cache *s, struct slab *slab)
{
int maxobj;
- if (!PageSlab(page)) {
- slab_err(s, page, "Not a valid slab page");
+ if (!folio_test_slab(slab_folio(slab))) {
+ slab_err(s, slab, "Not a valid slab page");
return 0;
}
- maxobj = order_objects(compound_order(page), s->size);
- if (page->objects > maxobj) {
- slab_err(s, page, "objects %u > max %u",
- page->objects, maxobj);
+ maxobj = order_objects(slab_order(slab), s->size);
+ if (slab->objects > maxobj) {
+ slab_err(s, slab, "objects %u > max %u",
+ slab->objects, maxobj);
return 0;
}
- if (page->inuse > page->objects) {
- slab_err(s, page, "inuse %u > max %u",
- page->inuse, page->objects);
+ if (slab->inuse > slab->objects) {
+ slab_err(s, slab, "inuse %u > max %u",
+ slab->inuse, slab->objects);
return 0;
}
/* Slab_pad_check fixes things up after itself */
- slab_pad_check(s, page);
+ slab_pad_check(s, slab);
return 1;
}
/*
- * Determine if a certain object on a page is on the freelist. Must hold the
+ * Determine if a certain object in a slab is on the freelist. Must hold the
* slab lock to guarantee that the chains are in a consistent state.
*/
-static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
+static int on_freelist(struct kmem_cache *s, struct slab *slab, void *search)
{
int nr = 0;
void *fp;
void *object = NULL;
int max_objects;
- fp = page->freelist;
- while (fp && nr <= page->objects) {
+ fp = slab->freelist;
+ while (fp && nr <= slab->objects) {
if (fp == search)
return 1;
- if (!check_valid_pointer(s, page, fp)) {
+ if (!check_valid_pointer(s, slab, fp)) {
if (object) {
- object_err(s, page, object,
+ object_err(s, slab, object,
"Freechain corrupt");
set_freepointer(s, object, NULL);
} else {
- slab_err(s, page, "Freepointer corrupt");
- page->freelist = NULL;
- page->inuse = page->objects;
+ slab_err(s, slab, "Freepointer corrupt");
+ slab->freelist = NULL;
+ slab->inuse = slab->objects;
slab_fix(s, "Freelist cleared");
return 0;
}
@@ -1167,34 +1172,34 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
nr++;
}
- max_objects = order_objects(compound_order(page), s->size);
+ max_objects = order_objects(slab_order(slab), s->size);
if (max_objects > MAX_OBJS_PER_PAGE)
max_objects = MAX_OBJS_PER_PAGE;
- if (page->objects != max_objects) {
- slab_err(s, page, "Wrong number of objects. Found %d but should be %d",
- page->objects, max_objects);
- page->objects = max_objects;
+ if (slab->objects != max_objects) {
+ slab_err(s, slab, "Wrong number of objects. Found %d but should be %d",
+ slab->objects, max_objects);
+ slab->objects = max_objects;
slab_fix(s, "Number of objects adjusted");
}
- if (page->inuse != page->objects - nr) {
- slab_err(s, page, "Wrong object count. Counter is %d but counted were %d",
- page->inuse, page->objects - nr);
- page->inuse = page->objects - nr;
+ if (slab->inuse != slab->objects - nr) {
+ slab_err(s, slab, "Wrong object count. Counter is %d but counted were %d",
+ slab->inuse, slab->objects - nr);
+ slab->inuse = slab->objects - nr;
slab_fix(s, "Object count adjusted");
}
return search == NULL;
}
-static void trace(struct kmem_cache *s, struct page *page, void *object,
+static void trace(struct kmem_cache *s, struct slab *slab, void *object,
int alloc)
{
if (s->flags & SLAB_TRACE) {
pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n",
s->name,
alloc ? "alloc" : "free",
- object, page->inuse,
- page->freelist);
+ object, slab->inuse,
+ slab->freelist);
if (!alloc)
print_section(KERN_INFO, "Object ", (void *)object,
@@ -1208,22 +1213,22 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
* Tracking of fully allocated slabs for debugging purposes.
*/
static void add_full(struct kmem_cache *s,
- struct kmem_cache_node *n, struct page *page)
+ struct kmem_cache_node *n, struct slab *slab)
{
if (!(s->flags & SLAB_STORE_USER))
return;
lockdep_assert_held(&n->list_lock);
- list_add(&page->slab_list, &n->full);
+ list_add(&slab->slab_list, &n->full);
}
-static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page)
+static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct slab *slab)
{
if (!(s->flags & SLAB_STORE_USER))
return;
lockdep_assert_held(&n->list_lock);
- list_del(&page->slab_list);
+ list_del(&slab->slab_list);
}
/* Tracking of the number of slabs for debugging purposes */
@@ -1263,7 +1268,7 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
}
/* Object debug checks for alloc/free paths */
-static void setup_object_debug(struct kmem_cache *s, struct page *page,
+static void setup_object_debug(struct kmem_cache *s, struct slab *slab,
void *object)
{
if (!kmem_cache_debug_flags(s, SLAB_STORE_USER|SLAB_RED_ZONE|__OBJECT_POISON))
@@ -1274,89 +1279,89 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
}
static
-void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
+void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr)
{
if (!kmem_cache_debug_flags(s, SLAB_POISON))
return;
metadata_access_enable();
- memset(kasan_reset_tag(addr), POISON_INUSE, page_size(page));
+ memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab));
metadata_access_disable();
}
static inline int alloc_consistency_checks(struct kmem_cache *s,
- struct page *page, void *object)
+ struct slab *slab, void *object)
{
- if (!check_slab(s, page))
+ if (!check_slab(s, slab))
return 0;
- if (!check_valid_pointer(s, page, object)) {
- object_err(s, page, object, "Freelist Pointer check fails");
+ if (!check_valid_pointer(s, slab, object)) {
+ object_err(s, slab, object, "Freelist Pointer check fails");
return 0;
}
- if (!check_object(s, page, object, SLUB_RED_INACTIVE))
+ if (!check_object(s, slab, object, SLUB_RED_INACTIVE))
return 0;
return 1;
}
static noinline int alloc_debug_processing(struct kmem_cache *s,
- struct page *page,
+ struct slab *slab,
void *object, unsigned long addr)
{
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
- if (!alloc_consistency_checks(s, page, object))
+ if (!alloc_consistency_checks(s, slab, object))
goto bad;
}
/* Success perform special debug activities for allocs */
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_ALLOC, addr);
- trace(s, page, object, 1);
+ trace(s, slab, object, 1);
init_object(s, object, SLUB_RED_ACTIVE);
return 1;
bad:
- if (PageSlab(page)) {
+ if (folio_test_slab(slab_folio(slab))) {
/*
* If this is a slab page then lets do the best we can
* to avoid issues in the future. Marking all objects
* as used avoids touching the remaining objects.
*/
slab_fix(s, "Marking all objects used");
- page->inuse = page->objects;
- page->freelist = NULL;
+ slab->inuse = slab->objects;
+ slab->freelist = NULL;
}
return 0;
}
static inline int free_consistency_checks(struct kmem_cache *s,
- struct page *page, void *object, unsigned long addr)
+ struct slab *slab, void *object, unsigned long addr)
{
- if (!check_valid_pointer(s, page, object)) {
- slab_err(s, page, "Invalid object pointer 0x%p", object);
+ if (!check_valid_pointer(s, slab, object)) {
+ slab_err(s, slab, "Invalid object pointer 0x%p", object);
return 0;
}
- if (on_freelist(s, page, object)) {
- object_err(s, page, object, "Object already free");
+ if (on_freelist(s, slab, object)) {
+ object_err(s, slab, object, "Object already free");
return 0;
}
- if (!check_object(s, page, object, SLUB_RED_ACTIVE))
+ if (!check_object(s, slab, object, SLUB_RED_ACTIVE))
return 0;
- if (unlikely(s != page->slab_cache)) {
- if (!PageSlab(page)) {
- slab_err(s, page, "Attempt to free object(0x%p) outside of slab",
+ if (unlikely(s != slab->slab_cache)) {
+ if (!folio_test_slab(slab_folio(slab))) {
+ slab_err(s, slab, "Attempt to free object(0x%p) outside of slab",
object);
- } else if (!page->slab_cache) {
+ } else if (!slab->slab_cache) {
pr_err("SLUB <none>: no slab for object 0x%p.\n",
object);
dump_stack();
} else
- object_err(s, page, object,
+ object_err(s, slab, object,
"page slab pointer corrupt.");
return 0;
}
@@ -1365,21 +1370,21 @@ static inline int free_consistency_checks(struct kmem_cache *s,
/* Supports checking bulk free of a constructed freelist */
static noinline int free_debug_processing(
- struct kmem_cache *s, struct page *page,
+ struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr)
{
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+ struct kmem_cache_node *n = get_node(s, slab_nid(slab));
void *object = head;
int cnt = 0;
unsigned long flags, flags2;
int ret = 0;
spin_lock_irqsave(&n->list_lock, flags);
- slab_lock(page, &flags2);
+ slab_lock(slab, &flags2);
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
- if (!check_slab(s, page))
+ if (!check_slab(s, slab))
goto out;
}
@@ -1387,13 +1392,13 @@ next_object:
cnt++;
if (s->flags & SLAB_CONSISTENCY_CHECKS) {
- if (!free_consistency_checks(s, page, object, addr))
+ if (!free_consistency_checks(s, slab, object, addr))
goto out;
}
if (s->flags & SLAB_STORE_USER)
set_track(s, object, TRACK_FREE, addr);
- trace(s, page, object, 0);
+ trace(s, slab, object, 0);
/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
@@ -1406,10 +1411,10 @@ next_object:
out:
if (cnt != bulk_cnt)
- slab_err(s, page, "Bulk freelist count(%d) invalid(%d)\n",
+ slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
bulk_cnt, cnt);
- slab_unlock(page, &flags2);
+ slab_unlock(slab, &flags2);
spin_unlock_irqrestore(&n->list_lock, flags);
if (!ret)
slab_fix(s, "Object at 0x%p not freed", object);
@@ -1624,26 +1629,26 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
}
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s,
- struct page *page, void *object) {}
+ struct slab *slab, void *object) {}
static inline
-void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
+void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
- struct page *page, void *object, unsigned long addr) { return 0; }
+ struct slab *slab, void *object, unsigned long addr) { return 0; }
static inline int free_debug_processing(
- struct kmem_cache *s, struct page *page,
+ struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int bulk_cnt,
unsigned long addr) { return 0; }
-static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
+static inline int slab_pad_check(struct kmem_cache *s, struct slab *slab)
{ return 1; }
-static inline int check_object(struct kmem_cache *s, struct page *page,
+static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; }
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
- struct page *page) {}
+ struct slab *slab) {}
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
- struct page *page) {}
+ struct slab *slab) {}
slab_flags_t kmem_cache_flags(unsigned int object_size,
slab_flags_t flags, const char *name)
{
@@ -1662,7 +1667,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node,
static inline void dec_slabs_node(struct kmem_cache *s, int node,
int objects) {}
-static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
+static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
void **freelist, void *nextfree)
{
return false;
@@ -1767,10 +1772,10 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
return *head != NULL;
}
-static void *setup_object(struct kmem_cache *s, struct page *page,
+static void *setup_object(struct kmem_cache *s, struct slab *slab,
void *object)
{
- setup_object_debug(s, page, object);
+ setup_object_debug(s, slab, object);
object = kasan_init_slab_obj(s, object);
if (unlikely(s->ctor)) {
kasan_unpoison_object_data(s, object);
@@ -1783,18 +1788,27 @@ static void *setup_object(struct kmem_cache *s, struct page *page,
/*
* Slab allocation and freeing
*/
-static inline struct page *alloc_slab_page(struct kmem_cache *s,
+static inline struct slab *alloc_slab_page(struct kmem_cache *s,
gfp_t flags, int node, struct kmem_cache_order_objects oo)
{
- struct page *page;
+ struct folio *folio;
+ struct slab *slab;
unsigned int order = oo_order(oo);
if (node == NUMA_NO_NODE)
- page = alloc_pages(flags, order);
+ folio = (struct folio *)alloc_pages(flags, order);
else
- page = __alloc_pages_node(node, flags, order);
+ folio = (struct folio *)__alloc_pages_node(node, flags, order);
+
+ if (!folio)
+ return NULL;
+
+ slab = folio_slab(folio);
+ __folio_set_slab(folio);
+ if (page_is_pfmemalloc(folio_page(folio, 0)))
+ slab_set_pfmemalloc(slab);
- return page;
+ return slab;
}
#ifdef CONFIG_SLAB_FREELIST_RANDOM
@@ -1839,7 +1853,7 @@ static void __init init_freelist_randomization(void)
}
/* Get the next entry on the pre-computed freelist randomized */
-static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
+static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab,
unsigned long *pos, void *start,
unsigned long page_limit,
unsigned long freelist_count)
@@ -1861,32 +1875,32 @@ static void *next_freelist_entry(struct kmem_cache *s, struct page *page,
}
/* Shuffle the single linked freelist based on a random pre-computed sequence */
-static bool shuffle_freelist(struct kmem_cache *s, struct page *page)
+static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
{
void *start;
void *cur;
void *next;
unsigned long idx, pos, page_limit, freelist_count;
- if (page->objects < 2 || !s->random_seq)
+ if (slab->objects < 2 || !s->random_seq)
return false;
freelist_count = oo_objects(s->oo);
pos = get_random_int() % freelist_count;
- page_limit = page->objects * s->size;
- start = fixup_red_left(s, page_address(page));
+ page_limit = slab->objects * s->size;
+ start = fixup_red_left(s, slab_address(slab));
/* First entry is used as the base of the freelist */
- cur = next_freelist_entry(s, page, &pos, start, page_limit,
+ cur = next_freelist_entry(s, slab, &pos, start, page_limit,
freelist_count);
- cur = setup_object(s, page, cur);
- page->freelist = cur;
+ cur = setup_object(s, slab, cur);
+ slab->freelist = cur;
- for (idx = 1; idx < page->objects; idx++) {
- next = next_freelist_entry(s, page, &pos, start, page_limit,
+ for (idx = 1; idx < slab->objects; idx++) {
+ next = next_freelist_entry(s, slab, &pos, start, page_limit,
freelist_count);
- next = setup_object(s, page, next);
+ next = setup_object(s, slab, next);
set_freepointer(s, cur, next);
cur = next;
}
@@ -1900,15 +1914,15 @@ static inline int init_cache_random_seq(struct kmem_cache *s)
return 0;
}
static inline void init_freelist_randomization(void) { }
-static inline bool shuffle_freelist(struct kmem_cache *s, struct page *page)
+static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
{
return false;
}
#endif /* CONFIG_SLAB_FREELIST_RANDOM */
-static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
{
- struct page *page;
+ struct slab *slab;
struct kmem_cache_order_objects oo = s->oo;
gfp_t alloc_gfp;
void *start, *p, *next;
@@ -1927,63 +1941,60 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
if ((alloc_gfp & __GFP_DIRECT_RECLAIM) && oo_order(oo) > oo_order(s->min))
alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~(__GFP_RECLAIM|__GFP_NOFAIL);
- page = alloc_slab_page(s, alloc_gfp, node, oo);
- if (unlikely(!page)) {
+ slab = alloc_slab_page(s, alloc_gfp, node, oo);
+ if (unlikely(!slab)) {
oo = s->min;
alloc_gfp = flags;
/*
* Allocation may have failed due to fragmentation.
* Try a lower order alloc if possible
*/
- page = alloc_slab_page(s, alloc_gfp, node, oo);
- if (unlikely(!page))
+ slab = alloc_slab_page(s, alloc_gfp, node, oo);
+ if (unlikely(!slab))
goto out;
stat(s, ORDER_FALLBACK);
}
- page->objects = oo_objects(oo);
+ slab->objects = oo_objects(oo);
- account_slab_page(page, oo_order(oo), s, flags);
+ account_slab(slab, oo_order(oo), s, flags);
- page->slab_cache = s;
- __SetPageSlab(page);
- if (page_is_pfmemalloc(page))
- SetPageSlabPfmemalloc(page);
+ slab->slab_cache = s;
- kasan_poison_slab(page);
+ kasan_poison_slab(slab);
- start = page_address(page);
+ start = slab_address(slab);
- setup_page_debug(s, page, start);
+ setup_slab_debug(s, slab, start);
- shuffle = shuffle_freelist(s, page);
+ shuffle = shuffle_freelist(s, slab);
if (!shuffle) {
start = fixup_red_left(s, start);
- start = setup_object(s, page, start);
- page->freelist = start;
- for (idx = 0, p = start; idx < page->objects - 1; idx++) {
+ start = setup_object(s, slab, start);
+ slab->freelist = start;
+ for (idx = 0, p = start; idx < slab->objects - 1; idx++) {
next = p + s->size;
- next = setup_object(s, page, next);
+ next = setup_object(s, slab, next);
set_freepointer(s, p, next);
p = next;
}
set_freepointer(s, p, NULL);
}
- page->inuse = page->objects;
- page->frozen = 1;
+ slab->inuse = slab->objects;
+ slab->frozen = 1;
out:
- if (!page)
+ if (!slab)
return NULL;
- inc_slabs_node(s, page_to_nid(page), page->objects);
+ inc_slabs_node(s, slab_nid(slab), slab->objects);
- return page;
+ return slab;
}
-static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
{
if (unlikely(flags & GFP_SLAB_BUG_MASK))
flags = kmalloc_fix_flags(flags);
@@ -1994,76 +2005,75 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
}
-static void __free_slab(struct kmem_cache *s, struct page *page)
+static void __free_slab(struct kmem_cache *s, struct slab *slab)
{
- int order = compound_order(page);
+ struct folio *folio = slab_folio(slab);
+ int order = folio_order(folio);
int pages = 1 << order;
if (kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
void *p;
- slab_pad_check(s, page);
- for_each_object(p, s, page_address(page),
- page->objects)
- check_object(s, page, p, SLUB_RED_INACTIVE);
+ slab_pad_check(s, slab);
+ for_each_object(p, s, slab_address(slab), slab->objects)
+ check_object(s, slab, p, SLUB_RED_INACTIVE);
}
- __ClearPageSlabPfmemalloc(page);
- __ClearPageSlab(page);
- /* In union with page->mapping where page allocator expects NULL */
- page->slab_cache = NULL;
+ __slab_clear_pfmemalloc(slab);
+ __folio_clear_slab(folio);
+ folio->mapping = NULL;
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
- unaccount_slab_page(page, order, s);
- __free_pages(page, order);
+ unaccount_slab(slab, order, s);
+ __free_pages(folio_page(folio, 0), order);
}
static void rcu_free_slab(struct rcu_head *h)
{
- struct page *page = container_of(h, struct page, rcu_head);
+ struct slab *slab = container_of(h, struct slab, rcu_head);
- __free_slab(page->slab_cache, page);
+ __free_slab(slab->slab_cache, slab);
}
-static void free_slab(struct kmem_cache *s, struct page *page)
+static void free_slab(struct kmem_cache *s, struct slab *slab)
{
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
- call_rcu(&page->rcu_head, rcu_free_slab);
+ call_rcu(&slab->rcu_head, rcu_free_slab);
} else
- __free_slab(s, page);
+ __free_slab(s, slab);
}
-static void discard_slab(struct kmem_cache *s, struct page *page)
+static void discard_slab(struct kmem_cache *s, struct slab *slab)
{
- dec_slabs_node(s, page_to_nid(page), page->objects);
- free_slab(s, page);
+ dec_slabs_node(s, slab_nid(slab), slab->objects);
+ free_slab(s, slab);
}
/*
* Management of partially allocated slabs.
*/
static inline void
-__add_partial(struct kmem_cache_node *n, struct page *page, int tail)
+__add_partial(struct kmem_cache_node *n, struct slab *slab, int tail)
{
n->nr_partial++;
if (tail == DEACTIVATE_TO_TAIL)
- list_add_tail(&page->slab_list, &n->partial);
+ list_add_tail(&slab->slab_list, &n->partial);
else
- list_add(&page->slab_list, &n->partial);
+ list_add(&slab->slab_list, &n->partial);
}
static inline void add_partial(struct kmem_cache_node *n,
- struct page *page, int tail)
+ struct slab *slab, int tail)
{
lockdep_assert_held(&n->list_lock);
- __add_partial(n, page, tail);
+ __add_partial(n, slab, tail);
}
static inline void remove_partial(struct kmem_cache_node *n,
- struct page *page)
+ struct slab *slab)
{
lockdep_assert_held(&n->list_lock);
- list_del(&page->slab_list);
+ list_del(&slab->slab_list);
n->nr_partial--;
}
@@ -2074,12 +2084,12 @@ static inline void remove_partial(struct kmem_cache_node *n,
* Returns a list of objects or NULL if it fails.
*/
static inline void *acquire_slab(struct kmem_cache *s,
- struct kmem_cache_node *n, struct page *page,
+ struct kmem_cache_node *n, struct slab *slab,
int mode)
{
void *freelist;
unsigned long counters;
- struct page new;
+ struct slab new;
lockdep_assert_held(&n->list_lock);
@@ -2088,11 +2098,11 @@ static inline void *acquire_slab(struct kmem_cache *s,
* The old freelist is the list of objects for the
* per cpu allocation list.
*/
- freelist = page->freelist;
- counters = page->counters;
+ freelist = slab->freelist;
+ counters = slab->counters;
new.counters = counters;
if (mode) {
- new.inuse = page->objects;
+ new.inuse = slab->objects;
new.freelist = NULL;
} else {
new.freelist = freelist;
@@ -2101,35 +2111,35 @@ static inline void *acquire_slab(struct kmem_cache *s,
VM_BUG_ON(new.frozen);
new.frozen = 1;
- if (!__cmpxchg_double_slab(s, page,
+ if (!__cmpxchg_double_slab(s, slab,
freelist, counters,
new.freelist, new.counters,
"acquire_slab"))
return NULL;
- remove_partial(n, page);
+ remove_partial(n, slab);
WARN_ON(!freelist);
return freelist;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
+static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain);
#else
-static inline void put_cpu_partial(struct kmem_cache *s, struct page *page,
+static inline void put_cpu_partial(struct kmem_cache *s, struct slab *slab,
int drain) { }
#endif
-static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
+static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags);
/*
* Try to allocate a partial slab from a specific node.
*/
static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
- struct page **ret_page, gfp_t gfpflags)
+ struct slab **ret_slab, gfp_t gfpflags)
{
- struct page *page, *page2;
+ struct slab *slab, *slab2;
void *object = NULL;
unsigned long flags;
- unsigned int partial_pages = 0;
+ unsigned int partial_slabs = 0;
/*
* Racy check. If we mistakenly see no partial slabs then we
@@ -2141,28 +2151,28 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
return NULL;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry_safe(page, page2, &n->partial, slab_list) {
+ list_for_each_entry_safe(slab, slab2, &n->partial, slab_list) {
void *t;
- if (!pfmemalloc_match(page, gfpflags))
+ if (!pfmemalloc_match(slab, gfpflags))
continue;
- t = acquire_slab(s, n, page, object == NULL);
+ t = acquire_slab(s, n, slab, object == NULL);
if (!t)
break;
if (!object) {
- *ret_page = page;
+ *ret_slab = slab;
stat(s, ALLOC_FROM_PARTIAL);
object = t;
} else {
- put_cpu_partial(s, page, 0);
+ put_cpu_partial(s, slab, 0);
stat(s, CPU_PARTIAL_NODE);
- partial_pages++;
+ partial_slabs++;
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
if (!kmem_cache_has_cpu_partial(s)
- || partial_pages > s->cpu_partial_pages / 2)
+ || partial_slabs > s->cpu_partial_slabs / 2)
break;
#else
break;
@@ -2174,10 +2184,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
}
/*
- * Get a page from somewhere. Search in increasing NUMA distances.
+ * Get a slab from somewhere. Search in increasing NUMA distances.
*/
static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
- struct page **ret_page)
+ struct slab **ret_slab)
{
#ifdef CONFIG_NUMA
struct zonelist *zonelist;
@@ -2219,7 +2229,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
if (n && cpuset_zone_allowed(zone, flags) &&
n->nr_partial > s->min_partial) {
- object = get_partial_node(s, n, ret_page, flags);
+ object = get_partial_node(s, n, ret_slab, flags);
if (object) {
/*
* Don't check read_mems_allowed_retry()
@@ -2238,10 +2248,10 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
}
/*
- * Get a partial page, lock it and return it.
+ * Get a partial slab, lock it and return it.
*/
static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
- struct page **ret_page)
+ struct slab **ret_slab)
{
void *object;
int searchnode = node;
@@ -2249,11 +2259,11 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
if (node == NUMA_NO_NODE)
searchnode = numa_mem_id();
- object = get_partial_node(s, get_node(s, searchnode), ret_page, flags);
+ object = get_partial_node(s, get_node(s, searchnode), ret_slab, flags);
if (object || node != NUMA_NO_NODE)
return object;
- return get_any_partial(s, flags, ret_page);
+ return get_any_partial(s, flags, ret_slab);
}
#ifdef CONFIG_PREEMPTION
@@ -2330,25 +2340,25 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
}
/*
- * Finishes removing the cpu slab. Merges cpu's freelist with page's freelist,
+ * Finishes removing the cpu slab. Merges cpu's freelist with slab's freelist,
* unfreezes the slabs and puts it on the proper list.
* Assumes the slab has been already safely taken away from kmem_cache_cpu
* by the caller.
*/
-static void deactivate_slab(struct kmem_cache *s, struct page *page,
+static void deactivate_slab(struct kmem_cache *s, struct slab *slab,
void *freelist)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
- struct kmem_cache_node *n = get_node(s, page_to_nid(page));
+ struct kmem_cache_node *n = get_node(s, slab_nid(slab));
int lock = 0, free_delta = 0;
enum slab_modes l = M_NONE, m = M_NONE;
void *nextfree, *freelist_iter, *freelist_tail;
int tail = DEACTIVATE_TO_HEAD;
unsigned long flags = 0;
- struct page new;
- struct page old;
+ struct slab new;
+ struct slab old;
- if (page->freelist) {
+ if (slab->freelist) {
stat(s, DEACTIVATE_REMOTE_FREES);
tail = DEACTIVATE_TO_TAIL;
}
@@ -2367,7 +2377,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
* 'freelist_iter' is already corrupted. So isolate all objects
* starting at 'freelist_iter' by skipping them.
*/
- if (freelist_corrupted(s, page, &freelist_iter, nextfree))
+ if (freelist_corrupted(s, slab, &freelist_iter, nextfree))
break;
freelist_tail = freelist_iter;
@@ -2377,25 +2387,25 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
}
/*
- * Stage two: Unfreeze the page while splicing the per-cpu
- * freelist to the head of page's freelist.
+ * Stage two: Unfreeze the slab while splicing the per-cpu
+ * freelist to the head of slab's freelist.
*
- * Ensure that the page is unfrozen while the list presence
+ * Ensure that the slab is unfrozen while the list presence
* reflects the actual number of objects during unfreeze.
*
* We setup the list membership and then perform a cmpxchg
- * with the count. If there is a mismatch then the page
- * is not unfrozen but the page is on the wrong list.
+ * with the count. If there is a mismatch then the slab
+ * is not unfrozen but the slab is on the wrong list.
*
* Then we restart the process which may have to remove
- * the page from the list that we just put it on again
+ * the slab from the list that we just put it on again
* because the number of objects in the slab may have
* changed.
*/
redo:
- old.freelist = READ_ONCE(page->freelist);
- old.counters = READ_ONCE(page->counters);
+ old.freelist = READ_ONCE(slab->freelist);
+ old.counters = READ_ONCE(slab->counters);
VM_BUG_ON(!old.frozen);
/* Determine target state of the slab */
@@ -2416,9 +2426,8 @@ redo:
if (!lock) {
lock = 1;
/*
- * Taking the spinlock removes the possibility
- * that acquire_slab() will see a slab page that
- * is frozen
+ * Taking the spinlock removes the possibility that
+ * acquire_slab() will see a slab that is frozen
*/
spin_lock_irqsave(&n->list_lock, flags);
}
@@ -2437,18 +2446,18 @@ redo:
if (l != m) {
if (l == M_PARTIAL)
- remove_partial(n, page);
+ remove_partial(n, slab);
else if (l == M_FULL)
- remove_full(s, n, page);
+ remove_full(s, n, slab);
if (m == M_PARTIAL)
- add_partial(n, page, tail);
+ add_partial(n, slab, tail);
else if (m == M_FULL)
- add_full(s, n, page);
+ add_full(s, n, slab);
}
l = m;
- if (!cmpxchg_double_slab(s, page,
+ if (!cmpxchg_double_slab(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"))
@@ -2463,26 +2472,26 @@ redo:
stat(s, DEACTIVATE_FULL);
else if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
- discard_slab(s, page);
+ discard_slab(s, slab);
stat(s, FREE_SLAB);
}
}
#ifdef CONFIG_SLUB_CPU_PARTIAL
-static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
+static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
{
struct kmem_cache_node *n = NULL, *n2 = NULL;
- struct page *page, *discard_page = NULL;
+ struct slab *slab, *slab_to_discard = NULL;
unsigned long flags = 0;
- while (partial_page) {
- struct page new;
- struct page old;
+ while (partial_slab) {
+ struct slab new;
+ struct slab old;
- page = partial_page;
- partial_page = page->next;
+ slab = partial_slab;
+ partial_slab = slab->next;
- n2 = get_node(s, page_to_nid(page));
+ n2 = get_node(s, slab_nid(slab));
if (n != n2) {
if (n)
spin_unlock_irqrestore(&n->list_lock, flags);
@@ -2493,8 +2502,8 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
do {
- old.freelist = page->freelist;
- old.counters = page->counters;
+ old.freelist = slab->freelist;
+ old.counters = slab->counters;
VM_BUG_ON(!old.frozen);
new.counters = old.counters;
@@ -2502,16 +2511,16 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
new.frozen = 0;
- } while (!__cmpxchg_double_slab(s, page,
+ } while (!__cmpxchg_double_slab(s, slab,
old.freelist, old.counters,
new.freelist, new.counters,
"unfreezing slab"));
if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
- page->next = discard_page;
- discard_page = page;
+ slab->next = slab_to_discard;
+ slab_to_discard = slab;
} else {
- add_partial(n, page, DEACTIVATE_TO_TAIL);
+ add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
}
@@ -2519,12 +2528,12 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
if (n)
spin_unlock_irqrestore(&n->list_lock, flags);
- while (discard_page) {
- page = discard_page;
- discard_page = discard_page->next;
+ while (slab_to_discard) {
+ slab = slab_to_discard;
+ slab_to_discard = slab_to_discard->next;
stat(s, DEACTIVATE_EMPTY);
- discard_slab(s, page);
+ discard_slab(s, slab);
stat(s, FREE_SLAB);
}
}
@@ -2534,73 +2543,73 @@ static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
*/
static void unfreeze_partials(struct kmem_cache *s)
{
- struct page *partial_page;
+ struct slab *partial_slab;
unsigned long flags;
local_lock_irqsave(&s->cpu_slab->lock, flags);
- partial_page = this_cpu_read(s->cpu_slab->partial);
+ partial_slab = this_cpu_read(s->cpu_slab->partial);
this_cpu_write(s->cpu_slab->partial, NULL);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- if (partial_page)
- __unfreeze_partials(s, partial_page);
+ if (partial_slab)
+ __unfreeze_partials(s, partial_slab);
}
static void unfreeze_partials_cpu(struct kmem_cache *s,
struct kmem_cache_cpu *c)
{
- struct page *partial_page;
+ struct slab *partial_slab;
- partial_page = slub_percpu_partial(c);
+ partial_slab = slub_percpu_partial(c);
c->partial = NULL;
- if (partial_page)
- __unfreeze_partials(s, partial_page);
+ if (partial_slab)
+ __unfreeze_partials(s, partial_slab);
}
/*
- * Put a page that was just frozen (in __slab_free|get_partial_node) into a
- * partial page slot if available.
+ * Put a slab that was just frozen (in __slab_free|get_partial_node) into a
+ * partial slab slot if available.
*
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
+static void put_cpu_partial(struct kmem_cache *s, struct slab *slab, int drain)
{
- struct page *oldpage;
- struct page *page_to_unfreeze = NULL;
+ struct slab *oldslab;
+ struct slab *slab_to_unfreeze = NULL;
unsigned long flags;
- int pages = 0;
+ int slabs = 0;
local_lock_irqsave(&s->cpu_slab->lock, flags);
- oldpage = this_cpu_read(s->cpu_slab->partial);
+ oldslab = this_cpu_read(s->cpu_slab->partial);
- if (oldpage) {
- if (drain && oldpage->pages >= s->cpu_partial_pages) {
+ if (oldslab) {
+ if (drain && oldslab->slabs >= s->cpu_partial_slabs) {
/*
* Partial array is full. Move the existing set to the
* per node partial list. Postpone the actual unfreezing
* outside of the critical section.
*/
- page_to_unfreeze = oldpage;
- oldpage = NULL;
+ slab_to_unfreeze = oldslab;
+ oldslab = NULL;
} else {
- pages = oldpage->pages;
+ slabs = oldslab->slabs;
}
}
- pages++;
+ slabs++;
- page->pages = pages;
- page->next = oldpage;
+ slab->slabs = slabs;
+ slab->next = oldslab;
- this_cpu_write(s->cpu_slab->partial, page);
+ this_cpu_write(s->cpu_slab->partial, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- if (page_to_unfreeze) {
- __unfreeze_partials(s, page_to_unfreeze);
+ if (slab_to_unfreeze) {
+ __unfreeze_partials(s, slab_to_unfreeze);
stat(s, CPU_PARTIAL_DRAIN);
}
}
@@ -2616,22 +2625,22 @@ static inline void unfreeze_partials_cpu(struct kmem_cache *s,
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
unsigned long flags;
- struct page *page;
+ struct slab *slab;
void *freelist;
local_lock_irqsave(&s->cpu_slab->lock, flags);
- page = c->page;
+ slab = c->slab;
freelist = c->freelist;
- c->page = NULL;
+ c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- if (page) {
- deactivate_slab(s, page, freelist);
+ if (slab) {
+ deactivate_slab(s, slab, freelist);
stat(s, CPUSLAB_FLUSH);
}
}
@@ -2640,14 +2649,14 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
void *freelist = c->freelist;
- struct page *page = c->page;
+ struct slab *slab = c->slab;
- c->page = NULL;
+ c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
- if (page) {
- deactivate_slab(s, page, freelist);
+ if (slab) {
+ deactivate_slab(s, slab, freelist);
stat(s, CPUSLAB_FLUSH);
}
@@ -2676,7 +2685,7 @@ static void flush_cpu_slab(struct work_struct *w)
s = sfw->s;
c = this_cpu_ptr(s->cpu_slab);
- if (c->page)
+ if (c->slab)
flush_slab(s, c);
unfreeze_partials(s);
@@ -2686,7 +2695,7 @@ static bool has_cpu_slab(int cpu, struct kmem_cache *s)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
- return c->page || slub_percpu_partial(c);
+ return c->slab || slub_percpu_partial(c);
}
static DEFINE_MUTEX(flush_lock);
@@ -2748,19 +2757,19 @@ static int slub_cpu_dead(unsigned int cpu)
* Check if the objects in a per cpu structure fit numa
* locality expectations.
*/
-static inline int node_match(struct page *page, int node)
+static inline int node_match(struct slab *slab, int node)
{
#ifdef CONFIG_NUMA
- if (node != NUMA_NO_NODE && page_to_nid(page) != node)
+ if (node != NUMA_NO_NODE && slab_nid(slab) != node)
return 0;
#endif
return 1;
}
#ifdef CONFIG_SLUB_DEBUG
-static int count_free(struct page *page)
+static int count_free(struct slab *slab)
{
- return page->objects - page->inuse;
+ return slab->objects - slab->inuse;
}
static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
@@ -2771,15 +2780,15 @@ static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS)
static unsigned long count_partial(struct kmem_cache_node *n,
- int (*get_count)(struct page *))
+ int (*get_count)(struct slab *))
{
unsigned long flags;
unsigned long x = 0;
- struct page *page;
+ struct slab *slab;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, slab_list)
- x += get_count(page);
+ list_for_each_entry(slab, &n->partial, slab_list)
+ x += get_count(slab);
spin_unlock_irqrestore(&n->list_lock, flags);
return x;
}
@@ -2822,54 +2831,41 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
#endif
}
-static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
-{
- if (unlikely(PageSlabPfmemalloc(page)))
- return gfp_pfmemalloc_allowed(gfpflags);
-
- return true;
-}
-
-/*
- * A variant of pfmemalloc_match() that tests page flags without asserting
- * PageSlab. Intended for opportunistic checks before taking a lock and
- * rechecking that nobody else freed the page under us.
- */
-static inline bool pfmemalloc_match_unsafe(struct page *page, gfp_t gfpflags)
+static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
{
- if (unlikely(__PageSlabPfmemalloc(page)))
+ if (unlikely(slab_test_pfmemalloc(slab)))
return gfp_pfmemalloc_allowed(gfpflags);
return true;
}
/*
- * Check the page->freelist of a page and either transfer the freelist to the
- * per cpu freelist or deactivate the page.
+ * Check the slab->freelist and either transfer the freelist to the
+ * per cpu freelist or deactivate the slab.
*
- * The page is still frozen if the return value is not NULL.
+ * The slab is still frozen if the return value is not NULL.
*
- * If this function returns NULL then the page has been unfrozen.
+ * If this function returns NULL then the slab has been unfrozen.
*/
-static inline void *get_freelist(struct kmem_cache *s, struct page *page)
+static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
{
- struct page new;
+ struct slab new;
unsigned long counters;
void *freelist;
lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock));
do {
- freelist = page->freelist;
- counters = page->counters;
+ freelist = slab->freelist;
+ counters = slab->counters;
new.counters = counters;
VM_BUG_ON(!new.frozen);
- new.inuse = page->objects;
+ new.inuse = slab->objects;
new.frozen = freelist != NULL;
- } while (!__cmpxchg_double_slab(s, page,
+ } while (!__cmpxchg_double_slab(s, slab,
freelist, counters,
NULL, new.counters,
"get_freelist"));
@@ -2900,15 +2896,15 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c)
{
void *freelist;
- struct page *page;
+ struct slab *slab;
unsigned long flags;
stat(s, ALLOC_SLOWPATH);
-reread_page:
+reread_slab:
- page = READ_ONCE(c->page);
- if (!page) {
+ slab = READ_ONCE(c->slab);
+ if (!slab) {
/*
* if the node is not online or has no normal memory, just
* ignore the node constraint
@@ -2920,7 +2916,7 @@ reread_page:
}
redo:
- if (unlikely(!node_match(page, node))) {
+ if (unlikely(!node_match(slab, node))) {
/*
* same as above but node_match() being false already
* implies node != NUMA_NO_NODE
@@ -2939,23 +2935,23 @@ redo:
* PFMEMALLOC but right now, we are losing the pfmemalloc
* information when the page leaves the per-cpu allocator
*/
- if (unlikely(!pfmemalloc_match_unsafe(page, gfpflags)))
+ if (unlikely(!pfmemalloc_match(slab, gfpflags)))
goto deactivate_slab;
- /* must check again c->page in case we got preempted and it changed */
+ /* must check again c->slab in case we got preempted and it changed */
local_lock_irqsave(&s->cpu_slab->lock, flags);
- if (unlikely(page != c->page)) {
+ if (unlikely(slab != c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- goto reread_page;
+ goto reread_slab;
}
freelist = c->freelist;
if (freelist)
goto load_freelist;
- freelist = get_freelist(s, page);
+ freelist = get_freelist(s, slab);
if (!freelist) {
- c->page = NULL;
+ c->slab = NULL;
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, DEACTIVATE_BYPASS);
goto new_slab;
@@ -2969,10 +2965,10 @@ load_freelist:
/*
* freelist is pointing to the list of objects to be used.
- * page is pointing to the page from which the objects are obtained.
- * That page must be frozen for per cpu allocations to work.
+ * slab is pointing to the slab from which the objects are obtained.
+ * That slab must be frozen for per cpu allocations to work.
*/
- VM_BUG_ON(!c->page->frozen);
+ VM_BUG_ON(!c->slab->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
@@ -2981,23 +2977,23 @@ load_freelist:
deactivate_slab:
local_lock_irqsave(&s->cpu_slab->lock, flags);
- if (page != c->page) {
+ if (slab != c->slab) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- goto reread_page;
+ goto reread_slab;
}
freelist = c->freelist;
- c->page = NULL;
+ c->slab = NULL;
c->freelist = NULL;
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- deactivate_slab(s, page, freelist);
+ deactivate_slab(s, slab, freelist);
new_slab:
if (slub_percpu_partial(c)) {
local_lock_irqsave(&s->cpu_slab->lock, flags);
- if (unlikely(c->page)) {
+ if (unlikely(c->slab)) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- goto reread_page;
+ goto reread_slab;
}
if (unlikely(!slub_percpu_partial(c))) {
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
@@ -3005,8 +3001,8 @@ new_slab:
goto new_objects;
}
- page = c->page = slub_percpu_partial(c);
- slub_set_percpu_partial(c, page);
+ slab = c->slab = slub_percpu_partial(c);
+ slub_set_percpu_partial(c, slab);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, CPU_PARTIAL_ALLOC);
goto redo;
@@ -3014,32 +3010,32 @@ new_slab:
new_objects:
- freelist = get_partial(s, gfpflags, node, &page);
+ freelist = get_partial(s, gfpflags, node, &slab);
if (freelist)
- goto check_new_page;
+ goto check_new_slab;
slub_put_cpu_ptr(s->cpu_slab);
- page = new_slab(s, gfpflags, node);
+ slab = new_slab(s, gfpflags, node);
c = slub_get_cpu_ptr(s->cpu_slab);
- if (unlikely(!page)) {
+ if (unlikely(!slab)) {
slab_out_of_memory(s, gfpflags, node);
return NULL;
}
/*
- * No other reference to the page yet so we can
+ * No other reference to the slab yet so we can
* muck around with it freely without cmpxchg
*/
- freelist = page->freelist;
- page->freelist = NULL;
+ freelist = slab->freelist;
+ slab->freelist = NULL;
stat(s, ALLOC_SLAB);
-check_new_page:
+check_new_slab:
if (kmem_cache_debug(s)) {
- if (!alloc_debug_processing(s, page, freelist, addr)) {
+ if (!alloc_debug_processing(s, slab, freelist, addr)) {
/* Slab failed checks. Next slab needed */
goto new_slab;
} else {
@@ -3051,39 +3047,39 @@ check_new_page:
}
}
- if (unlikely(!pfmemalloc_match(page, gfpflags)))
+ if (unlikely(!pfmemalloc_match(slab, gfpflags)))
/*
* For !pfmemalloc_match() case we don't load freelist so that
* we don't make further mismatched allocations easier.
*/
goto return_single;
-retry_load_page:
+retry_load_slab:
local_lock_irqsave(&s->cpu_slab->lock, flags);
- if (unlikely(c->page)) {
+ if (unlikely(c->slab)) {
void *flush_freelist = c->freelist;
- struct page *flush_page = c->page;
+ struct slab *flush_slab = c->slab;
- c->page = NULL;
+ c->slab = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- deactivate_slab(s, flush_page, flush_freelist);
+ deactivate_slab(s, flush_slab, flush_freelist);
stat(s, CPUSLAB_FLUSH);
- goto retry_load_page;
+ goto retry_load_slab;
}
- c->page = page;
+ c->slab = slab;
goto load_freelist;
return_single:
- deactivate_slab(s, page, get_freepointer(s, freelist));
+ deactivate_slab(s, slab, get_freepointer(s, freelist));
return freelist;
}
@@ -3140,7 +3136,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
{
void *object;
struct kmem_cache_cpu *c;
- struct page *page;
+ struct slab *slab;
unsigned long tid;
struct obj_cgroup *objcg = NULL;
bool init = false;
@@ -3172,9 +3168,9 @@ redo:
/*
* Irqless object alloc/free algorithm used here depends on sequence
* of fetching cpu_slab's data. tid should be fetched before anything
- * on c to guarantee that object and page associated with previous tid
+ * on c to guarantee that object and slab associated with previous tid
* won't be used with current tid. If we fetch tid first, object and
- * page could be one associated with next tid and our alloc/free
+ * slab could be one associated with next tid and our alloc/free
* request will be failed. In this case, we will retry. So, no problem.
*/
barrier();
@@ -3187,7 +3183,7 @@ redo:
*/
object = c->freelist;
- page = c->page;
+ slab = c->slab;
/*
* We cannot use the lockless fastpath on PREEMPT_RT because if a
* slowpath has taken the local_lock_irqsave(), it is not protected
@@ -3196,7 +3192,7 @@ redo:
* there is a suitable cpu freelist.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) ||
- unlikely(!object || !page || !node_match(page, node))) {
+ unlikely(!object || !slab || !node_match(slab, node))) {
object = __slab_alloc(s, gfpflags, node, addr, c);
} else {
void *next_object = get_freepointer_safe(s, object);
@@ -3298,17 +3294,17 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
* have a longer lifetime than the cpu slabs in most processing loads.
*
* So we still attempt to reduce cache line usage. Just take the slab
- * lock and free the item. If there is no additional partial page
+ * lock and free the item. If there is no additional partial slab
* handling required then we can return immediately.
*/
-static void __slab_free(struct kmem_cache *s, struct page *page,
+static void __slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int cnt,
unsigned long addr)
{
void *prior;
int was_frozen;
- struct page new;
+ struct slab new;
unsigned long counters;
struct kmem_cache_node *n = NULL;
unsigned long flags;
@@ -3319,7 +3315,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
return;
if (kmem_cache_debug(s) &&
- !free_debug_processing(s, page, head, tail, cnt, addr))
+ !free_debug_processing(s, slab, head, tail, cnt, addr))
return;
do {
@@ -3327,8 +3323,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
spin_unlock_irqrestore(&n->list_lock, flags);
n = NULL;
}
- prior = page->freelist;
- counters = page->counters;
+ prior = slab->freelist;
+ counters = slab->counters;
set_freepointer(s, tail, prior);
new.counters = counters;
was_frozen = new.frozen;
@@ -3347,7 +3343,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
} else { /* Needs to be taken off a list */
- n = get_node(s, page_to_nid(page));
+ n = get_node(s, slab_nid(slab));
/*
* Speculatively acquire the list_lock.
* If the cmpxchg does not succeed then we may
@@ -3361,7 +3357,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
}
}
- } while (!cmpxchg_double_slab(s, page,
+ } while (!cmpxchg_double_slab(s, slab,
prior, counters,
head, new.counters,
"__slab_free"));
@@ -3376,10 +3372,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
stat(s, FREE_FROZEN);
} else if (new.frozen) {
/*
- * If we just froze the page then put it onto the
+ * If we just froze the slab then put it onto the
* per cpu partial list.
*/
- put_cpu_partial(s, page, 1);
+ put_cpu_partial(s, slab, 1);
stat(s, CPU_PARTIAL_FREE);
}
@@ -3394,8 +3390,8 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
* then add it.
*/
if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
- remove_full(s, n, page);
- add_partial(n, page, DEACTIVATE_TO_TAIL);
+ remove_full(s, n, slab);
+ add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
spin_unlock_irqrestore(&n->list_lock, flags);
@@ -3406,16 +3402,16 @@ slab_empty:
/*
* Slab on the partial list.
*/
- remove_partial(n, page);
+ remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
} else {
/* Slab must be on the full list */
- remove_full(s, n, page);
+ remove_full(s, n, slab);
}
spin_unlock_irqrestore(&n->list_lock, flags);
stat(s, FREE_SLAB);
- discard_slab(s, page);
+ discard_slab(s, slab);
}
/*
@@ -3430,11 +3426,11 @@ slab_empty:
* with all sorts of special processing.
*
* Bulk free of a freelist with several objects (all pointing to the
- * same page) possible by specifying head and tail ptr, plus objects
+ * same slab) possible by specifying head and tail ptr, plus objects
* count (cnt). Bulk free indicated by tail pointer being set.
*/
static __always_inline void do_slab_free(struct kmem_cache *s,
- struct page *page, void *head, void *tail,
+ struct slab *slab, void *head, void *tail,
int cnt, unsigned long addr)
{
void *tail_obj = tail ? : head;
@@ -3457,7 +3453,7 @@ redo:
/* Same with comment on barrier() in slab_alloc_node() */
barrier();
- if (likely(page == c->page)) {
+ if (likely(slab == c->slab)) {
#ifndef CONFIG_PREEMPT_RT
void **freelist = READ_ONCE(c->freelist);
@@ -3483,7 +3479,7 @@ redo:
local_lock(&s->cpu_slab->lock);
c = this_cpu_ptr(s->cpu_slab);
- if (unlikely(page != c->page)) {
+ if (unlikely(slab != c->slab)) {
local_unlock(&s->cpu_slab->lock);
goto redo;
}
@@ -3498,11 +3494,11 @@ redo:
#endif
stat(s, FREE_FASTPATH);
} else
- __slab_free(s, page, head, tail_obj, cnt, addr);
+ __slab_free(s, slab, head, tail_obj, cnt, addr);
}
-static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
+static __always_inline void slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, int cnt,
unsigned long addr)
{
@@ -3511,13 +3507,13 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
* to remove objects, whose reuse must be delayed.
*/
if (slab_free_freelist_hook(s, &head, &tail, &cnt))
- do_slab_free(s, page, head, tail, cnt, addr);
+ do_slab_free(s, slab, head, tail, cnt, addr);
}
#ifdef CONFIG_KASAN_GENERIC
void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
{
- do_slab_free(cache, virt_to_head_page(x), x, NULL, 1, addr);
+ do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr);
}
#endif
@@ -3527,35 +3523,36 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
if (!s)
return;
trace_kmem_cache_free(_RET_IP_, x, s->name);
- slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
+ slab_free(s, virt_to_slab(x), x, NULL, 1, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_free);
struct detached_freelist {
- struct page *page;
+ struct slab *slab;
void *tail;
void *freelist;
int cnt;
struct kmem_cache *s;
};
-static inline void free_nonslab_page(struct page *page, void *object)
+static inline void free_large_kmalloc(struct folio *folio, void *object)
{
- unsigned int order = compound_order(page);
+ unsigned int order = folio_order(folio);
- if (WARN_ON_ONCE(!PageCompound(page)))
+ if (WARN_ON_ONCE(order == 0))
pr_warn_once("object pointer: 0x%p\n", object);
kfree_hook(object);
- mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
- __free_pages(page, order);
+ mod_lruvec_page_state(folio_page(folio, 0), NR_SLAB_UNRECLAIMABLE_B,
+ -(PAGE_SIZE << order));
+ __free_pages(folio_page(folio, 0), order);
}
/*
* This function progressively scans the array with free objects (with
* a limited look ahead) and extract objects belonging to the same
- * page. It builds a detached freelist directly within the given
- * page/objects. This can happen without any need for
+ * slab. It builds a detached freelist directly within the given
+ * slab/objects. This can happen without any need for
* synchronization, because the objects are owned by running process.
* The freelist is build up as a single linked list in the objects.
* The idea is, that this detached freelist can then be bulk
@@ -3570,10 +3567,11 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
size_t first_skipped_index = 0;
int lookahead = 3;
void *object;
- struct page *page;
+ struct folio *folio;
+ struct slab *slab;
/* Always re-init detached_freelist */
- df->page = NULL;
+ df->slab = NULL;
do {
object = p[--size];
@@ -3583,17 +3581,19 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
if (!object)
return 0;
- page = virt_to_head_page(object);
+ folio = virt_to_folio(object);
if (!s) {
/* Handle kalloc'ed objects */
- if (unlikely(!PageSlab(page))) {
- free_nonslab_page(page, object);
+ if (unlikely(!folio_test_slab(folio))) {
+ free_large_kmalloc(folio, object);
p[size] = NULL; /* mark object processed */
return size;
}
/* Derive kmem_cache from object */
- df->s = page->slab_cache;
+ slab = folio_slab(folio);
+ df->s = slab->slab_cache;
} else {
+ slab = folio_slab(folio);
df->s = cache_from_obj(s, object); /* Support for memcg */
}
@@ -3605,7 +3605,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
}
/* Start new detached freelist */
- df->page = page;
+ df->slab = slab;
set_freepointer(df->s, object, NULL);
df->tail = object;
df->freelist = object;
@@ -3617,8 +3617,8 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
if (!object)
continue; /* Skip processed objects */
- /* df->page is always set at this point */
- if (df->page == virt_to_head_page(object)) {
+ /* df->slab is always set at this point */
+ if (df->slab == virt_to_slab(object)) {
/* Opportunity build freelist */
set_freepointer(df->s, object, df->freelist);
df->freelist = object;
@@ -3650,10 +3650,10 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
struct detached_freelist df;
size = build_detached_freelist(s, size, p, &df);
- if (!df.page)
+ if (!df.slab)
continue;
- slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+ slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_);
} while (likely(size));
}
EXPORT_SYMBOL(kmem_cache_free_bulk);
@@ -3787,7 +3787,7 @@ static unsigned int slub_min_objects;
* requested a higher minimum order then we start with that one instead of
* the smallest order which will fit the object.
*/
-static inline unsigned int slab_order(unsigned int size,
+static inline unsigned int calc_slab_order(unsigned int size,
unsigned int min_objects, unsigned int max_order,
unsigned int fract_leftover)
{
@@ -3851,7 +3851,7 @@ static inline int calculate_order(unsigned int size)
fraction = 16;
while (fraction >= 4) {
- order = slab_order(size, min_objects,
+ order = calc_slab_order(size, min_objects,
slub_max_order, fraction);
if (order <= slub_max_order)
return order;
@@ -3864,14 +3864,14 @@ static inline int calculate_order(unsigned int size)
* We were unable to place multiple objects in a slab. Now
* lets see if we can place a single object there.
*/
- order = slab_order(size, 1, slub_max_order, 1);
+ order = calc_slab_order(size, 1, slub_max_order, 1);
if (order <= slub_max_order)
return order;
/*
* Doh this slab cannot be placed using slub_max_order.
*/
- order = slab_order(size, 1, MAX_ORDER, 1);
+ order = calc_slab_order(size, 1, MAX_ORDER, 1);
if (order < MAX_ORDER)
return order;
return -ENOSYS;
@@ -3923,38 +3923,38 @@ static struct kmem_cache *kmem_cache_node;
*/
static void early_kmem_cache_node_alloc(int node)
{
- struct page *page;
+ struct slab *slab;
struct kmem_cache_node *n;
BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
- page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
+ slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
- BUG_ON(!page);
- if (page_to_nid(page) != node) {
+ BUG_ON(!slab);
+ if (slab_nid(slab) != node) {
pr_err("SLUB: Unable to allocate memory from node %d\n", node);
pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
}
- n = page->freelist;
+ n = slab->freelist;
BUG_ON(!n);
#ifdef CONFIG_SLUB_DEBUG
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
init_tracking(kmem_cache_node, n);
#endif
n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
- page->freelist = get_freepointer(kmem_cache_node, n);
- page->inuse = 1;
- page->frozen = 0;
+ slab->freelist = get_freepointer(kmem_cache_node, n);
+ slab->inuse = 1;
+ slab->frozen = 0;
kmem_cache_node->node[node] = n;
init_kmem_cache_node(n);
- inc_slabs_node(kmem_cache_node, node, page->objects);
+ inc_slabs_node(kmem_cache_node, node, slab->objects);
/*
* No locks need to be taken here as it has just been
* initialized and there is no concurrent access.
*/
- __add_partial(n, page, DEACTIVATE_TO_HEAD);
+ __add_partial(n, slab, DEACTIVATE_TO_HEAD);
}
static void free_kmem_cache_nodes(struct kmem_cache *s)
@@ -4212,7 +4212,7 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
#endif
/*
- * The larger the object size is, the more pages we want on the partial
+ * The larger the object size is, the more slabs we want on the partial
* list to avoid pounding the page allocator excessively.
*/
set_min_partial(s, ilog2(s->size) / 2);
@@ -4240,20 +4240,20 @@ error:
return -EINVAL;
}
-static void list_slab_objects(struct kmem_cache *s, struct page *page,
+static void list_slab_objects(struct kmem_cache *s, struct slab *slab,
const char *text)
{
#ifdef CONFIG_SLUB_DEBUG
- void *addr = page_address(page);
+ void *addr = slab_address(slab);
unsigned long flags;
unsigned long *map;
void *p;
- slab_err(s, page, text, s->name);
- slab_lock(page, &flags);
+ slab_err(s, slab, text, s->name);
+ slab_lock(slab, &flags);
- map = get_map(s, page);
- for_each_object(p, s, addr, page->objects) {
+ map = get_map(s, slab);
+ for_each_object(p, s, addr, slab->objects) {
if (!test_bit(__obj_to_index(s, addr, p), map)) {
pr_err("Object 0x%p @offset=%tu\n", p, p - addr);
@@ -4261,7 +4261,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
}
}
put_map(map);
- slab_unlock(page, &flags);
+ slab_unlock(slab, &flags);
#endif
}
@@ -4273,23 +4273,23 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page,
static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
{
LIST_HEAD(discard);
- struct page *page, *h;
+ struct slab *slab, *h;
BUG_ON(irqs_disabled());
spin_lock_irq(&n->list_lock);
- list_for_each_entry_safe(page, h, &n->partial, slab_list) {
- if (!page->inuse) {
- remove_partial(n, page);
- list_add(&page->slab_list, &discard);
+ list_for_each_entry_safe(slab, h, &n->partial, slab_list) {
+ if (!slab->inuse) {
+ remove_partial(n, slab);
+ list_add(&slab->slab_list, &discard);
} else {
- list_slab_objects(s, page,
+ list_slab_objects(s, slab,
"Objects remaining in %s on __kmem_cache_shutdown()");
}
}
spin_unlock_irq(&n->list_lock);
- list_for_each_entry_safe(page, h, &discard, slab_list)
- discard_slab(s, page);
+ list_for_each_entry_safe(slab, h, &discard, slab_list)
+ discard_slab(s, slab);
}
bool __kmem_cache_empty(struct kmem_cache *s)
@@ -4322,31 +4322,32 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
}
#ifdef CONFIG_PRINTK
-void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct page *page)
+void kmem_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *slab)
{
void *base;
int __maybe_unused i;
unsigned int objnr;
void *objp;
void *objp0;
- struct kmem_cache *s = page->slab_cache;
+ struct kmem_cache *s = slab->slab_cache;
struct track __maybe_unused *trackp;
kpp->kp_ptr = object;
- kpp->kp_page = page;
+ kpp->kp_slab = slab;
kpp->kp_slab_cache = s;
- base = page_address(page);
+ base = slab_address(slab);
objp0 = kasan_reset_tag(object);
#ifdef CONFIG_SLUB_DEBUG
objp = restore_red_left(s, objp0);
#else
objp = objp0;
#endif
- objnr = obj_to_index(s, page, objp);
+ objnr = obj_to_index(s, slab, objp);
kpp->kp_data_offset = (unsigned long)((char *)objp0 - (char *)objp);
objp = base + s->size * objnr;
kpp->kp_objp = objp;
- if (WARN_ON_ONCE(objp < base || objp >= base + page->objects * s->size || (objp - base) % s->size) ||
+ if (WARN_ON_ONCE(objp < base || objp >= base + slab->objects * s->size
+ || (objp - base) % s->size) ||
!(s->flags & SLAB_STORE_USER))
return;
#ifdef CONFIG_SLUB_DEBUG
@@ -4484,8 +4485,8 @@ EXPORT_SYMBOL(__kmalloc_node);
* Returns NULL if check passes, otherwise const char * to name of cache
* to indicate an error.
*/
-void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
- bool to_user)
+void __check_heap_object(const void *ptr, unsigned long n,
+ const struct slab *slab, bool to_user)
{
struct kmem_cache *s;
unsigned int offset;
@@ -4494,10 +4495,10 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
ptr = kasan_reset_tag(ptr);
/* Find object and usable object size. */
- s = page->slab_cache;
+ s = slab->slab_cache;
/* Reject impossible pointers. */
- if (ptr < page_address(page))
+ if (ptr < slab_address(slab))
usercopy_abort("SLUB object not in SLUB page?!", NULL,
to_user, 0, n);
@@ -4505,7 +4506,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
if (is_kfence)
offset = ptr - kfence_object_start(ptr);
else
- offset = (ptr - page_address(page)) % s->size;
+ offset = (ptr - slab_address(slab)) % s->size;
/* Adjust for redzone and reject if within the redzone. */
if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
@@ -4527,25 +4528,24 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
size_t __ksize(const void *object)
{
- struct page *page;
+ struct folio *folio;
if (unlikely(object == ZERO_SIZE_PTR))
return 0;
- page = virt_to_head_page(object);
+ folio = virt_to_folio(object);
- if (unlikely(!PageSlab(page))) {
- WARN_ON(!PageCompound(page));
- return page_size(page);
- }
+ if (unlikely(!folio_test_slab(folio)))
+ return folio_size(folio);
- return slab_ksize(page->slab_cache);
+ return slab_ksize(folio_slab(folio)->slab_cache);
}
EXPORT_SYMBOL(__ksize);
void kfree(const void *x)
{
- struct page *page;
+ struct folio *folio;
+ struct slab *slab;
void *object = (void *)x;
trace_kfree(_RET_IP_, x);
@@ -4553,12 +4553,13 @@ void kfree(const void *x)
if (unlikely(ZERO_OR_NULL_PTR(x)))
return;
- page = virt_to_head_page(x);
- if (unlikely(!PageSlab(page))) {
- free_nonslab_page(page, object);
+ folio = virt_to_folio(x);
+ if (unlikely(!folio_test_slab(folio))) {
+ free_large_kmalloc(folio, object);
return;
}
- slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
+ slab = folio_slab(folio);
+ slab_free(slab->slab_cache, slab, object, NULL, 1, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
@@ -4578,8 +4579,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
int node;
int i;
struct kmem_cache_node *n;
- struct page *page;
- struct page *t;
+ struct slab *slab;
+ struct slab *t;
struct list_head discard;
struct list_head promote[SHRINK_PROMOTE_MAX];
unsigned long flags;
@@ -4596,22 +4597,22 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
* Build lists of slabs to discard or promote.
*
* Note that concurrent frees may occur while we hold the
- * list_lock. page->inuse here is the upper limit.
+ * list_lock. slab->inuse here is the upper limit.
*/
- list_for_each_entry_safe(page, t, &n->partial, slab_list) {
- int free = page->objects - page->inuse;
+ list_for_each_entry_safe(slab, t, &n->partial, slab_list) {
+ int free = slab->objects - slab->inuse;
- /* Do not reread page->inuse */
+ /* Do not reread slab->inuse */
barrier();
/* We do not keep full slabs on the list */
BUG_ON(free <= 0);
- if (free == page->objects) {
- list_move(&page->slab_list, &discard);
+ if (free == slab->objects) {
+ list_move(&slab->slab_list, &discard);
n->nr_partial--;
} else if (free <= SHRINK_PROMOTE_MAX)
- list_move(&page->slab_list, promote + free - 1);
+ list_move(&slab->slab_list, promote + free - 1);
}
/*
@@ -4624,8 +4625,8 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
spin_unlock_irqrestore(&n->list_lock, flags);
/* Release empty slabs */
- list_for_each_entry_safe(page, t, &discard, slab_list)
- discard_slab(s, page);
+ list_for_each_entry_safe(slab, t, &discard, slab_list)
+ discard_slab(s, slab);
if (slabs_node(s, node))
ret = 1;
@@ -4786,7 +4787,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
*/
__flush_cpu_slab(s, smp_processor_id());
for_each_kmem_cache_node(s, node, n) {
- struct page *p;
+ struct slab *p;
list_for_each_entry(p, &n->partial, slab_list)
p->slab_cache = s;
@@ -4964,54 +4965,54 @@ EXPORT_SYMBOL(__kmalloc_node_track_caller);
#endif
#ifdef CONFIG_SYSFS
-static int count_inuse(struct page *page)
+static int count_inuse(struct slab *slab)
{
- return page->inuse;
+ return slab->inuse;
}
-static int count_total(struct page *page)
+static int count_total(struct slab *slab)
{
- return page->objects;
+ return slab->objects;
}
#endif
#ifdef CONFIG_SLUB_DEBUG
-static void validate_slab(struct kmem_cache *s, struct page *page,
+static void validate_slab(struct kmem_cache *s, struct slab *slab,
unsigned long *obj_map)
{
void *p;
- void *addr = page_address(page);
+ void *addr = slab_address(slab);
unsigned long flags;
- slab_lock(page, &flags);
+ slab_lock(slab, &flags);
- if (!check_slab(s, page) || !on_freelist(s, page, NULL))
+ if (!check_slab(s, slab) || !on_freelist(s, slab, NULL))
goto unlock;
/* Now we know that a valid freelist exists */
- __fill_map(obj_map, s, page);
- for_each_object(p, s, addr, page->objects) {
+ __fill_map(obj_map, s, slab);
+ for_each_object(p, s, addr, slab->objects) {
u8 val = test_bit(__obj_to_index(s, addr, p), obj_map) ?
SLUB_RED_INACTIVE : SLUB_RED_ACTIVE;
- if (!check_object(s, page, p, val))
+ if (!check_object(s, slab, p, val))
break;
}
unlock:
- slab_unlock(page, &flags);
+ slab_unlock(slab, &flags);
}
static int validate_slab_node(struct kmem_cache *s,
struct kmem_cache_node *n, unsigned long *obj_map)
{
unsigned long count = 0;
- struct page *page;
+ struct slab *slab;
unsigned long flags;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, slab_list) {
- validate_slab(s, page, obj_map);
+ list_for_each_entry(slab, &n->partial, slab_list) {
+ validate_slab(s, slab, obj_map);
count++;
}
if (count != n->nr_partial) {
@@ -5023,8 +5024,8 @@ static int validate_slab_node(struct kmem_cache *s,
if (!(s->flags & SLAB_STORE_USER))
goto out;
- list_for_each_entry(page, &n->full, slab_list) {
- validate_slab(s, page, obj_map);
+ list_for_each_entry(slab, &n->full, slab_list) {
+ validate_slab(s, slab, obj_map);
count++;
}
if (count != atomic_long_read(&n->nr_slabs)) {
@@ -5081,6 +5082,7 @@ struct loc_track {
unsigned long max;
unsigned long count;
struct location *loc;
+ loff_t idx;
};
static struct dentry *slab_debugfs_root;
@@ -5189,15 +5191,15 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
}
static void process_slab(struct loc_track *t, struct kmem_cache *s,
- struct page *page, enum track_item alloc,
+ struct slab *slab, enum track_item alloc,
unsigned long *obj_map)
{
- void *addr = page_address(page);
+ void *addr = slab_address(slab);
void *p;
- __fill_map(obj_map, s, page);
+ __fill_map(obj_map, s, slab);
- for_each_object(p, s, addr, page->objects)
+ for_each_object(p, s, addr, slab->objects)
if (!test_bit(__obj_to_index(s, addr, p), obj_map))
add_location(t, s, get_track(s, p, alloc));
}
@@ -5239,35 +5241,37 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
cpu);
int node;
- struct page *page;
+ struct slab *slab;
- page = READ_ONCE(c->page);
- if (!page)
+ slab = READ_ONCE(c->slab);
+ if (!slab)
continue;
- node = page_to_nid(page);
+ node = slab_nid(slab);
if (flags & SO_TOTAL)
- x = page->objects;
+ x = slab->objects;
else if (flags & SO_OBJECTS)
- x = page->inuse;
+ x = slab->inuse;
else
x = 1;
total += x;
nodes[node] += x;
- page = slub_percpu_partial_read_once(c);
- if (page) {
- node = page_to_nid(page);
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+ slab = slub_percpu_partial_read_once(c);
+ if (slab) {
+ node = slab_nid(slab);
if (flags & SO_TOTAL)
WARN_ON_ONCE(1);
else if (flags & SO_OBJECTS)
WARN_ON_ONCE(1);
else
- x = page->pages;
+ x = slab->slabs;
total += x;
nodes[node] += x;
}
+#endif
}
}
@@ -5466,33 +5470,35 @@ SLAB_ATTR_RO(objects_partial);
static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
{
int objects = 0;
- int pages = 0;
- int cpu;
+ int slabs = 0;
+ int cpu __maybe_unused;
int len = 0;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
for_each_online_cpu(cpu) {
- struct page *page;
+ struct slab *slab;
- page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
+ slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
- if (page)
- pages += page->pages;
+ if (slab)
+ slabs += slab->slabs;
}
+#endif
- /* Approximate half-full pages , see slub_set_cpu_partial() */
- objects = (pages * oo_objects(s->oo)) / 2;
- len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
+ /* Approximate half-full slabs, see slub_set_cpu_partial() */
+ objects = (slabs * oo_objects(s->oo)) / 2;
+ len += sysfs_emit_at(buf, len, "%d(%d)", objects, slabs);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SLUB_CPU_PARTIAL) && defined(CONFIG_SMP)
for_each_online_cpu(cpu) {
- struct page *page;
+ struct slab *slab;
- page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
- if (page) {
- pages = READ_ONCE(page->pages);
- objects = (pages * oo_objects(s->oo)) / 2;
+ slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
+ if (slab) {
+ slabs = READ_ONCE(slab->slabs);
+ objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
- cpu, objects, pages);
+ cpu, objects, slabs);
}
}
#endif
@@ -6052,11 +6058,11 @@ __initcall(slab_sysfs_init);
#if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS)
static int slab_debugfs_show(struct seq_file *seq, void *v)
{
-
- struct location *l;
- unsigned int idx = *(unsigned int *)v;
struct loc_track *t = seq->private;
+ struct location *l;
+ unsigned long idx;
+ idx = (unsigned long) t->idx;
if (idx < t->count) {
l = &t->loc[idx];
@@ -6105,16 +6111,18 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos)
{
struct loc_track *t = seq->private;
- v = ppos;
- ++*ppos;
+ t->idx = ++(*ppos);
if (*ppos <= t->count)
- return v;
+ return ppos;
return NULL;
}
static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos)
{
+ struct loc_track *t = seq->private;
+
+ t->idx = *ppos;
return ppos;
}
@@ -6158,16 +6166,16 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
for_each_kmem_cache_node(s, node, n) {
unsigned long flags;
- struct page *page;
+ struct slab *slab;
if (!atomic_long_read(&n->nr_slabs))
continue;
spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, slab_list)
- process_slab(t, s, page, alloc, obj_map);
- list_for_each_entry(page, &n->full, slab_list)
- process_slab(t, s, page, alloc, obj_map);
+ list_for_each_entry(slab, &n->partial, slab_list)
+ process_slab(t, s, slab, alloc, obj_map);
+ list_for_each_entry(slab, &n->full, slab_list)
+ process_slab(t, s, slab, alloc, obj_map);
spin_unlock_irqrestore(&n->list_lock, flags);
}
diff --git a/mm/sparse.c b/mm/sparse.c
index e5c84b0cf0c9..d21c6e5910d0 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -722,7 +722,7 @@ static void free_map_bootmem(struct page *memmap)
>> PAGE_SHIFT;
for (i = 0; i < nr_pages; i++, page++) {
- magic = (unsigned long) page->freelist;
+ magic = page->index;
BUG_ON(magic == NODE_INFO);
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 16f706c55d92..2b5531840583 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -30,6 +30,7 @@
#include <linux/swap_slots.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
#include <linux/mm.h>
diff --git a/mm/usercopy.c b/mm/usercopy.c
index b3de3c4eefba..d0d268135d96 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/jump_label.h>
#include <asm/sections.h>
+#include "slab.h"
/*
* Checks if a given pointer and length is contained by the current
@@ -223,7 +224,7 @@ static inline void check_page_span(const void *ptr, unsigned long n,
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
- struct page *page;
+ struct folio *folio;
if (!virt_addr_valid(ptr))
return;
@@ -231,16 +232,16 @@ static inline void check_heap_object(const void *ptr, unsigned long n,
/*
* When CONFIG_HIGHMEM=y, kmap_to_page() will give either the
* highmem page or fallback to virt_to_page(). The following
- * is effectively a highmem-aware virt_to_head_page().
+ * is effectively a highmem-aware virt_to_slab().
*/
- page = compound_head(kmap_to_page((void *)ptr));
+ folio = page_folio(kmap_to_page((void *)ptr));
- if (PageSlab(page)) {
+ if (folio_test_slab(folio)) {
/* Check slab allocator for flags and size. */
- __check_heap_object(ptr, n, page, to_user);
+ __check_heap_object(ptr, n, folio_slab(folio), to_user);
} else {
/* Verify object does not incorrectly span multiple pages. */
- check_page_span(ptr, n, page, to_user);
+ check_page_span(ptr, n, folio_page(folio, 0), to_user);
}
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fb9584641ac7..700434db5735 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1021,6 +1021,39 @@ static void handle_write_error(struct address_space *mapping,
unlock_page(page);
}
+static bool skip_throttle_noprogress(pg_data_t *pgdat)
+{
+ int reclaimable = 0, write_pending = 0;
+ int i;
+
+ /*
+ * If kswapd is disabled, reschedule if necessary but do not
+ * throttle as the system is likely near OOM.
+ */
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+ return true;
+
+ /*
+ * If there are a lot of dirty/writeback pages then do not
+ * throttle as throttling will occur when the pages cycle
+ * towards the end of the LRU if still under writeback.
+ */
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+
+ if (!populated_zone(zone))
+ continue;
+
+ reclaimable += zone_reclaimable_pages(zone);
+ write_pending += zone_page_state_snapshot(zone,
+ NR_ZONE_WRITE_PENDING);
+ }
+ if (2 * write_pending <= reclaimable)
+ return true;
+
+ return false;
+}
+
void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
{
wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
@@ -1056,8 +1089,16 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason)
}
break;
+ case VMSCAN_THROTTLE_CONGESTED:
+ fallthrough;
case VMSCAN_THROTTLE_NOPROGRESS:
- timeout = HZ/2;
+ if (skip_throttle_noprogress(pgdat)) {
+ cond_resched();
+ return;
+ }
+
+ timeout = 1;
+
break;
case VMSCAN_THROTTLE_ISOLATED:
timeout = HZ/50;
@@ -3321,7 +3362,7 @@ again:
if (!current_is_kswapd() && current_may_throttle() &&
!sc->hibernation_mode &&
test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
- reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
+ reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED);
if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc))
@@ -3386,16 +3427,16 @@ static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc)
}
/*
- * Do not throttle kswapd on NOPROGRESS as it will throttle on
- * VMSCAN_THROTTLE_WRITEBACK if there are too many pages under
- * writeback and marked for immediate reclaim at the tail of
- * the LRU.
+ * Do not throttle kswapd or cgroup reclaim on NOPROGRESS as it will
+ * throttle on VMSCAN_THROTTLE_WRITEBACK if there are too many pages
+ * under writeback and marked for immediate reclaim at the tail of the
+ * LRU.
*/
- if (current_is_kswapd())
+ if (current_is_kswapd() || cgroup_reclaim(sc))
return;
/* Throttle if making no progress at high prioities. */
- if (sc->priority < DEF_PRIORITY - 2)
+ if (sc->priority == 1 && !sc->nr_reclaimed)
reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS);
}
@@ -3415,6 +3456,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
unsigned long nr_soft_scanned;
gfp_t orig_mask;
pg_data_t *last_pgdat = NULL;
+ pg_data_t *first_pgdat = NULL;
/*
* If the number of buffer_heads in the machine exceeds the maximum
@@ -3478,14 +3520,19 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
/* need some check for avoid more shrink_zone() */
}
+ if (!first_pgdat)
+ first_pgdat = zone->zone_pgdat;
+
/* See comment about same check for global reclaim above */
if (zone->zone_pgdat == last_pgdat)
continue;
last_pgdat = zone->zone_pgdat;
shrink_node(zone->zone_pgdat, sc);
- consider_reclaim_throttle(zone->zone_pgdat, sc);
}
+ if (first_pgdat)
+ consider_reclaim_throttle(first_pgdat, sc);
+
/*
* Restore to original mask to avoid the impact on the caller if we
* promoted it to __GFP_HIGHMEM.
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b897ce3b399a..0d3b65939016 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -17,10 +17,10 @@
*
* Usage of struct page fields:
* page->private: points to zspage
- * page->freelist(index): links together all component pages of a zspage
+ * page->index: links together all component pages of a zspage
* For the huge page, this is always 0, so we use this field
* to store handle.
- * page->units: first object offset in a subpage of zspage
+ * page->page_type: first object offset in a subpage of zspage
*
* Usage of struct page flags:
* PG_private: identifies the first component page
@@ -489,12 +489,12 @@ static inline struct page *get_first_page(struct zspage *zspage)
static inline int get_first_obj_offset(struct page *page)
{
- return page->units;
+ return page->page_type;
}
static inline void set_first_obj_offset(struct page *page, int offset)
{
- page->units = offset;
+ page->page_type = offset;
}
static inline unsigned int get_freeobj(struct zspage *zspage)
@@ -827,7 +827,7 @@ static struct page *get_next_page(struct page *page)
if (unlikely(PageHugeObject(page)))
return NULL;
- return page->freelist;
+ return (struct page *)page->index;
}
/**
@@ -901,7 +901,7 @@ static void reset_page(struct page *page)
set_page_private(page, 0);
page_mapcount_reset(page);
ClearPageHugeObject(page);
- page->freelist = NULL;
+ page->index = 0;
}
static int trylock_zspage(struct zspage *zspage)
@@ -1027,7 +1027,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
/*
* Allocate individual pages and link them together as:
- * 1. all pages are linked together using page->freelist
+ * 1. all pages are linked together using page->index
* 2. each sub-page point to zspage using page->private
*
* we set PG_private to identify the first page (i.e. no other sub-page
@@ -1036,7 +1036,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
for (i = 0; i < nr_pages; i++) {
page = pages[i];
set_page_private(page, (unsigned long)zspage);
- page->freelist = NULL;
+ page->index = 0;
if (i == 0) {
zspage->first_page = page;
SetPagePrivate(page);
@@ -1044,7 +1044,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage,
class->pages_per_zspage == 1))
SetPageHugeObject(page);
} else {
- prev_page->freelist = page;
+ prev_page->index = (unsigned long)page;
}
prev_page = page;
}